Blob Blame History Raw
diff --git a/breakpoints.c b/breakpoints.c
index 1ea406a..1eff8b0 100644
--- a/breakpoints.c
+++ b/breakpoints.c
@@ -14,15 +14,25 @@
 
 Breakpoint *
 address2bpstruct(Process *proc, void *addr) {
+	assert(proc != NULL);
+	assert(proc->breakpoints != NULL);
+	assert(proc->leader == proc);
 	debug(DEBUG_FUNCTION, "address2bpstruct(pid=%d, addr=%p)", proc->pid, addr);
 	return dict_find_entry(proc->breakpoints, addr);
 }
 
 void
 insert_breakpoint(Process *proc, void *addr,
-		  struct library_symbol *libsym) {
+		  struct library_symbol *libsym, int enable) {
 	Breakpoint *sbp;
 
+	Process * leader = proc->leader;
+
+	/* Only the group leader should be getting the breakpoints and
+	 * thus have ->breakpoint initialized.  */
+	assert(leader != NULL);
+	assert(leader->breakpoints != NULL);
+
 #ifdef __arm__
 	int thumb_mode = (int)addr & 1;
 	if (thumb_mode)
@@ -38,13 +48,13 @@ insert_breakpoint(Process *proc, void *addr,
 	if (libsym)
 		libsym->needs_init = 0;
 
-	sbp = dict_find_entry(proc->breakpoints, addr);
+	sbp = dict_find_entry(leader->breakpoints, addr);
 	if (!sbp) {
 		sbp = calloc(1, sizeof(Breakpoint));
 		if (!sbp) {
 			return;	/* TODO FIXME XXX: error_mem */
 		}
-		dict_enter(proc->breakpoints, addr, sbp);
+		dict_enter(leader->breakpoints, addr, sbp);
 		sbp->addr = addr;
 		sbp->libsym = libsym;
 	}
@@ -53,8 +63,10 @@ insert_breakpoint(Process *proc, void *addr,
 	proc->thumb_mode = 0;
 #endif
 	sbp->enabled++;
-	if (sbp->enabled == 1 && proc->pid)
-		enable_breakpoint(proc->pid, sbp);
+	if (sbp->enabled == 1 && enable) {
+		assert(proc->pid != 0);
+		enable_breakpoint(proc, sbp);
+	}
 }
 
 void
@@ -63,7 +75,10 @@ delete_breakpoint(Process *proc, void *addr) {
 
 	debug(DEBUG_FUNCTION, "delete_breakpoint(pid=%d, addr=%p)", proc->pid, addr);
 
-	sbp = dict_find_entry(proc->breakpoints, addr);
+	Process * leader = proc->leader;
+	assert(leader != NULL);
+
+	sbp = dict_find_entry(leader->breakpoints, addr);
 	assert(sbp);		/* FIXME: remove after debugging has been done. */
 	/* This should only happen on out-of-memory conditions. */
 	if (sbp == NULL)
@@ -71,7 +86,7 @@ delete_breakpoint(Process *proc, void *addr) {
 
 	sbp->enabled--;
 	if (sbp->enabled == 0)
-		disable_breakpoint(proc->pid, sbp);
+		disable_breakpoint(proc, sbp);
 	assert(sbp->enabled >= 0);
 }
 
@@ -79,7 +94,7 @@ static void
 enable_bp_cb(void *addr, void *sbp, void *proc) {
 	debug(DEBUG_FUNCTION, "enable_bp_cb(pid=%d)", ((Process *)proc)->pid);
 	if (((Breakpoint *)sbp)->enabled) {
-		enable_breakpoint(((Process *)proc)->pid, sbp);
+		enable_breakpoint(proc, sbp);
 	}
 }
 
@@ -146,13 +161,14 @@ static void
 disable_bp_cb(void *addr, void *sbp, void *proc) {
 	debug(DEBUG_FUNCTION, "disable_bp_cb(pid=%d)", ((Process *)proc)->pid);
 	if (((Breakpoint *)sbp)->enabled) {
-		disable_breakpoint(((Process *)proc)->pid, sbp);
+		disable_breakpoint(proc, sbp);
 	}
 }
 
 void
 disable_all_breakpoints(Process *proc) {
 	debug(DEBUG_FUNCTION, "disable_all_breakpoints(pid=%d)", proc->pid);
+	assert(proc->leader == proc);
 	if (proc->breakpoints_enabled) {
 		debug(1, "Disabling breakpoints for pid %u...", proc->pid);
 		dict_apply_to_all(proc->breakpoints, disable_bp_cb, proc);
@@ -167,8 +183,9 @@ free_bp_cb(void *addr, void *sbp, void *data) {
 	free(sbp);
 }
 
-void
-breakpoints_init(Process *proc) {
+int
+breakpoints_init(Process *proc, int enable)
+{
 	struct library_symbol *sym;
 
 	debug(DEBUG_FUNCTION, "breakpoints_init(pid=%d)", proc->pid);
@@ -177,19 +194,41 @@ breakpoints_init(Process *proc) {
 		dict_clear(proc->breakpoints);
 		proc->breakpoints = NULL;
 	}
-	proc->breakpoints = dict_init(dict_key2hash_int, dict_key_cmp_int);
+
+	/* Only the thread group leader should hold the breakpoints.
+	 * (N.B. PID may be set to 0 temporarily when called by
+	 * handle_exec).  */
+	assert(proc->leader == proc);
+
+	proc->breakpoints = dict_init(dict_key2hash_int,
+				      dict_key_cmp_int);
+
+	if (proc->list_of_symbols != NULL) {
+		struct library_symbol * sym = proc->list_of_symbols;
+		while (sym != NULL) {
+			struct library_symbol * next = sym->next;
+			free(sym);
+			sym = next;
+		}
+	}
+	proc->list_of_symbols = NULL;
 
 	if (options.libcalls && proc->filename) {
-		/* FIXME: memory leak when called by exec(): */
 		proc->list_of_symbols = read_elf(proc);
+		if (proc->list_of_symbols == NULL) {
+			/* XXX leak breakpoints */
+			return -1;
+		}
+
 		if (opt_e) {
-			struct library_symbol **tmp1 = &(proc->list_of_symbols);
+			struct library_symbol **tmp1 = &proc->list_of_symbols;
 			while (*tmp1) {
 				struct opt_e_t *tmp2 = opt_e;
 				int keep = !opt_e_enable;
 
 				while (tmp2) {
-					if (!strcmp((*tmp1)->name, tmp2->name)) {
+					if (!strcmp((*tmp1)->name,
+						    tmp2->name)) {
 						keep = opt_e_enable;
 					}
 					tmp2 = tmp2->next;
@@ -201,15 +240,14 @@ breakpoints_init(Process *proc) {
 				}
 			}
 		}
-	} else {
-		proc->list_of_symbols = NULL;
-	}
-	for (sym = proc->list_of_symbols; sym; sym = sym->next) {
-		/* proc->pid==0 delays enabling. */
-		insert_breakpoint(proc, sym2addr(proc, sym), sym);
 	}
+
+	for (sym = proc->list_of_symbols; sym; sym = sym->next)
+		insert_breakpoint(proc, sym2addr(proc, sym), sym, enable);
+
 	proc->callstack_depth = 0;
 	proc->breakpoints_enabled = -1;
+	return 0;
 }
 
 void
@@ -222,8 +260,7 @@ reinitialize_breakpoints(Process *proc) {
 
 	while (sym) {
 		if (sym->needs_init) {
-			insert_breakpoint(proc, sym2addr(proc, sym),
-					  sym);
+			insert_breakpoint(proc, sym2addr(proc, sym), sym, 1);
 			if (sym->needs_init && !sym->is_weak) {
 				fprintf(stderr,
 					"could not re-initialize breakpoint for \"%s\" in file \"%s\"\n",
diff --git a/common.h b/common.h
index 70e4a5a..49861cf 100644
--- a/common.h
+++ b/common.h
@@ -1,3 +1,4 @@
+#include <config.h>
 #if defined(HAVE_LIBUNWIND)
 #include <libunwind.h>
 #endif /* defined(HAVE_LIBUNWIND) */
@@ -161,12 +162,32 @@ enum Process_State {
 	STATE_IGNORED  /* ignore this process (it's a fork and no -f was used) */
 };
 
+typedef struct Event_Handler Event_Handler;
+struct Event_Handler {
+	/* Event handler that overrides the default one.  Should
+	 * return NULL if the event was handled, otherwise the
+	 * returned event is passed to the default handler.  */
+	Event * (* on_event)(Event_Handler * self, Event * event);
+
+	/* Called when the event handler removal is requested.  */
+	void (* destroy)(Event_Handler * self);
+};
+
+/* XXX We would rather have this all organized a little differently,
+ * have Process for the whole group and Task for what's there for
+ * per-thread stuff.  But for now this is the less invasive way of
+ * structuring it.  */
 struct Process {
 	Process_State state;
 	Process * parent;         /* needed by STATE_BEING_CREATED */
 	char * filename;
 	pid_t pid;
+
+	/* Dictionary of breakpoints (which is a mapping
+	 * address->Breakpoint).  This is NULL for non-leader
+	 * processes.  */
 	Dict * breakpoints;
+
 	int breakpoints_enabled;  /* -1:not enabled yet, 0:disabled, 1:enabled */
 	int mask_32bit;           /* 1 if 64-bit ltrace is tracing 32-bit process */
 	unsigned int personality;
@@ -183,7 +204,6 @@ struct Process {
 	void * instruction_pointer;
 	void * stack_pointer;      /* To get return addr, args... */
 	void * return_addr;
-	Breakpoint * breakpoint_being_enabled;
 	void * arch_ptr;
 	short e_machine;
 	short need_to_reinitialize_breakpoints;
@@ -191,16 +211,28 @@ struct Process {
 	int thumb_mode;           /* ARM execution mode: 0: ARM, 1: Thumb */
 #endif
 
-	/* output: */
-	enum tof type_being_displayed;
-
 #if defined(HAVE_LIBUNWIND)
 	/* libunwind address space */
 	unw_addr_space_t unwind_as;
 	void *unwind_priv;
 #endif /* defined(HAVE_LIBUNWIND) */
 
+	/* Set in leader.  */
+	Event_Handler * event_handler;
+
+
+	/**
+	 * Process chaining.
+	 **/
 	Process * next;
+
+	/* LEADER points to the leader thread of the POSIX.1 process.
+	   If X->LEADER == X, then X is the leader thread and the
+	   Process structures chained by NEXT represent other threads,
+	   up until, but not including, the next leader thread.
+	   LEADER may be NULL after the leader has already exited.  In
+	   that case this process is waiting to be collected.  */
+	Process * leader;
 };
 
 struct opt_c_struct {
@@ -216,27 +248,64 @@ struct opt_c_struct {
 
 extern Dict * dict_opt_c;
 
-extern Process * list_of_processes;
+enum process_status {
+	ps_invalid,	/* Failure.  */
+	ps_stop,	/* Job-control stop.  */
+	ps_tracing_stop,
+	ps_zombie,
+	ps_other,	/* Necessary other states can be added as needed.  */
+};
 
-extern Event * next_event(void);
+enum pcb_status {
+	pcb_stop, /* The iteration should stop.  */
+	pcb_cont, /* The iteration should continue.  */
+};
+
+/* Process list  */
 extern Process * pid2proc(pid_t pid);
+extern void add_process(Process * proc);
+extern void remove_process(Process * proc);
+extern Process *each_process(Process * start,
+			     enum pcb_status (* cb)(Process * proc, void * data),
+			     void * data);
+extern Process *each_task(Process * start,
+			  enum pcb_status (* cb)(Process * proc, void * data),
+			  void * data);
+
+/* Events  */
+enum ecb_status {
+	ecb_cont, /* The iteration should continue.  */
+	ecb_yield, /* The iteration should stop, yielding this
+		    * event.  */
+	ecb_deque, /* Like ecb_stop, but the event should be removed
+		    * from the queue.  */
+};
+extern Event * next_event(void);
+extern Event * each_qd_event(enum ecb_status (* cb)(Event * event, void * data),
+			     void * data);
+extern void enque_event(Event * event);
 extern void handle_event(Event * event);
-extern void execute_program(Process *, char **);
+
+extern void install_event_handler(Process * proc, Event_Handler * handler);
+extern void destroy_event_handler(Process * proc);
+
+extern pid_t execute_program(const char * command, char ** argv);
 extern int display_arg(enum tof type, Process * proc, int arg_num, arg_type_info * info);
 extern Breakpoint * address2bpstruct(Process * proc, void * addr);
-extern void breakpoints_init(Process * proc);
-extern void insert_breakpoint(Process * proc, void * addr, struct library_symbol * libsym);
+extern int breakpoints_init(Process * proc, int enable);
+extern void insert_breakpoint(Process * proc, void * addr,
+			      struct library_symbol * libsym, int enable);
 extern void delete_breakpoint(Process * proc, void * addr);
 extern void enable_all_breakpoints(Process * proc);
 extern void disable_all_breakpoints(Process * proc);
 extern void reinitialize_breakpoints(Process *);
 
-extern Process * open_program(char * filename, pid_t pid);
+extern Process * open_program(char * filename, pid_t pid, int init_breakpoints);
 extern void open_pid(pid_t pid);
 extern void show_summary(void);
 extern arg_type_info * lookup_prototype(enum arg_type at);
 
-extern void do_init_elf(struct ltelf *lte, const char *filename);
+extern int do_init_elf(struct ltelf *lte, const char *filename);
 extern void do_close_elf(struct ltelf *lte);
 extern int in_load_libraries(const char *name, struct ltelf *lte, size_t count, GElf_Sym *sym);
 extern struct library_symbol *library_symbols;
@@ -246,6 +315,10 @@ extern void add_library_symbol(GElf_Addr addr, const char *name,
 
 /* Arch-dependent stuff: */
 extern char * pid2name(pid_t pid);
+extern pid_t process_leader(pid_t pid);
+extern int process_tasks(pid_t pid, pid_t **ret_tasks, size_t *ret_n);
+extern int process_stopped(pid_t pid);
+extern enum process_status process_status(pid_t pid);
 extern void trace_set_options(Process * proc, pid_t pid);
 extern void trace_me(void);
 extern int trace_pid(pid_t pid);
@@ -256,13 +329,13 @@ extern void set_instruction_pointer(Process * proc, void * addr);
 extern void * get_stack_pointer(Process * proc);
 extern void * get_return_addr(Process * proc, void * stack_pointer);
 extern void set_return_addr(Process * proc, void * addr);
-extern void enable_breakpoint(pid_t pid, Breakpoint * sbp);
-extern void disable_breakpoint(pid_t pid, const Breakpoint * sbp);
+extern void enable_breakpoint(Process * proc, Breakpoint * sbp);
+extern void disable_breakpoint(Process * proc, Breakpoint * sbp);
 extern int syscall_p(Process * proc, int status, int * sysnum);
 extern void continue_process(pid_t pid);
 extern void continue_after_signal(pid_t pid, int signum);
 extern void continue_after_breakpoint(Process * proc, Breakpoint * sbp);
-extern void continue_enabling_breakpoint(pid_t pid, Breakpoint * sbp);
+extern void ltrace_exiting(void);
 extern long gimme_arg(enum tof type, Process * proc, int arg_num, arg_type_info * info);
 extern void save_register_args(enum tof type, Process * proc);
 extern int umovestr(Process * proc, void * addr, int len, void * laddr);
@@ -272,5 +345,7 @@ extern int ffcheck(void * maddr);
 extern void * sym2addr(Process *, struct library_symbol *);
 extern int linkmap_init(Process *, struct ltelf *);
 extern void arch_check_dbg(Process *proc);
+extern int task_kill (pid_t pid, int sig);
+
 
 extern struct ltelf main_lte;
diff --git a/execute_program.c b/execute_program.c
index 3651b66..47f514d 100644
--- a/execute_program.c
+++ b/execute_program.c
@@ -17,7 +17,8 @@
 #include "common.h"
 
 static void
-change_uid(Process *proc) {
+change_uid(const char * command)
+{
 	uid_t run_uid, run_euid;
 	gid_t run_gid, run_egid;
 
@@ -49,7 +50,7 @@ change_uid(Process *proc) {
 		run_euid = run_uid;
 		run_egid = run_gid;
 
-		if (!stat(proc->filename, &statbuf)) {
+		if (!stat(command, &statbuf)) {
 			if (statbuf.st_mode & S_ISUID) {
 				run_euid = statbuf.st_uid;
 			}
@@ -68,32 +69,27 @@ change_uid(Process *proc) {
 	}
 }
 
-void
-execute_program(Process *sp, char **argv) {
+pid_t
+execute_program(const char * command, char **argv)
+{
 	pid_t pid;
 
-	debug(1, "Executing `%s'...", sp->filename);
+	debug(1, "Executing `%s'...", command);
 
 	pid = fork();
 	if (pid < 0) {
 		perror("ltrace: fork");
 		exit(1);
 	} else if (!pid) {	/* child */
-		change_uid(sp);
+		change_uid(command);
 		trace_me();
-		execvp(sp->filename, argv);
-		fprintf(stderr, "Can't execute `%s': %s\n", sp->filename,
+		execvp(command, argv);
+		fprintf(stderr, "Can't execute `%s': %s\n", command,
 			strerror(errno));
 		_exit(1);
 	}
 
 	debug(1, "PID=%d", pid);
 
-	sp->pid = pid;
-
-#if defined(HAVE_LIBUNWIND)
-	sp->unwind_priv = _UPT_create(pid);
-#endif /* defined(HAVE_LIBUNWIND) */
-
-	return;
+	return pid;
 }
diff --git a/handle_event.c b/handle_event.c
index 01309ff..0aa40f7 100644
--- a/handle_event.c
+++ b/handle_event.c
@@ -25,7 +25,6 @@ static void handle_clone(Event *event);
 static void handle_exec(Event *event);
 static void handle_breakpoint(Event *event);
 static void handle_new(Event *event);
-static void remove_proc(Process *proc);
 
 static void callstack_push_syscall(Process *proc, int sysnum);
 static void callstack_push_symfunc(Process *proc,
@@ -38,7 +37,26 @@ static char * arch_sysname(Process *proc, int sysnum);
 
 void
 handle_event(Event *event) {
-	debug(DEBUG_FUNCTION, "handle_event(pid=%d, type=%d)", event->proc ? event->proc->pid : -1, event->type);
+	if (exiting == 1) {
+		exiting = 2;
+		debug(1, "ltrace about to exit");
+		ltrace_exiting();
+	}
+	debug(DEBUG_FUNCTION, "handle_event(pid=%d, type=%d)",
+	      event->proc ? event->proc->pid : -1, event->type);
+	/* If the thread group defines an overriding event handler,
+	   give it a chance to kick in.  */
+	if (event->proc != NULL
+	    && event->proc->leader != NULL) {
+		Event_Handler * handler = event->proc->leader->event_handler;
+		if (handler != NULL) {
+			event = (*handler->on_event) (handler, event);
+			if (event == NULL)
+				/* It was handled.  */
+				return;
+		}
+	}
+
 	switch (event->type) {
 	case EVENT_NONE:
 		debug(1, "event: none");
@@ -202,24 +220,24 @@ handle_clone(Event * event) {
 	p->pid = event->e_un.newpid;
 	p->parent = event->proc;
 
+	/* We save register values to the arch pointer, and these need
+	   to be per-thread.  */
+	p->arch_ptr = NULL;
+
 	if (pending_new(p->pid)) {
 		pending_new_remove(p->pid);
-		if (p->breakpoint_being_enabled) {
-			enable_breakpoint(p->pid, p->breakpoint_being_enabled);
-			p->breakpoint_being_enabled = NULL;
-		}
+		if (p->event_handler != NULL)
+			destroy_event_handler(p);
 		if (event->proc->state == STATE_ATTACHED && options.follow) {
 			p->state = STATE_ATTACHED;
 		} else {
 			p->state = STATE_IGNORED;
 		}
 		continue_process(p->pid);
-		p->next = list_of_processes;
-		list_of_processes = p;
+		add_process(p);
 	} else {
 		p->state = STATE_BEING_CREATED;
-		p->next = list_of_processes;
-		list_of_processes = p;
+		add_process(p);
 	}
 	continue_process(event->proc->pid);
 }
@@ -235,10 +253,8 @@ handle_new(Event * event) {
 		pending_new_insert(event->e_un.newpid);
 	} else {
 		assert(proc->state == STATE_BEING_CREATED);
-		if (proc->breakpoint_being_enabled) {
-			enable_breakpoint(proc->pid, proc->breakpoint_being_enabled);
-			proc->breakpoint_being_enabled = NULL;
-		}
+		if (proc->event_handler != NULL)
+			destroy_event_handler(proc);
 		if (options.follow) {
 			proc->state = STATE_ATTACHED;
 		} else {
@@ -323,13 +339,6 @@ arch_sysname(Process *proc, int sysnum) {
 static void
 handle_signal(Event *event) {
 	debug(DEBUG_FUNCTION, "handle_signal(pid=%d, signum=%d)", event->proc->pid, event->e_un.signum);
-	if (exiting && event->e_un.signum == SIGSTOP) {
-		pid_t pid = event->proc->pid;
-		disable_all_breakpoints(event->proc);
-		untrace_pid(pid);
-		remove_proc(event->proc);
-		return;
-	}
 	if (event->proc->state != STATE_IGNORED && !options.no_signals) {
 		output_line(event->proc, "--- %s (%s) ---",
 				shortsignal(event->proc, event->e_un.signum),
@@ -345,7 +354,7 @@ handle_exit(Event *event) {
 		output_line(event->proc, "+++ exited (status %d) +++",
 				event->e_un.ret_val);
 	}
-	remove_proc(event->proc);
+	remove_process(event->proc);
 }
 
 static void
@@ -355,31 +364,7 @@ handle_exit_signal(Event *event) {
 		output_line(event->proc, "+++ killed by %s +++",
 				shortsignal(event->proc, event->e_un.signum));
 	}
-	remove_proc(event->proc);
-}
-
-static void
-remove_proc(Process *proc) {
-	Process *tmp, *tmp2;
-
-	debug(DEBUG_FUNCTION, "remove_proc(pid=%d)", proc->pid);
-
-	if (list_of_processes == proc) {
-		tmp = list_of_processes;
-		list_of_processes = list_of_processes->next;
-		free(tmp);
-		return;
-	}
-	tmp = list_of_processes;
-	while (tmp->next) {
-		if (tmp->next == proc) {
-			tmp2 = tmp->next;
-			tmp->next = tmp->next->next;
-			free(tmp2);
-			continue;
-		}
-		tmp = tmp->next;
-	}
+	remove_process(event->proc);
 }
 
 static void
@@ -389,7 +374,7 @@ handle_syscall(Event *event) {
 		callstack_push_syscall(event->proc, event->e_un.sysnum);
 		if (options.syscalls) {
 			output_left(LT_TOF_SYSCALL, event->proc,
-					sysname(event->proc, event->e_un.sysnum));
+				    sysname(event->proc, event->e_un.sysnum));
 		}
 		if (event->proc->breakpoints_enabled == 0) {
 			enable_all_breakpoints(event->proc);
@@ -406,7 +391,7 @@ handle_exec(Event * event) {
 	debug(DEBUG_FUNCTION, "handle_exec(pid=%d)", proc->pid);
 	if (proc->state == STATE_IGNORED) {
 		untrace_pid(proc->pid);
-		remove_proc(proc);
+		remove_process(proc);
 		return;
 	}
 	output_line(proc, "--- Called exec() ---");
@@ -417,7 +402,7 @@ handle_exec(Event * event) {
 	proc->filename = pid2name(proc->pid);
 	saved_pid = proc->pid;
 	proc->pid = 0;
-	breakpoints_init(proc);
+	breakpoints_init(proc, 0);
 	proc->pid = saved_pid;
 	proc->callstack_depth = 0;
 	continue_process(proc->pid);
@@ -503,6 +488,13 @@ static void
 handle_breakpoint(Event *event) {
 	int i, j;
 	Breakpoint *sbp;
+	Process *leader = event->proc->leader;
+
+	/* The leader has terminated.  */
+	if (leader == NULL) {
+		continue_process(event->proc->pid);
+		return;
+	}
 
 	debug(DEBUG_FUNCTION, "handle_breakpoint(pid=%d, addr=%p)", event->proc->pid, event->e_un.brk_addr);
 	debug(2, "event: breakpoint (%p)", event->e_un.brk_addr);
@@ -513,7 +505,7 @@ handle_breakpoint(Event *event) {
 	Breakpoint *stub_bp = NULL;
 	char nop_instruction[] = PPC_NOP;
 
-	stub_bp = address2bpstruct (event->proc, event->e_un.brk_addr);
+	stub_bp = address2bpstruct(leader, event->e_un.brk_addr);
 
 	if (stub_bp) {
 		unsigned char *bp_instruction = stub_bp->orig_value;
@@ -528,14 +520,6 @@ handle_breakpoint(Event *event) {
 		}
 	}
 #endif
-	if ((sbp = event->proc->breakpoint_being_enabled) != 0) {
-		/* Reinsert breakpoint */
-		continue_enabling_breakpoint(event->proc->pid,
-					     event->proc->
-					     breakpoint_being_enabled);
-		event->proc->breakpoint_being_enabled = NULL;
-		return;
-	}
 
 	for (i = event->proc->callstack_depth - 1; i >= 0; i--) {
 		if (event->e_un.brk_addr ==
@@ -554,7 +538,7 @@ handle_breakpoint(Event *event) {
 			if (libsym->plt_type != LS_TOPLT_POINT) {
 				unsigned char break_insn[] = BREAKPOINT_VALUE;
 
-				sbp = address2bpstruct(event->proc, addr);
+				sbp = address2bpstruct(leader, addr);
 				assert(sbp);
 				a = ptrace(PTRACE_PEEKTEXT, event->proc->pid,
 					   addr);
@@ -562,10 +546,10 @@ handle_breakpoint(Event *event) {
 				if (memcmp(&a, break_insn, BREAKPOINT_LENGTH)) {
 					sbp->enabled--;
 					insert_breakpoint(event->proc, addr,
-							  libsym);
+							  libsym, 1);
 				}
 			} else {
-				sbp = dict_find_entry(event->proc->breakpoints, addr);
+				sbp = dict_find_entry(leader->breakpoints, addr);
 				/* On powerpc, the breakpoint address
 				   may end up being actual entry point
 				   of the library symbol, not the PLT
@@ -573,7 +557,7 @@ handle_breakpoint(Event *event) {
 				   sbp is NULL.  */
 				if (sbp == NULL || addr != sbp->addr) {
 					insert_breakpoint(event->proc, addr,
-							  libsym);
+							  libsym, 1);
 				}
 			}
 #elif defined(__mips__)
@@ -581,18 +565,18 @@ handle_breakpoint(Event *event) {
 			struct library_symbol *sym= event->proc->callstack[i].c_un.libfunc;
 			struct library_symbol *new_sym;
 			assert(sym);
-			addr=sym2addr(event->proc,sym);
-			sbp = dict_find_entry(event->proc->breakpoints, addr);
+			addr = sym2addr(leader, sym);
+			sbp = dict_find_entry(leader->breakpoints, addr);
 			if (sbp) {
 				if (addr != sbp->addr) {
-					insert_breakpoint(event->proc, addr, sym);
+					insert_breakpoint(event->proc, addr, sym, 1);
 				}
 			} else {
 				new_sym=malloc(sizeof(*new_sym) + strlen(sym->name) + 1);
 				memcpy(new_sym,sym,sizeof(*new_sym) + strlen(sym->name) + 1);
-				new_sym->next=event->proc->list_of_symbols;
-				event->proc->list_of_symbols=new_sym;
-				insert_breakpoint(event->proc, addr, new_sym);
+				new_sym->next = leader->list_of_symbols;
+				leader->list_of_symbols = new_sym;
+				insert_breakpoint(event->proc, addr, new_sym, 1);
 			}
 #endif
 			for (j = event->proc->callstack_depth - 1; j > i; j--) {
@@ -609,18 +593,23 @@ handle_breakpoint(Event *event) {
 						event->proc->callstack[i].c_un.libfunc->name);
 			}
 			callstack_pop(event->proc);
-			continue_after_breakpoint(event->proc,
-					address2bpstruct(event->proc,
-						event->e_un.brk_addr));
+			sbp = address2bpstruct(leader, event->e_un.brk_addr);
+			continue_after_breakpoint(event->proc, sbp);
 			return;
 		}
 	}
 
-	if ((sbp = address2bpstruct(event->proc, event->e_un.brk_addr))) {
+	if ((sbp = address2bpstruct(leader, event->e_un.brk_addr))) {
+		if (sbp->libsym == NULL) {
+			continue_after_breakpoint(event->proc, sbp);
+			return;
+		}
+
 		if (strcmp(sbp->libsym->name, "") == 0) {
-			debug(2, "Hit _dl_debug_state breakpoint!\n");
-			arch_check_dbg(event->proc);
+			debug(DEBUG_PROCESS, "Hit _dl_debug_state breakpoint!\n");
+			arch_check_dbg(leader);
 		}
+
 		if (event->proc->state != STATE_IGNORED) {
 			event->proc->stack_pointer = get_stack_pointer(event->proc);
 			event->proc->return_addr =
@@ -632,7 +621,7 @@ handle_breakpoint(Event *event) {
 		if (event->proc->need_to_reinitialize_breakpoints
 		    && (strcmp(sbp->libsym->name, PLTs_initialized_by_here) ==
 			0))
-			reinitialize_breakpoints(event->proc);
+			reinitialize_breakpoints(leader);
 #endif
 
 		continue_after_breakpoint(event->proc, sbp);
@@ -689,7 +678,7 @@ callstack_push_symfunc(Process *proc, struct library_symbol *sym) {
 
 	elem->return_addr = proc->return_addr;
 	if (elem->return_addr) {
-		insert_breakpoint(proc, elem->return_addr, 0);
+		insert_breakpoint(proc, elem->return_addr, NULL, 1);
 	}
 
 	/* handle functions like atexit() on mips which have no return */
@@ -709,6 +698,7 @@ callstack_pop(Process *proc) {
 	debug(DEBUG_FUNCTION, "callstack_pop(pid=%d)", proc->pid);
 	elem = &proc->callstack[proc->callstack_depth - 1];
 	if (!elem->is_syscall && elem->return_addr) {
+		assert(proc->leader != NULL);
 		delete_breakpoint(proc, elem->return_addr);
 	}
 	if (elem->arch_ptr != NULL) {
diff --git a/libltrace.c b/libltrace.c
index 0f48d11..e731fe1 100644
--- a/libltrace.c
+++ b/libltrace.c
@@ -12,32 +12,39 @@
 #include "common.h"
 
 char *command = NULL;
-Process *list_of_processes = NULL;
 
 int exiting = 0;		/* =1 if a SIGINT or SIGTERM has been received */
 
-static void
-signal_alarm(int sig) {
-	Process *tmp = list_of_processes;
+static enum pcb_status
+stop_non_p_processes (Process * proc, void * data)
+{
+	int stop = 1;
 
-	signal(SIGALRM, SIG_DFL);
-	while (tmp) {
-		struct opt_p_t *tmp2 = opt_p;
-		while (tmp2) {
-			if (tmp->pid == tmp2->pid) {
-				tmp = tmp->next;
-				if (!tmp) {
-					return;
-				}
-				tmp2 = opt_p;
-				continue;
-			}
-			tmp2 = tmp2->next;
+	struct opt_p_t *it;
+	for (it = opt_p; it != NULL; it = it->next) {
+		Process * p_proc = pid2proc(it->pid);
+		if (p_proc == NULL) {
+			printf("stop_non_p_processes: %d terminated?\n", it->pid);
+			continue;
+		}
+		if (p_proc == proc || p_proc->leader == proc->leader) {
+			stop = 0;
+			break;
 		}
-		debug(2, "Sending SIGSTOP to process %u\n", tmp->pid);
-		kill(tmp->pid, SIGSTOP);
-		tmp = tmp->next;
 	}
+
+	if (stop) {
+		debug(2, "Sending SIGSTOP to process %u", proc->pid);
+		kill(proc->pid, SIGSTOP);
+	}
+
+	return pcb_cont;
+}
+
+static void
+signal_alarm(int sig) {
+	signal(SIGALRM, SIG_DFL);
+	each_process(NULL, &stop_non_p_processes, NULL);
 }
 
 static void
@@ -47,15 +54,7 @@ signal_exit(int sig) {
 	signal(SIGINT, SIG_IGN);
 	signal(SIGTERM, SIG_IGN);
 	signal(SIGALRM, signal_alarm);
-	if (opt_p) {
-		struct opt_p_t *tmp = opt_p;
-		while (tmp) {
-			debug(2, "Sending SIGSTOP to process %u\n", tmp->pid);
-			kill(tmp->pid, SIGSTOP);
-			tmp = tmp->next;
-		}
-	}
-	alarm(1);
+	//alarm(1);
 }
 
 static void
@@ -108,7 +107,7 @@ ltrace_init(int argc, char **argv) {
 		}
 	}
 	if (command) {
-		execute_program(open_program(command, 0), argv);
+		open_program(command, execute_program(command, argv), 0);
 	}
 	opt_p_tmp = opt_p;
 	while (opt_p_tmp) {
diff --git a/ltrace-elf.c b/ltrace-elf.c
index 1a33ec3..d88d5a6 100644
--- a/ltrace-elf.c
+++ b/ltrace-elf.c
@@ -14,7 +14,6 @@
 
 #include "common.h"
 
-void do_init_elf(struct ltelf *lte, const char *filename);
 void do_close_elf(struct ltelf *lte);
 void add_library_symbol(GElf_Addr addr, const char *name,
 		struct library_symbol **library_symbolspp,
@@ -136,7 +135,7 @@ static GElf_Addr get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot,
 	return 0;
 }
 
-void
+int
 do_init_elf(struct ltelf *lte, const char *filename) {
 	int i;
 	GElf_Addr relplt_addr = 0;
@@ -147,7 +146,7 @@ do_init_elf(struct ltelf *lte, const char *filename) {
 
 	lte->fd = open(filename, O_RDONLY);
 	if (lte->fd == -1)
-		error(EXIT_FAILURE, errno, "Can't open \"%s\"", filename);
+		return 1;
 
 #ifdef HAVE_ELF_C_READ_MMAP
 	lte->elf = elf_begin(lte->fd, ELF_C_READ_MMAP, NULL);
@@ -454,6 +453,7 @@ do_init_elf(struct ltelf *lte, const char *filename) {
 
 		debug(1, "%s %zd PLT relocations", filename, lte->relplt_count);
 	}
+	return 0;
 }
 
 void
@@ -622,7 +622,8 @@ read_elf(Process *proc) {
 
 	elf_version(EV_CURRENT);
 
-	do_init_elf(lte, proc->filename);
+	if (do_init_elf(lte, proc->filename))
+		return NULL;
 
 	memcpy(&main_lte, lte, sizeof(struct ltelf));
 
@@ -634,7 +635,9 @@ read_elf(Process *proc) {
 	proc->e_machine = lte->ehdr.e_machine;
 
 	for (i = 0; i < library_num; ++i) {
-		do_init_elf(&lte[i + 1], library[i]);
+		if (do_init_elf(&lte[i + 1], library[i]))
+			error(EXIT_FAILURE, errno, "Can't open \"%s\"",
+			      proc->filename);
 	}
 
 	if (!options.no_plt) {
diff --git a/ltrace.h b/ltrace.h
index 5e43ba5..0ff4572 100644
--- a/ltrace.h
+++ b/ltrace.h
@@ -20,6 +20,7 @@ enum Event_type {
 typedef struct Process Process;
 typedef struct Event Event;
 struct Event {
+	struct Event * next;
 	Process * proc;
 	Event_type type;
 	union {
diff --git a/output.c b/output.c
index de2a836..945dd52 100644
--- a/output.c
+++ b/output.c
@@ -96,7 +96,7 @@ begin_of_line(enum tof type, Process *proc) {
 }
 
 static Function *
-name2func(char *name) {
+name2func(char const *name) {
 	Function *tmp;
 	const char *str1, *str2;
 
@@ -153,7 +153,7 @@ tabto(int col) {
 }
 
 void
-output_left(enum tof type, Process *proc, char *function_name) {
+output_left(enum tof type, Process *proc, char const *function_name) {
 	Function *func;
 	static arg_type_info *arg_unknown = NULL;
 	if (arg_unknown == NULL)
@@ -168,7 +168,6 @@ output_left(enum tof type, Process *proc, char *function_name) {
 	}
 	current_proc = proc;
 	current_depth = proc->callstack_depth;
-	proc->type_being_displayed = type;
 	begin_of_line(type, proc);
 #ifdef USE_DEMANGLE
 	current_column +=
diff --git a/output.h b/output.h
index c58577a..fa840c7 100644
--- a/output.h
+++ b/output.h
@@ -1,3 +1,3 @@
 void output_line(Process *proc, char *fmt, ...);
-void output_left(enum tof type, Process *proc, char *function_name);
+void output_left(enum tof type, Process *proc, char const *function_name);
 void output_right(enum tof type, Process *proc, char *function_name);
diff --git a/proc.c b/proc.c
index 1c57532..0425e09 100644
--- a/proc.c
+++ b/proc.c
@@ -10,73 +10,280 @@
 #include <stdio.h>
 #include <errno.h>
 #include <stdlib.h>
+#include <assert.h>
+#include <error.h>
 
 #include "common.h"
 
 Process *
-open_program(char *filename, pid_t pid) {
+open_program(char *filename, pid_t pid, int enable) {
 	Process *proc;
+	assert(pid != 0);
 	proc = calloc(sizeof(Process), 1);
 	if (!proc) {
 		perror("malloc");
 		exit(1);
 	}
+
 	proc->filename = strdup(filename);
 	proc->breakpoints_enabled = -1;
-	if (pid) {
-		proc->pid = pid;
+	proc->pid = pid;
 #if defined(HAVE_LIBUNWIND)
-		proc->unwind_priv = _UPT_create(pid);
-	} else {
-		proc->unwind_priv = NULL;
+	proc->unwind_priv = _UPT_create(pid);
+	proc->unwind_as = unw_create_addr_space(&_UPT_accessors, 0);
 #endif /* defined(HAVE_LIBUNWIND) */
-	}
 
-	breakpoints_init(proc);
+	add_process(proc);
+	if (proc->leader == NULL) {
+		free(proc);
+		return NULL;
+	}
 
-	proc->next = list_of_processes;
-	list_of_processes = proc;
+	if (proc->leader == proc)
+		if (breakpoints_init(proc, enable)) {
+			fprintf(stderr, "failed to init breakpoints %d\n",
+				proc->pid);
+			remove_process(proc);
+			return NULL;
+		}
 
-#if defined(HAVE_LIBUNWIND)
-	proc->unwind_as = unw_create_addr_space(&_UPT_accessors, 0);
-#endif /* defined(HAVE_LIBUNWIND) */
 	return proc;
 }
 
-void
-open_pid(pid_t pid) {
+static int
+open_one_pid(pid_t pid)
+{
 	Process *proc;
 	char *filename;
+	debug(DEBUG_PROCESS, "open_one_pid(pid=%d)", pid);
 
-	if (trace_pid(pid) < 0) {
-		fprintf(stderr, "Cannot attach to pid %u: %s\n", pid,
-			strerror(errno));
-		return;
+	/* Get the filename first.  Should the trace_pid fail, we can
+	 * easily free it, untracing is more work.  */
+	if ((filename = pid2name(pid)) == NULL
+	    || trace_pid(pid) < 0) {
+		free(filename);
+		return -1;
 	}
 
-	filename = pid2name(pid);
+	proc = open_program(filename, pid, 0);
+	if (proc == NULL)
+		return -1;
+	trace_set_options(proc, pid);
+
+	return 0;
+}
+
+enum pcb_status
+start_one_pid(Process * proc, void * data)
+{
+	continue_process(proc->pid);
+	proc->breakpoints_enabled = 1;
+	return pcb_cont;
+}
+
+void
+open_pid(pid_t pid)
+{
+	debug(DEBUG_PROCESS, "open_pid(pid=%d)", pid);
+	/* If we are already tracing this guy, we should be seeing all
+	 * his children via normal tracing route.  */
+	if (pid2proc(pid) != NULL)
+		return;
 
-	if (!filename) {
-		fprintf(stderr, "Cannot trace pid %u: %s\n", pid,
-				strerror(errno));
+	/* First, see if we can attach the requested PID itself.  */
+	if (open_one_pid(pid)) {
+		fprintf(stderr, "Cannot attach to pid %u: %s\n",
+			pid, strerror(errno));
 		return;
 	}
 
-	proc = open_program(filename, pid);
-	continue_process(pid);
-	proc->breakpoints_enabled = 1;
+	/* Now attach to all tasks that belong to that PID.  There's a
+	 * race between process_tasks and open_one_pid.  So when we
+	 * fail in open_one_pid below, we just do another round.
+	 * Chances are that by then that PID will have gone away, and
+	 * that's why we have seen the failure.  The processes that we
+	 * manage to open_one_pid are stopped, so we should eventually
+	 * reach a point where process_tasks doesn't give any new
+	 * processes (because there's nobody left to produce
+	 * them).  */
+	size_t old_ntasks = 0;
+	int have_all;
+	while (1) {
+		pid_t *tasks;
+		size_t ntasks;
+		size_t i;
+
+		if (process_tasks(pid, &tasks, &ntasks) < 0) {
+			fprintf(stderr, "Cannot obtain tasks of pid %u: %s\n",
+				pid, strerror(errno));
+			goto start;
+		}
+
+		have_all = 1;
+		for (i = 0; i < ntasks; ++i)
+			if (pid2proc(tasks[i]) == NULL
+			    && open_one_pid(tasks[i]))
+				have_all = 0;
+
+		free(tasks);
+
+		if (have_all && old_ntasks == ntasks)
+			break;
+		old_ntasks = ntasks;
+	}
+
+	/* Done.  Now initialize breakpoints and then continue
+	 * everyone.  */
+	Process * leader;
+start:
+	leader = pid2proc(pid)->leader;
+	enable_all_breakpoints(leader);
+
+	each_task(pid2proc(pid)->leader, start_one_pid, NULL);
+}
+
+static enum pcb_status
+find_proc(Process * proc, void * data)
+{
+	pid_t pid = (pid_t)(uintptr_t)data;
+	return proc->pid == pid ? pcb_stop : pcb_cont;
 }
 
 Process *
 pid2proc(pid_t pid) {
-	Process *tmp;
+	return each_process(NULL, &find_proc, (void *)(uintptr_t)pid);
+}
 
+
+static Process * list_of_processes = NULL;
+
+Process *
+each_process(Process * proc,
+	     enum pcb_status (* cb)(Process * proc, void * data),
+	     void * data)
+{
+	Process * it = proc ?: list_of_processes;
+	for (; it != NULL; ) {
+		/* Callback might call remove_process.  */
+		Process * next = it->next;
+		if ((*cb) (it, data) == pcb_stop)
+			return it;
+		it = next;
+	}
+	return NULL;
+}
+
+Process *
+each_task(Process * it, enum pcb_status (* cb)(Process * proc, void * data),
+	  void * data)
+{
+	if (it != NULL) {
+		Process * leader = it->leader;
+		for (; it != NULL && it->leader == leader; ) {
+			/* Callback might call remove_process.  */
+			Process * next = it->next;
+			if ((*cb) (it, data) == pcb_stop)
+				return it;
+			it = next;
+		}
+	}
+	return NULL;
+}
+
+void
+add_process(Process * proc)
+{
+	Process ** leaderp = &list_of_processes;
+	if (proc->pid) {
+		pid_t tgid = process_leader(proc->pid);
+		if (tgid == 0)
+			/* Must have been terminated before we managed
+			 * to fully attach.  */
+			return;
+		if (tgid == proc->pid)
+			proc->leader = proc;
+		else {
+			Process * leader = pid2proc(tgid);
+			proc->leader = leader;
+			if (leader != NULL)
+				leaderp = &leader->next;
+		}
+	}
+	proc->next = *leaderp;
+	*leaderp = proc;
+}
+
+static enum pcb_status
+clear_leader(Process * proc, void * data)
+{
+	debug(DEBUG_FUNCTION, "detach_task %d from leader %d",
+	      proc->pid, proc->leader->pid);
+	proc->leader = NULL;
+	return pcb_cont;
+}
+
+static enum ecb_status
+event_for_proc(Event * event, void * data)
+{
+	if (event->proc == data)
+		return ecb_deque;
+	else
+		return ecb_cont;
+}
+
+static void
+delete_events_for(Process * proc)
+{
+	Event * event;
+	while ((event = each_qd_event(&event_for_proc, proc)) != NULL)
+		free(event);
+}
+
+void
+remove_process(Process *proc)
+{
+	Process *tmp, *tmp2;
+
+	debug(DEBUG_FUNCTION, "remove_proc(pid=%d)", proc->pid);
+
+	if (proc->leader == proc)
+		each_task(proc, &clear_leader, NULL);
+
+	if (list_of_processes == proc) {
+		tmp = list_of_processes;
+		list_of_processes = list_of_processes->next;
+		delete_events_for(tmp);
+		free(tmp);
+		return;
+	}
 	tmp = list_of_processes;
-	while (tmp) {
-		if (pid == tmp->pid) {
-			return tmp;
+	while (tmp->next) {
+		if (tmp->next == proc) {
+			tmp2 = tmp->next;
+			tmp->next = tmp->next->next;
+			delete_events_for(tmp2);
+			free(tmp2);
+			return;
 		}
 		tmp = tmp->next;
 	}
-	return NULL;
+}
+
+void
+install_event_handler(Process * proc, Event_Handler * handler)
+{
+	debug(DEBUG_FUNCTION, "install_event_handler(pid=%d, %p)", proc->pid, handler);
+	assert(proc->event_handler == NULL);
+	proc->event_handler = handler;
+}
+
+void
+destroy_event_handler(Process * proc)
+{
+	Event_Handler * handler = proc->event_handler;
+	debug(DEBUG_FUNCTION, "destroy_event_handler(pid=%d, %p)", proc->pid, handler);
+	assert(handler != NULL);
+	handler->destroy(handler);
+	free(handler);
+	proc->event_handler = NULL;
 }
diff --git a/sysdeps/linux-gnu/breakpoint.c b/sysdeps/linux-gnu/breakpoint.c
index 9104189..5a49e9d 100644
--- a/sysdeps/linux-gnu/breakpoint.c
+++ b/sysdeps/linux-gnu/breakpoint.c
@@ -8,21 +8,11 @@
 
 #ifdef ARCH_HAVE_ENABLE_BREAKPOINT
 extern void arch_enable_breakpoint(pid_t, Breakpoint *);
+#else				/* ARCH_HAVE_ENABLE_BREAKPOINT */
 void
-enable_breakpoint(pid_t pid, Breakpoint *sbp) {
-	if (sbp->libsym) {
-		debug(DEBUG_PROCESS, "enable_breakpoint: pid=%d, addr=%p, symbol=%s", pid, sbp->addr, sbp->libsym->name);
-	} else {
-		debug(DEBUG_PROCESS, "enable_breakpoint: pid=%d, addr=%p", pid, sbp->addr);
-	}
-	arch_enable_breakpoint(pid, sbp);
-}
-#else
-
-static unsigned char break_insn[] = BREAKPOINT_VALUE;
-
-void
-enable_breakpoint(pid_t pid, Breakpoint *sbp) {
+arch_enable_breakpoint(pid_t pid, Breakpoint *sbp)
+{
+	static unsigned char break_insn[] = BREAKPOINT_VALUE;
 	unsigned int i, j;
 
 	if (sbp->libsym) {
@@ -32,9 +22,8 @@ enable_breakpoint(pid_t pid, Breakpoint *sbp) {
 	}
 
 	for (i = 0; i < 1 + ((BREAKPOINT_LENGTH - 1) / sizeof(long)); i++) {
-		long a =
-		    ptrace(PTRACE_PEEKTEXT, pid, sbp->addr + i * sizeof(long),
-			   0);
+		long a = ptrace(PTRACE_PEEKTEXT, pid,
+				sbp->addr + i * sizeof(long), 0);
 		for (j = 0;
 		     j < sizeof(long)
 		     && i * sizeof(long) + j < BREAKPOINT_LENGTH; j++) {
@@ -48,20 +37,22 @@ enable_breakpoint(pid_t pid, Breakpoint *sbp) {
 }
 #endif				/* ARCH_HAVE_ENABLE_BREAKPOINT */
 
-#ifdef ARCH_HAVE_DISABLE_BREAKPOINT
-extern void arch_disable_breakpoint(pid_t, const Breakpoint *sbp);
 void
-disable_breakpoint(pid_t pid, const Breakpoint *sbp) {
+enable_breakpoint(Process * proc, Breakpoint *sbp) {
 	if (sbp->libsym) {
-		debug(DEBUG_PROCESS, "disable_breakpoint: pid=%d, addr=%p, symbol=%s", pid, sbp->addr, sbp->libsym->name);
+		debug(DEBUG_PROCESS, "enable_breakpoint: pid=%d, addr=%p, symbol=%s", proc->pid, sbp->addr, sbp->libsym->name);
 	} else {
-		debug(DEBUG_PROCESS, "disable_breakpoint: pid=%d, addr=%p", pid, sbp->addr);
+		debug(DEBUG_PROCESS, "enable_breakpoint: pid=%d, addr=%p", proc->pid, sbp->addr);
 	}
-	arch_disable_breakpoint(pid, sbp);
+	arch_enable_breakpoint(proc->pid, sbp);
 }
-#else
+
+#ifdef ARCH_HAVE_DISABLE_BREAKPOINT
+extern void arch_disable_breakpoint(pid_t, const Breakpoint *sbp);
+#else				/* ARCH_HAVE_DISABLE_BREAKPOINT */
 void
-disable_breakpoint(pid_t pid, const Breakpoint *sbp) {
+arch_disable_breakpoint(pid_t pid, const Breakpoint *sbp)
+{
 	unsigned int i, j;
 
 	if (sbp->libsym) {
@@ -85,3 +76,13 @@ disable_breakpoint(pid_t pid, const Breakpoint *sbp) {
 	}
 }
 #endif				/* ARCH_HAVE_DISABLE_BREAKPOINT */
+
+void
+disable_breakpoint(Process * proc, Breakpoint *sbp) {
+	if (sbp->libsym) {
+		debug(DEBUG_PROCESS, "disable_breakpoint: pid=%d, addr=%p, symbol=%s", proc->pid, sbp->addr, sbp->libsym->name);
+	} else {
+		debug(DEBUG_PROCESS, "disable_breakpoint: pid=%d, addr=%p", proc->pid, sbp->addr);
+	}
+	arch_disable_breakpoint(proc->pid, sbp);
+}
diff --git a/sysdeps/linux-gnu/events.c b/sysdeps/linux-gnu/events.c
index fd19e71..8a79583 100644
--- a/sysdeps/linux-gnu/events.c
+++ b/sysdeps/linux-gnu/events.c
@@ -8,20 +8,118 @@
 #include <signal.h>
 #include <string.h>
 #include <sys/ptrace.h>
+#include <assert.h>
 
 #include "common.h"
 
 static Event event;
 
+/* A queue of events that we missed while enabling the
+ * breakpoint in one of tasks.  */
+static Event * delayed_events = NULL;
+static Event * end_delayed_events = NULL;
+
+static enum pcb_status
+first (Process * proc, void * data)
+{
+	return pcb_stop;
+}
+
+void
+enque_event(Event * event)
+{
+	debug(DEBUG_FUNCTION, "%d: queuing event %d for later",
+	      event->proc->pid, event->type);
+	Event * ne = malloc(sizeof(*ne));
+	if (ne == NULL) {
+		perror("event will be missed: malloc");
+		return;
+	}
+
+	*ne = *event;
+	ne->next = NULL;
+	if (end_delayed_events == NULL) {
+		assert(delayed_events == NULL);
+		end_delayed_events = delayed_events = ne;
+	}
+	else {
+		assert(delayed_events != NULL);
+		end_delayed_events = end_delayed_events->next = ne;
+	}
+}
+
+Event *
+each_qd_event(enum ecb_status (*pred)(Event *, void *), void * data)
+{
+	Event * prev = delayed_events;
+	Event * event;
+	for (event = prev; event != NULL; ) {
+		switch ((*pred)(event, data)) {
+		case ecb_cont:
+			prev = event;
+			event = event->next;
+			continue;
+
+		case ecb_deque:
+			debug(DEBUG_FUNCTION, "dequeuing event %d for %d",
+			      event->type,
+			      event->proc != NULL ? event->proc->pid : -1);
+			/*
+			printf("dequeuing event %d for %d\n", event->type,
+			       event->proc != NULL ? event->proc->pid : -1) ;
+			*/
+			if (end_delayed_events == event)
+				end_delayed_events = prev;
+			if (delayed_events == event)
+				delayed_events = event->next;
+			else
+				prev->next = event->next;
+			if (delayed_events == NULL)
+				end_delayed_events = NULL;
+			/* fall-through */
+
+		case ecb_yield:
+			return event;
+		}
+	}
+
+	return NULL;
+}
+
+static enum ecb_status
+event_process_not_reenabling(Event * event, void * data)
+{
+	if (event->proc == NULL
+	    || event->proc->leader == NULL
+	    || event->proc->leader->event_handler == NULL)
+		return ecb_deque;
+	else
+		return ecb_cont;
+}
+
+static Event *
+next_qd_event(void)
+{
+	return each_qd_event(&event_process_not_reenabling, NULL);
+}
+
 Event *
-next_event(void) {
+next_event(void)
+{
 	pid_t pid;
 	int status;
 	int tmp;
 	int stop_signal;
 
 	debug(DEBUG_FUNCTION, "next_event()");
-	if (!list_of_processes) {
+	Event * ev;
+	if ((ev = next_qd_event()) != NULL) {
+		event = *ev;
+		free(ev);
+		return &event;
+	}
+
+	if (!each_process(NULL, &first, NULL)) {
 		debug(DEBUG_EVENT, "event: No more traced programs: exiting");
 		exit(0);
 	}
@@ -46,26 +144,76 @@ next_event(void) {
 		return &event;
 	}
 	get_arch_dep(event.proc);
-	event.proc->instruction_pointer = NULL;
 	debug(3, "event from pid %u", pid);
-	if (event.proc->breakpoints_enabled == -1) {
-		event.type = EVENT_NONE;
+	if (event.proc->breakpoints_enabled == -1)
 		trace_set_options(event.proc, event.proc->pid);
-		enable_all_breakpoints(event.proc);
-		continue_process(event.proc->pid);
-		debug(DEBUG_EVENT, "event: NONE: pid=%d (enabling breakpoints)", pid);
-		return &event;
-	} else if (!event.proc->libdl_hooked) {
-		/* debug struct may not have been written yet.. */
-		if (linkmap_init(event.proc, &main_lte) == 0) {
-			event.proc->libdl_hooked = 1;
+	Process *leader = event.proc->leader;
+	if (leader == event.proc) {
+		if (event.proc->breakpoints_enabled == -1) {
+			event.type = EVENT_NONE;
+			enable_all_breakpoints(event.proc);
+			continue_process(event.proc->pid);
+			debug(DEBUG_EVENT,
+			      "event: NONE: pid=%d (enabling breakpoints)",
+			      pid);
+			return &event;
+		} else if (!event.proc->libdl_hooked) {
+			/* debug struct may not have been written yet.. */
+			if (linkmap_init(event.proc, &main_lte) == 0) {
+				event.proc->libdl_hooked = 1;
+			}
 		}
 	}
 
-	if (opt_i) {
-		event.proc->instruction_pointer =
-			get_instruction_pointer(event.proc);
+	/* The process should be stopped after the waitpid call.  But
+	 * when the whole thread group is terminated, we see
+	 * individual tasks spontaneously transitioning from 't' to
+	 * 'R' and 'Z'.  Calls to ptrace fail and /proc/pid/status may
+	 * not even be available anymore, so we can't check in
+	 * advance.  So we just drop the error checking around ptrace
+	 * calls.  We check for termination ex post when it fails,
+	 * suppress the event, and let the event loop collect the
+	 * termination in the next iteration.  */
+#define CHECK_PROCESS_TERMINATED					\
+	do {								\
+		int errno_save = errno;					\
+		switch (process_stopped(pid))				\
+		case 0:							\
+		case -1: {						\
+			debug(DEBUG_EVENT,				\
+			      "process not stopped, is it terminating?"); \
+			event.type = EVENT_NONE;			\
+			continue_process(event.proc->pid);		\
+			return &event;					\
+		}							\
+		errno = errno_save;					\
+	} while (0)
+
+	event.proc->instruction_pointer = (void *)(uintptr_t)-1;
+
+	/* Check for task termination now, before we have a need to
+	 * call CHECK_PROCESS_TERMINATED later.  That would suppress
+	 * the event that we are processing.  */
+	if (WIFSIGNALED(status)) {
+		event.type = EVENT_EXIT_SIGNAL;
+		event.e_un.signum = WTERMSIG(status);
+		debug(DEBUG_EVENT, "event: EXIT_SIGNAL: pid=%d, signum=%d", pid, event.e_un.signum);
+		return &event;
+	}
+	if (WIFEXITED(status)) {
+		event.type = EVENT_EXIT;
+		event.e_un.ret_val = WEXITSTATUS(status);
+		debug(DEBUG_EVENT, "event: EXIT: pid=%d, status=%d", pid, event.e_un.ret_val);
+		return &event;
+	}
+
+	event.proc->instruction_pointer = get_instruction_pointer(event.proc);
+	if (event.proc->instruction_pointer == (void *)(uintptr_t)-1) {
+		CHECK_PROCESS_TERMINATED;
+		if (errno != 0)
+			perror("get_instruction_pointer");
 	}
+
 	switch (syscall_p(event.proc, status, &tmp)) {
 		case 1:
 			event.type = EVENT_SYSCALL;
@@ -88,10 +236,9 @@ next_event(void) {
 			debug(DEBUG_EVENT, "event: ARCH_SYSRET: pid=%d, sysnum=%d", pid, tmp);
 			return &event;
 		case -1:
-			event.type = EVENT_NONE;
-			continue_process(event.proc->pid);
-			debug(DEBUG_EVENT, "event: NONE: pid=%d (syscall_p returned -1)", pid);
-			return &event;
+			CHECK_PROCESS_TERMINATED;
+			if (errno != 0)
+				perror("syscall_p");
 	}
 	if (WIFSTOPPED(status) && ((status>>16 == PTRACE_EVENT_FORK) || (status>>16 == PTRACE_EVENT_VFORK) || (status>>16 == PTRACE_EVENT_CLONE))) {
 		unsigned long data;
@@ -106,18 +253,6 @@ next_event(void) {
 		debug(DEBUG_EVENT, "event: EXEC: pid=%d", pid);
 		return &event;
 	}
-	if (WIFEXITED(status)) {
-		event.type = EVENT_EXIT;
-		event.e_un.ret_val = WEXITSTATUS(status);
-		debug(DEBUG_EVENT, "event: EXIT: pid=%d, status=%d", pid, event.e_un.ret_val);
-		return &event;
-	}
-	if (WIFSIGNALED(status)) {
-		event.type = EVENT_EXIT_SIGNAL;
-		event.e_un.signum = WTERMSIG(status);
-		debug(DEBUG_EVENT, "event: EXIT_SIGNAL: pid=%d, signum=%d", pid, event.e_un.signum);
-		return &event;
-	}
 	if (!WIFSTOPPED(status)) {
 		/* should never happen */
 		event.type = EVENT_NONE;
@@ -128,22 +263,19 @@ next_event(void) {
 	stop_signal = WSTOPSIG(status);
 
 	/* On some targets, breakpoints are signalled not using
-	   SIGTRAP, but also with SIGILL, SIGSEGV or SIGEMT.  Check
-	   for these. (TODO: is this true?) */
-	if (stop_signal == SIGSEGV
-			|| stop_signal == SIGILL
-#ifdef SIGEMT
-			|| stop_signal == SIGEMT
-#endif
-	   ) {
-		if (!event.proc->instruction_pointer) {
-			event.proc->instruction_pointer =
-				get_instruction_pointer(event.proc);
-		}
+	   SIGTRAP, but also with SIGILL, SIGSEGV or SIGEMT.  SIGEMT
+	   is not defined on Linux, but check for the others.
 
-		if (address2bpstruct(event.proc, event.proc->instruction_pointer))
+	   N.B. see comments in GDB's infrun.c for details.  I've
+	   actually seen this on an Itanium machine on RHEL 5, I don't
+	   remember the exact kernel version anymore.  ia64-sigill.s
+	   in the test suite tests this.  Petr Machata 2011-06-08.  */
+	void * break_address
+		= event.proc->instruction_pointer - DECR_PC_AFTER_BREAK;
+	if ((stop_signal == SIGSEGV || stop_signal == SIGILL)
+	    && leader != NULL
+	    && address2bpstruct(leader, break_address))
 			stop_signal = SIGTRAP;
-	}
 
 	if (stop_signal != (SIGTRAP | event.proc->tracesysgood)
 			&& stop_signal != SIGTRAP) {
@@ -156,12 +288,8 @@ next_event(void) {
 	/* last case [by exhaustion] */
 	event.type = EVENT_BREAKPOINT;
 
-	if (!event.proc->instruction_pointer) {
-		event.proc->instruction_pointer =
-			get_instruction_pointer(event.proc);
-	}
-	event.e_un.brk_addr =
-		event.proc->instruction_pointer - DECR_PC_AFTER_BREAK;
+	event.e_un.brk_addr = break_address;
 	debug(DEBUG_EVENT, "event: BREAKPOINT: pid=%d, addr=%p", pid, event.e_un.brk_addr);
+
 	return &event;
 }
diff --git a/sysdeps/linux-gnu/proc.c b/sysdeps/linux-gnu/proc.c
index e1cadf7..e3b71e5 100644
--- a/sysdeps/linux-gnu/proc.c
+++ b/sysdeps/linux-gnu/proc.c
@@ -1,13 +1,22 @@
+#define _GNU_SOURCE /* For getline.  */
 #include "config.h"
 #include "common.h"
 
 #include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 #include <inttypes.h>
 #include <link.h>
 #include <stdio.h>
 #include <string.h>
 #include <signal.h>
 #include <unistd.h>
+#include <dirent.h>
+#include <ctype.h>
+#include <errno.h>
+#include <sys/syscall.h>
+#include <error.h>
+
 
 /* /proc/pid doesn't exist just after the fork, and sometimes `ltrace'
  * couldn't open it to find the executable.  So it may be necessary to
@@ -16,17 +25,19 @@
 
 #define	MAX_DELAY	100000	/* 100000 microseconds = 0.1 seconds */
 
+#define PROC_PID_FILE(VAR, FORMAT, PID)		\
+	char VAR[strlen(FORMAT) + 6];		\
+	sprintf(VAR, FORMAT, PID)
+
 /*
  * Returns a (malloc'd) file name corresponding to a running pid
  */
 char *
 pid2name(pid_t pid) {
-	char proc_exe[1024];
-
 	if (!kill(pid, 0)) {
 		int delay = 0;
 
-		sprintf(proc_exe, "/proc/%d/exe", pid);
+		PROC_PID_FILE(proc_exe, "/proc/%d/exe", pid);
 
 		while (delay < MAX_DELAY) {
 			if (!access(proc_exe, F_OK)) {
@@ -38,6 +49,197 @@ pid2name(pid_t pid) {
 	return NULL;
 }
 
+static FILE *
+open_status_file(pid_t pid)
+{
+	PROC_PID_FILE(fn, "/proc/%d/status", pid);
+	/* Don't complain if we fail.  This would typically happen
+	   when the process is about to terminate, and these files are
+	   not available anymore.  This function is called from the
+	   event loop, and we don't want to clutter the output just
+	   because the process terminates.  */
+	return fopen(fn, "r");
+}
+
+static char *
+find_line_starting(FILE * file, const char * prefix, size_t len)
+{
+	char * line = NULL;
+	size_t line_len = 0;
+	while (!feof(file)) {
+		if (getline(&line, &line_len, file) < 0)
+			return NULL;
+		if (strncmp(line, prefix, len) == 0)
+			return line;
+	}
+	return NULL;
+}
+
+static void
+each_line_starting(FILE * file, const char *prefix,
+		   enum pcb_status (*cb)(const char * line, const char * prefix,
+					 void * data),
+		   void * data)
+{
+	size_t len = strlen(prefix);
+	char * line;
+	while ((line = find_line_starting(file, prefix, len)) != NULL) {
+		enum pcb_status st = (*cb)(line, prefix, data);
+		free (line);
+		if (st == pcb_stop)
+			return;
+	}
+}
+
+static enum pcb_status
+process_leader_cb(const char * line, const char * prefix, void * data)
+{
+	pid_t * pidp = data;
+	*pidp = atoi(line + strlen(prefix));
+	return pcb_stop;
+}
+
+pid_t
+process_leader(pid_t pid)
+{
+	pid_t tgid = 0;
+	FILE * file = open_status_file(pid);
+	if (file != NULL) {
+		each_line_starting(file, "Tgid:\t", &process_leader_cb, &tgid);
+		fclose(file);
+	}
+
+	return tgid;
+}
+
+static enum pcb_status
+process_stopped_cb(const char * line, const char * prefix, void * data)
+{
+	char c = line[strlen(prefix)];
+	// t:tracing stop, T:job control stop
+	*(int *)data = (c == 't' || c == 'T');
+	return pcb_stop;
+}
+
+int
+process_stopped(pid_t pid)
+{
+	int is_stopped = -1;
+	FILE * file = open_status_file(pid);
+	if (file != NULL) {
+		each_line_starting(file, "State:\t", &process_stopped_cb,
+				   &is_stopped);
+		fclose(file);
+	}
+	return is_stopped;
+}
+
+static enum pcb_status
+process_status_cb(const char * line, const char * prefix, void * data)
+{
+	const char * status = line + strlen(prefix);
+	const char c = *status;
+
+#define RETURN(C) do {					\
+		*(enum process_status *)data = C;	\
+		return pcb_stop;			\
+	} while (0)
+
+	switch (c) {
+	case 'Z': RETURN(ps_zombie);
+	case 't': RETURN(ps_tracing_stop);
+	case 'T': {
+		/* This can be either "T (stopped)" or, for older
+		 * kernels, "T (tracing stop)".  */
+		if (!strcmp(status, "T (stopped)\n"))
+			RETURN(ps_stop);
+		else if (!strcmp(status, "T (tracing stop)\n"))
+			RETURN(ps_tracing_stop);
+		else {
+			fprintf(stderr, "Unknown process status: %s",
+				status);
+			RETURN(ps_stop); /* Some sort of stop
+					  * anyway.  */
+		}
+	}
+	}
+
+	RETURN(ps_other);
+#undef RETURN
+}
+
+enum process_status
+process_status(pid_t pid)
+{
+	enum process_status ret = ps_invalid;
+	FILE * file = open_status_file(pid);
+	if (file != NULL) {
+		each_line_starting(file, "State:\t", &process_status_cb, &ret);
+		fclose(file);
+		if (ret == ps_invalid)
+			error(0, errno, "process_status %d", pid);
+	} else
+		/* If the file is not present, the process presumably
+		 * exited already.  */
+		ret = ps_zombie;
+
+	return ret;
+}
+
+static int
+all_digits(const char *str)
+{
+	while (isdigit(*str))
+		str++;
+	return !*str;
+}
+
+int
+process_tasks(pid_t pid, pid_t **ret_tasks, size_t *ret_n)
+{
+	PROC_PID_FILE(fn, "/proc/%d/task", pid);
+	DIR * d = opendir(fn);
+	if (d == NULL)
+		return -1;
+
+	pid_t *tasks = NULL;
+	size_t n = 0;
+	size_t alloc = 0;
+
+	while (1) {
+		struct dirent entry;
+		struct dirent *result;
+		if (readdir_r(d, &entry, &result) != 0) {
+			free(tasks);
+			return -1;
+		}
+		if (result == NULL)
+			break;
+		if (result->d_type == DT_DIR && all_digits(result->d_name)) {
+			pid_t npid = atoi(result->d_name);
+			if (n >= alloc) {
+				alloc = alloc > 0 ? (2 * alloc) : 8;
+				pid_t *ntasks = realloc(tasks,
+							sizeof(*tasks) * alloc);
+				if (ntasks == NULL) {
+					free(tasks);
+					return -1;
+				}
+				tasks = ntasks;
+			}
+			if (n >= alloc)
+				abort();
+			tasks[n++] = npid;
+		}
+	}
+
+	closedir(d);
+
+	*ret_tasks = tasks;
+	*ret_n = n;
+	return 0;
+}
+
 static int
 find_dynamic_entry_addr(Process *proc, void *pvAddr, int d_tag, void **addr) {
 	int i = 0, done = 0;
@@ -187,7 +389,10 @@ linkmap_add_cb(void *data) { //const char *lib_name, ElfW(Addr) addr) {
 				addr = sym.st_value;
 				add_library_symbol(addr, xptr->name, &library_symbols, LS_TOPLT_NONE, 0);
 				xptr->found = 1;
-				insert_breakpoint(lm_add->proc, sym2addr(lm_add->proc, library_symbols), library_symbols);
+				insert_breakpoint(lm_add->proc,
+						  sym2addr(lm_add->proc,
+							   library_symbols),
+						  library_symbols, 1);
 			}
 		}
 		do_close_elf(&lte);
@@ -275,10 +480,22 @@ linkmap_init(Process *proc, struct ltelf *lte) {
 	data.lte = lte;
 
 	add_library_symbol(rdbg->r_brk, "", &library_symbols, LS_TOPLT_NONE, 0);
-	insert_breakpoint(proc, sym2addr(proc, library_symbols), library_symbols);
+	insert_breakpoint(proc, sym2addr(proc, library_symbols),
+			  library_symbols, 1);
 
 	crawl_linkmap(proc, rdbg, hook_libdl_cb, &data);
 
 	free(rdbg);
 	return 0;
 }
+
+int
+task_kill (pid_t pid, int sig)
+{
+	// Taken from GDB
+        int ret;
+
+        errno = 0;
+        ret = syscall (__NR_tkill, pid, sig);
+	return ret;
+}
diff --git a/sysdeps/linux-gnu/trace.c b/sysdeps/linux-gnu/trace.c
index e4be465..3800fad 100644
--- a/sysdeps/linux-gnu/trace.c
+++ b/sysdeps/linux-gnu/trace.c
@@ -7,6 +7,7 @@
 #include <sys/wait.h>
 #include "ptrace.h"
 #include <asm/unistd.h>
+#include <assert.h>
 
 #include "common.h"
 
@@ -69,7 +70,7 @@ umovelong (Process *proc, void *addr, long *result, arg_type_info *info) {
 
 void
 trace_me(void) {
-	debug(DEBUG_PROCESS, "trace_me: pid=%d\n", getpid());
+	debug(DEBUG_PROCESS, "trace_me: pid=%d", getpid());
 	if (ptrace(PTRACE_TRACEME, 0, 1, 0) < 0) {
 		perror("PTRACE_TRACEME");
 		exit(1);
@@ -78,7 +79,7 @@ trace_me(void) {
 
 int
 trace_pid(pid_t pid) {
-	debug(DEBUG_PROCESS, "trace_pid: pid=%d\n", pid);
+	debug(DEBUG_PROCESS, "trace_pid: pid=%d", pid);
 	if (ptrace(PTRACE_ATTACH, pid, 1, 0) < 0) {
 		return -1;
 	}
@@ -87,9 +88,9 @@ trace_pid(pid_t pid) {
 	   in pid.  The child is sent a SIGSTOP, but will not
 	   necessarily have stopped by the completion of this call;
 	   use wait() to wait for the child to stop. */
-	if (waitpid (pid, NULL, 0) != pid) {
+	if (waitpid (pid, NULL, __WALL) != pid) {
 		perror ("trace_pid: waitpid");
-		exit (1);
+		return -1;
 	}
 
 	return 0;
@@ -100,7 +101,7 @@ trace_set_options(Process *proc, pid_t pid) {
 	if (proc->tracesysgood & 0x80)
 		return;
 
-	debug(DEBUG_PROCESS, "trace_set_options: pid=%d\n", pid);
+	debug(DEBUG_PROCESS, "trace_set_options: pid=%d", pid);
 
 	long options = PTRACE_O_TRACESYSGOOD | PTRACE_O_TRACEFORK |
 		PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE |
@@ -115,7 +116,7 @@ trace_set_options(Process *proc, pid_t pid) {
 
 void
 untrace_pid(pid_t pid) {
-	debug(DEBUG_PROCESS, "untrace_pid: pid=%d\n", pid);
+	debug(DEBUG_PROCESS, "untrace_pid: pid=%d", pid);
 	ptrace(PTRACE_DETACH, pid, 1, 0);
 }
 
@@ -126,56 +127,652 @@ continue_after_signal(pid_t pid, int signum) {
 
 void
 continue_after_signal(pid_t pid, int signum) {
-	Process *proc;
-
 	debug(DEBUG_PROCESS, "continue_after_signal: pid=%d, signum=%d", pid, signum);
-
-	proc = pid2proc(pid);
-	if (proc && proc->breakpoint_being_enabled) {
-#if defined __sparc__  || defined __ia64___ || defined __mips__
-		ptrace(PTRACE_SYSCALL, pid, 0, signum);
-#else
-		ptrace(PTRACE_SINGLESTEP, pid, 0, signum);
-#endif
+	ptrace(PTRACE_SYSCALL, pid, 0, signum);
+}
+
+static enum ecb_status
+event_for_pid(Event * event, void * data)
+{
+	if (event->proc != NULL && event->proc->pid == (pid_t)(uintptr_t)data)
+		return ecb_yield;
+	return ecb_cont;
+}
+
+static int
+have_events_for(pid_t pid)
+{
+	return each_qd_event(event_for_pid, (void *)(uintptr_t)pid) != NULL;
+}
+
+void
+continue_process(pid_t pid)
+{
+	debug(DEBUG_PROCESS, "continue_process: pid=%d", pid);
+
+	/* Only really continue the process if there are no events in
+	   the queue for this process.  Otherwise just for the other
+	   events to arrive.  */
+	if (!have_events_for(pid))
+		/* We always trace syscalls to control fork(),
+		 * clone(), execve()... */
+		ptrace(PTRACE_SYSCALL, pid, 0, 0);
+	else
+		debug(DEBUG_PROCESS,
+		      "putting off the continue, events in que.");
+}
+
+/**
+ * This is used for bookkeeping related to PIDs that the event
+ * handlers work with.
+ */
+struct pid_task {
+	pid_t pid;	/* This may be 0 for tasks that exited
+			 * mid-handling.  */
+	int sigstopped;
+	int got_event;
+	int delivered;
+} * pids;
+
+struct pid_set {
+	struct pid_task * tasks;
+	size_t count;
+	size_t alloc;
+};
+
+/**
+ * Breakpoint re-enablement.  When we hit a breakpoint, we must
+ * disable it, single-step, and re-enable it.  That single-step can be
+ * done only by one task in a task group, while others are stopped,
+ * otherwise the processes would race for who sees the breakpoint
+ * disabled and who doesn't.  The following is to keep track of it
+ * all.
+ */
+struct process_stopping_handler
+{
+	Event_Handler super;
+
+	/* The task that is doing the re-enablement.  */
+	Process * task_enabling_breakpoint;
+
+	/* The pointer being re-enabled.  */
+	Breakpoint * breakpoint_being_enabled;
+
+	enum {
+		/* We are waiting for everyone to land in t/T.  */
+		psh_stopping = 0,
+
+		/* We are doing the PTRACE_SINGLESTEP.  */
+		psh_singlestep,
+
+		/* We are waiting for all the SIGSTOPs to arrive so
+		 * that we can sink them.  */
+		psh_sinking,
+
+		/* This is for tracking the ugly workaround.  */
+		psh_ugly_workaround,
+	} state;
+
+	int exiting;
+
+	struct pid_set pids;
+};
+
+static enum pcb_status
+task_stopped(Process * task, void * data)
+{
+	/* If the task is already stopped, don't worry about it.
+	 * Likewise if it managed to become a zombie or terminate in
+	 * the meantime.  This can happen when the whole thread group
+	 * is terminating.  */
+	switch (process_status(task->pid)) {
+	case ps_invalid:
+	case ps_tracing_stop:
+	case ps_zombie:
+		return pcb_cont;
+	default:
+		return pcb_stop;
+	}
+}
+
+static struct pid_task *
+get_task_info(struct pid_set * pids, pid_t pid)
+{
+	assert(pid != 0);
+	size_t i;
+	for (i = 0; i < pids->count; ++i)
+		if (pids->tasks[i].pid == pid)
+			return &pids->tasks[i];
+
+	return NULL;
+}
+
+static struct pid_task *
+add_task_info(struct pid_set * pids, pid_t pid)
+{
+	if (pids->count == pids->alloc) {
+		size_t ns = (2 * pids->alloc) ?: 4;
+		struct pid_task * n = realloc(pids->tasks,
+					      sizeof(*pids->tasks) * ns);
+		if (n == NULL)
+			return NULL;
+		pids->tasks = n;
+		pids->alloc = ns;
+	}
+	struct pid_task * task_info = &pids->tasks[pids->count++];
+	memset(task_info, 0, sizeof(*task_info));
+	task_info->pid = pid;
+	return task_info;
+}
+
+static enum pcb_status
+send_sigstop(Process * task, void * data)
+{
+	Process * leader = task->leader;
+	struct pid_set * pids = data;
+
+	/* Look for pre-existing task record, or add new.  */
+	struct pid_task * task_info = get_task_info(pids, task->pid);
+	if (task_info == NULL)
+		task_info = add_task_info(pids, task->pid);
+	if (task_info == NULL) {
+		perror("send_sigstop: add_task_info");
+		destroy_event_handler(leader);
+		/* Signal failure upwards.  */
+		return pcb_stop;
+	}
+
+	/* This task still has not been attached to.  It should be
+	   stopped by the kernel.  */
+	if (task->state == STATE_BEING_CREATED)
+		return pcb_cont;
+
+	/* Don't bother sending SIGSTOP if we are already stopped, or
+	 * if we sent the SIGSTOP already, which happens when we
+	 * inherit the handler from breakpoint re-enablement.  */
+	if (task_stopped(task, NULL) == pcb_cont)
+		return pcb_cont;
+	if (task_info->sigstopped) {
+		if (!task_info->delivered)
+			return pcb_cont;
+		task_info->delivered = 0;
+	}
+
+	if (task_kill(task->pid, SIGSTOP) >= 0) {
+		debug(DEBUG_PROCESS, "send SIGSTOP to %d", task->pid);
+		task_info->sigstopped = 1;
+	} else
+		fprintf(stderr,
+			"Warning: couldn't send SIGSTOP to %d\n", task->pid);
+
+	return pcb_cont;
+}
+
+/* On certain kernels, detaching right after a singlestep causes the
+   tracee to be killed with a SIGTRAP (that even though the singlestep
+   was properly caught by waitpid.  The ugly workaround is to put a
+   breakpoint where IP points and let the process continue.  After
+   this the breakpoint can be retracted and the process detached.  */
+static void
+ugly_workaround(Process * proc)
+{
+	void * ip = get_instruction_pointer(proc);
+	Breakpoint * sbp = dict_find_entry(proc->leader->breakpoints, ip);
+	if (sbp != NULL)
+		enable_breakpoint(proc, sbp);
+	else
+		insert_breakpoint(proc, ip, NULL, 1);
+	ptrace(PTRACE_CONT, proc->pid, 0, 0);
+}
+
+static void
+process_stopping_done(struct process_stopping_handler * self, Process * leader)
+{
+	debug(DEBUG_PROCESS, "process stopping done %d",
+	      self->task_enabling_breakpoint->pid);
+	size_t i;
+	if (!self->exiting) {
+		for (i = 0; i < self->pids.count; ++i)
+			if (self->pids.tasks[i].pid != 0
+			    && self->pids.tasks[i].delivered)
+				continue_process(self->pids.tasks[i].pid);
+		continue_process(self->task_enabling_breakpoint->pid);
+		destroy_event_handler(leader);
 	} else {
-		ptrace(PTRACE_SYSCALL, pid, 0, signum);
+		self->state = psh_ugly_workaround;
+		ugly_workaround(self->task_enabling_breakpoint);
 	}
 }
 
-void
-continue_process(pid_t pid) {
-	/* We always trace syscalls to control fork(), clone(), execve()... */
+/* Before we detach, we need to make sure that task's IP is on the
+ * edge of an instruction.  So for tasks that have a breakpoint event
+ * in the queue, we adjust the instruction pointer, just like
+ * continue_after_breakpoint does.  */
+static enum ecb_status
+undo_breakpoint(Event * event, void * data)
+{
+	if (event != NULL
+	    && event->proc->leader == data
+	    && event->type == EVENT_BREAKPOINT)
+		set_instruction_pointer(event->proc, event->e_un.brk_addr);
+	return ecb_cont;
+}
 
-	debug(DEBUG_PROCESS, "continue_process: pid=%d", pid);
+static enum pcb_status
+untrace_task(Process * task, void * data)
+{
+	if (task != data)
+		untrace_pid(task->pid);
+	return pcb_cont;
+}
 
-	ptrace(PTRACE_SYSCALL, pid, 0, 0);
+static enum pcb_status
+remove_task(Process * task, void * data)
+{
+	/* Don't untrace leader just yet.  */
+	if (task != data)
+		remove_process(task);
+	return pcb_cont;
 }
 
-void
-continue_enabling_breakpoint(pid_t pid, Breakpoint *sbp) {
-	enable_breakpoint(pid, sbp);
-	continue_process(pid);
+static void
+detach_process(Process * leader)
+{
+	each_qd_event(&undo_breakpoint, leader);
+	disable_all_breakpoints(leader);
+
+	/* Now untrace the process, if it was attached to by -p.  */
+	struct opt_p_t * it;
+	for (it = opt_p; it != NULL; it = it->next) {
+		Process * proc = pid2proc(it->pid);
+		if (proc == NULL)
+			continue;
+		if (proc->leader == leader) {
+			each_task(leader, &untrace_task, NULL);
+			break;
+		}
+	}
+	each_task(leader, &remove_task, leader);
+	destroy_event_handler(leader);
+	remove_task(leader, NULL);
+}
+
+static void
+handle_stopping_event(struct pid_task * task_info, Event ** eventp)
+{
+	/* Mark all events, so that we know whom to SIGCONT later.  */
+	if (task_info != NULL)
+		task_info->got_event = 1;
+
+	Event * event = *eventp;
+
+	/* In every state, sink SIGSTOP events for tasks that it was
+	 * sent to.  */
+	if (task_info != NULL
+	    && event->type == EVENT_SIGNAL
+	    && event->e_un.signum == SIGSTOP) {
+		debug(DEBUG_PROCESS, "SIGSTOP delivered to %d", task_info->pid);
+		if (task_info->sigstopped
+		    && !task_info->delivered) {
+			task_info->delivered = 1;
+			*eventp = NULL; // sink the event
+		} else
+			fprintf(stderr, "suspicious: %d got SIGSTOP, but "
+				"sigstopped=%d and delivered=%d\n",
+				task_info->pid, task_info->sigstopped,
+				task_info->delivered);
+	}
+}
+
+/* Some SIGSTOPs may have not been delivered to their respective tasks
+ * yet.  They are still in the queue.  If we have seen an event for
+ * that process, continue it, so that the SIGSTOP can be delivered and
+ * caught by ltrace.  */
+static void
+continue_for_sigstop_delivery(struct pid_set * pids)
+{
+	size_t i;
+	for (i = 0; i < pids->count; ++i) {
+		if (pids->tasks[i].pid != 0
+		    && pids->tasks[i].sigstopped
+		    && !pids->tasks[i].delivered
+		    && pids->tasks[i].got_event) {
+			debug(DEBUG_PROCESS, "continue %d for SIGSTOP delivery",
+			      pids->tasks[i].pid);
+			ptrace(PTRACE_SYSCALL, pids->tasks[i].pid, 0, 0);
+		}
+	}
+}
+
+static int
+event_exit_p(Event * event)
+{
+	return event != NULL && (event->type == EVENT_EXIT
+				 || event->type == EVENT_EXIT_SIGNAL);
+}
+
+static int
+event_exit_or_none_p(Event * event)
+{
+	return event == NULL || event_exit_p(event)
+		|| event->type == EVENT_NONE;
+}
+
+static int
+await_sigstop_delivery(struct pid_set * pids, struct pid_task * task_info,
+		       Event * event)
+{
+	/* If we still didn't get our SIGSTOP, continue the process
+	 * and carry on.  */
+	if (event != NULL && !event_exit_or_none_p(event)
+	    && task_info != NULL && task_info->sigstopped) {
+		debug(DEBUG_PROCESS, "continue %d for SIGSTOP delivery",
+		      task_info->pid);
+		/* We should get the signal the first thing
+		 * after this, so it should be OK to continue
+		 * even if we are over a breakpoint.  */
+		ptrace(PTRACE_SYSCALL, task_info->pid, 0, 0);
+
+	} else {
+		/* If all SIGSTOPs were delivered, uninstall the
+		 * handler and continue everyone.  */
+		/* XXX I suspect that we should check tasks that are
+		 * still around.  Is things are now, there should be a
+		 * race between waiting for everyone to stop and one
+		 * of the tasks exiting.  */
+		int all_clear = 1;
+		size_t i;
+		for (i = 0; i < pids->count; ++i)
+			if (pids->tasks[i].pid != 0
+			    && pids->tasks[i].sigstopped
+			    && !pids->tasks[i].delivered) {
+				all_clear = 0;
+				break;
+			}
+		return all_clear;
+	}
+
+	return 0;
+}
+
+static int
+all_stops_accountable(struct pid_set * pids)
+{
+	size_t i;
+	for (i = 0; i < pids->count; ++i)
+		if (pids->tasks[i].pid != 0
+		    && !pids->tasks[i].got_event
+		    && !have_events_for(pids->tasks[i].pid))
+			return 0;
+	return 1;
+}
+
+/* This event handler is installed when we are in the process of
+ * stopping the whole thread group to do the pointer re-enablement for
+ * one of the threads.  We pump all events to the queue for later
+ * processing while we wait for all the threads to stop.  When this
+ * happens, we let the re-enablement thread to PTRACE_SINGLESTEP,
+ * re-enable, and continue everyone.  */
+static Event *
+process_stopping_on_event(Event_Handler * super, Event * event)
+{
+	struct process_stopping_handler * self = (void *)super;
+	Process * task = event->proc;
+	Process * leader = task->leader;
+	Breakpoint * sbp = self->breakpoint_being_enabled;
+	Process * teb = self->task_enabling_breakpoint;
+
+	debug(DEBUG_PROCESS,
+	      "pid %d; event type %d; state %d",
+	      task->pid, event->type, self->state);
+
+	struct pid_task * task_info = get_task_info(&self->pids, task->pid);
+	if (task_info == NULL)
+		fprintf(stderr, "new task??? %d\n", task->pid);
+	handle_stopping_event(task_info, &event);
+
+	int state = self->state;
+	int event_to_queue = !event_exit_or_none_p(event);
+
+	/* Deactivate the entry if the task exits.  */
+	if (event_exit_p(event) && task_info != NULL)
+		task_info->pid = 0;
+
+	switch (state) {
+	case psh_stopping:
+		/* If everyone is stopped, singlestep.  */
+		if (each_task(leader, &task_stopped, NULL) == NULL) {
+			debug(DEBUG_PROCESS, "all stopped, now SINGLESTEP %d",
+			      teb->pid);
+			if (sbp->enabled)
+				disable_breakpoint(teb, sbp);
+			if (ptrace(PTRACE_SINGLESTEP, teb->pid, 0, 0))
+				perror("PTRACE_SINGLESTEP");
+			self->state = state = psh_singlestep;
+		}
+		break;
+
+	case psh_singlestep: {
+		/* In singlestep state, breakpoint signifies that we
+		 * have now stepped, and can re-enable the breakpoint.  */
+		if (event != NULL && task == teb) {
+			/* Essentially we don't care what event caused
+			 * the thread to stop.  We can do the
+			 * re-enablement now.  */
+			if (sbp->enabled)
+				enable_breakpoint(teb, sbp);
+
+			continue_for_sigstop_delivery(&self->pids);
+
+			self->breakpoint_being_enabled = NULL;
+			self->state = state = psh_sinking;
+
+			if (event->type == EVENT_BREAKPOINT)
+				event = NULL; // handled
+		} else
+			break;
+	}
+
+		/* fall-through */
+
+	case psh_sinking:
+		if (await_sigstop_delivery(&self->pids, task_info, event))
+			process_stopping_done(self, leader);
+		break;
+
+	case psh_ugly_workaround:
+		if (event == NULL)
+			break;
+		if (event->type == EVENT_BREAKPOINT) {
+			undo_breakpoint(event, leader);
+			if (task == teb)
+				self->task_enabling_breakpoint = NULL;
+		}
+		if (self->task_enabling_breakpoint == NULL
+		    && all_stops_accountable(&self->pids)) {
+			undo_breakpoint(event, leader);
+			detach_process(leader);
+			event = NULL; // handled
+		}
+	}
+
+	if (event != NULL && event_to_queue) {
+		enque_event(event);
+		event = NULL; // sink the event
+	}
+
+	return event;
+}
+
+static void
+process_stopping_destroy(Event_Handler * super)
+{
+	struct process_stopping_handler * self = (void *)super;
+	free(self->pids.tasks);
 }
 
 void
-continue_after_breakpoint(Process *proc, Breakpoint *sbp) {
-	if (sbp->enabled)
-		disable_breakpoint(proc->pid, sbp);
+continue_after_breakpoint(Process *proc, Breakpoint *sbp)
+{
 	set_instruction_pointer(proc, sbp->addr);
 	if (sbp->enabled == 0) {
 		continue_process(proc->pid);
 	} else {
-		debug(DEBUG_PROCESS, "continue_after_breakpoint: pid=%d, addr=%p", proc->pid, sbp->addr);
-		proc->breakpoint_being_enabled = sbp;
+		debug(DEBUG_PROCESS,
+		      "continue_after_breakpoint: pid=%d, addr=%p",
+		      proc->pid, sbp->addr);
 #if defined __sparc__  || defined __ia64___ || defined __mips__
 		/* we don't want to singlestep here */
 		continue_process(proc->pid);
 #else
-		ptrace(PTRACE_SINGLESTEP, proc->pid, 0, 0);
+		struct process_stopping_handler * handler
+			= calloc(sizeof(*handler), 1);
+		if (handler == NULL) {
+			perror("malloc breakpoint disable handler");
+		fatal:
+			/* Carry on not bothering to re-enable.  */
+			continue_process(proc->pid);
+			return;
+		}
+
+		handler->super.on_event = process_stopping_on_event;
+		handler->super.destroy = process_stopping_destroy;
+		handler->task_enabling_breakpoint = proc;
+		handler->breakpoint_being_enabled = sbp;
+		install_event_handler(proc->leader, &handler->super);
+
+		if (each_task(proc->leader, &send_sigstop,
+			      &handler->pids) != NULL)
+			goto fatal;
+
+		/* And deliver the first fake event, in case all the
+		 * conditions are already fulfilled.  */
+		Event ev;
+		ev.type = EVENT_NONE;
+		ev.proc = proc;
+		process_stopping_on_event(&handler->super, &ev);
 #endif
 	}
 }
 
+/**
+ * Ltrace exit.  When we are about to exit, we have to go through all
+ * the processes, stop them all, remove all the breakpoints, and then
+ * detach the processes that we attached to using -p.  If we left the
+ * other tasks running, they might hit stray return breakpoints and
+ * produce artifacts, so we better stop everyone, even if it's a bit
+ * of extra work.
+ */
+struct ltrace_exiting_handler
+{
+	Event_Handler super;
+	struct pid_set pids;
+};
+
+static Event *
+ltrace_exiting_on_event(Event_Handler * super, Event * event)
+{
+	struct ltrace_exiting_handler * self = (void *)super;
+	Process * task = event->proc;
+	Process * leader = task->leader;
+
+	debug(DEBUG_PROCESS, "pid %d; event type %d", task->pid, event->type);
+
+	struct pid_task * task_info = get_task_info(&self->pids, task->pid);
+	handle_stopping_event(task_info, &event);
+
+	if (event != NULL && event->type == EVENT_BREAKPOINT)
+		undo_breakpoint(event, leader);
+
+	if (await_sigstop_delivery(&self->pids, task_info, event)
+	    && all_stops_accountable(&self->pids))
+		detach_process(leader);
+
+	/* Sink all non-exit events.  We are about to exit, so we
+	 * don't bother with queuing them. */
+	if (event_exit_or_none_p(event))
+		return event;
+
+	return NULL;
+}
+
+static void
+ltrace_exiting_destroy(Event_Handler * super)
+{
+	struct ltrace_exiting_handler * self = (void *)super;
+	free(self->pids.tasks);
+}
+
+static int
+ltrace_exiting_install_handler(Process * proc)
+{
+	/* Only install to leader.  */
+	if (proc->leader != proc)
+		return 0;
+
+	/* Perhaps we are already installed, if the user passed
+	 * several -p options that are tasks of one process.  */
+	if (proc->event_handler != NULL
+	    && proc->event_handler->on_event == &ltrace_exiting_on_event)
+		return 0;
+
+	/* If stopping handler is already present, let it do the
+	 * work.  */
+	if (proc->event_handler != NULL) {
+		assert(proc->event_handler->on_event
+		       == &process_stopping_on_event);
+		struct process_stopping_handler * other
+			= (void *)proc->event_handler;
+		other->exiting = 1;
+		return 0;
+	}
+
+	struct ltrace_exiting_handler * handler
+		= calloc(sizeof(*handler), 1);
+	if (handler == NULL) {
+		perror("malloc exiting handler");
+	fatal:
+		/* XXXXXXXXXXXXXXXXXXX fixme */
+		return -1;
+	}
+
+	handler->super.on_event = ltrace_exiting_on_event;
+	handler->super.destroy = ltrace_exiting_destroy;
+	install_event_handler(proc->leader, &handler->super);
+
+	if (each_task(proc->leader, &send_sigstop,
+		      &handler->pids) != NULL)
+		goto fatal;
+
+	return 0;
+}
+
+/* If ltrace gets SIGINT, the processes directly or indirectly run by
+ * ltrace get it too.  We just have to wait long enough for the signal
+ * to be delivered and the process terminated, which we notice and
+ * exit ltrace, too.  So there's not much we need to do there.  We
+ * want to keep tracing those processes as usual, in case they just
+ * SIG_IGN the SIGINT to do their shutdown etc.
+ *
+ * For processes ran on the background, we want to install an exit
+ * handler that stops all the threads, removes all breakpoints, and
+ * detaches.
+ */
+void
+ltrace_exiting(void)
+{
+	struct opt_p_t * it;
+	for (it = opt_p; it != NULL; it = it->next) {
+		Process * proc = pid2proc(it->pid);
+		if (proc == NULL || proc->leader == NULL)
+			continue;
+		if (ltrace_exiting_install_handler(proc->leader) < 0)
+			fprintf(stderr,
+				"Couldn't install exiting handler for %d.\n",
+				proc->pid);
+	}
+}
+
 size_t
 umovebytes(Process *proc, void *addr, void *laddr, size_t len) {
 
diff --git a/sysdeps/linux-gnu/x86_64/trace.c b/sysdeps/linux-gnu/x86_64/trace.c
index e8581af..d0299d9 100644
--- a/sysdeps/linux-gnu/x86_64/trace.c
+++ b/sysdeps/linux-gnu/x86_64/trace.c
@@ -8,6 +8,7 @@
 #include <sys/reg.h>
 #include <string.h>
 #include <assert.h>
+#include <errno.h>
 
 #include "common.h"
 #include "ptrace.h"
@@ -44,8 +45,11 @@ int
 syscall_p(Process *proc, int status, int *sysnum) {
 	if (WIFSTOPPED(status)
 	    && WSTOPSIG(status) == (SIGTRAP | proc->tracesysgood)) {
-		*sysnum = ptrace(PTRACE_PEEKUSER, proc->pid, 8 * ORIG_RAX, 0);
+		long int ret = ptrace(PTRACE_PEEKUSER, proc->pid, 8 * ORIG_RAX, 0);
+		if (ret == -1 && errno)
+			return -1;
 
+		*sysnum = ret;
 		if (proc->callstack_depth > 0 &&
 				proc->callstack[proc->callstack_depth - 1].is_syscall &&
 				proc->callstack[proc->callstack_depth - 1].c_un.syscall == *sysnum) {
diff --git a/testsuite/ltrace.main/main-threaded.c b/testsuite/ltrace.main/main-threaded.c
new file mode 100644
index 0000000..a183966
--- /dev/null
+++ b/testsuite/ltrace.main/main-threaded.c
@@ -0,0 +1,29 @@
+#include <pthread.h>
+
+extern void print (char *);
+
+#define	PRINT_LOOP	10
+
+void *
+th_main (void *arg)
+{
+  int i;
+  for (i=0; i<PRINT_LOOP; i++)
+    print (arg);
+}
+
+int
+main ()
+{
+  pthread_t thread1;
+  pthread_t thread2;
+  pthread_t thread3;
+  pthread_create (&thread1, NULL, th_main, "aaa");
+  pthread_create (&thread2, NULL, th_main, "bbb");
+  pthread_create (&thread3, NULL, th_main, "ccc");
+  pthread_join (thread1, NULL);
+  pthread_join (thread2, NULL);
+  pthread_join (thread3, NULL);
+  return 0;
+}
+
diff --git a/testsuite/ltrace.main/main-threaded.exp b/testsuite/ltrace.main/main-threaded.exp
new file mode 100644
index 0000000..0157797
--- /dev/null
+++ b/testsuite/ltrace.main/main-threaded.exp
@@ -0,0 +1,39 @@
+# This file was written by Yao Qi <qiyao@cn.ibm.com>.
+
+set testfile "main-threaded"
+set srcfile ${testfile}.c
+set binfile ${testfile}
+set libfile "main-lib"
+set libsrc $srcdir/$subdir/$libfile.c
+set lib_sl $objdir/$subdir/lib$testfile.so
+
+
+if [get_compiler_info $binfile] {
+  return -1
+}
+
+verbose "compiling source file now....."
+if { [ltrace_compile_shlib $libsrc $lib_sl debug ] != "" 
+  || [ltrace_compile $srcdir/$subdir/$srcfile $objdir/$subdir/$binfile executable [list debug shlib=$lib_sl ldflags=-pthread] ] != ""} {
+  send_user "Testcase compile failed, so all tests in this file will automatically fail.\n"
+}
+
+# set options for ltrace.
+ltrace_options "-l" "$objdir/$subdir/libmain.so" "-f"
+
+# Run PUT for ltarce.
+set exec_output [ltrace_runtest $objdir/$subdir $objdir/$subdir/$binfile]
+
+# Check the output of this program.
+verbose "ltrace runtest output: $exec_output\n"
+if [regexp {ELF from incompatible architecture} $exec_output] {
+	fail "32-bit ltrace can not perform on 64-bit PUTs and rebuild ltrace in 64 bit mode!"
+	return 
+} elseif [ regexp {Couldn't get .hash data} $exec_output ] {
+	fail "Couldn't get .hash data!"
+	return
+}
+
+# Verify the output by checking numbers of print in main-threaded.ltrace.
+set pattern "print("
+ltrace_verify_output ${objdir}/${subdir}/${testfile}.ltrace $pattern 30