2008-01-09 Jan Kratochvil Support attaching to stopped processes/threads and/or with pending signals. This ChangeLog entry is OBSOLETE: * linux-nat.c (STRINGIFY, STRINGIFY_ARG): New macros. (kill_lwp): New declaration. (linux_ptrace_post_attach, pid_is_stopped): New function. (linux_child_follow_fork): New comment about WAS_STOPPED. (lin_lwp_attach_lwp): Variable PID removed. Part replaced by a call to LINUX_PTRACE_POST_ATTACH. (linux_nat_attach): Likewise. (linux_nat_detach): Optionally stop the detached process. (linux_nat_resume): Clear WAS_STOPPED if appropriate. * NEWS: Document the new behaviour. 2007-06-30 Jan Kratochvil * gdb.texinfo (Attach): Document the ATTACH and DETACH commands for stopped processes. Document the messages on the seen pending signals. 2008-02-24 Jan Kratochvil Port to GDB-6.8pre. Index: gdb-6.8cvs20080219/gdb/NEWS =================================================================== --- gdb-6.8cvs20080219.orig/gdb/NEWS 2008-02-24 19:35:37.000000000 +0100 +++ gdb-6.8cvs20080219/gdb/NEWS 2008-02-24 19:36:26.000000000 +0100 @@ -412,6 +412,9 @@ Renesas M32C/M16C m32c-elf Morpho Technologies ms1 ms1-elf +* On GNU/Linux, stopped processes may get attached to now. Signals being +delivered at the time of the attach command no longer get lost. + * New commands init-if-undefined Initialize a convenience variable, but Index: gdb-6.8cvs20080219/gdb/linux-nat.c =================================================================== --- gdb-6.8cvs20080219.orig/gdb/linux-nat.c 2008-02-24 19:36:26.000000000 +0100 +++ gdb-6.8cvs20080219/gdb/linux-nat.c 2008-02-24 19:45:01.000000000 +0100 @@ -89,6 +89,15 @@ #define PTRACE_GETSIGINFO 0x4202 #endif +#define STRINGIFY_ARG(x) #x +#define STRINGIFY(x) STRINGIFY_ARG (x) + +static int linux_ptrace_post_attach (ptid_t ptid, int *cloned_return); +static int kill_lwp (int lwpid, int signo); + +/* PID of the inferior stopped by SIGSTOP before attaching (or zero). */ +static pid_t pid_was_stopped; + /* The single-threaded native GNU/Linux target_ops. We save a pointer for the use of the multi-threaded target. */ static struct target_ops *linux_ops; @@ -539,6 +548,11 @@ linux_child_follow_fork (struct target_o } else { + /* We should check PID_WAS_STOPPED and detach it stopped accordingly. + In this point of code it cannot be 1 as we would not get FORK + executed without CONTINUE first which resets PID_WAS_STOPPED. + We would have to first TARGET_STOP and WAITPID it as with running + inferior PTRACE_DETACH, SIGSTOP will ignore the signal. */ target_detach (NULL, 0); } @@ -942,9 +956,8 @@ lin_lwp_attach_lwp (ptid_t ptid) to happen. */ if (GET_LWP (ptid) != GET_PID (ptid) && lp == NULL) { - pid_t pid; int status; - int cloned = 0; + int cloned; if (ptrace (PTRACE_ATTACH, GET_LWP (ptid), 0, 0) < 0) { @@ -958,37 +971,20 @@ lin_lwp_attach_lwp (ptid_t ptid) return -1; } - if (debug_linux_nat) - fprintf_unfiltered (gdb_stdlog, - "LLAL: PTRACE_ATTACH %s, 0, 0 (OK)\n", - target_pid_to_str (ptid)); - - pid = my_waitpid (GET_LWP (ptid), &status, 0); - if (pid == -1 && errno == ECHILD) - { - /* Try again with __WCLONE to check cloned processes. */ - pid = my_waitpid (GET_LWP (ptid), &status, __WCLONE); - cloned = 1; + status = linux_ptrace_post_attach (ptid, &cloned); + if (status != 0) + { + error (_("Thread %s exited: %s"), target_pid_to_str (ptid), + status_to_str (status)); } - gdb_assert (pid == GET_LWP (ptid) - && WIFSTOPPED (status) && WSTOPSIG (status)); - - if (lp == NULL) - lp = add_lwp (ptid); + /* ADD_LWP with TID only already after the WAITPID. */ + lp = add_lwp (ptid); lp->cloned = cloned; - target_post_attach (pid); + target_post_attach (GET_LWP (ptid)); lp->stopped = 1; - - if (debug_linux_nat) - { - fprintf_unfiltered (gdb_stdlog, - "LLAL: waitpid %s received %s\n", - target_pid_to_str (ptid), - status_to_str (status)); - } } else { @@ -998,44 +994,202 @@ lin_lwp_attach_lwp (ptid_t ptid) threads. Note that this won't have already been done since the main thread will have, we assume, been stopped by an attach from a different layer. */ - if (lp == NULL) - lp = add_lwp (ptid); lp->stopped = 1; } return 0; } +/* Detect `T (stopped)' in `/proc/PID/status'. + Other states including `T (tracing stop)' are reported as false. */ + +static int +pid_is_stopped (pid_t pid) +{ + FILE *status_file; + char buf[100]; + int retval = 0; + + snprintf (buf, sizeof (buf), "/proc/%d/status", (int) pid); + status_file = fopen (buf, "r"); + if (status_file != NULL) + { + int have_state = 0; + + while (fgets (buf, sizeof (buf), status_file)) + { + if (strncmp (buf, "State:", 6) == 0) + { + have_state = 1; + break; + } + } + if (have_state && strstr (buf, "T (stopped)") != NULL) + retval = 1; + fclose (status_file); + } + return retval; +} + +/* Handle the processing after PTRACE_ATTACH, the first WAITPID -> SIGSTOP. + Returns STATUS if the thread has exited, 0 otherwise. + Sets PID_WAS_STOPPED if the process was originally stopped. + Sets LP->CLONED if the given LWP is not the thread leader. + + Scenario for a standard unstopped inferior: + * `S (sleeping)' or `R (running)' or similiar states. + * PTRACE_ATTACH is called. + * `S (sleeping)' (or similiar) for some while. + * `T (tracing stop)'. + * WAITPID succeeds here returning SIGSTOP (signalled by PTRACE_ATTACH). + + Scenario for a formerly stopped inferior: + * `T (stopped)'. + * PTRACE_ATTACH is called. + * `T (stopped)' would stay indefinitely + Note since this moment the `TracerPid' field gets filled + (by PTRACE_ATTACH), it is no longer just the common `T (stopped)' state. + * If no one did WAITPID since sending SIGSTOP our WAITPID would return + SIGSTOP. The state still would not turn to `T (tracing stop)'. + * Usually its original parent (before PTRACE_ATTACH was applied) already + did WAITPID. The original parent already received our SIGSTOP + sinalled by our PTRACE_ATTACH. + In this case our own WAITPID would hang. Therefore... + * ... we do artificial: tkill (SIGCONT); + `PTRACE_CONT, SIGSTOP' does not work in 100% cases as sometimes SIGSTOP + gets remembered by kernel during the first PTRACE_CONT later and we get + spurious SIGSTOP event. Expecting the signal may get delivered to + a different task of the thread group. + `kill_lwp (SIGSTOP)' has no effect in this moment (it is already stopped). + * WAITPID returns the artifical SIGCONT. + (The possibly pending SIGSTOP gets vanished by specifically SIGCONT.) + * State turns `T (tracing stop)'. + In this moment everything is almost fine but we need a workaround as final + `PTRACE_DETACH, SIGSTOP' would leave the process unstopped otherwise: + * tkill (SIGSTOP); + * `PTRACE_CONT, 0' + * WAITPID returns the artifical SIGSTOP. + + With the pending (unwaited for) SIGSTOP the artifical signal effects are: + kill (SIGSTOP) + PTRACE_ATTACH + /-tkill (SIGCONT), WAITPID: SIGCONT, WAITPID: hang ! + //-tkill (SIGCONT), WAITPID: SIGCONT, PTRACE_CONT (SIG_0), WAITPID: wait (OK) + \\-tkill (SIGALRM), WAITPID: SIGSTOP, WAITPID: hang ! + \-tkill (SIGALRM), WAITPID: SIGSTOP, PTRACE_CONT (SIG_0), WAITPID: SIGALRM ! + Therefore we signal artifical SIGCONT and stop waiting after its reception. + + For the detection whether the process was formerly stopped we need to + read `/proc/PID/status'. `PTRACE_CONT, SIGSTOP' returns ESRCH + for `S (sleeping)' and succeeds for `T (stopped)' but it unfortunately + succeeds even for `T (tracing stop)'. Depending on PTRACE_CONT, SIGSTOP + success value for formerly stopped processes would mean a race condition + as we would get false stopped processes detection if we get too slow. + + `waitid (..., WSTOPPED)' hangs the same way as WAITPID. + + Signals get queued for WAITPID. PTRACE_ATTACH (or TKILL) enqueues SIGSTOP + there but WAITPID may return an already pending signal. + Redeliver it by PTRACE_CONT, SIGxxx as otherwise it would get lost. + Similiar processing is being done in this file by WAIT_LWP. */ + +static int +linux_ptrace_post_attach (ptid_t ptid, int *cloned_return) +{ + unsigned long sig; + int this_thread_was_stopped; + int cloned = 0; + + if (debug_linux_nat) + fprintf_unfiltered (gdb_stdlog, + "LLAL: PTRACE_ATTACH %s, 0, 0 (OK)\n", + target_pid_to_str (ptid)); + + /* This code should not be run for the threads being attached after the first + thread (process) as we would get later spurious SIGCONT otherwise. */ + + this_thread_was_stopped = pid_is_stopped (GET_LWP (ptid)); + if (this_thread_was_stopped) + { + pid_was_stopped = GET_PID (ptid); + if (kill_lwp (GET_LWP (ptid), SIGCONT) != 0) + perror_with_name (("kill_lwp (SIGCONT)")); + } + + for (;;) + { + pid_t pid; + int status; + + pid = my_waitpid (GET_LWP (ptid), &status, 0); + if (pid == -1 && errno == ECHILD) + { + /* Try again with __WCLONE to check cloned processes. */ + pid = my_waitpid (GET_LWP (ptid), &status, __WCLONE); + cloned = 1; + } + + gdb_assert (pid == GET_LWP (ptid)); + + if (debug_linux_nat) + { + fprintf_unfiltered (gdb_stdlog, + "LLAL: waitpid %s received %s\n", + target_pid_to_str (ptid), + status_to_str (status)); + } + + /* Check if the thread has exited. */ + if (WIFEXITED (status) || WIFSIGNALED (status)) + return status; + gdb_assert (WIFSTOPPED (status)); + sig = WSTOPSIG (status); + gdb_assert (sig != 0); + if (sig == SIGSTOP) + break; + + /* As the second signal for stopped processes we send SIGSTOP. */ + if (this_thread_was_stopped && sig == SIGCONT) + sig = SIGSTOP; + + printf_unfiltered (_("Redelivering pending %s.\n"), + target_signal_to_string (target_signal_from_host (sig))); + if (sig == SIGSTOP) + { + if (kill_lwp (GET_LWP (ptid), sig) != 0) + perror_with_name (("kill_lwp")); + /* We now must resume the inferior to get SIGSTOP delivered. */ + sig = 0; + } + if (ptrace (PTRACE_CONT, GET_LWP (ptid), NULL, (void *) sig) != 0) + perror_with_name (("ptrace")); + } + *cloned_return = cloned; + return 0; +} + static void linux_nat_attach (char *args, int from_tty) { struct lwp_info *lp; - pid_t pid; int status; - int cloned = 0; + int cloned; /* FIXME: We should probably accept a list of process id's, and attach all of them. */ linux_ops->to_attach (args, from_tty); - /* Make sure the initial process is stopped. The user-level threads - layer might want to poke around in the inferior, and that won't - work if things haven't stabilized yet. */ - pid = my_waitpid (GET_PID (inferior_ptid), &status, 0); - if (pid == -1 && errno == ECHILD) - { - warning (_("%s is a cloned process"), target_pid_to_str (inferior_ptid)); - - /* Try again with __WCLONE to check cloned processes. */ - pid = my_waitpid (GET_PID (inferior_ptid), &status, __WCLONE); - cloned = 1; - } - - gdb_assert (pid == GET_PID (inferior_ptid) - && WIFSTOPPED (status) && WSTOPSIG (status) == SIGSTOP); - /* Add the initial process as the first LWP to the list. */ inferior_ptid = BUILD_LWP (GET_PID (inferior_ptid), GET_PID (inferior_ptid)); + + status = linux_ptrace_post_attach (inferior_ptid, &cloned); + if (status != 0) + error (_("Program %s exited: %s\n"), target_pid_to_str (inferior_ptid), + status_to_str (status)); + if (cloned) + warning (_("%s is a cloned process"), target_pid_to_str (inferior_ptid)); + + /* ADD_LWP with TID only already after the WAITPID. */ lp = add_lwp (inferior_ptid); lp->cloned = cloned; @@ -1046,8 +1200,8 @@ linux_nat_attach (char *args, int from_t lp->resumed = 1; if (debug_linux_nat) { - fprintf_unfiltered (gdb_stdlog, - "LLA: waitpid %ld, faking SIGSTOP\n", (long) pid); + fprintf_unfiltered (gdb_stdlog, "LLA: waitpid %d, faking SIGSTOP\n", + GET_PID (inferior_ptid)); } } @@ -1092,6 +1246,9 @@ detach_callback (struct lwp_info *lp, vo overall process id just yet. */ if (GET_LWP (lp->ptid) != GET_PID (lp->ptid)) { + /* None of these threads should be stopped for PID_WAS_STOPPED. Only the + last thread (process) will be stopped by LINUX_NAT_DETACH. */ + errno = 0; if (ptrace (PTRACE_DETACH, GET_LWP (lp->ptid), 0, WSTOPSIG (lp->status)) < 0) @@ -1120,6 +1277,9 @@ linux_nat_detach (char *args, int from_t trap_ptid = null_ptid; + if (pid_was_stopped == GET_PID (lwp_list->ptid)) + args = STRINGIFY (SIGSTOP); + /* Destroy LWP info; it's no longer valid. */ init_lwp_list (); @@ -1255,6 +1415,14 @@ linux_nat_resume (ptid_t ptid, int step_ resume_callback. */ lp->stopped = 0; + /* At this point, we are going to resume the inferior and if we + have attached to a stopped process, we no longer should leave + it as stopped if the user detaches. PTID variable has PID set to LWP + while we need to check the real PID here. */ + + if (!step && lp && pid_was_stopped == GET_PID (lp->ptid)) + pid_was_stopped = 0; + if (resume_all) iterate_over_lwps (resume_callback, NULL); Index: gdb-6.8cvs20080219/gdb/doc/gdb.texinfo =================================================================== --- gdb-6.8cvs20080219.orig/gdb/doc/gdb.texinfo 2008-02-24 19:36:24.000000000 +0100 +++ gdb-6.8cvs20080219/gdb/doc/gdb.texinfo 2008-02-24 19:36:26.000000000 +0100 @@ -2176,16 +2176,29 @@ can step and continue; you can modify st process continue running, you may use the @code{continue} command after attaching @value{GDBN} to the process. +For a process already being stopped before the @code{attach} command executed +you get the informational message below. Other signals may be occasionally +shown if they were being delivered right the time the @code{attach} command +executed. Such process is left still stopped after the @code{detach} command +as long as you have not used the @code{continue} command (or similiar one) +during your debugging session. + +@smallexample +Attaching to program: /bin/sleep, process 16289 +Redelivering pending Stopped (signal). +@end smallexample + @table @code @kindex detach @item detach When you have finished debugging the attached process, you can use the -@code{detach} command to release it from @value{GDBN} control. Detaching -the process continues its execution. After the @code{detach} command, -that process and @value{GDBN} become completely independent once more, and you -are ready to @code{attach} another process or start one with @code{run}. -@code{detach} does not repeat if you press @key{RET} again after -executing the command. +@code{detach} command to release it from @value{GDBN} control. Detaching the +process continues its execution unless it was already stopped before the +attachment and a @code{continue} type command has not been executed. After the +@code{detach} command, that process and @value{GDBN} become completely +independent once more, and you are ready to @code{attach} another process or +start one with @code{run}. @code{detach} does not repeat if you press +@key{RET} again after executing the command. @end table If you exit @value{GDBN} while you have an attached process, you detach