4f94566
From 0ab7a0e4655fd1108fa3abc994f816a22073abc6 Mon Sep 17 00:00:00 2001
4f94566
From: Yuxuan Shui <yshuiv7@gmail.com>
4f94566
Date: Sat, 15 Feb 2014 02:38:50 +0800
4f94566
Subject: [PATCH] core: fix detection of dead processes
4f94566
4f94566
Commit 5ba6985b moves the UNIT_VTABLE(u)->sigchld_event before systemd
4f94566
actually reaps the zombie. Which leads to service_load_pid_file accepting
4f94566
zombie as a valid pid.
4f94566
4f94566
This fixes timeouts like:
4f94566
[ 2746.602243] systemd[1]: chronyd.service stop-sigterm timed out. Killing.
4f94566
[ 2836.852545] systemd[1]: chronyd.service still around after SIGKILL. Ignoring.
4f94566
[ 2927.102187] systemd[1]: chronyd.service stop-final-sigterm timed out. Killing.
4f94566
[ 3017.352560] systemd[1]: chronyd.service still around after final SIGKILL. Entering failed mode.
4f94566
---
4f94566
 src/core/service.c |  8 ++++++++
4f94566
 src/shared/util.c  | 25 +++++++++++++++++++++++++
4f94566
 src/shared/util.h  |  1 +
4f94566
 3 files changed, 34 insertions(+)
4f94566
4f94566
diff --git a/src/core/service.c b/src/core/service.c
4f94566
index 41e5cb5..d9bc021 100644
4f94566
--- a/src/core/service.c
4f94566
+++ b/src/core/service.c
4f94566
@@ -1429,6 +1429,14 @@ static int service_load_pid_file(Service *s, bool may_warn) {
4f94566
                 return -ESRCH;
4f94566
         }
4f94566
 
4f94566
+        if (get_process_state(pid) == 'Z') {
4f94566
+                if (may_warn)
4f94566
+                        log_info_unit(UNIT(s)->id,
4f94566
+                                      "PID "PID_FMT" read from file %s is a zombie.",
4f94566
+                                      pid, s->pid_file);
4f94566
+                return -ESRCH;
4f94566
+        }
4f94566
+
4f94566
         if (s->main_pid_known) {
4f94566
                 if (pid == s->main_pid)
4f94566
                         return 0;
4f94566
diff --git a/src/shared/util.c b/src/shared/util.c
4f94566
index e754747..1329854 100644
4f94566
--- a/src/shared/util.c
4f94566
+++ b/src/shared/util.c
4f94566
@@ -558,6 +558,31 @@ char *truncate_nl(char *s) {
4f94566
         return s;
4f94566
 }
4f94566
 
4f94566
+int get_process_state(pid_t pid) {
4f94566
+        const char *p;
4f94566
+        char state;
4f94566
+        int r;
4f94566
+        _cleanup_free_ char *line = NULL;
4f94566
+
4f94566
+        assert(pid >= 0);
4f94566
+
4f94566
+        p = procfs_file_alloca(pid, "stat");
4f94566
+        r = read_one_line_file(p, &line);
4f94566
+        if (r < 0)
4f94566
+                return r;
4f94566
+
4f94566
+        p = strrchr(line, ')');
4f94566
+        if (!p)
4f94566
+                return -EIO;
4f94566
+
4f94566
+        p++;
4f94566
+
4f94566
+        if (sscanf(p, " %c", &state) != 1)
4f94566
+                return -EIO;
4f94566
+
4f94566
+        return (unsigned char) state;
4f94566
+}
4f94566
+
4f94566
 int get_process_comm(pid_t pid, char **name) {
4f94566
         const char *p;
4f94566
 
4f94566
diff --git a/src/shared/util.h b/src/shared/util.h
4f94566
index bdbdca3..ca38336 100644
4f94566
--- a/src/shared/util.h
4f94566
+++ b/src/shared/util.h
4f94566
@@ -216,6 +216,7 @@ char *file_in_same_dir(const char *path, const char *filename);
4f94566
 
4f94566
 int rmdir_parents(const char *path, const char *stop);
4f94566
 
4f94566
+char get_process_state(pid_t pid);
4f94566
 int get_process_comm(pid_t pid, char **name);
4f94566
 int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line);
4f94566
 int get_process_exe(pid_t pid, char **name);