|
|
e43452d |
From 1b2395f867d283bb08862e1ce421ab3ac1b02968 Mon Sep 17 00:00:00 2001
|
|
|
e43452d |
From: Michal Schmidt <mschmidt@redhat.com>
|
|
|
e43452d |
Date: Sat, 3 Dec 2011 02:13:30 +0100
|
|
|
f1996ec |
Subject: [PATCH] service: handle services with racy daemonization gracefully
|
|
|
e43452d |
|
|
|
e43452d |
There are a lot of forking daemons that do not exactly follow the
|
|
|
e43452d |
initialization steps as described in daemon(7). It is common that they
|
|
|
e43452d |
do not bother waiting in the parent process for the child to write the
|
|
|
e43452d |
PID file before exiting. The daemons' developers often do not perceive
|
|
|
e43452d |
this as a bug and they're unwilling to change.
|
|
|
e43452d |
|
|
|
e43452d |
Currently systemd warns about the missing PID file and falls back to
|
|
|
e43452d |
guessing the main PID. Being not quite deterministic, the guess can be
|
|
|
e43452d |
wrong with bad consequences. If the guessing is disabled, determinism is
|
|
|
e43452d |
achieved at the cost of losing the ability of noticing when the main
|
|
|
e43452d |
process of the service dies.
|
|
|
e43452d |
|
|
|
e43452d |
As long as it does not negatively affect properly written services,
|
|
|
e43452d |
systemd should strive for compatibility even with services with racy
|
|
|
e43452d |
daemonization. It is possible to provide determinism _and_ main process
|
|
|
e43452d |
supervision to them.
|
|
|
e43452d |
|
|
|
e43452d |
If the PID file is not there, rather than guessing and considering the
|
|
|
e43452d |
service running immediately after getting the SIGCHLD from the ExecStart
|
|
|
e43452d |
(or ExecStartPost) process, we can keep the service in the activating
|
|
|
e43452d |
state for a bit longer. We can use inotify to wait for the PID file to
|
|
|
e43452d |
appear. Only when it finally does appear and we read a valid PID from
|
|
|
e43452d |
it, we'll move the service to the running state. If the PID file never
|
|
|
e43452d |
appears, the usual timeout kicks in and the service fails.
|
|
|
e43452d |
(cherry picked from commit 3a11183858af30bc9b4e9dac430dd7541deec19b)
|
|
|
e43452d |
---
|
|
|
e43452d |
src/service.c | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
|
|
|
e43452d |
src/service.h | 3 +
|
|
|
e43452d |
2 files changed, 157 insertions(+), 21 deletions(-)
|
|
|
e43452d |
|
|
|
e43452d |
diff --git a/src/service.c b/src/service.c
|
|
|
e43452d |
index d51445e..0b03a8d 100644
|
|
|
e43452d |
--- a/src/service.c
|
|
|
e43452d |
+++ b/src/service.c
|
|
|
e43452d |
@@ -1294,8 +1294,8 @@ static int service_load_pid_file(Service *s, bool may_warn) {
|
|
|
e43452d |
|
|
|
e43452d |
if ((r = read_one_line_file(s->pid_file, &k)) < 0) {
|
|
|
e43452d |
if (may_warn)
|
|
|
e43452d |
- log_warning("Failed to read PID file %s after %s. The service might be broken.",
|
|
|
e43452d |
- s->pid_file, service_state_to_string(s->state));
|
|
|
e43452d |
+ log_info("PID file %s not readable (yet?) after %s.",
|
|
|
e43452d |
+ s->pid_file, service_state_to_string(s->state));
|
|
|
e43452d |
return r;
|
|
|
e43452d |
}
|
|
|
e43452d |
|
|
|
e43452d |
@@ -1307,8 +1307,8 @@ static int service_load_pid_file(Service *s, bool may_warn) {
|
|
|
e43452d |
|
|
|
e43452d |
if (kill(pid, 0) < 0 && errno != EPERM) {
|
|
|
e43452d |
if (may_warn)
|
|
|
e43452d |
- log_warning("PID %lu read from file %s does not exist. Your service or init script might be broken.",
|
|
|
e43452d |
- (unsigned long) pid, s->pid_file);
|
|
|
e43452d |
+ log_info("PID %lu read from file %s does not exist.",
|
|
|
e43452d |
+ (unsigned long) pid, s->pid_file);
|
|
|
e43452d |
return -ESRCH;
|
|
|
e43452d |
}
|
|
|
e43452d |
|
|
|
e43452d |
@@ -1320,7 +1320,8 @@ static int service_load_pid_file(Service *s, bool may_warn) {
|
|
|
e43452d |
(unsigned long) s->main_pid, (unsigned long) pid);
|
|
|
e43452d |
service_unwatch_main_pid(s);
|
|
|
e43452d |
s->main_pid_known = false;
|
|
|
e43452d |
- }
|
|
|
e43452d |
+ } else
|
|
|
e43452d |
+ log_debug("Main PID loaded: %lu", (unsigned long) pid);
|
|
|
e43452d |
|
|
|
e43452d |
if ((r = service_set_main_pid(s, pid)) < 0)
|
|
|
e43452d |
return r;
|
|
|
e43452d |
@@ -1351,6 +1352,7 @@ static int service_search_main_pid(Service *s) {
|
|
|
e43452d |
if ((pid = cgroup_bonding_search_main_pid_list(s->meta.cgroup_bondings)) <= 0)
|
|
|
e43452d |
return -ENOENT;
|
|
|
e43452d |
|
|
|
e43452d |
+ log_debug("Main PID guessed: %lu", (unsigned long) pid);
|
|
|
e43452d |
if ((r = service_set_main_pid(s, pid)) < 0)
|
|
|
e43452d |
return r;
|
|
|
e43452d |
|
|
|
e43452d |
@@ -1443,6 +1445,17 @@ static int service_notify_sockets_dead(Service *s) {
|
|
|
e43452d |
return 0;
|
|
|
e43452d |
}
|
|
|
e43452d |
|
|
|
e43452d |
+static void service_unwatch_pid_file(Service *s) {
|
|
|
e43452d |
+ if (!s->pid_file_pathspec)
|
|
|
e43452d |
+ return;
|
|
|
e43452d |
+
|
|
|
e43452d |
+ log_debug("Stopping watch for %s's PID file %s", s->meta.id, s->pid_file_pathspec->path);
|
|
|
e43452d |
+ pathspec_unwatch(s->pid_file_pathspec, UNIT(s));
|
|
|
e43452d |
+ pathspec_done(s->pid_file_pathspec);
|
|
|
e43452d |
+ free(s->pid_file_pathspec);
|
|
|
e43452d |
+ s->pid_file_pathspec = NULL;
|
|
|
e43452d |
+}
|
|
|
e43452d |
+
|
|
|
e43452d |
static void service_set_state(Service *s, ServiceState state) {
|
|
|
e43452d |
ServiceState old_state;
|
|
|
e43452d |
assert(s);
|
|
|
e43452d |
@@ -1450,6 +1463,8 @@ static void service_set_state(Service *s, ServiceState state) {
|
|
|
e43452d |
old_state = s->state;
|
|
|
e43452d |
s->state = state;
|
|
|
e43452d |
|
|
|
e43452d |
+ service_unwatch_pid_file(s);
|
|
|
e43452d |
+
|
|
|
e43452d |
if (state != SERVICE_START_PRE &&
|
|
|
e43452d |
state != SERVICE_START &&
|
|
|
e43452d |
state != SERVICE_START_POST &&
|
|
|
e43452d |
@@ -2594,6 +2609,95 @@ static bool service_check_snapshot(Unit *u) {
|
|
|
e43452d |
return !s->got_socket_fd;
|
|
|
e43452d |
}
|
|
|
e43452d |
|
|
|
e43452d |
+static int service_retry_pid_file(Service *s) {
|
|
|
e43452d |
+ int r;
|
|
|
e43452d |
+
|
|
|
e43452d |
+ assert(s->pid_file);
|
|
|
e43452d |
+ assert(s->state == SERVICE_START || s->state == SERVICE_START_POST);
|
|
|
e43452d |
+
|
|
|
e43452d |
+ r = service_load_pid_file(s, false);
|
|
|
e43452d |
+ if (r < 0)
|
|
|
e43452d |
+ return r;
|
|
|
e43452d |
+
|
|
|
e43452d |
+ service_unwatch_pid_file(s);
|
|
|
e43452d |
+
|
|
|
e43452d |
+ service_enter_running(s, true);
|
|
|
e43452d |
+ return 0;
|
|
|
e43452d |
+}
|
|
|
e43452d |
+
|
|
|
e43452d |
+static int service_watch_pid_file(Service *s) {
|
|
|
e43452d |
+ int r;
|
|
|
e43452d |
+
|
|
|
e43452d |
+ log_debug("Setting watch for %s's PID file %s", s->meta.id, s->pid_file_pathspec->path);
|
|
|
e43452d |
+ r = pathspec_watch(s->pid_file_pathspec, UNIT(s));
|
|
|
e43452d |
+ if (r < 0)
|
|
|
e43452d |
+ goto fail;
|
|
|
e43452d |
+
|
|
|
e43452d |
+ /* the pidfile might have appeared just before we set the watch */
|
|
|
e43452d |
+ service_retry_pid_file(s);
|
|
|
e43452d |
+
|
|
|
e43452d |
+ return 0;
|
|
|
e43452d |
+fail:
|
|
|
e43452d |
+ log_error("Failed to set a watch for %s's PID file %s: %s",
|
|
|
e43452d |
+ s->meta.id, s->pid_file_pathspec->path, strerror(-r));
|
|
|
e43452d |
+ service_unwatch_pid_file(s);
|
|
|
e43452d |
+ return r;
|
|
|
e43452d |
+}
|
|
|
e43452d |
+
|
|
|
e43452d |
+static int service_demand_pid_file(Service *s) {
|
|
|
e43452d |
+ PathSpec *ps;
|
|
|
e43452d |
+
|
|
|
e43452d |
+ assert(s->pid_file);
|
|
|
e43452d |
+ assert(!s->pid_file_pathspec);
|
|
|
e43452d |
+
|
|
|
e43452d |
+ ps = new0(PathSpec, 1);
|
|
|
e43452d |
+ if (!ps)
|
|
|
e43452d |
+ return -ENOMEM;
|
|
|
e43452d |
+
|
|
|
e43452d |
+ ps->path = strdup(s->pid_file);
|
|
|
e43452d |
+ if (!ps->path) {
|
|
|
e43452d |
+ free(ps);
|
|
|
e43452d |
+ return -ENOMEM;
|
|
|
e43452d |
+ }
|
|
|
e43452d |
+
|
|
|
e43452d |
+ path_kill_slashes(ps->path);
|
|
|
e43452d |
+
|
|
|
e43452d |
+ /* PATH_CHANGED would not be enough. There are daemons (sendmail) that
|
|
|
e43452d |
+ * keep their PID file open all the time. */
|
|
|
e43452d |
+ ps->type = PATH_MODIFIED;
|
|
|
e43452d |
+ ps->inotify_fd = -1;
|
|
|
e43452d |
+
|
|
|
e43452d |
+ s->pid_file_pathspec = ps;
|
|
|
e43452d |
+
|
|
|
e43452d |
+ return service_watch_pid_file(s);
|
|
|
e43452d |
+}
|
|
|
e43452d |
+
|
|
|
e43452d |
+static void service_fd_event(Unit *u, int fd, uint32_t events, Watch *w) {
|
|
|
e43452d |
+ Service *s = SERVICE(u);
|
|
|
e43452d |
+
|
|
|
e43452d |
+ assert(s);
|
|
|
e43452d |
+ assert(fd >= 0);
|
|
|
e43452d |
+ assert(s->state == SERVICE_START || s->state == SERVICE_START_POST);
|
|
|
e43452d |
+ assert(s->pid_file_pathspec);
|
|
|
e43452d |
+ assert(pathspec_owns_inotify_fd(s->pid_file_pathspec, fd));
|
|
|
e43452d |
+
|
|
|
e43452d |
+ log_debug("inotify event for %s", u->meta.id);
|
|
|
e43452d |
+
|
|
|
e43452d |
+ if (pathspec_fd_event(s->pid_file_pathspec, events) < 0)
|
|
|
e43452d |
+ goto fail;
|
|
|
e43452d |
+
|
|
|
e43452d |
+ if (service_retry_pid_file(s) == 0)
|
|
|
e43452d |
+ return;
|
|
|
e43452d |
+
|
|
|
e43452d |
+ if (service_watch_pid_file(s) < 0)
|
|
|
e43452d |
+ goto fail;
|
|
|
e43452d |
+
|
|
|
e43452d |
+ return;
|
|
|
e43452d |
+fail:
|
|
|
e43452d |
+ service_unwatch_pid_file(s);
|
|
|
e43452d |
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, false);
|
|
|
e43452d |
+}
|
|
|
e43452d |
+
|
|
|
e43452d |
static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
|
|
|
e43452d |
Service *s = SERVICE(u);
|
|
|
e43452d |
bool success;
|
|
|
e43452d |
@@ -2699,7 +2803,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
|
|
|
e43452d |
success = true;
|
|
|
e43452d |
}
|
|
|
e43452d |
|
|
|
e43452d |
- log_full(success ? LOG_DEBUG : LOG_NOTICE,
|
|
|
e43452d |
+ log_full(success ? LOG_DEBUG : LOG_NOTICE,
|
|
|
e43452d |
"%s: control process exited, code=%s status=%i", u->meta.id, sigchld_code_to_string(code), status);
|
|
|
e43452d |
s->failure = s->failure || !success;
|
|
|
e43452d |
|
|
|
e43452d |
@@ -2734,27 +2838,41 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
|
|
|
e43452d |
case SERVICE_START:
|
|
|
e43452d |
assert(s->type == SERVICE_FORKING);
|
|
|
e43452d |
|
|
|
e43452d |
- /* Let's try to load the pid
|
|
|
e43452d |
- * file here if we can. We
|
|
|
e43452d |
- * ignore the return value,
|
|
|
e43452d |
- * since the PID file might
|
|
|
e43452d |
- * actually be created by a
|
|
|
e43452d |
- * START_POST script */
|
|
|
e43452d |
-
|
|
|
e43452d |
- if (success) {
|
|
|
e43452d |
- service_load_pid_file(s, !s->exec_command[SERVICE_EXEC_START_POST]);
|
|
|
e43452d |
- service_search_main_pid(s);
|
|
|
e43452d |
+ if (!success) {
|
|
|
e43452d |
+ service_enter_signal(s, SERVICE_FINAL_SIGTERM, false);
|
|
|
e43452d |
+ break;
|
|
|
e43452d |
+ }
|
|
|
e43452d |
|
|
|
e43452d |
- service_enter_start_post(s);
|
|
|
e43452d |
+ if (s->pid_file) {
|
|
|
e43452d |
+ /* Let's try to load the pid file here if we can.
|
|
|
e43452d |
+ * The PID file might actually be created by a START_POST
|
|
|
e43452d |
+ * script. In that case don't worry if the loading fails. */
|
|
|
e43452d |
+ bool has_start_post = !!s->exec_command[SERVICE_EXEC_START_POST];
|
|
|
e43452d |
+ int r = service_load_pid_file(s, !has_start_post);
|
|
|
e43452d |
+ if (!has_start_post && r < 0) {
|
|
|
e43452d |
+ r = service_demand_pid_file(s);
|
|
|
e43452d |
+ if (r < 0 || !cgroup_good(s))
|
|
|
e43452d |
+ service_enter_signal(s, SERVICE_FINAL_SIGTERM, false);
|
|
|
e43452d |
+ break;
|
|
|
e43452d |
+ }
|
|
|
e43452d |
} else
|
|
|
e43452d |
- service_enter_signal(s, SERVICE_FINAL_SIGTERM, false);
|
|
|
e43452d |
+ service_search_main_pid(s);
|
|
|
e43452d |
|
|
|
e43452d |
+ service_enter_start_post(s);
|
|
|
e43452d |
break;
|
|
|
e43452d |
|
|
|
e43452d |
case SERVICE_START_POST:
|
|
|
e43452d |
if (success) {
|
|
|
e43452d |
- service_load_pid_file(s, true);
|
|
|
e43452d |
- service_search_main_pid(s);
|
|
|
e43452d |
+ if (s->pid_file) {
|
|
|
e43452d |
+ int r = service_load_pid_file(s, true);
|
|
|
e43452d |
+ if (r < 0) {
|
|
|
e43452d |
+ r = service_demand_pid_file(s);
|
|
|
e43452d |
+ if (r < 0 || !cgroup_good(s))
|
|
|
e43452d |
+ service_enter_stop(s, false);
|
|
|
e43452d |
+ break;
|
|
|
e43452d |
+ }
|
|
|
e43452d |
+ } else
|
|
|
e43452d |
+ service_search_main_pid(s);
|
|
|
e43452d |
}
|
|
|
e43452d |
|
|
|
e43452d |
s->reload_failure = !success;
|
|
|
e43452d |
@@ -2899,6 +3017,20 @@ static void service_cgroup_notify_event(Unit *u) {
|
|
|
e43452d |
* except when we don't know pid which to expect the
|
|
|
e43452d |
* SIGCHLD for. */
|
|
|
e43452d |
|
|
|
e43452d |
+ case SERVICE_START:
|
|
|
e43452d |
+ case SERVICE_START_POST:
|
|
|
e43452d |
+ /* If we were hoping for the daemon to write its PID file,
|
|
|
e43452d |
+ * we can give up now. */
|
|
|
e43452d |
+ if (s->pid_file_pathspec) {
|
|
|
e43452d |
+ log_warning("%s never wrote its PID file. Failing.", s->meta.id);
|
|
|
e43452d |
+ service_unwatch_pid_file(s);
|
|
|
e43452d |
+ if (s->state == SERVICE_START)
|
|
|
e43452d |
+ service_enter_signal(s, SERVICE_FINAL_SIGTERM, false);
|
|
|
e43452d |
+ else
|
|
|
e43452d |
+ service_enter_stop(s, false);
|
|
|
e43452d |
+ }
|
|
|
e43452d |
+ break;
|
|
|
e43452d |
+
|
|
|
e43452d |
case SERVICE_RUNNING:
|
|
|
e43452d |
service_enter_running(s, true);
|
|
|
e43452d |
break;
|
|
|
e43452d |
@@ -3208,7 +3340,7 @@ static int service_enumerate(Manager *m) {
|
|
|
e43452d |
r = 0;
|
|
|
e43452d |
|
|
|
e43452d |
#ifdef TARGET_SUSE
|
|
|
e43452d |
- sysv_facility_in_insserv_conf (m);
|
|
|
e43452d |
+ sysv_facility_in_insserv_conf (m);
|
|
|
e43452d |
#endif
|
|
|
e43452d |
|
|
|
e43452d |
finish:
|
|
|
e43452d |
@@ -3503,6 +3635,7 @@ const UnitVTable service_vtable = {
|
|
|
e43452d |
|
|
|
e43452d |
.sigchld_event = service_sigchld_event,
|
|
|
e43452d |
.timer_event = service_timer_event,
|
|
|
e43452d |
+ .fd_event = service_fd_event,
|
|
|
e43452d |
|
|
|
e43452d |
.reset_failed = service_reset_failed,
|
|
|
e43452d |
|
|
|
e43452d |
diff --git a/src/service.h b/src/service.h
|
|
|
e43452d |
index e28f74b..15d58cc 100644
|
|
|
e43452d |
--- a/src/service.h
|
|
|
e43452d |
+++ b/src/service.h
|
|
|
e43452d |
@@ -86,6 +86,8 @@ typedef enum NotifyAccess {
|
|
|
e43452d |
_NOTIFY_ACCESS_INVALID = -1
|
|
|
e43452d |
} NotifyAccess;
|
|
|
e43452d |
|
|
|
e43452d |
+typedef struct PathSpec PathSpec;
|
|
|
e43452d |
+
|
|
|
e43452d |
struct Service {
|
|
|
e43452d |
Meta meta;
|
|
|
e43452d |
|
|
|
e43452d |
@@ -157,6 +159,7 @@ struct Service {
|
|
|
e43452d |
Set *configured_sockets;
|
|
|
e43452d |
|
|
|
e43452d |
Watch timer_watch;
|
|
|
e43452d |
+ PathSpec *pid_file_pathspec;
|
|
|
e43452d |
|
|
|
e43452d |
NotifyAccess notify_access;
|
|
|
e43452d |
};
|
|
|
e43452d |
--
|
|
|
e43452d |
1.7.7.5
|
|
|
e43452d |
|