From fe55ab80cc20ed77f99bb39b292ddf91161b052a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 24 May 2012 04:00:56 +0200 Subject: [PATCH] main: add configuration option to alter capability bounding set for PID 1 This also ensures that caps dropped from the bounding set are also dropped from the inheritable set, to be extra-secure. Usually that should change very little though as the inheritable set is empty for all our uses anyway. (cherry picked from commit ec8927ca5940e809f0b72f530582c76f1db4f065) Conflicts: TODO --- man/systemd.conf.xml | 45 +++++++++++++++-- man/systemd.exec.xml | 16 +++--- src/core/execute.c | 64 +----------------------- src/core/load-fragment-gperf.gperf.m4 | 2 +- src/core/load-fragment.c | 13 ++--- src/core/load-fragment.h | 2 +- src/core/main.c | 11 ++++ src/core/system.conf | 17 +++++++ src/nspawn/nspawn.c | 70 ++++++++++---------------- src/shared/capability.c | 89 ++++++++++++++++++++++++++++++++- src/shared/capability.h | 5 ++ 11 files changed, 208 insertions(+), 126 deletions(-) diff --git a/man/systemd.conf.xml b/man/systemd.conf.xml index d37c574..8e288eb 100644 --- a/man/systemd.conf.xml +++ b/man/systemd.conf.xml @@ -196,6 +196,38 @@ + CapabilityBoundingSet= + + Controls which + capabilities to include in the + capability bounding set for PID 1 and + its children. See + capabilities7 + for details. Takes a whitespace + separated list of capability names as + read by + cap_from_name3. + Capabilities listed will be included + in the bounding set, all others are + removed. If the list of capabilities + is prefixed with ~ all but the listed + capabilities will be included, the + effect of the assignment + inverted. Note that this option also + effects the respective capabilities in + the effective, permitted and + inheritable capability sets. The + capability bounding set may also be + individually configured for units + using the + CapabilityBoundingSet= + directive for units, but note that + capabilities dropped for PID 1 cannot + be regained in individual units, they + are lost for good. + + + DefaultLimitCPU= DefaultLimitFSIZE= DefaultLimitDATA= @@ -212,14 +244,21 @@ DefaultLimitNICE= DefaultLimitRTPRIO= DefaultLimitRTTIME= + These settings control - various default resource limits for units. See + various default resource limits for + units. See setrlimit2 for details. Use the string infinity to configure no limit on a specific - resource. They can be overriden in units files - using corresponding LimitXXXX parameter. + resource. These settings may be + overriden in individual units + using the corresponding LimitXXX= + directives. Note that these resource + limits are only defaults for units, + they are not applied to PID 1 + itself. diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index e6f49c9..7d28545 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -678,17 +678,17 @@ is prefixed with ~ all but the listed capabilities will be included, the effect of the assignment - inverted. Note that this option does - not actually set or unset any - capabilities in the effective, - permitted or inherited capability - sets. That's what - Capabilities= is - for. If this option is not used the + inverted. Note that this option also + effects the respective capabilities in + the effective, permitted and + inheritable capability sets, on top of + what Capabilities= + does. If this option is not used the capability bounding set is not modified on process execution, hence no limits on the capabilities of the - process are enforced. + process are + enforced. diff --git a/src/core/execute.c b/src/core/execute.c index a2ef77c..f93c9a4 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -870,68 +870,6 @@ fail: } #endif -static int do_capability_bounding_set_drop(uint64_t drop) { - unsigned long i; - cap_t old_cap = NULL, new_cap = NULL; - cap_flag_value_t fv; - int r; - - /* If we are run as PID 1 we will lack CAP_SETPCAP by default - * in the effective set (yes, the kernel drops that when - * executing init!), so get it back temporarily so that we can - * call PR_CAPBSET_DROP. */ - - old_cap = cap_get_proc(); - if (!old_cap) - return -errno; - - if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) { - r = -errno; - goto finish; - } - - if (fv != CAP_SET) { - static const cap_value_t v = CAP_SETPCAP; - - new_cap = cap_dup(old_cap); - if (!new_cap) { - r = -errno; - goto finish; - } - - if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) { - r = -errno; - goto finish; - } - - if (cap_set_proc(new_cap) < 0) { - r = -errno; - goto finish; - } - } - - for (i = 0; i <= cap_last_cap(); i++) - if (drop & ((uint64_t) 1ULL << (uint64_t) i)) { - if (prctl(PR_CAPBSET_DROP, i) < 0) { - r = -errno; - goto finish; - } - } - - r = 0; - -finish: - if (new_cap) - cap_free(new_cap); - - if (old_cap) { - cap_set_proc(old_cap); - cap_free(old_cap); - } - - return r; -} - static void rename_process_from_path(const char *path) { char process_name[11]; const char *p; @@ -1398,7 +1336,7 @@ int exec_spawn(ExecCommand *command, } if (context->capability_bounding_set_drop) { - err = do_capability_bounding_set_drop(context->capability_bounding_set_drop); + err = capability_bounding_set_drop(context->capability_bounding_set_drop, false); if (err < 0) { r = EXIT_CAPABILITIES; goto fail_child; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 9c4a930..9efc859 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -46,7 +46,7 @@ $1.SyslogLevel, config_parse_level, 0, $1.SyslogLevelPrefix, config_parse_bool, 0, offsetof($1, exec_context.syslog_level_prefix) $1.Capabilities, config_parse_exec_capabilities, 0, offsetof($1, exec_context) $1.SecureBits, config_parse_exec_secure_bits, 0, offsetof($1, exec_context) -$1.CapabilityBoundingSet, config_parse_exec_bounding_set, 0, offsetof($1, exec_context) +$1.CapabilityBoundingSet, config_parse_bounding_set, 0, offsetof($1, exec_context.capability_bounding_set_drop) $1.TimerSlackNSec, config_parse_exec_timer_slack_nsec, 0, offsetof($1, exec_context) $1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit) $1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index b59029e..bb27d95 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -931,7 +931,7 @@ int config_parse_exec_secure_bits( return 0; } -int config_parse_exec_bounding_set( +int config_parse_bounding_set( const char *filename, unsigned line, const char *section, @@ -941,7 +941,7 @@ int config_parse_exec_bounding_set( void *data, void *userdata) { - ExecContext *c = data; + uint64_t *capability_bounding_set_drop = data; char *w; size_t l; char *state; @@ -968,7 +968,8 @@ int config_parse_exec_bounding_set( int r; cap_value_t cap; - if (!(t = strndup(w, l))) + t = strndup(w, l); + if (!t) return -ENOMEM; r = cap_from_name(t, &cap); @@ -983,9 +984,9 @@ int config_parse_exec_bounding_set( } if (invert) - c->capability_bounding_set_drop |= sum; + *capability_bounding_set_drop |= sum; else - c->capability_bounding_set_drop |= ~sum; + *capability_bounding_set_drop |= ~sum; return 0; } @@ -2440,7 +2441,7 @@ void unit_dump_config_items(FILE *f) { { config_parse_level, "LEVEL" }, { config_parse_exec_capabilities, "CAPABILITIES" }, { config_parse_exec_secure_bits, "SECUREBITS" }, - { config_parse_exec_bounding_set, "BOUNDINGSET" }, + { config_parse_bounding_set, "BOUNDINGSET" }, { config_parse_exec_timer_slack_nsec, "TIMERSLACK" }, { config_parse_limit, "LIMIT" }, { config_parse_unit_cgroup, "CGROUP [...]" }, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index d0efa90..7d73eec 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -56,7 +56,7 @@ int config_parse_exec_cpu_sched_prio(const char *filename, unsigned line, const int config_parse_exec_cpu_affinity(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_exec_capabilities(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_exec_secure_bits(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); -int config_parse_exec_bounding_set(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_bounding_set(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_exec_timer_slack_nsec(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_limit(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_unit_cgroup(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); diff --git a/src/core/main.c b/src/core/main.c index f1b2b28..4ebddab 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -50,6 +50,7 @@ #include "watchdog.h" #include "path-util.h" #include "switch-root.h" +#include "capability.h" #include "mount-setup.h" #include "loopback-setup.h" @@ -90,6 +91,7 @@ static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT; static usec_t arg_runtime_watchdog = 0; static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE; static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {}; +static uint64_t arg_capability_bounding_set_drop = 0; static FILE* serialization = NULL; @@ -682,6 +684,7 @@ static int parse_config_file(void) { { "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers }, { "Manager", "RuntimeWatchdogSec", config_parse_usec, 0, &arg_runtime_watchdog }, { "Manager", "ShutdownWatchdogSec", config_parse_usec, 0, &arg_shutdown_watchdog }, + { "Manager", "CapabilityBoundingSet", config_parse_bounding_set, 0, &arg_capability_bounding_set_drop }, { "Manager", "DefaultLimitCPU", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_CPU]}, { "Manager", "DefaultLimitFSIZE", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_FSIZE]}, { "Manager", "DefaultLimitDATA", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_DATA]}, @@ -1488,6 +1491,14 @@ int main(int argc, char *argv[]) { if (arg_running_as == MANAGER_SYSTEM && arg_runtime_watchdog > 0) watchdog_set_timeout(&arg_runtime_watchdog); + if (arg_capability_bounding_set_drop) { + r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true); + if (r < 0) { + log_error("Failed to drop capability bounding set: %s", strerror(-r)); + goto finish; + } + } + r = manager_new(arg_running_as, &m); if (r < 0) { log_error("Failed to allocate manager object: %s", strerror(-r)); diff --git a/src/core/system.conf b/src/core/system.conf index 807d184..e50ee3c 100644 --- a/src/core/system.conf +++ b/src/core/system.conf @@ -26,3 +26,20 @@ #JoinControllers=cpu,cpuacct #RuntimeWatchdogSec=0 #ShutdownWatchdogSec=10min +#CapabilityBoundingSet= +#DefaultLimitCPU= +#DefaultLimitFSIZE= +#DefaultLimitDATA= +#DefaultLimitSTACK= +#DefaultLimitCORE= +#DefaultLimitRSS= +#DefaultLimitNOFILE= +#DefaultLimitAS= +#DefaultLimitNPROC= +#DefaultLimitMEMLOCK= +#DefaultLimitLOCKS= +#DefaultLimitSIGPENDING= +#DefaultLimitMSGQUEUE= +#DefaultLimitNICE= +#DefaultLimitRTPRIO= +#DefaultLimitRTTIME= diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 7c36731..ad60e30 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -544,49 +544,31 @@ static int setup_hostname(void) { } static int drop_capabilities(void) { - static const unsigned long retain[] = { - CAP_CHOWN, - CAP_DAC_OVERRIDE, - CAP_DAC_READ_SEARCH, - CAP_FOWNER, - CAP_FSETID, - CAP_IPC_OWNER, - CAP_KILL, - CAP_LEASE, - CAP_LINUX_IMMUTABLE, - CAP_NET_BIND_SERVICE, - CAP_NET_BROADCAST, - CAP_NET_RAW, - CAP_SETGID, - CAP_SETFCAP, - CAP_SETPCAP, - CAP_SETUID, - CAP_SYS_ADMIN, - CAP_SYS_CHROOT, - CAP_SYS_NICE, - CAP_SYS_PTRACE, - CAP_SYS_TTY_CONFIG - }; - - unsigned long l; - - for (l = 0; l <= cap_last_cap(); l++) { - unsigned i; - - for (i = 0; i < ELEMENTSOF(retain); i++) - if (retain[i] == l) - break; - - if (i < ELEMENTSOF(retain)) - continue; - if (prctl(PR_CAPBSET_DROP, l) < 0) { - log_error("PR_CAPBSET_DROP failed: %m"); - return -errno; - } - } - - return 0; + static const uint64_t retain = + (1ULL << CAP_CHOWN) | + (1ULL << CAP_DAC_OVERRIDE) | + (1ULL << CAP_DAC_READ_SEARCH) | + (1ULL << CAP_FOWNER) | + (1ULL << CAP_FSETID) | + (1ULL << CAP_IPC_OWNER) | + (1ULL << CAP_KILL) | + (1ULL << CAP_LEASE) | + (1ULL << CAP_LINUX_IMMUTABLE) | + (1ULL << CAP_NET_BIND_SERVICE) | + (1ULL << CAP_NET_BROADCAST) | + (1ULL << CAP_NET_RAW) | + (1ULL << CAP_SETGID) | + (1ULL << CAP_SETFCAP) | + (1ULL << CAP_SETPCAP) | + (1ULL << CAP_SETUID) | + (1ULL << CAP_SYS_ADMIN) | + (1ULL << CAP_SYS_CHROOT) | + (1ULL << CAP_SYS_NICE) | + (1ULL << CAP_SYS_PTRACE) | + (1ULL << CAP_SYS_TTY_CONFIG); + + return capability_bounding_set_drop(~retain, false); } static int is_os_tree(const char *path) { @@ -1041,8 +1023,10 @@ int main(int argc, char *argv[]) { loopback_setup(); - if (drop_capabilities() < 0) + if (drop_capabilities() < 0) { + log_error("drop_capabilities() failed: %m"); goto child_fail; + } if (arg_user) { diff --git a/src/shared/capability.c b/src/shared/capability.c index b800215..b2bcfed 100644 --- a/src/shared/capability.c +++ b/src/shared/capability.c @@ -40,7 +40,8 @@ int have_effective_cap(int value) { cap_flag_value_t fv; int r; - if (!(cap = cap_get_proc())) + cap = cap_get_proc(); + if (!cap) return -errno; if (cap_get_flag(cap, value, CAP_EFFECTIVE, &fv) < 0) @@ -84,3 +85,89 @@ unsigned long cap_last_cap(void) { return p; } + +int capability_bounding_set_drop(uint64_t drop, bool right_now) { + unsigned long i; + cap_t after_cap = NULL, temp_cap = NULL; + cap_flag_value_t fv; + int r; + + /* If we are run as PID 1 we will lack CAP_SETPCAP by default + * in the effective set (yes, the kernel drops that when + * executing init!), so get it back temporarily so that we can + * call PR_CAPBSET_DROP. */ + + after_cap = cap_get_proc(); + if (!after_cap) + return -errno; + + if (cap_get_flag(after_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) { + cap_free(after_cap); + return -errno; + } + + if (fv != CAP_SET) { + static const cap_value_t v = CAP_SETPCAP; + + temp_cap = cap_dup(after_cap); + if (!temp_cap) { + r = -errno; + goto finish; + } + + if (cap_set_flag(temp_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) { + r = -errno; + goto finish; + } + + if (cap_set_proc(temp_cap) < 0) { + r = -errno; + goto finish; + } + } + + for (i = 0; i <= cap_last_cap(); i++) { + + if (drop & ((uint64_t) 1ULL << (uint64_t) i)) { + cap_value_t v; + + /* Drop it from the bounding set */ + if (prctl(PR_CAPBSET_DROP, i) < 0) { + r = -errno; + goto finish; + } + v = i; + + /* Also drop it from the inheritable set, so + * that anything we exec() loses the + * capability for good. */ + if (cap_set_flag(after_cap, CAP_INHERITABLE, 1, &v, CAP_CLEAR) < 0) { + r = -errno; + goto finish; + } + + /* If we shall apply this right now drop it + * also from our own capability sets. */ + if (right_now) { + if (cap_set_flag(after_cap, CAP_PERMITTED, 1, &v, CAP_CLEAR) < 0 || + cap_set_flag(after_cap, CAP_EFFECTIVE, 1, &v, CAP_CLEAR) < 0) { + r = -errno; + goto finish; + } + } + } + } + + r = 0; + +finish: + if (temp_cap) + cap_free(temp_cap); + + if (after_cap) { + cap_set_proc(after_cap); + cap_free(after_cap); + } + + return r; +} diff --git a/src/shared/capability.h b/src/shared/capability.h index ab7e40b..0daf4a6 100644 --- a/src/shared/capability.h +++ b/src/shared/capability.h @@ -22,6 +22,11 @@ along with systemd; If not, see . ***/ +#include +#include + unsigned long cap_last_cap(void); int have_effective_cap(int value); +int capability_bounding_set_drop(uint64_t caps, bool right_now); + #endif