99043cc
From fe55ab80cc20ed77f99bb39b292ddf91161b052a Mon Sep 17 00:00:00 2001
99043cc
From: Lennart Poettering <lennart@poettering.net>
99043cc
Date: Thu, 24 May 2012 04:00:56 +0200
99043cc
Subject: [PATCH] main: add configuration option to alter capability bounding
99043cc
 set for PID 1
99043cc
99043cc
This also ensures that caps dropped from the bounding set are also
99043cc
dropped from the inheritable set, to be extra-secure. Usually that should
99043cc
change very little though as the inheritable set is empty for all our uses
99043cc
anyway.
99043cc
(cherry picked from commit ec8927ca5940e809f0b72f530582c76f1db4f065)
99043cc
99043cc
Conflicts:
99043cc
	TODO
99043cc
---
99043cc
 man/systemd.conf.xml                  |   45 +++++++++++++++--
99043cc
 man/systemd.exec.xml                  |   16 +++---
99043cc
 src/core/execute.c                    |   64 +-----------------------
99043cc
 src/core/load-fragment-gperf.gperf.m4 |    2 +-
99043cc
 src/core/load-fragment.c              |   13 ++---
99043cc
 src/core/load-fragment.h              |    2 +-
99043cc
 src/core/main.c                       |   11 ++++
99043cc
 src/core/system.conf                  |   17 +++++++
99043cc
 src/nspawn/nspawn.c                   |   70 ++++++++++----------------
99043cc
 src/shared/capability.c               |   89 ++++++++++++++++++++++++++++++++-
99043cc
 src/shared/capability.h               |    5 ++
99043cc
 11 files changed, 208 insertions(+), 126 deletions(-)
99043cc
99043cc
diff --git a/man/systemd.conf.xml b/man/systemd.conf.xml
99043cc
index d37c574..8e288eb 100644
99043cc
--- a/man/systemd.conf.xml
99043cc
+++ b/man/systemd.conf.xml
99043cc
@@ -196,6 +196,38 @@
99043cc
                         </varlistentry>
99043cc
 
99043cc
                         <varlistentry>
99043cc
+                                <term><varname>CapabilityBoundingSet=</varname></term>
99043cc
+
99043cc
+                                <listitem><para>Controls which
99043cc
+                                capabilities to include in the
99043cc
+                                capability bounding set for PID 1 and
99043cc
+                                its children. See
99043cc
+                                <citerefentry><refentrytitle>capabilities</refentrytitle><manvolnum>7</manvolnum></citerefentry>
99043cc
+                                for details. Takes a whitespace
99043cc
+                                separated list of capability names as
99043cc
+                                read by
99043cc
+                                <citerefentry><refentrytitle>cap_from_name</refentrytitle><manvolnum>3</manvolnum></citerefentry>.
99043cc
+                                Capabilities listed will be included
99043cc
+                                in the bounding set, all others are
99043cc
+                                removed. If the list of capabilities
99043cc
+                                is prefixed with ~ all but the listed
99043cc
+                                capabilities will be included, the
99043cc
+                                effect of the assignment
99043cc
+                                inverted. Note that this option also
99043cc
+                                effects the respective capabilities in
99043cc
+                                the effective, permitted and
99043cc
+                                inheritable capability sets. The
99043cc
+                                capability bounding set may also be
99043cc
+                                individually configured for units
99043cc
+                                using the
99043cc
+                                <varname>CapabilityBoundingSet=</varname>
99043cc
+                                directive for units, but note that
99043cc
+                                capabilities dropped for PID 1 cannot
99043cc
+                                be regained in individual units, they
99043cc
+                                are lost for good.</para></listitem>
99043cc
+                        </varlistentry>
99043cc
+
99043cc
+                        <varlistentry>
99043cc
                                 <term><varname>DefaultLimitCPU=</varname></term>
99043cc
                                 <term><varname>DefaultLimitFSIZE=</varname></term>
99043cc
                                 <term><varname>DefaultLimitDATA=</varname></term>
99043cc
@@ -212,14 +244,21 @@
99043cc
                                 <term><varname>DefaultLimitNICE=</varname></term>
99043cc
                                 <term><varname>DefaultLimitRTPRIO=</varname></term>
99043cc
                                 <term><varname>DefaultLimitRTTIME=</varname></term>
99043cc
+
99043cc
                                 <listitem><para>These settings control
99043cc
-                                various default resource limits for units. See
99043cc
+                                various default resource limits for
99043cc
+                                units. See
99043cc
                                 <citerefentry><refentrytitle>setrlimit</refentrytitle><manvolnum>2</manvolnum></citerefentry>
99043cc
                                 for details. Use the string
99043cc
                                 <varname>infinity</varname> to
99043cc
                                 configure no limit on a specific
99043cc
-				resource. They can be overriden in units files
99043cc
-				using corresponding LimitXXXX parameter.</para></listitem>
99043cc
+                                resource. These settings may be
99043cc
+                                overriden in individual units
99043cc
+                                using the corresponding LimitXXX=
99043cc
+                                directives. Note that these resource
99043cc
+                                limits are only defaults for units,
99043cc
+                                they are not applied to PID 1
99043cc
+                                itself.</para></listitem>
99043cc
                         </varlistentry>
99043cc
                 </variablelist>
99043cc
         </refsect1>
99043cc
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
99043cc
index e6f49c9..7d28545 100644
99043cc
--- a/man/systemd.exec.xml
99043cc
+++ b/man/systemd.exec.xml
99043cc
@@ -678,17 +678,17 @@
99043cc
                                 is prefixed with ~ all but the listed
99043cc
                                 capabilities will be included, the
99043cc
                                 effect of the assignment
99043cc
-                                inverted. Note that this option does
99043cc
-                                not actually set or unset any
99043cc
-                                capabilities in the effective,
99043cc
-                                permitted or inherited capability
99043cc
-                                sets. That's what
99043cc
-                                <varname>Capabilities=</varname> is
99043cc
-                                for. If this option is not used the
99043cc
+                                inverted. Note that this option also
99043cc
+                                effects the respective capabilities in
99043cc
+                                the effective, permitted and
99043cc
+                                inheritable capability sets, on top of
99043cc
+                                what <varname>Capabilities=</varname>
99043cc
+                                does. If this option is not used the
99043cc
                                 capability bounding set is not
99043cc
                                 modified on process execution, hence
99043cc
                                 no limits on the capabilities of the
99043cc
-                                process are enforced.</para></listitem>
99043cc
+                                process are
99043cc
+                                enforced.</para></listitem>
99043cc
                         </varlistentry>
99043cc
 
99043cc
                         <varlistentry>
99043cc
diff --git a/src/core/execute.c b/src/core/execute.c
99043cc
index a2ef77c..f93c9a4 100644
99043cc
--- a/src/core/execute.c
99043cc
+++ b/src/core/execute.c
99043cc
@@ -870,68 +870,6 @@ fail:
99043cc
 }
99043cc
 #endif
99043cc
 
99043cc
-static int do_capability_bounding_set_drop(uint64_t drop) {
99043cc
-        unsigned long i;
99043cc
-        cap_t old_cap = NULL, new_cap = NULL;
99043cc
-        cap_flag_value_t fv;
99043cc
-        int r;
99043cc
-
99043cc
-        /* If we are run as PID 1 we will lack CAP_SETPCAP by default
99043cc
-         * in the effective set (yes, the kernel drops that when
99043cc
-         * executing init!), so get it back temporarily so that we can
99043cc
-         * call PR_CAPBSET_DROP. */
99043cc
-
99043cc
-        old_cap = cap_get_proc();
99043cc
-        if (!old_cap)
99043cc
-                return -errno;
99043cc
-
99043cc
-        if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
99043cc
-                r = -errno;
99043cc
-                goto finish;
99043cc
-        }
99043cc
-
99043cc
-        if (fv != CAP_SET) {
99043cc
-                static const cap_value_t v = CAP_SETPCAP;
99043cc
-
99043cc
-                new_cap = cap_dup(old_cap);
99043cc
-                if (!new_cap) {
99043cc
-                        r = -errno;
99043cc
-                        goto finish;
99043cc
-                }
99043cc
-
99043cc
-                if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
99043cc
-                        r = -errno;
99043cc
-                        goto finish;
99043cc
-                }
99043cc
-
99043cc
-                if (cap_set_proc(new_cap) < 0) {
99043cc
-                        r = -errno;
99043cc
-                        goto finish;
99043cc
-                }
99043cc
-        }
99043cc
-
99043cc
-        for (i = 0; i <= cap_last_cap(); i++)
99043cc
-                if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
99043cc
-                        if (prctl(PR_CAPBSET_DROP, i) < 0) {
99043cc
-                                r = -errno;
99043cc
-                                goto finish;
99043cc
-                        }
99043cc
-                }
99043cc
-
99043cc
-        r = 0;
99043cc
-
99043cc
-finish:
99043cc
-        if (new_cap)
99043cc
-                cap_free(new_cap);
99043cc
-
99043cc
-        if (old_cap) {
99043cc
-                cap_set_proc(old_cap);
99043cc
-                cap_free(old_cap);
99043cc
-        }
99043cc
-
99043cc
-        return r;
99043cc
-}
99043cc
-
99043cc
 static void rename_process_from_path(const char *path) {
99043cc
         char process_name[11];
99043cc
         const char *p;
99043cc
@@ -1398,7 +1336,7 @@ int exec_spawn(ExecCommand *command,
99043cc
                         }
99043cc
 
99043cc
                         if (context->capability_bounding_set_drop) {
99043cc
-                                err = do_capability_bounding_set_drop(context->capability_bounding_set_drop);
99043cc
+                                err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
99043cc
                                 if (err < 0) {
99043cc
                                         r = EXIT_CAPABILITIES;
99043cc
                                         goto fail_child;
99043cc
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
99043cc
index 9c4a930..9efc859 100644
99043cc
--- a/src/core/load-fragment-gperf.gperf.m4
99043cc
+++ b/src/core/load-fragment-gperf.gperf.m4
99043cc
@@ -46,7 +46,7 @@ $1.SyslogLevel,                  config_parse_level,                 0,
99043cc
 $1.SyslogLevelPrefix,            config_parse_bool,                  0,                             offsetof($1, exec_context.syslog_level_prefix)
99043cc
 $1.Capabilities,                 config_parse_exec_capabilities,     0,                             offsetof($1, exec_context)
99043cc
 $1.SecureBits,                   config_parse_exec_secure_bits,      0,                             offsetof($1, exec_context)
99043cc
-$1.CapabilityBoundingSet,        config_parse_exec_bounding_set,     0,                             offsetof($1, exec_context)
99043cc
+$1.CapabilityBoundingSet,        config_parse_bounding_set,          0,                             offsetof($1, exec_context.capability_bounding_set_drop)
99043cc
 $1.TimerSlackNSec,               config_parse_exec_timer_slack_nsec, 0,                             offsetof($1, exec_context)
99043cc
 $1.LimitCPU,                     config_parse_limit,                 RLIMIT_CPU,                    offsetof($1, exec_context.rlimit)
99043cc
 $1.LimitFSIZE,                   config_parse_limit,                 RLIMIT_FSIZE,                  offsetof($1, exec_context.rlimit)
99043cc
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
99043cc
index b59029e..bb27d95 100644
99043cc
--- a/src/core/load-fragment.c
99043cc
+++ b/src/core/load-fragment.c
99043cc
@@ -931,7 +931,7 @@ int config_parse_exec_secure_bits(
99043cc
         return 0;
99043cc
 }
99043cc
 
99043cc
-int config_parse_exec_bounding_set(
99043cc
+int config_parse_bounding_set(
99043cc
                 const char *filename,
99043cc
                 unsigned line,
99043cc
                 const char *section,
99043cc
@@ -941,7 +941,7 @@ int config_parse_exec_bounding_set(
99043cc
                 void *data,
99043cc
                 void *userdata) {
99043cc
 
99043cc
-        ExecContext *c = data;
99043cc
+        uint64_t *capability_bounding_set_drop = data;
99043cc
         char *w;
99043cc
         size_t l;
99043cc
         char *state;
99043cc
@@ -968,7 +968,8 @@ int config_parse_exec_bounding_set(
99043cc
                 int r;
99043cc
                 cap_value_t cap;
99043cc
 
99043cc
-                if (!(t = strndup(w, l)))
99043cc
+                t = strndup(w, l);
99043cc
+                if (!t)
99043cc
                         return -ENOMEM;
99043cc
 
99043cc
                 r = cap_from_name(t, &cap);
99043cc
@@ -983,9 +984,9 @@ int config_parse_exec_bounding_set(
99043cc
         }
99043cc
 
99043cc
         if (invert)
99043cc
-                c->capability_bounding_set_drop |= sum;
99043cc
+                *capability_bounding_set_drop |= sum;
99043cc
         else
99043cc
-                c->capability_bounding_set_drop |= ~sum;
99043cc
+                *capability_bounding_set_drop |= ~sum;
99043cc
 
99043cc
         return 0;
99043cc
 }
99043cc
@@ -2440,7 +2441,7 @@ void unit_dump_config_items(FILE *f) {
99043cc
                 { config_parse_level,                 "LEVEL" },
99043cc
                 { config_parse_exec_capabilities,     "CAPABILITIES" },
99043cc
                 { config_parse_exec_secure_bits,      "SECUREBITS" },
99043cc
-                { config_parse_exec_bounding_set,     "BOUNDINGSET" },
99043cc
+                { config_parse_bounding_set,          "BOUNDINGSET" },
99043cc
                 { config_parse_exec_timer_slack_nsec, "TIMERSLACK" },
99043cc
                 { config_parse_limit,                 "LIMIT" },
99043cc
                 { config_parse_unit_cgroup,           "CGROUP [...]" },
99043cc
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
99043cc
index d0efa90..7d73eec 100644
99043cc
--- a/src/core/load-fragment.h
99043cc
+++ b/src/core/load-fragment.h
99043cc
@@ -56,7 +56,7 @@ int config_parse_exec_cpu_sched_prio(const char *filename, unsigned line, const
99043cc
 int config_parse_exec_cpu_affinity(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
99043cc
 int config_parse_exec_capabilities(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
99043cc
 int config_parse_exec_secure_bits(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
99043cc
-int config_parse_exec_bounding_set(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
99043cc
+int config_parse_bounding_set(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
99043cc
 int config_parse_exec_timer_slack_nsec(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
99043cc
 int config_parse_limit(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
99043cc
 int config_parse_unit_cgroup(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
99043cc
diff --git a/src/core/main.c b/src/core/main.c
99043cc
index f1b2b28..4ebddab 100644
99043cc
--- a/src/core/main.c
99043cc
+++ b/src/core/main.c
99043cc
@@ -50,6 +50,7 @@
99043cc
 #include "watchdog.h"
99043cc
 #include "path-util.h"
99043cc
 #include "switch-root.h"
99043cc
+#include "capability.h"
99043cc
 
99043cc
 #include "mount-setup.h"
99043cc
 #include "loopback-setup.h"
99043cc
@@ -90,6 +91,7 @@ static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
99043cc
 static usec_t arg_runtime_watchdog = 0;
99043cc
 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
99043cc
 static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {};
99043cc
+static uint64_t arg_capability_bounding_set_drop = 0;
99043cc
 
99043cc
 static FILE* serialization = NULL;
99043cc
 
99043cc
@@ -682,6 +684,7 @@ static int parse_config_file(void) {
99043cc
                 { "Manager", "JoinControllers",       config_parse_join_controllers, 0, &arg_join_controllers },
99043cc
                 { "Manager", "RuntimeWatchdogSec",    config_parse_usec,         0, &arg_runtime_watchdog    },
99043cc
                 { "Manager", "ShutdownWatchdogSec",   config_parse_usec,         0, &arg_shutdown_watchdog   },
99043cc
+                { "Manager", "CapabilityBoundingSet", config_parse_bounding_set, 0, &arg_capability_bounding_set_drop },
99043cc
                 { "Manager", "DefaultLimitCPU",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CPU]},
99043cc
                 { "Manager", "DefaultLimitFSIZE",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_FSIZE]},
99043cc
                 { "Manager", "DefaultLimitDATA",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_DATA]},
99043cc
@@ -1488,6 +1491,14 @@ int main(int argc, char *argv[]) {
99043cc
         if (arg_running_as == MANAGER_SYSTEM && arg_runtime_watchdog > 0)
99043cc
                 watchdog_set_timeout(&arg_runtime_watchdog);
99043cc
 
99043cc
+        if (arg_capability_bounding_set_drop) {
99043cc
+                r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
99043cc
+                if (r < 0) {
99043cc
+                        log_error("Failed to drop capability bounding set: %s", strerror(-r));
99043cc
+                        goto finish;
99043cc
+                }
99043cc
+        }
99043cc
+
99043cc
         r = manager_new(arg_running_as, &m);
99043cc
         if (r < 0) {
99043cc
                 log_error("Failed to allocate manager object: %s", strerror(-r));
99043cc
diff --git a/src/core/system.conf b/src/core/system.conf
99043cc
index 807d184..e50ee3c 100644
99043cc
--- a/src/core/system.conf
99043cc
+++ b/src/core/system.conf
99043cc
@@ -26,3 +26,20 @@
99043cc
 #JoinControllers=cpu,cpuacct
99043cc
 #RuntimeWatchdogSec=0
99043cc
 #ShutdownWatchdogSec=10min
99043cc
+#CapabilityBoundingSet=
99043cc
+#DefaultLimitCPU=
99043cc
+#DefaultLimitFSIZE=
99043cc
+#DefaultLimitDATA=
99043cc
+#DefaultLimitSTACK=
99043cc
+#DefaultLimitCORE=
99043cc
+#DefaultLimitRSS=
99043cc
+#DefaultLimitNOFILE=
99043cc
+#DefaultLimitAS=
99043cc
+#DefaultLimitNPROC=
99043cc
+#DefaultLimitMEMLOCK=
99043cc
+#DefaultLimitLOCKS=
99043cc
+#DefaultLimitSIGPENDING=
99043cc
+#DefaultLimitMSGQUEUE=
99043cc
+#DefaultLimitNICE=
99043cc
+#DefaultLimitRTPRIO=
99043cc
+#DefaultLimitRTTIME=
99043cc
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
99043cc
index 7c36731..ad60e30 100644
99043cc
--- a/src/nspawn/nspawn.c
99043cc
+++ b/src/nspawn/nspawn.c
99043cc
@@ -544,49 +544,31 @@ static int setup_hostname(void) {
99043cc
 }
99043cc
 
99043cc
 static int drop_capabilities(void) {
99043cc
-        static const unsigned long retain[] = {
99043cc
-                CAP_CHOWN,
99043cc
-                CAP_DAC_OVERRIDE,
99043cc
-                CAP_DAC_READ_SEARCH,
99043cc
-                CAP_FOWNER,
99043cc
-                CAP_FSETID,
99043cc
-                CAP_IPC_OWNER,
99043cc
-                CAP_KILL,
99043cc
-                CAP_LEASE,
99043cc
-                CAP_LINUX_IMMUTABLE,
99043cc
-                CAP_NET_BIND_SERVICE,
99043cc
-                CAP_NET_BROADCAST,
99043cc
-                CAP_NET_RAW,
99043cc
-                CAP_SETGID,
99043cc
-                CAP_SETFCAP,
99043cc
-                CAP_SETPCAP,
99043cc
-                CAP_SETUID,
99043cc
-                CAP_SYS_ADMIN,
99043cc
-                CAP_SYS_CHROOT,
99043cc
-                CAP_SYS_NICE,
99043cc
-                CAP_SYS_PTRACE,
99043cc
-                CAP_SYS_TTY_CONFIG
99043cc
-        };
99043cc
-
99043cc
-        unsigned long l;
99043cc
-
99043cc
-        for (l = 0; l <= cap_last_cap(); l++) {
99043cc
-                unsigned i;
99043cc
-
99043cc
-                for (i = 0; i < ELEMENTSOF(retain); i++)
99043cc
-                        if (retain[i] == l)
99043cc
-                                break;
99043cc
-
99043cc
-                if (i < ELEMENTSOF(retain))
99043cc
-                        continue;
99043cc
 
99043cc
-                if (prctl(PR_CAPBSET_DROP, l) < 0) {
99043cc
-                        log_error("PR_CAPBSET_DROP failed: %m");
99043cc
-                        return -errno;
99043cc
-                }
99043cc
-        }
99043cc
-
99043cc
-        return 0;
99043cc
+        static const uint64_t retain =
99043cc
+                (1ULL << CAP_CHOWN) |
99043cc
+                (1ULL << CAP_DAC_OVERRIDE) |
99043cc
+                (1ULL << CAP_DAC_READ_SEARCH) |
99043cc
+                (1ULL << CAP_FOWNER) |
99043cc
+                (1ULL << CAP_FSETID) |
99043cc
+                (1ULL << CAP_IPC_OWNER) |
99043cc
+                (1ULL << CAP_KILL) |
99043cc
+                (1ULL << CAP_LEASE) |
99043cc
+                (1ULL << CAP_LINUX_IMMUTABLE) |
99043cc
+                (1ULL << CAP_NET_BIND_SERVICE) |
99043cc
+                (1ULL << CAP_NET_BROADCAST) |
99043cc
+                (1ULL << CAP_NET_RAW) |
99043cc
+                (1ULL << CAP_SETGID) |
99043cc
+                (1ULL << CAP_SETFCAP) |
99043cc
+                (1ULL << CAP_SETPCAP) |
99043cc
+                (1ULL << CAP_SETUID) |
99043cc
+                (1ULL << CAP_SYS_ADMIN) |
99043cc
+                (1ULL << CAP_SYS_CHROOT) |
99043cc
+                (1ULL << CAP_SYS_NICE) |
99043cc
+                (1ULL << CAP_SYS_PTRACE) |
99043cc
+                (1ULL << CAP_SYS_TTY_CONFIG);
99043cc
+
99043cc
+        return capability_bounding_set_drop(~retain, false);
99043cc
 }
99043cc
 
99043cc
 static int is_os_tree(const char *path) {
99043cc
@@ -1041,8 +1023,10 @@ int main(int argc, char *argv[]) {
99043cc
 
99043cc
                 loopback_setup();
99043cc
 
99043cc
-                if (drop_capabilities() < 0)
99043cc
+                if (drop_capabilities() < 0) {
99043cc
+                        log_error("drop_capabilities() failed: %m");
99043cc
                         goto child_fail;
99043cc
+                }
99043cc
 
99043cc
                 if (arg_user) {
99043cc
 
99043cc
diff --git a/src/shared/capability.c b/src/shared/capability.c
99043cc
index b800215..b2bcfed 100644
99043cc
--- a/src/shared/capability.c
99043cc
+++ b/src/shared/capability.c
99043cc
@@ -40,7 +40,8 @@ int have_effective_cap(int value) {
99043cc
         cap_flag_value_t fv;
99043cc
         int r;
99043cc
 
99043cc
-        if (!(cap = cap_get_proc()))
99043cc
+        cap = cap_get_proc();
99043cc
+        if (!cap)
99043cc
                 return -errno;
99043cc
 
99043cc
         if (cap_get_flag(cap, value, CAP_EFFECTIVE, &fv) < 0)
99043cc
@@ -84,3 +85,89 @@ unsigned long cap_last_cap(void) {
99043cc
 
99043cc
         return p;
99043cc
 }
99043cc
+
99043cc
+int capability_bounding_set_drop(uint64_t drop, bool right_now) {
99043cc
+        unsigned long i;
99043cc
+        cap_t after_cap = NULL, temp_cap = NULL;
99043cc
+        cap_flag_value_t fv;
99043cc
+        int r;
99043cc
+
99043cc
+        /* If we are run as PID 1 we will lack CAP_SETPCAP by default
99043cc
+         * in the effective set (yes, the kernel drops that when
99043cc
+         * executing init!), so get it back temporarily so that we can
99043cc
+         * call PR_CAPBSET_DROP. */
99043cc
+
99043cc
+        after_cap = cap_get_proc();
99043cc
+        if (!after_cap)
99043cc
+                return -errno;
99043cc
+
99043cc
+        if (cap_get_flag(after_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
99043cc
+                cap_free(after_cap);
99043cc
+                return -errno;
99043cc
+        }
99043cc
+
99043cc
+        if (fv != CAP_SET) {
99043cc
+                static const cap_value_t v = CAP_SETPCAP;
99043cc
+
99043cc
+                temp_cap = cap_dup(after_cap);
99043cc
+                if (!temp_cap) {
99043cc
+                        r = -errno;
99043cc
+                        goto finish;
99043cc
+                }
99043cc
+
99043cc
+                if (cap_set_flag(temp_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
99043cc
+                        r = -errno;
99043cc
+                        goto finish;
99043cc
+                }
99043cc
+
99043cc
+                if (cap_set_proc(temp_cap) < 0) {
99043cc
+                        r = -errno;
99043cc
+                        goto finish;
99043cc
+                }
99043cc
+        }
99043cc
+
99043cc
+        for (i = 0; i <= cap_last_cap(); i++) {
99043cc
+
99043cc
+                if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
99043cc
+                        cap_value_t v;
99043cc
+
99043cc
+                        /* Drop it from the bounding set */
99043cc
+                        if (prctl(PR_CAPBSET_DROP, i) < 0) {
99043cc
+                                r = -errno;
99043cc
+                                goto finish;
99043cc
+                        }
99043cc
+                        v = i;
99043cc
+
99043cc
+                        /* Also drop it from the inheritable set, so
99043cc
+                         * that anything we exec() loses the
99043cc
+                         * capability for good. */
99043cc
+                        if (cap_set_flag(after_cap, CAP_INHERITABLE, 1, &v, CAP_CLEAR) < 0) {
99043cc
+                                r = -errno;
99043cc
+                                goto finish;
99043cc
+                        }
99043cc
+
99043cc
+                        /* If we shall apply this right now drop it
99043cc
+                         * also from our own capability sets. */
99043cc
+                        if (right_now) {
99043cc
+                                if (cap_set_flag(after_cap, CAP_PERMITTED, 1, &v, CAP_CLEAR) < 0 ||
99043cc
+                                    cap_set_flag(after_cap, CAP_EFFECTIVE, 1, &v, CAP_CLEAR) < 0) {
99043cc
+                                        r = -errno;
99043cc
+                                        goto finish;
99043cc
+                                }
99043cc
+                        }
99043cc
+                }
99043cc
+        }
99043cc
+
99043cc
+        r = 0;
99043cc
+
99043cc
+finish:
99043cc
+        if (temp_cap)
99043cc
+                cap_free(temp_cap);
99043cc
+
99043cc
+        if (after_cap) {
99043cc
+                cap_set_proc(after_cap);
99043cc
+                cap_free(after_cap);
99043cc
+        }
99043cc
+
99043cc
+        return r;
99043cc
+}
99043cc
diff --git a/src/shared/capability.h b/src/shared/capability.h
99043cc
index ab7e40b..0daf4a6 100644
99043cc
--- a/src/shared/capability.h
99043cc
+++ b/src/shared/capability.h
99043cc
@@ -22,6 +22,11 @@
99043cc
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
99043cc
 ***/
99043cc
 
99043cc
+#include <inttypes.h>
99043cc
+#include <stdbool.h>
99043cc
+
99043cc
 unsigned long cap_last_cap(void);
99043cc
 int have_effective_cap(int value);
99043cc
+int capability_bounding_set_drop(uint64_t caps, bool right_now);
99043cc
+
99043cc
 #endif