1d442bb
From: Stefan Hajnoczi <stefanha@redhat.com>
1d442bb
Date: Mon, 27 Jan 2020 19:01:27 +0000
1d442bb
Subject: [PATCH] virtiofsd: sandbox mount namespace
1d442bb
MIME-Version: 1.0
1d442bb
Content-Type: text/plain; charset=UTF-8
1d442bb
Content-Transfer-Encoding: 8bit
1d442bb
1d442bb
Use a mount namespace with the shared directory tree mounted at "/" and
1d442bb
no other mounts.
1d442bb
1d442bb
This prevents symlink escape attacks because symlink targets are
1d442bb
resolved only against the shared directory and cannot go outside it.
1d442bb
1d442bb
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
1d442bb
Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
1d442bb
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
1d442bb
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
1d442bb
(cherry picked from commit 5baa3b8e95064c2434bd9e2f312edd5e9ae275dc)
1d442bb
---
1d442bb
 tools/virtiofsd/passthrough_ll.c | 89 ++++++++++++++++++++++++++++++++
1d442bb
 1 file changed, 89 insertions(+)
1d442bb
1d442bb
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
1d442bb
index e2e2211ea1..0570453eef 100644
1d442bb
--- a/tools/virtiofsd/passthrough_ll.c
1d442bb
+++ b/tools/virtiofsd/passthrough_ll.c
1d442bb
@@ -50,6 +50,7 @@
1d442bb
 #include <stdlib.h>
1d442bb
 #include <string.h>
1d442bb
 #include <sys/file.h>
1d442bb
+#include <sys/mount.h>
1d442bb
 #include <sys/syscall.h>
1d442bb
 #include <sys/xattr.h>
1d442bb
 #include <unistd.h>
1d442bb
@@ -1943,6 +1944,58 @@ static void print_capabilities(void)
1d442bb
     printf("}\n");
1d442bb
 }
1d442bb
 
1d442bb
+/* This magic is based on lxc's lxc_pivot_root() */
1d442bb
+static void setup_pivot_root(const char *source)
1d442bb
+{
1d442bb
+    int oldroot;
1d442bb
+    int newroot;
1d442bb
+
1d442bb
+    oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
1d442bb
+    if (oldroot < 0) {
1d442bb
+        fuse_log(FUSE_LOG_ERR, "open(/): %m\n");
1d442bb
+        exit(1);
1d442bb
+    }
1d442bb
+
1d442bb
+    newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
1d442bb
+    if (newroot < 0) {
1d442bb
+        fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source);
1d442bb
+        exit(1);
1d442bb
+    }
1d442bb
+
1d442bb
+    if (fchdir(newroot) < 0) {
1d442bb
+        fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n");
1d442bb
+        exit(1);
1d442bb
+    }
1d442bb
+
1d442bb
+    if (syscall(__NR_pivot_root, ".", ".") < 0) {
1d442bb
+        fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n");
1d442bb
+        exit(1);
1d442bb
+    }
1d442bb
+
1d442bb
+    if (fchdir(oldroot) < 0) {
1d442bb
+        fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n");
1d442bb
+        exit(1);
1d442bb
+    }
1d442bb
+
1d442bb
+    if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) {
1d442bb
+        fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n");
1d442bb
+        exit(1);
1d442bb
+    }
1d442bb
+
1d442bb
+    if (umount2(".", MNT_DETACH) < 0) {
1d442bb
+        fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n");
1d442bb
+        exit(1);
1d442bb
+    }
1d442bb
+
1d442bb
+    if (fchdir(newroot) < 0) {
1d442bb
+        fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n");
1d442bb
+        exit(1);
1d442bb
+    }
1d442bb
+
1d442bb
+    close(newroot);
1d442bb
+    close(oldroot);
1d442bb
+}
1d442bb
+
1d442bb
 static void setup_proc_self_fd(struct lo_data *lo)
1d442bb
 {
1d442bb
     lo->proc_self_fd = open("/proc/self/fd", O_PATH);
1d442bb
@@ -1952,6 +2005,39 @@ static void setup_proc_self_fd(struct lo_data *lo)
1d442bb
     }
1d442bb
 }
1d442bb
 
1d442bb
+/*
1d442bb
+ * Make the source directory our root so symlinks cannot escape and no other
1d442bb
+ * files are accessible.
1d442bb
+ */
1d442bb
+static void setup_mount_namespace(const char *source)
1d442bb
+{
1d442bb
+    if (unshare(CLONE_NEWNS) != 0) {
1d442bb
+        fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n");
1d442bb
+        exit(1);
1d442bb
+    }
1d442bb
+
1d442bb
+    if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) {
1d442bb
+        fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n");
1d442bb
+        exit(1);
1d442bb
+    }
1d442bb
+
1d442bb
+    if (mount(source, source, NULL, MS_BIND, NULL) < 0) {
1d442bb
+        fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source);
1d442bb
+        exit(1);
1d442bb
+    }
1d442bb
+
1d442bb
+    setup_pivot_root(source);
1d442bb
+}
1d442bb
+
1d442bb
+/*
1d442bb
+ * Lock down this process to prevent access to other processes or files outside
1d442bb
+ * source directory.  This reduces the impact of arbitrary code execution bugs.
1d442bb
+ */
1d442bb
+static void setup_sandbox(struct lo_data *lo)
1d442bb
+{
1d442bb
+    setup_mount_namespace(lo->source);
1d442bb
+}
1d442bb
+
1d442bb
 int main(int argc, char *argv[])
1d442bb
 {
1d442bb
     struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
1d442bb
@@ -2052,6 +2138,7 @@ int main(int argc, char *argv[])
1d442bb
     }
1d442bb
 
1d442bb
     lo.root.fd = open(lo.source, O_PATH);
1d442bb
+
1d442bb
     if (lo.root.fd == -1) {
1d442bb
         fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source);
1d442bb
         exit(1);
1d442bb
@@ -2075,6 +2162,8 @@ int main(int argc, char *argv[])
1d442bb
     /* Must be after daemonize to get the right /proc/self/fd */
1d442bb
     setup_proc_self_fd(&lo);
1d442bb
 
1d442bb
+    setup_sandbox(&lo);
1d442bb
+
1d442bb
     /* Block until ctrl+c or fusermount -u */
1d442bb
     ret = virtio_loop(se);
1d442bb