4289f9c
From: Stefan Hajnoczi <stefanha@redhat.com>
4289f9c
Date: Tue, 6 Oct 2020 10:58:26 +0100
4289f9c
Subject: [PATCH] virtiofsd: avoid /proc/self/fd tempdir
4289f9c
4289f9c
In order to prevent /proc/self/fd escapes a temporary directory is
4289f9c
created where /proc/self/fd is bind-mounted. This doesn't work on
4289f9c
read-only file systems.
4289f9c
4289f9c
Avoid the temporary directory by bind-mounting /proc/self/fd over /proc.
4289f9c
This does not affect other processes since we remounted / with MS_REC |
4289f9c
MS_SLAVE. /proc must exist and virtiofsd does not use it so it's safe to
4289f9c
do this.
4289f9c
4289f9c
Path traversal can be tested with the following function:
4289f9c
4289f9c
  static void test_proc_fd_escape(struct lo_data *lo)
4289f9c
  {
4289f9c
      int fd;
4289f9c
      int level = 0;
4289f9c
      ino_t last_ino = 0;
4289f9c
4289f9c
      fd = lo->proc_self_fd;
4289f9c
      for (;;) {
4289f9c
          struct stat st;
4289f9c
4289f9c
          if (fstat(fd, &st) != 0) {
4289f9c
              perror("fstat");
4289f9c
              return;
4289f9c
          }
4289f9c
          if (last_ino && st.st_ino == last_ino) {
4289f9c
              fprintf(stderr, "inode number unchanged, stopping\n");
4289f9c
              return;
4289f9c
          }
4289f9c
          last_ino = st.st_ino;
4289f9c
4289f9c
          fprintf(stderr, "Level %d dev %lu ino %lu\n", level,
4289f9c
                  (unsigned long)st.st_dev,
4289f9c
                  (unsigned long)last_ino);
4289f9c
          fd = openat(fd, "..", O_PATH | O_DIRECTORY | O_NOFOLLOW);
4289f9c
          level++;
4289f9c
      }
4289f9c
  }
4289f9c
4289f9c
Before and after this patch only Level 0 is displayed. Without
4289f9c
/proc/self/fd bind-mount protection it is possible to traverse parent
4289f9c
directories.
4289f9c
4289f9c
Fixes: 397ae982f4df4 ("virtiofsd: jail lo->proc_self_fd")
4289f9c
Cc: Miklos Szeredi <mszeredi@redhat.com>
4289f9c
Cc: Jens Freimann <jfreimann@redhat.com>
4289f9c
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4289f9c
Message-Id: <20201006095826.59813-1-stefanha@redhat.com>
4289f9c
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
4289f9c
Tested-by: Jens Freimann <jfreimann@redhat.com>
4289f9c
Reviewed-by: Jens Freimann <jfreimann@redhat.com>
4289f9c
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
4289f9c
---
4289f9c
 tools/virtiofsd/passthrough_ll.c | 34 +++++++++++---------------------
4289f9c
 1 file changed, 11 insertions(+), 23 deletions(-)
4289f9c
4289f9c
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
0d7eae5
index 50a164a599..b865dc564b 100644
4289f9c
--- a/tools/virtiofsd/passthrough_ll.c
4289f9c
+++ b/tools/virtiofsd/passthrough_ll.c
0d7eae5
@@ -2386,8 +2386,6 @@ static void setup_wait_parent_capabilities(void)
4289f9c
 static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
4289f9c
 {
4289f9c
     pid_t child;
4289f9c
-    char template[] = "virtiofsd-XXXXXX";
4289f9c
-    char *tmpdir;
4289f9c
 
4289f9c
     /*
4289f9c
      * Create a new pid namespace for *child* processes.  We'll have to
0d7eae5
@@ -2451,33 +2449,23 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
4289f9c
         exit(1);
4289f9c
     }
4289f9c
 
4289f9c
-    tmpdir = mkdtemp(template);
4289f9c
-    if (!tmpdir) {
4289f9c
-        fuse_log(FUSE_LOG_ERR, "tmpdir(%s): %m\n", template);
4289f9c
-        exit(1);
4289f9c
-    }
4289f9c
-
4289f9c
-    if (mount("/proc/self/fd", tmpdir, NULL, MS_BIND, NULL) < 0) {
4289f9c
-        fuse_log(FUSE_LOG_ERR, "mount(/proc/self/fd, %s, MS_BIND): %m\n",
4289f9c
-                 tmpdir);
4289f9c
+    /*
4289f9c
+     * We only need /proc/self/fd. Prevent ".." from accessing parent
4289f9c
+     * directories of /proc/self/fd by bind-mounting it over /proc. Since / was
4289f9c
+     * previously remounted with MS_REC | MS_SLAVE this mount change only
4289f9c
+     * affects our process.
4289f9c
+     */
4289f9c
+    if (mount("/proc/self/fd", "/proc", NULL, MS_BIND, NULL) < 0) {
4289f9c
+        fuse_log(FUSE_LOG_ERR, "mount(/proc/self/fd, MS_BIND): %m\n");
4289f9c
         exit(1);
4289f9c
     }
4289f9c
 
4289f9c
-    /* Now we can get our /proc/self/fd directory file descriptor */
4289f9c
-    lo->proc_self_fd = open(tmpdir, O_PATH);
4289f9c
+    /* Get the /proc (actually /proc/self/fd, see above) file descriptor */
4289f9c
+    lo->proc_self_fd = open("/proc", O_PATH);
4289f9c
     if (lo->proc_self_fd == -1) {
4289f9c
-        fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", tmpdir);
4289f9c
+        fuse_log(FUSE_LOG_ERR, "open(/proc, O_PATH): %m\n");
4289f9c
         exit(1);
4289f9c
     }
4289f9c
-
4289f9c
-    if (umount2(tmpdir, MNT_DETACH) < 0) {
4289f9c
-        fuse_log(FUSE_LOG_ERR, "umount2(%s, MNT_DETACH): %m\n", tmpdir);
4289f9c
-        exit(1);
4289f9c
-    }
4289f9c
-
4289f9c
-    if (rmdir(tmpdir) < 0) {
4289f9c
-        fuse_log(FUSE_LOG_ERR, "rmdir(%s): %m\n", tmpdir);
4289f9c
-    }
4289f9c
 }
4289f9c
 
4289f9c
 /*