83bab2
From a52f7bfdea91550eee25ee5af1efed4bf1def869 Mon Sep 17 00:00:00 2001
942b17
From: Giuseppe Scrivano <gscrivan@redhat.com>
942b17
Date: Fri, 25 May 2018 18:04:06 +0200
942b17
Subject: [PATCH] sd-notify: do not hang when NOTIFY_SOCKET is used with create
942b17
942b17
if NOTIFY_SOCKET is used, do not block the main runc process waiting
b1f78e
for events on the notify socket.  Bind mount the parent directory of
b1f78e
the notify socket, so that "start" can create the socket and it is
b1f78e
still accessible from the container.
942b17
942b17
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
942b17
---
b1f78e
 notify_socket.go | 113 ++++++++++++++++++++++++++++++++++-------------
83bab2
 signals.go       |   4 +-
b1f78e
 start.go         |  13 +++++-
b1f78e
 utils_linux.go   |  12 ++++-
83bab2
 4 files changed, 106 insertions(+), 36 deletions(-)
942b17
942b17
diff --git a/notify_socket.go b/notify_socket.go
83bab2
index b890b5b1c..286ce1ddd 100644
942b17
--- a/notify_socket.go
942b17
+++ b/notify_socket.go
b1f78e
@@ -6,11 +6,14 @@ import (
942b17
 	"bytes"
942b17
 	"fmt"
942b17
 	"net"
942b17
+	"os"
b1f78e
+	"path"
942b17
 	"path/filepath"
942b17
+	"strconv"
942b17
+	"time"
942b17
 
b1f78e
+	"github.com/opencontainers/runc/libcontainer"
942b17
 	"github.com/opencontainers/runtime-spec/specs-go"
942b17
-
b1f78e
-	"github.com/sirupsen/logrus"
942b17
 	"github.com/urfave/cli"
942b17
 )
b1f78e
 
b1f78e
@@ -26,12 +29,12 @@ func newNotifySocket(context *cli.Context, notifySocketHost string, id string) *
b1f78e
 	}
b1f78e
 
b1f78e
 	root := filepath.Join(context.GlobalString("root"), id)
b1f78e
-	path := filepath.Join(root, "notify.sock")
b1f78e
+	socketPath := filepath.Join(root, "notify", "notify.sock")
b1f78e
 
b1f78e
 	notifySocket := ┬ČifySocket{
b1f78e
 		socket:     nil,
b1f78e
 		host:       notifySocketHost,
b1f78e
-		socketPath: path,
b1f78e
+		socketPath: socketPath,
b1f78e
 	}
b1f78e
 
b1f78e
 	return notifySocket
83bab2
@@ -43,13 +46,19 @@ func (s *notifySocket) Close() error {
b1f78e
 
b1f78e
 // If systemd is supporting sd_notify protocol, this function will add support
b1f78e
 // for sd_notify protocol from within the container.
b1f78e
-func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) {
b1f78e
-	mount := specs.Mount{Destination: s.host, Source: s.socketPath, Options: []string{"bind"}}
b1f78e
+func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) error {
b1f78e
+	pathInContainer := filepath.Join("/run/notify", path.Base(s.socketPath))
b1f78e
+	mount := specs.Mount{
b1f78e
+		Destination: path.Dir(pathInContainer),
b1f78e
+		Source:      path.Dir(s.socketPath),
b1f78e
+		Options:     []string{"bind", "nosuid", "noexec", "nodev", "ro"},
b1f78e
+	}
b1f78e
 	spec.Mounts = append(spec.Mounts, mount)
b1f78e
-	spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", s.host))
b1f78e
+	spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", pathInContainer))
b1f78e
+	return nil
b1f78e
 }
b1f78e
 
b1f78e
-func (s *notifySocket) setupSocket() error {
b1f78e
+func (s *notifySocket) bindSocket() error {
b1f78e
 	addr := net.UnixAddr{
b1f78e
 		Name: s.socketPath,
b1f78e
 		Net:  "unixgram",
b1f78e
@@ -64,45 +73,89 @@ func (s *notifySocket) setupSocket() error {
942b17
 	return nil
942b17
 }
942b17
 
b1f78e
-// pid1 must be set only with -d, as it is used to set the new process as the main process
b1f78e
-// for the service in systemd
83bab2
-func (s *notifySocket) run(pid1 int) {
942b17
-	buf := make([]byte, 512)
83bab2
-	notifySocketHostAddr := net.UnixAddr{Name: s.host, Net: "unixgram"}
b1f78e
+func (s *notifySocket) setupSocketDirectory() error {
b1f78e
+	return os.Mkdir(path.Dir(s.socketPath), 0755)
b1f78e
+}
942b17
+
b1f78e
+func notifySocketStart(context *cli.Context, notifySocketHost, id string) (*notifySocket, error) {
b1f78e
+	notifySocket := newNotifySocket(context, notifySocketHost, id)
b1f78e
+	if notifySocket == nil {
b1f78e
+		return nil, nil
b1f78e
+	}
942b17
+
b1f78e
+	if err := notifySocket.bindSocket(); err != nil {
b1f78e
+		return nil, err
942b17
+	}
b1f78e
+	return notifySocket, nil
942b17
+}
942b17
+
b1f78e
+func (n *notifySocket) waitForContainer(container libcontainer.Container) error {
b1f78e
+	s, err := container.State()
942b17
+	if err != nil {
b1f78e
+		return err
942b17
+	}
b1f78e
+	return n.run(s.InitProcessPid)
b1f78e
+}
b1f78e
+
b1f78e
+func (n *notifySocket) run(pid1 int) error {
b1f78e
+	if n.socket == nil {
b1f78e
+		return nil
942b17
+	}
b1f78e
+	notifySocketHostAddr := net.UnixAddr{Name: n.host, Net: "unixgram"}
b1f78e
 	client, err := net.DialUnix("unixgram", nil, ┬ČifySocketHostAddr)
b1f78e
 	if err != nil {
b1f78e
-		logrus.Error(err)
b1f78e
-		return
b1f78e
+		return err
b1f78e
 	}
b1f78e
-	for {
83bab2
-		r, err := s.socket.Read(buf)
b1f78e
-		if err != nil {
b1f78e
-			break
942b17
+
b1f78e
+	ticker := time.NewTicker(time.Millisecond * 100)
b1f78e
+	defer ticker.Stop()
942b17
+
942b17
+	fileChan := make(chan []byte)
942b17
+	go func() {
942b17
+		for {
942b17
+			buf := make([]byte, 512)
b1f78e
+			r, err := n.socket.Read(buf)
942b17
+			if err != nil {
942b17
+				return
942b17
+			}
b1f78e
+			got := buf[0:r]
b1f78e
+			if !bytes.HasPrefix(got, []byte("READY=")) {
b1f78e
+				continue
b1f78e
+			}
b1f78e
+			fileChan <- got
b1f78e
+			return
942b17
 		}
942b17
-		var out bytes.Buffer
942b17
-		for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) {
942b17
-			if bytes.HasPrefix(line, []byte("READY=")) {
942b17
+	}()
942b17
+
942b17
+	for {
942b17
+		select {
b1f78e
+		case <-ticker.C:
b1f78e
+			_, err := os.Stat(filepath.Join("/proc", strconv.Itoa(pid1)))
b1f78e
+			if err != nil {
b1f78e
+				return nil
b1f78e
+			}
942b17
+		case b := <-fileChan:
942b17
+			for _, line := range bytes.Split(b, []byte{'\n'}) {
942b17
+				var out bytes.Buffer
942b17
 				_, err = out.Write(line)
942b17
 				if err != nil {
b1f78e
-					return
b1f78e
+					return err
b1f78e
 				}
b1f78e
 
b1f78e
 				_, err = out.Write([]byte{'\n'})
b1f78e
 				if err != nil {
b1f78e
-					return
b1f78e
+					return err
b1f78e
 				}
b1f78e
 
b1f78e
 				_, err = client.Write(out.Bytes())
b1f78e
 				if err != nil {
b1f78e
-					return
b1f78e
+					return err
942b17
 				}
942b17
 
942b17
 				// now we can inform systemd to use pid1 as the pid to monitor
942b17
-				if pid1 > 0 {
942b17
-					newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
942b17
-					client.Write([]byte(newPid))
942b17
-				}
b1f78e
-				return
942b17
+				newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
942b17
+				client.Write([]byte(newPid))
b1f78e
+				return nil
942b17
 			}
942b17
 		}
b1f78e
 	}
942b17
diff --git a/signals.go b/signals.go
83bab2
index b67f65a03..dd25e094c 100644
942b17
--- a/signals.go
942b17
+++ b/signals.go
83bab2
@@ -70,6 +70,7 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
942b17
 			h.notifySocket.run(pid1)
942b17
 			return 0, nil
942b17
 		}
83bab2
+		h.notifySocket.run(os.Getpid())
83bab2
 		go h.notifySocket.run(0)
942b17
 	}
942b17
 
83bab2
@@ -97,9 +98,6 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
942b17
 					// status because we must ensure that any of the go specific process
942b17
 					// fun such as flushing pipes are complete before we return.
942b17
 					process.Wait()
942b17
-					if h.notifySocket != nil {
942b17
-						h.notifySocket.Close()
942b17
-					}
942b17
 					return e.status, nil
942b17
 				}
942b17
 			}
b1f78e
diff --git a/start.go b/start.go
83bab2
index 2bb698b20..3a1769a43 100644
b1f78e
--- a/start.go
b1f78e
+++ b/start.go
b1f78e
@@ -3,6 +3,7 @@ package main
b1f78e
 import (
b1f78e
 	"errors"
b1f78e
 	"fmt"
b1f78e
+	"os"
b1f78e
 
b1f78e
 	"github.com/opencontainers/runc/libcontainer"
b1f78e
 	"github.com/urfave/cli"
b1f78e
@@ -31,7 +32,17 @@ your host.`,
b1f78e
 		}
b1f78e
 		switch status {
b1f78e
 		case libcontainer.Created:
b1f78e
-			return container.Exec()
b1f78e
+			notifySocket, err := notifySocketStart(context, os.Getenv("NOTIFY_SOCKET"), container.ID())
b1f78e
+			if err != nil {
b1f78e
+				return err
b1f78e
+			}
b1f78e
+			if err := container.Exec(); err != nil {
b1f78e
+				return err
b1f78e
+			}
b1f78e
+			if notifySocket != nil {
b1f78e
+				return notifySocket.waitForContainer(container)
b1f78e
+			}
b1f78e
+			return nil
b1f78e
 		case libcontainer.Stopped:
b1f78e
 			return errors.New("cannot start a container that has stopped")
b1f78e
 		case libcontainer.Running:
b1f78e
diff --git a/utils_linux.go b/utils_linux.go
83bab2
index a37b1c3df..4921bd94b 100644
b1f78e
--- a/utils_linux.go
b1f78e
+++ b/utils_linux.go
83bab2
@@ -401,7 +401,9 @@ func startContainer(context *cli.Context, spec *specs.Spec, action CtAct, criuOp
b1f78e
 
b1f78e
 	notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id)
b1f78e
 	if notifySocket != nil {
b1f78e
-		notifySocket.setupSpec(context, spec)
b1f78e
+		if err := notifySocket.setupSpec(context, spec); err != nil {
b1f78e
+			return -1, err
b1f78e
+		}
b1f78e
 	}
b1f78e
 
b1f78e
 	container, err := createContainer(context, id, spec)
83bab2
@@ -410,10 +412,16 @@ func startContainer(context *cli.Context, spec *specs.Spec, action CtAct, criuOp
b1f78e
 	}
b1f78e
 
b1f78e
 	if notifySocket != nil {
b1f78e
-		err := notifySocket.setupSocket()
b1f78e
+		err := notifySocket.setupSocketDirectory()
b1f78e
 		if err != nil {
b1f78e
 			return -1, err
b1f78e
 		}
b1f78e
+		if action == CT_ACT_RUN {
b1f78e
+			err := notifySocket.bindSocket()
b1f78e
+			if err != nil {
b1f78e
+				return -1, err
b1f78e
+			}
b1f78e
+		}
b1f78e
 	}
b1f78e
 
b1f78e
 	// Support on-demand socket activation by passing file descriptors into the container init process.