Blame 0035-tests-improve-the-image-streamer-process-control.patch

Radostin Stoyanov 46abdd7
From b405b1b0142bea0eeec1437a867b1c0ef7a3afe1 Mon Sep 17 00:00:00 2001
Radostin Stoyanov 46abdd7
From: Nicolas Viennot <Nicolas.Viennot@twosigma.com>
Radostin Stoyanov 46abdd7
Date: Fri, 29 Oct 2021 02:49:31 +0000
Radostin Stoyanov 46abdd7
Subject: [PATCH 035/120] tests: improve the image streamer process control
Radostin Stoyanov 46abdd7
Radostin Stoyanov 46abdd7
When exceptions are raised during testing, the image streamer process
Radostin Stoyanov 46abdd7
should be terminated as opposed to being left hanging.
Radostin Stoyanov 46abdd7
This could lead to the whole test suite to be left hanging as it waits
Radostin Stoyanov 46abdd7
for all child processes to exit.
Radostin Stoyanov 46abdd7
Radostin Stoyanov 46abdd7
Signed-off-by: Nicolas Viennot <Nicolas.Viennot@twosigma.com>
Radostin Stoyanov 46abdd7
---
Radostin Stoyanov 46abdd7
 test/zdtm.py | 44 ++++++++++++++++++++++++++++++++++----------
Radostin Stoyanov 46abdd7
 1 file changed, 34 insertions(+), 10 deletions(-)
Radostin Stoyanov 46abdd7
Radostin Stoyanov 46abdd7
diff --git a/test/zdtm.py b/test/zdtm.py
Radostin Stoyanov 46abdd7
index 0a52e1b96..fc7b8a183 100755
Radostin Stoyanov 46abdd7
--- a/test/zdtm.py
Radostin Stoyanov 46abdd7
+++ b/test/zdtm.py
Radostin Stoyanov 46abdd7
@@ -1039,6 +1039,7 @@ class criu:
Radostin Stoyanov 46abdd7
         self.__lazy_pages_p = None
Radostin Stoyanov 46abdd7
         self.__page_server_p = None
Radostin Stoyanov 46abdd7
         self.__dump_process = None
Radostin Stoyanov 46abdd7
+        self.__img_streamer_process = None
Radostin Stoyanov 46abdd7
         self.__tls = self.__tls_options() if opts['tls'] else []
Radostin Stoyanov 46abdd7
         self.__criu_bin = opts['criu_bin']
Radostin Stoyanov 46abdd7
         self.__crit_bin = opts['crit_bin']
Radostin Stoyanov 46abdd7
@@ -1065,6 +1066,11 @@ class criu:
Radostin Stoyanov 46abdd7
             self.__dump_process = None
Radostin Stoyanov 46abdd7
             if ret:
Radostin Stoyanov 46abdd7
                 raise test_fail_exc("criu dump exited with %s" % ret)
Radostin Stoyanov 46abdd7
+        if self.__img_streamer_process:
Radostin Stoyanov 46abdd7
+            ret = self.wait_for_criu_image_streamer()
Radostin Stoyanov 46abdd7
+            if ret:
Radostin Stoyanov 46abdd7
+                raise test_fail_exc("criu-image-streamer exited with %s" % ret)
Radostin Stoyanov 46abdd7
+
Radostin Stoyanov 46abdd7
         return
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
     def logs(self):
Radostin Stoyanov 46abdd7
@@ -1219,8 +1225,10 @@ class criu:
Radostin Stoyanov 46abdd7
                 stent['pages_written'])
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
         if self.__stream:
Radostin Stoyanov 46abdd7
-            p = self.spawn_criu_image_streamer("extract")
Radostin Stoyanov 46abdd7
-            p.wait()
Radostin Stoyanov 46abdd7
+            self.spawn_criu_image_streamer("extract")
Radostin Stoyanov 46abdd7
+            ret = self.wait_for_criu_image_streamer()
Radostin Stoyanov 46abdd7
+            if ret:
Radostin Stoyanov 46abdd7
+                raise test_fail_exc("criu-image-streamer (extract) exited with %s" % ret)
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
         real_written = 0
Radostin Stoyanov 46abdd7
         for f in os.listdir(self.__ddir()):
Radostin Stoyanov 46abdd7
@@ -1262,6 +1270,8 @@ class criu:
Radostin Stoyanov 46abdd7
                    "--progress-fd {progress_fd}",
Radostin Stoyanov 46abdd7
                    action]
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
+        log = open(os.path.join(self.__ddir(), "img-streamer.log"), "w")
Radostin Stoyanov 46abdd7
+
Radostin Stoyanov 46abdd7
         # * As we are using a shell pipe command, we want to use pipefail.
Radostin Stoyanov 46abdd7
         # Otherwise, failures stay unnoticed. For this, we use bash as sh
Radostin Stoyanov 46abdd7
         # doesn't support that feature.
Radostin Stoyanov 46abdd7
@@ -1270,7 +1280,9 @@ class criu:
Radostin Stoyanov 46abdd7
             progress_fd=progress_w,
Radostin Stoyanov 46abdd7
             images_dir=self.__ddir(),
Radostin Stoyanov 46abdd7
             img_file=os.path.join(self.__ddir(), STREAMED_IMG_FILE_NAME)
Radostin Stoyanov 46abdd7
-        )], close_fds=False)
Radostin Stoyanov 46abdd7
+        )], stderr=log, close_fds=False)
Radostin Stoyanov 46abdd7
+
Radostin Stoyanov 46abdd7
+        log.close()
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
         os.close(progress_w)
Radostin Stoyanov 46abdd7
         progress = os.fdopen(progress_r, "r")
Radostin Stoyanov 46abdd7
@@ -1287,7 +1299,15 @@ class criu:
Radostin Stoyanov 46abdd7
                 raise test_fail_exc(
Radostin Stoyanov 46abdd7
                     "criu-image-streamer is not starting (exit_code=%d)" % p.wait())
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
-        return p
Radostin Stoyanov 46abdd7
+        progress.close()
Radostin Stoyanov 46abdd7
+
Radostin Stoyanov 46abdd7
+        self.__img_streamer_process = p
Radostin Stoyanov 46abdd7
+
Radostin Stoyanov 46abdd7
+    def wait_for_criu_image_streamer(self):
Radostin Stoyanov 46abdd7
+        ret = self.__img_streamer_process.wait()
Radostin Stoyanov 46abdd7
+        grep_errors(os.path.join(self.__ddir(), "img-streamer.log"))
Radostin Stoyanov 46abdd7
+        self.__img_streamer_process = None
Radostin Stoyanov 46abdd7
+        return ret
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
     def dump(self, action, opts=[]):
Radostin Stoyanov 46abdd7
         self.__iter += 1
Radostin Stoyanov 46abdd7
@@ -1319,7 +1339,7 @@ class criu:
Radostin Stoyanov 46abdd7
         a_opts += self.__test.getdopts()
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
         if self.__stream:
Radostin Stoyanov 46abdd7
-            streamer_p = self.spawn_criu_image_streamer("capture")
Radostin Stoyanov 46abdd7
+            self.spawn_criu_image_streamer("capture")
Radostin Stoyanov 46abdd7
             a_opts += ["--stream"]
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
         if self.__dedup:
Radostin Stoyanov 46abdd7
@@ -1347,9 +1367,9 @@ class criu:
Radostin Stoyanov 46abdd7
                                               opts=a_opts + opts,
Radostin Stoyanov 46abdd7
                                               nowait=nowait)
Radostin Stoyanov 46abdd7
         if self.__stream:
Radostin Stoyanov 46abdd7
-            ret = streamer_p.wait()
Radostin Stoyanov 46abdd7
+            ret = self.wait_for_criu_image_streamer()
Radostin Stoyanov 46abdd7
             if ret:
Radostin Stoyanov 46abdd7
-                raise test_fail_exc("criu-image-streamer exited with %d" % ret)
Radostin Stoyanov 46abdd7
+                raise test_fail_exc("criu-image-streamer (capture) exited with %d" % ret)
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
         if self.__mdedup and self.__iter > 1:
Radostin Stoyanov 46abdd7
             self.__criu_act("dedup", opts=[])
Radostin Stoyanov 46abdd7
@@ -1382,7 +1402,7 @@ class criu:
Radostin Stoyanov 46abdd7
             r_opts += ['--action-script', os.getcwd() + '/empty-netns-prep.sh']
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
         if self.__stream:
Radostin Stoyanov 46abdd7
-            streamer_p = self.spawn_criu_image_streamer("serve")
Radostin Stoyanov 46abdd7
+            self.spawn_criu_image_streamer("serve")
Radostin Stoyanov 46abdd7
             r_opts += ["--stream"]
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
         if self.__dedup:
Radostin Stoyanov 46abdd7
@@ -1419,9 +1439,9 @@ class criu:
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
         self.__criu_act("restore", opts=r_opts + ["--restore-detached"])
Radostin Stoyanov 46abdd7
         if self.__stream:
Radostin Stoyanov 46abdd7
-            ret = streamer_p.wait()
Radostin Stoyanov 46abdd7
+            ret = self.wait_for_criu_image_streamer()
Radostin Stoyanov 46abdd7
             if ret:
Radostin Stoyanov 46abdd7
-                raise test_fail_exc("criu-image-streamer exited with %d" % ret)
Radostin Stoyanov 46abdd7
+                raise test_fail_exc("criu-image-streamer (serve) exited with %d" % ret)
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
         self.show_stats("restore")
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
@@ -1466,6 +1486,10 @@ class criu:
Radostin Stoyanov 46abdd7
             print("criu dump exited with %s" % self.__dump_process.wait())
Radostin Stoyanov 46abdd7
             grep_errors(os.path.join(self.__ddir(), "dump.log"))
Radostin Stoyanov 46abdd7
             self.__dump_process = None
Radostin Stoyanov 46abdd7
+        if self.__img_streamer_process:
Radostin Stoyanov 46abdd7
+            self.__img_streamer_process.terminate()
Radostin Stoyanov 46abdd7
+            ret = self.wait_for_criu_image_streamer()
Radostin Stoyanov 46abdd7
+            print("criu-image-streamer exited with %s" % ret)
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
 
Radostin Stoyanov 46abdd7
 def try_run_hook(test, args):
Radostin Stoyanov 46abdd7
-- 
Radostin Stoyanov 46abdd7
2.34.1
Radostin Stoyanov 46abdd7