From b405b1b0142bea0eeec1437a867b1c0ef7a3afe1 Mon Sep 17 00:00:00 2001
From: Nicolas Viennot <Nicolas.Viennot@twosigma.com>
Date: Fri, 29 Oct 2021 02:49:31 +0000
Subject: [PATCH 035/120] tests: improve the image streamer process control
When exceptions are raised during testing, the image streamer process
should be terminated as opposed to being left hanging.
This could lead to the whole test suite to be left hanging as it waits
for all child processes to exit.
Signed-off-by: Nicolas Viennot <Nicolas.Viennot@twosigma.com>
---
test/zdtm.py | 44 ++++++++++++++++++++++++++++++++++----------
1 file changed, 34 insertions(+), 10 deletions(-)
diff --git a/test/zdtm.py b/test/zdtm.py
index 0a52e1b96..fc7b8a183 100755
--- a/test/zdtm.py
+++ b/test/zdtm.py
@@ -1039,6 +1039,7 @@ class criu:
self.__lazy_pages_p = None
self.__page_server_p = None
self.__dump_process = None
+ self.__img_streamer_process = None
self.__tls = self.__tls_options() if opts['tls'] else []
self.__criu_bin = opts['criu_bin']
self.__crit_bin = opts['crit_bin']
@@ -1065,6 +1066,11 @@ class criu:
self.__dump_process = None
if ret:
raise test_fail_exc("criu dump exited with %s" % ret)
+ if self.__img_streamer_process:
+ ret = self.wait_for_criu_image_streamer()
+ if ret:
+ raise test_fail_exc("criu-image-streamer exited with %s" % ret)
+
return
def logs(self):
@@ -1219,8 +1225,10 @@ class criu:
stent['pages_written'])
if self.__stream:
- p = self.spawn_criu_image_streamer("extract")
- p.wait()
+ self.spawn_criu_image_streamer("extract")
+ ret = self.wait_for_criu_image_streamer()
+ if ret:
+ raise test_fail_exc("criu-image-streamer (extract) exited with %s" % ret)
real_written = 0
for f in os.listdir(self.__ddir()):
@@ -1262,6 +1270,8 @@ class criu:
"--progress-fd {progress_fd}",
action]
+ log = open(os.path.join(self.__ddir(), "img-streamer.log"), "w")
+
# * As we are using a shell pipe command, we want to use pipefail.
# Otherwise, failures stay unnoticed. For this, we use bash as sh
# doesn't support that feature.
@@ -1270,7 +1280,9 @@ class criu:
progress_fd=progress_w,
images_dir=self.__ddir(),
img_file=os.path.join(self.__ddir(), STREAMED_IMG_FILE_NAME)
- )], close_fds=False)
+ )], stderr=log, close_fds=False)
+
+ log.close()
os.close(progress_w)
progress = os.fdopen(progress_r, "r")
@@ -1287,7 +1299,15 @@ class criu:
raise test_fail_exc(
"criu-image-streamer is not starting (exit_code=%d)" % p.wait())
- return p
+ progress.close()
+
+ self.__img_streamer_process = p
+
+ def wait_for_criu_image_streamer(self):
+ ret = self.__img_streamer_process.wait()
+ grep_errors(os.path.join(self.__ddir(), "img-streamer.log"))
+ self.__img_streamer_process = None
+ return ret
def dump(self, action, opts=[]):
self.__iter += 1
@@ -1319,7 +1339,7 @@ class criu:
a_opts += self.__test.getdopts()
if self.__stream:
- streamer_p = self.spawn_criu_image_streamer("capture")
+ self.spawn_criu_image_streamer("capture")
a_opts += ["--stream"]
if self.__dedup:
@@ -1347,9 +1367,9 @@ class criu:
opts=a_opts + opts,
nowait=nowait)
if self.__stream:
- ret = streamer_p.wait()
+ ret = self.wait_for_criu_image_streamer()
if ret:
- raise test_fail_exc("criu-image-streamer exited with %d" % ret)
+ raise test_fail_exc("criu-image-streamer (capture) exited with %d" % ret)
if self.__mdedup and self.__iter > 1:
self.__criu_act("dedup", opts=[])
@@ -1382,7 +1402,7 @@ class criu:
r_opts += ['--action-script', os.getcwd() + '/empty-netns-prep.sh']
if self.__stream:
- streamer_p = self.spawn_criu_image_streamer("serve")
+ self.spawn_criu_image_streamer("serve")
r_opts += ["--stream"]
if self.__dedup:
@@ -1419,9 +1439,9 @@ class criu:
self.__criu_act("restore", opts=r_opts + ["--restore-detached"])
if self.__stream:
- ret = streamer_p.wait()
+ ret = self.wait_for_criu_image_streamer()
if ret:
- raise test_fail_exc("criu-image-streamer exited with %d" % ret)
+ raise test_fail_exc("criu-image-streamer (serve) exited with %d" % ret)
self.show_stats("restore")
@@ -1466,6 +1486,10 @@ class criu:
print("criu dump exited with %s" % self.__dump_process.wait())
grep_errors(os.path.join(self.__ddir(), "dump.log"))
self.__dump_process = None
+ if self.__img_streamer_process:
+ self.__img_streamer_process.terminate()
+ ret = self.wait_for_criu_image_streamer()
+ print("criu-image-streamer exited with %s" % ret)
def try_run_hook(test, args):
--
2.34.1