Blob Blame History Raw
From b23b5b32250e5a03e4cc38ccf973e25e63ccc6d9 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 12 Sep 2019 10:38:48 +0100
Subject: [PATCH 3/3] interop: Retry TCP connections to qemu-nbd.

The test interop-qemu-nbd-tls-certs frequently fails on slow (32 bit)
machines in Fedora Koji.  (Is crypto slow on these already overloaded
machines?)

As we cannot wait for a signal when qemu-nbd is ready start serving,
we have to use a sleep.  The current sleep is 5 seconds, which is not
long enough.  Making the sleep longer would work but is inconsiderate
for people using faster machines.  Therefore replace this with a retry
loop with exponential backoff.

I tested this with a simple wrapper around qemu-nbd which did:

  sleep 5; exec /usr/bin/qemu-nbd "$@"
---
 interop/interop.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/interop/interop.c b/interop/interop.c
index 662d871..a3ab39b 100644
--- a/interop/interop.c
+++ b/interop/interop.c
@@ -28,6 +28,7 @@
 #include <fcntl.h>
 #include <time.h>
 #include <signal.h>
+#include <errno.h>
 #include <sys/types.h>
 
 #include <libnbd.h>
@@ -44,6 +45,7 @@ main (int argc, char *argv[])
   int port;
   char port_str[16];
   pid_t pid = -1;
+  int retry;
 #endif
   int64_t actual_size;
   char buf[512];
@@ -114,14 +116,19 @@ main (int argc, char *argv[])
   }
 
   /* Unfortunately there's no good way to wait for qemu-nbd to start
-   * serving, so ...
+   * serving, so we need to retry here.
    */
-  sleep (5);
-
-  if (nbd_connect_tcp (nbd, "localhost", port_str) == -1) {
-    fprintf (stderr, "%s\n", nbd_get_error ());
-    goto out;
+  for (retry = 0; retry < 5; ++retry) {
+    sleep (1 << retry);
+    if (nbd_connect_tcp (nbd, "localhost", port_str) == -1) {
+      fprintf (stderr, "%s\n", nbd_get_error ());
+      if (nbd_get_errno () != ECONNREFUSED)
+        goto out;
+    }
+    else break;
   }
+  if (retry == 5)
+    goto out;
 
 #else /* !SERVE_OVER_TCP */
 
-- 
2.23.0