Blob Blame History Raw
From 929da557efea6c7d2340467d9a7fdae7fda6d2b1 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 13 Sep 2022 07:35:13 +0200
Subject: tools/xenstore: make the internal memory data base the default

Having a file backed data base has the only advantage of being capable
to dump the contents of it while Xenstore is running, and potentially
using less swap space in case the data base can't be kept in memory.

It has the major disadvantage of a huge performance overhead: switching
to keep the data base in memory only speeds up live update of xenstored
with 120000 nodes from 20 minutes to 11 seconds. A complete tree walk
of this configuration will be reduced from 7 seconds to 280 msecs
(measured by "xenstore-control check").

So make the internal memory data base the default and enhance the
"--internal-db" command line parameter to take an optional parameter
allowing to switch the internal data base back to the file based one.

This is part of XSA-419.

Reported-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Julien Grall <jgrall@amazon.com>

diff --git a/tools/helpers/init-xenstore-domain.c b/tools/helpers/init-xenstore-domain.c
index 2d9ab6f1c583..04e351ca29a8 100644
--- a/tools/helpers/init-xenstore-domain.c
+++ b/tools/helpers/init-xenstore-domain.c
@@ -222,9 +222,9 @@ static int build(xc_interface *xch)
     }
 
     if ( param )
-        snprintf(cmdline, 512, "--event %d --internal-db %s", rv, param);
+        snprintf(cmdline, 512, "--event %d %s", rv, param);
     else
-        snprintf(cmdline, 512, "--event %d --internal-db", rv);
+        snprintf(cmdline, 512, "--event %d", rv);
 
     dom->guest_domid = domid;
     dom->cmdline = xc_dom_strdup(dom, cmdline);
diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index 13e48aaa731c..36fb4a832834 100644
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -2308,7 +2308,7 @@ static void accept_connection(int sock)
 }
 #endif
 
-static int tdb_flags;
+static int tdb_flags = TDB_INTERNAL | TDB_NOLOCK;
 
 /* We create initial nodes manually. */
 static void manual_node(const char *name, const char *child)
@@ -2618,7 +2618,8 @@ static void usage(void)
 "                          watch-event: time a watch-event is kept pending\n"
 "  -R, --no-recovery       to request that no recovery should be attempted when\n"
 "                          the store is corrupted (debug only),\n"
-"  -I, --internal-db       store database in memory, not on disk\n"
+"  -I, --internal-db [on|off] store database in memory, not on disk, default is\n"
+"                          memory, with \"--internal-db off\" it is on disk\n"
 "  -K, --keep-orphans      don't delete nodes owned by a domain when the\n"
 "                          domain is deleted (this is a security risk!)\n"
 "  -V, --verbose           to request verbose execution.\n");
@@ -2644,7 +2645,7 @@ static struct option options[] = {
 	{ "quota-soft", 1, NULL, 'q' },
 	{ "timeout", 1, NULL, 'w' },
 	{ "no-recovery", 0, NULL, 'R' },
-	{ "internal-db", 0, NULL, 'I' },
+	{ "internal-db", 2, NULL, 'I' },
 	{ "keep-orphans", 0, NULL, 'K' },
 	{ "verbose", 0, NULL, 'V' },
 	{ "watch-nb", 1, NULL, 'W' },
@@ -2725,7 +2726,8 @@ int main(int argc, char *argv[])
 	orig_argc = argc;
 	orig_argv = argv;
 
-	while ((opt = getopt_long(argc, argv, "DE:F:HKNPS:t:A:M:Q:q:T:RVW:w:U",
+	while ((opt = getopt_long(argc, argv,
+				  "DE:F:HI::KNPS:t:A:M:Q:q:T:RVW:w:U",
 				  options, NULL)) != -1) {
 		switch (opt) {
 		case 'D':
@@ -2759,7 +2761,8 @@ int main(int argc, char *argv[])
 			tracefile = optarg;
 			break;
 		case 'I':
-			tdb_flags = TDB_INTERNAL|TDB_NOLOCK;
+			if (optarg && !strcmp(optarg, "off"))
+				tdb_flags = 0;
 			break;
 		case 'K':
 			keep_orphans = true;