Blob Blame History Raw
From f641455c06f18da52bf7fd9c5228d6e3c5ce1453 Mon Sep 17 00:00:00 2001
From: "Owen W. Taylor" <otaylor@fishsoup.net>
Date: Fri, 20 Oct 2023 11:26:17 -0400
Subject: [PATCH] Docker.py: Pass the use_ino option to fix hardlnks

When use libguestfs to mount the intermediate image to create a tarball,
pass the "use_ino" option so that original inode numbers are preserved.
This fixes a problem where hardlinks were not properly preserved in the
output.

This is a minimal-code-change fix - an edge case bug and better approach are
described in comments.

Resolves #412
---
 imagefactory_plugins/Docker/Docker.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/imagefactory_plugins/Docker/Docker.py b/imagefactory_plugins/Docker/Docker.py
index 68e12c3926bb..960b0e43325c 100644
--- a/imagefactory_plugins/Docker/Docker.py
+++ b/imagefactory_plugins/Docker/Docker.py
@@ -312,12 +312,32 @@ class Docker(object):
         # we call a blocking function to activate the mount, which requires a thread
         # We also need a temp dir to mount it to - do our best to clean up when things
         # go wrong
+        #
+        # A better approach here would be to use:
+        #   g.tar_out_opts("/", dest_filename, excludes=[excludes])
+        # Though that would break compatibility with the tar_options parameter.
+        #
         tempdir = None
         fuse_thread = None
         try:
             tempdir = tempfile.mkdtemp(dir=storagedir)
             self.log.debug("Mounting input image locally at (%s)" % (tempdir))
-            guestfs_handle.mount_local(tempdir)
+
+            # The "use_ino" option causes FUSE to pass through the original inode
+            # numbers. Without it tar cannot properly detect hardlinks, possibly greatly
+            # increasing the size of the image.  This does create an edge case. If there
+            # are:
+            #
+            #  - Two separate groups of > 1 files hardlinked together
+            #  - On different partitions
+            #  - With the same inode number
+            #
+            # Then the groups will be incorrectly merged in the output image. This
+            # is unlikely to be encountered with typical container images, where almost
+            # all files are on a single partition. The correct fix is to use
+            # g.tar_out_opts() as described above.
+
+            guestfs_handle.mount_local(tempdir, options="use_ino")
             def _run_guestmount(g):
                 g.mount_local_run()
             self.log.debug("Launching mount_local_run thread")
-- 
2.41.0