diff --git a/0001-When-exporting-use-hardlinks-for-duplicated-files.patch b/0001-When-exporting-use-hardlinks-for-duplicated-files.patch new file mode 100644 index 0000000..751409c --- /dev/null +++ b/0001-When-exporting-use-hardlinks-for-duplicated-files.patch @@ -0,0 +1,189 @@ +From 3b2fd6e9ff0a3a91a2b72f524492e4f198069dec Mon Sep 17 00:00:00 2001 +From: "Owen W. Taylor" +Date: Fri, 29 Sep 2023 12:09:04 -0400 +Subject: [PATCH] When exporting, use hardlinks for duplicated files + +For ostree_repo_export_tree_to_archive(), and 'ostree export', when the +exported tree contains multiple files with the same checksum, write an +archive with hard links. + +Without this, importing a tree, then exporting it again breaks +hardlinks. + +As an example of savings: this reduces the (compressed) size of the +Fedora Flatpak Runtime image from 1345MiB to 712MiB. + +Resolves: #2925 +--- + src/libostree/ostree-repo-libarchive.c | 50 ++++++++++++++++++++------ + tests/archive-test.sh | 4 +-- + tests/libtest.sh | 7 ++++ + tests/test-composefs.sh | 2 +- + tests/test-export.sh | 10 +++++- + 5 files changed, 58 insertions(+), 15 deletions(-) + +diff --git a/src/libostree/ostree-repo-libarchive.c b/src/libostree/ostree-repo-libarchive.c +index d0f46883..65a30933 100644 +--- a/src/libostree/ostree-repo-libarchive.c ++++ b/src/libostree/ostree-repo-libarchive.c +@@ -943,15 +943,10 @@ ostree_repo_write_archive_to_mtree_from_fd (OstreeRepo *self, int fd, OstreeMuta + + #ifdef HAVE_LIBARCHIVE + +-static gboolean +-file_to_archive_entry_common (GFile *root, OstreeRepoExportArchiveOptions *opts, GFile *path, +- GFileInfo *file_info, struct archive_entry *entry, GError **error) ++static char * ++file_to_pathstr (GFile *root, OstreeRepoExportArchiveOptions *opts, GFile *path) + { +- gboolean ret = FALSE; + g_autofree char *pathstr = g_file_get_relative_path (root, path); +- g_autoptr (GVariant) xattrs = NULL; +- time_t ts = (time_t)opts->timestamp_secs; +- + if (opts->path_prefix && opts->path_prefix[0]) + { + g_autofree char *old_pathstr = pathstr; +@@ -964,6 +959,18 @@ file_to_archive_entry_common (GFile *root, OstreeRepoExportArchiveOptions *opts, + pathstr = g_strdup ("."); + } + ++ return g_steal_pointer (&pathstr); ++} ++ ++static gboolean ++file_to_archive_entry_common (GFile *root, OstreeRepoExportArchiveOptions *opts, GFile *path, ++ GFileInfo *file_info, struct archive_entry *entry, GError **error) ++{ ++ gboolean ret = FALSE; ++ g_autofree char *pathstr = file_to_pathstr (root, opts, path); ++ g_autoptr (GVariant) xattrs = NULL; ++ time_t ts = (time_t)opts->timestamp_secs; ++ + archive_entry_update_pathname_utf8 (entry, pathstr); + archive_entry_set_ctime (entry, ts, OSTREE_TIMESTAMP); + archive_entry_set_mtime (entry, ts, OSTREE_TIMESTAMP); +@@ -1021,7 +1028,8 @@ out: + static gboolean + write_directory_to_libarchive_recurse (OstreeRepo *self, OstreeRepoExportArchiveOptions *opts, + GFile *root, GFile *dir, struct archive *a, +- GCancellable *cancellable, GError **error) ++ GHashTable *seen_checksums, GCancellable *cancellable, ++ GError **error) + { + gboolean ret = FALSE; + g_autoptr (GFileInfo) dir_info = NULL; +@@ -1057,8 +1065,8 @@ write_directory_to_libarchive_recurse (OstreeRepo *self, OstreeRepoExportArchive + /* First, handle directories recursively */ + if (g_file_info_get_file_type (file_info) == G_FILE_TYPE_DIRECTORY) + { +- if (!write_directory_to_libarchive_recurse (self, opts, root, path, a, cancellable, +- error)) ++ if (!write_directory_to_libarchive_recurse (self, opts, root, path, a, seen_checksums, ++ cancellable, error)) + goto out; + + /* Go to the next entry */ +@@ -1086,9 +1094,27 @@ write_directory_to_libarchive_recurse (OstreeRepo *self, OstreeRepoExportArchive + g_autoptr (GInputStream) file_in = NULL; + g_autoptr (GFileInfo) regular_file_info = NULL; + const char *checksum; ++ GFile *old_path; + + checksum = ostree_repo_file_get_checksum ((OstreeRepoFile *)path); + ++ old_path = g_hash_table_lookup (seen_checksums, checksum); ++ if (old_path) ++ { ++ g_autofree char *old_pathstr = file_to_pathstr (root, opts, old_path); ++ ++ archive_entry_set_hardlink (entry, old_pathstr); ++ if (!write_header_free_entry (a, &entry, error)) ++ goto out; ++ ++ break; ++ } ++ else ++ { ++ /* The checksum is owned by path (an OstreeRepoFile) */ ++ g_hash_table_insert (seen_checksums, (char *)checksum, g_object_ref (path)); ++ } ++ + if (!ostree_repo_load_file (self, checksum, &file_in, ®ular_file_info, NULL, + cancellable, error)) + goto out; +@@ -1168,9 +1194,11 @@ ostree_repo_export_tree_to_archive (OstreeRepo *self, OstreeRepoExportArchiveOpt + #ifdef HAVE_LIBARCHIVE + gboolean ret = FALSE; + struct archive *a = archive; ++ g_autoptr (GHashTable) seen_checksums ++ = g_hash_table_new_full (g_str_hash, g_str_equal, NULL, g_object_unref); + + if (!write_directory_to_libarchive_recurse (self, opts, (GFile *)root, (GFile *)root, a, +- cancellable, error)) ++ seen_checksums, cancellable, error)) + goto out; + + ret = TRUE; +diff --git a/tests/archive-test.sh b/tests/archive-test.sh +index 6b45790e..f6bfd5fb 100644 +--- a/tests/archive-test.sh ++++ b/tests/archive-test.sh +@@ -72,9 +72,9 @@ date > test-overlays/overlaid-file + $OSTREE commit ${COMMIT_ARGS} -b test-base --base test2 --owner-uid 42 --owner-gid 42 test-overlays/ + $OSTREE ls -R test-base > ls.txt + if can_create_whiteout_devices; then +- assert_streq "$(wc -l < ls.txt)" 17 ++ assert_streq "$(wc -l < ls.txt)" 22 + else +- assert_streq "$(wc -l < ls.txt)" 14 ++ assert_streq "$(wc -l < ls.txt)" 19 + fi + + assert_streq "$(grep '42.*42' ls.txt | wc -l)" 2 +diff --git a/tests/libtest.sh b/tests/libtest.sh +index fa937827..d1c99eab 100755 +--- a/tests/libtest.sh ++++ b/tests/libtest.sh +@@ -249,6 +249,13 @@ setup_test_repository () { + mkdir baz/another/ + echo x > baz/another/y + ++ mkdir baz/sub1 ++ echo SAME_CONTENT > baz/sub1/duplicate_a ++ echo SAME_CONTENT > baz/sub1/duplicate_b ++ ++ mkdir baz/sub2 ++ echo SAME_CONTENT > baz/sub2/duplicate_c ++ + # if we are running inside a container we cannot test + # the overlayfs whiteout marker passthrough + if ! test -n "${OSTREE_NO_WHITEOUTS:-}"; then +diff --git a/tests/test-export.sh b/tests/test-export.sh +index e490ae40..6b8de94c 100755 +--- a/tests/test-export.sh ++++ b/tests/test-export.sh +@@ -28,7 +28,7 @@ fi + + setup_test_repository "archive" + +-echo '1..5' ++echo '1..6' + + $OSTREE checkout test2 test2-co + $OSTREE commit --no-xattrs -b test2-noxattrs -s "test2 without xattrs" --tree=dir=test2-co +@@ -81,3 +81,11 @@ assert_file_empty diff.txt + rm test2.tar diff.txt t -rf + + echo 'ok export import' ++ ++cd ${test_tmpdir} ++${OSTREE} 'export' test2 -o test2.tar ++tar tvf test2.tar > test2.manifest ++assert_file_has_content test2.manifest 'baz/sub1/duplicate_b link to baz/sub1/duplicate_a' ++assert_file_has_content test2.manifest 'baz/sub2/duplicate_c link to baz/sub1/duplicate_a' ++ ++echo 'ok export hard links' +-- +2.41.0 + diff --git a/ostree.spec b/ostree.spec index 66cf438..10b07ec 100644 --- a/ostree.spec +++ b/ostree.spec @@ -8,11 +8,13 @@ Summary: Tool for managing bootable, immutable filesystem trees Name: ostree Version: 2023.6 -Release: 1%{?dist} +Release: 2%{?dist} Source0: https://github.com/ostreedev/%{name}/releases/download/v%{version}/libostree-%{version}.tar.xz License: LGPL-2.0-or-later URL: https://ostree.readthedocs.io/en/latest/ +Patch0: 0001-When-exporting-use-hardlinks-for-duplicated-files.patch + BuildRequires: make BuildRequires: git # We always run autogen.sh @@ -170,6 +172,10 @@ find %{buildroot} -name '*.la' -delete %endif %changelog +* Fri Oct 06 2023 Colin Walters - 2023.6-2 +- Cherry pick + https://github.com/ostreedev/ostree/pull/3060/commits/3b2fd6e9ff0a3a91a2b72f524492e4f198069dec + * Fri Aug 25 2023 Colin Walters - 2023.6-1 - https://github.com/ostreedev/ostree/releases/tag/v2023.6