e464725
From: Chris Mason <clm@fb.com>
e464725
Date: Wed, 15 Oct 2014 13:50:56 -0700
57e2b74
Subject: [PATCH] Revert "Btrfs: race free update of commit root for ro
57e2b74
 snapshots"
57e2b74
57e2b74
This reverts commit 9c3b306e1c9e6be4be09e99a8fe2227d1005effc.
e464725
e464725
(cherry picked from commit d37973082b453ba6b89ec07eb7b84305895d35e1)
e464725
e464725
Switching only one commit root during a transaction is wrong because it
e464725
leads the fs into an inconsistent state. All commit roots should be
e464725
switched at once, at transaction commit time, otherwise backref walking
e464725
can often miss important references that were only accessible through
e464725
the old commit root.  Plus, the root item for the snapshot's root wasn't
e464725
getting updated and preventing the next transaction commit to do it.
e464725
e464725
This made several users get into random corruption issues after creation
e464725
of readonly snapshots.
e464725
e464725
A regression test for xfstests will follow soon.
e464725
e464725
Cc: stable@vger.kernel.org # 3.17
e464725
Signed-off-by: Filipe Manana <fdmanana@suse.com>
e464725
Signed-off-by: Chris Mason <clm@fb.com>
57e2b74
---
57e2b74
 fs/btrfs/inode.c | 36 ------------------------------------
57e2b74
 fs/btrfs/ioctl.c | 33 +++++++++++++++++++++++++++++++++
57e2b74
 2 files changed, 33 insertions(+), 36 deletions(-)
57e2b74
57e2b74
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
e464725
index 16454b6efc55..886d8d42640d 100644
57e2b74
--- a/fs/btrfs/inode.c
57e2b74
+++ b/fs/btrfs/inode.c
e464725
@@ -5203,42 +5203,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
57e2b74
 			iput(inode);
57e2b74
 			inode = ERR_PTR(ret);
57e2b74
 		}
57e2b74
-		/*
57e2b74
-		 * If orphan cleanup did remove any orphans, it means the tree
57e2b74
-		 * was modified and therefore the commit root is not the same as
57e2b74
-		 * the current root anymore. This is a problem, because send
57e2b74
-		 * uses the commit root and therefore can see inode items that
57e2b74
-		 * don't exist in the current root anymore, and for example make
57e2b74
-		 * calls to btrfs_iget, which will do tree lookups based on the
57e2b74
-		 * current root and not on the commit root. Those lookups will
57e2b74
-		 * fail, returning a -ESTALE error, and making send fail with
57e2b74
-		 * that error. So make sure a send does not see any orphans we
57e2b74
-		 * have just removed, and that it will see the same inodes
57e2b74
-		 * regardless of whether a transaction commit happened before
57e2b74
-		 * it started (meaning that the commit root will be the same as
57e2b74
-		 * the current root) or not.
57e2b74
-		 */
57e2b74
-		if (sub_root->node != sub_root->commit_root) {
57e2b74
-			u64 sub_flags = btrfs_root_flags(&sub_root->root_item);
57e2b74
-
57e2b74
-			if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) {
57e2b74
-				struct extent_buffer *eb;
57e2b74
-
57e2b74
-				/*
57e2b74
-				 * Assert we can't have races between dentry
57e2b74
-				 * lookup called through the snapshot creation
57e2b74
-				 * ioctl and the VFS.
57e2b74
-				 */
57e2b74
-				ASSERT(mutex_is_locked(&dir->i_mutex));
57e2b74
-
57e2b74
-				down_write(&root->fs_info->commit_root_sem);
57e2b74
-				eb = sub_root->commit_root;
57e2b74
-				sub_root->commit_root =
57e2b74
-					btrfs_root_node(sub_root);
57e2b74
-				up_write(&root->fs_info->commit_root_sem);
57e2b74
-				free_extent_buffer(eb);
57e2b74
-			}
57e2b74
-		}
57e2b74
 	}
57e2b74
 
57e2b74
 	return inode;
57e2b74
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
e464725
index 91269bd9ad05..b765d412cbb6 100644
57e2b74
--- a/fs/btrfs/ioctl.c
57e2b74
+++ b/fs/btrfs/ioctl.c
e464725
@@ -714,6 +714,39 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
57e2b74
 	if (ret)
57e2b74
 		goto fail;
57e2b74
 
57e2b74
+	ret = btrfs_orphan_cleanup(pending_snapshot->snap);
57e2b74
+	if (ret)
57e2b74
+		goto fail;
57e2b74
+
57e2b74
+	/*
57e2b74
+	 * If orphan cleanup did remove any orphans, it means the tree was
57e2b74
+	 * modified and therefore the commit root is not the same as the
57e2b74
+	 * current root anymore. This is a problem, because send uses the
57e2b74
+	 * commit root and therefore can see inode items that don't exist
57e2b74
+	 * in the current root anymore, and for example make calls to
57e2b74
+	 * btrfs_iget, which will do tree lookups based on the current root
57e2b74
+	 * and not on the commit root. Those lookups will fail, returning a
57e2b74
+	 * -ESTALE error, and making send fail with that error. So make sure
57e2b74
+	 * a send does not see any orphans we have just removed, and that it
57e2b74
+	 * will see the same inodes regardless of whether a transaction
57e2b74
+	 * commit happened before it started (meaning that the commit root
57e2b74
+	 * will be the same as the current root) or not.
57e2b74
+	 */
57e2b74
+	if (readonly && pending_snapshot->snap->node !=
57e2b74
+	    pending_snapshot->snap->commit_root) {
57e2b74
+		trans = btrfs_join_transaction(pending_snapshot->snap);
57e2b74
+		if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) {
57e2b74
+			ret = PTR_ERR(trans);
57e2b74
+			goto fail;
57e2b74
+		}
57e2b74
+		if (!IS_ERR(trans)) {
57e2b74
+			ret = btrfs_commit_transaction(trans,
57e2b74
+						       pending_snapshot->snap);
57e2b74
+			if (ret)
57e2b74
+				goto fail;
57e2b74
+		}
57e2b74
+	}
57e2b74
+
57e2b74
 	inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
57e2b74
 	if (IS_ERR(inode)) {
57e2b74
 		ret = PTR_ERR(inode);
57e2b74
-- 
57e2b74
1.9.3
57e2b74