c8e81b8
From 011278485ecc3cd2a3954b5d4c73101d919bf1fa Mon Sep 17 00:00:00 2001
c8e81b8
From: Jan Kara <jack@suse.com>
c8e81b8
Date: Mon, 7 Dec 2015 14:34:49 -0500
c8e81b8
Subject: [PATCH 4/4] ext4: fix races of writeback with punch hole and zero
c8e81b8
 range
c8e81b8
c8e81b8
When doing delayed allocation, update of on-disk inode size is postponed
c8e81b8
until IO submission time. However hole punch or zero range fallocate
c8e81b8
calls can end up discarding the tail page cache page and thus on-disk
c8e81b8
inode size would never be properly updated.
c8e81b8
c8e81b8
Make sure the on-disk inode size is updated before truncating page
c8e81b8
cache.
c8e81b8
c8e81b8
Signed-off-by: Jan Kara <jack@suse.com>
c8e81b8
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
c8e81b8
---
c8e81b8
 fs/ext4/ext4.h    |  3 +++
c8e81b8
 fs/ext4/extents.c |  5 +++++
c8e81b8
 fs/ext4/inode.c   | 35 ++++++++++++++++++++++++++++++++++-
c8e81b8
 3 files changed, 42 insertions(+), 1 deletion(-)
c8e81b8
c8e81b8
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
c8e81b8
index 348a5ff4a0e2..80f76f092079 100644
c8e81b8
--- a/fs/ext4/ext4.h
c8e81b8
+++ b/fs/ext4/ext4.h
c8e81b8
@@ -2858,6 +2858,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
c8e81b8
 	return changed;
c8e81b8
 }
c8e81b8
 
c8e81b8
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
c8e81b8
+				      loff_t len);
c8e81b8
+
c8e81b8
 struct ext4_group_info {
c8e81b8
 	unsigned long   bb_state;
c8e81b8
 	struct rb_root  bb_free_root;
c8e81b8
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
c8e81b8
index 4b105c96df08..3578b25fccfd 100644
c8e81b8
--- a/fs/ext4/extents.c
c8e81b8
+++ b/fs/ext4/extents.c
c8e81b8
@@ -4847,6 +4847,11 @@ static long ext4_zero_range(struct file *file, loff_t offset,
c8e81b8
 		 * released from page cache.
c8e81b8
 		 */
c8e81b8
 		down_write(&EXT4_I(inode)->i_mmap_sem);
c8e81b8
+		ret = ext4_update_disksize_before_punch(inode, offset, len);
c8e81b8
+		if (ret) {
c8e81b8
+			up_write(&EXT4_I(inode)->i_mmap_sem);
c8e81b8
+			goto out_dio;
c8e81b8
+		}
c8e81b8
 		/* Now release the pages and zero block aligned part of pages */
c8e81b8
 		truncate_pagecache_range(inode, start, end - 1);
c8e81b8
 		inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
c8e81b8
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
c8e81b8
index d1207d03c961..472e608da13d 100644
c8e81b8
--- a/fs/ext4/inode.c
c8e81b8
+++ b/fs/ext4/inode.c
c8e81b8
@@ -3559,6 +3559,35 @@ int ext4_can_truncate(struct inode *inode)
c8e81b8
 }
c8e81b8
 
c8e81b8
 /*
c8e81b8
+ * We have to make sure i_disksize gets properly updated before we truncate
c8e81b8
+ * page cache due to hole punching or zero range. Otherwise i_disksize update
c8e81b8
+ * can get lost as it may have been postponed to submission of writeback but
c8e81b8
+ * that will never happen after we truncate page cache.
c8e81b8
+ */
c8e81b8
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
c8e81b8
+				      loff_t len)
c8e81b8
+{
c8e81b8
+	handle_t *handle;
c8e81b8
+	loff_t size = i_size_read(inode);
c8e81b8
+
c8e81b8
+	WARN_ON(!mutex_is_locked(&inode->i_mutex));
c8e81b8
+	if (offset > size || offset + len < size)
c8e81b8
+		return 0;
c8e81b8
+
c8e81b8
+	if (EXT4_I(inode)->i_disksize >= size)
c8e81b8
+		return 0;
c8e81b8
+
c8e81b8
+	handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
c8e81b8
+	if (IS_ERR(handle))
c8e81b8
+		return PTR_ERR(handle);
c8e81b8
+	ext4_update_i_disksize(inode, size);
c8e81b8
+	ext4_mark_inode_dirty(handle, inode);
c8e81b8
+	ext4_journal_stop(handle);
c8e81b8
+
c8e81b8
+	return 0;
c8e81b8
+}
c8e81b8
+
c8e81b8
+/*
c8e81b8
  * ext4_punch_hole: punches a hole in a file by releaseing the blocks
c8e81b8
  * associated with the given offset and length
c8e81b8
  *
c8e81b8
@@ -3636,9 +3665,13 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
c8e81b8
 	last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
c8e81b8
 
c8e81b8
 	/* Now release the pages and zero block aligned part of pages*/
c8e81b8
-	if (last_block_offset > first_block_offset)
c8e81b8
+	if (last_block_offset > first_block_offset) {
c8e81b8
+		ret = ext4_update_disksize_before_punch(inode, offset, length);
c8e81b8
+		if (ret)
c8e81b8
+			goto out_dio;
c8e81b8
 		truncate_pagecache_range(inode, first_block_offset,
c8e81b8
 					 last_block_offset);
c8e81b8
+	}
c8e81b8
 
c8e81b8
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
c8e81b8
 		credits = ext4_writepage_trans_blocks(inode);
c8e81b8
-- 
c8e81b8
2.5.5
c8e81b8