5c8bd1
From 011278485ecc3cd2a3954b5d4c73101d919bf1fa Mon Sep 17 00:00:00 2001
5c8bd1
From: Jan Kara <jack@suse.com>
5c8bd1
Date: Mon, 7 Dec 2015 14:34:49 -0500
5c8bd1
Subject: [PATCH 4/4] ext4: fix races of writeback with punch hole and zero
5c8bd1
 range
5c8bd1
5c8bd1
When doing delayed allocation, update of on-disk inode size is postponed
5c8bd1
until IO submission time. However hole punch or zero range fallocate
5c8bd1
calls can end up discarding the tail page cache page and thus on-disk
5c8bd1
inode size would never be properly updated.
5c8bd1
5c8bd1
Make sure the on-disk inode size is updated before truncating page
5c8bd1
cache.
5c8bd1
5c8bd1
Signed-off-by: Jan Kara <jack@suse.com>
5c8bd1
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
5c8bd1
---
5c8bd1
 fs/ext4/ext4.h    |  3 +++
5c8bd1
 fs/ext4/extents.c |  5 +++++
5c8bd1
 fs/ext4/inode.c   | 35 ++++++++++++++++++++++++++++++++++-
5c8bd1
 3 files changed, 42 insertions(+), 1 deletion(-)
5c8bd1
5c8bd1
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
5c8bd1
index 348a5ff4a0e2..80f76f092079 100644
5c8bd1
--- a/fs/ext4/ext4.h
5c8bd1
+++ b/fs/ext4/ext4.h
5c8bd1
@@ -2858,6 +2858,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
5c8bd1
 	return changed;
5c8bd1
 }
5c8bd1
 
5c8bd1
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
5c8bd1
+				      loff_t len);
5c8bd1
+
5c8bd1
 struct ext4_group_info {
5c8bd1
 	unsigned long   bb_state;
5c8bd1
 	struct rb_root  bb_free_root;
5c8bd1
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
5c8bd1
index 4b105c96df08..3578b25fccfd 100644
5c8bd1
--- a/fs/ext4/extents.c
5c8bd1
+++ b/fs/ext4/extents.c
5c8bd1
@@ -4847,6 +4847,11 @@ static long ext4_zero_range(struct file *file, loff_t offset,
5c8bd1
 		 * released from page cache.
5c8bd1
 		 */
5c8bd1
 		down_write(&EXT4_I(inode)->i_mmap_sem);
5c8bd1
+		ret = ext4_update_disksize_before_punch(inode, offset, len);
5c8bd1
+		if (ret) {
5c8bd1
+			up_write(&EXT4_I(inode)->i_mmap_sem);
5c8bd1
+			goto out_dio;
5c8bd1
+		}
5c8bd1
 		/* Now release the pages and zero block aligned part of pages */
5c8bd1
 		truncate_pagecache_range(inode, start, end - 1);
5c8bd1
 		inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
5c8bd1
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
5c8bd1
index d1207d03c961..472e608da13d 100644
5c8bd1
--- a/fs/ext4/inode.c
5c8bd1
+++ b/fs/ext4/inode.c
5c8bd1
@@ -3559,6 +3559,35 @@ int ext4_can_truncate(struct inode *inode)
5c8bd1
 }
5c8bd1
 
5c8bd1
 /*
5c8bd1
+ * We have to make sure i_disksize gets properly updated before we truncate
5c8bd1
+ * page cache due to hole punching or zero range. Otherwise i_disksize update
5c8bd1
+ * can get lost as it may have been postponed to submission of writeback but
5c8bd1
+ * that will never happen after we truncate page cache.
5c8bd1
+ */
5c8bd1
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
5c8bd1
+				      loff_t len)
5c8bd1
+{
5c8bd1
+	handle_t *handle;
5c8bd1
+	loff_t size = i_size_read(inode);
5c8bd1
+
5c8bd1
+	WARN_ON(!mutex_is_locked(&inode->i_mutex));
5c8bd1
+	if (offset > size || offset + len < size)
5c8bd1
+		return 0;
5c8bd1
+
5c8bd1
+	if (EXT4_I(inode)->i_disksize >= size)
5c8bd1
+		return 0;
5c8bd1
+
5c8bd1
+	handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
5c8bd1
+	if (IS_ERR(handle))
5c8bd1
+		return PTR_ERR(handle);
5c8bd1
+	ext4_update_i_disksize(inode, size);
5c8bd1
+	ext4_mark_inode_dirty(handle, inode);
5c8bd1
+	ext4_journal_stop(handle);
5c8bd1
+
5c8bd1
+	return 0;
5c8bd1
+}
5c8bd1
+
5c8bd1
+/*
5c8bd1
  * ext4_punch_hole: punches a hole in a file by releaseing the blocks
5c8bd1
  * associated with the given offset and length
5c8bd1
  *
5c8bd1
@@ -3636,9 +3665,13 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
5c8bd1
 	last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
5c8bd1
 
5c8bd1
 	/* Now release the pages and zero block aligned part of pages*/
5c8bd1
-	if (last_block_offset > first_block_offset)
5c8bd1
+	if (last_block_offset > first_block_offset) {
5c8bd1
+		ret = ext4_update_disksize_before_punch(inode, offset, length);
5c8bd1
+		if (ret)
5c8bd1
+			goto out_dio;
5c8bd1
 		truncate_pagecache_range(inode, first_block_offset,
5c8bd1
 					 last_block_offset);
5c8bd1
+	}
5c8bd1
 
5c8bd1
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5c8bd1
 		credits = ext4_writepage_trans_blocks(inode);
5c8bd1
-- 
5c8bd1
2.5.5
5c8bd1