4d441da
From: Dmitry Monakhov <dmonakhov@xxxxxxxx>
4d441da
Date: Thu, 9 Oct 2014 15:14:47 +0400
4d441da
Subject: [PATCH] ext4: fix race between write and fcntl(F_SETFL)
4d441da
4d441da
O_DIRECT flags can be toggeled via fcntl(F_SETFL).
4d441da
But this value checked twice inside ext4_file_write_iter() and __generic_file_write()
4d441da
which result in BUG_ON (see typical stack trace below)
4d441da
In order to fix this we have to use our own copy of __generic_file_write and
4d441da
pass o_direct status explicitly.
4d441da
4d441da
TESTCASE: xfstest:generic/326  (http://patchwork.ozlabs.org/patch/397949/)
4d441da
4d441da
kernel BUG at fs/ext4/inode.c:2960!
4d441da
invalid opcode: 0000 [#1] SMP
4d441da
Modules linked in: brd iTCO_wdt lpc_ich mfd_core igb ptp dm_mirror dm_region_hash dm_log dm_mod
4d441da
CPU: 6 PID: 5505 Comm: aio-dio-fcntl-r Not tainted 3.17.0-rc2-00176-gff5c017 #161
4d441da
Hardware name: Intel Corporation W2600CR/W2600CR, BIOS SE5C600.86B.99.99.x028.061320111235 06/13/2011
4d441da
task: ffff88080e95a7c0 ti: ffff88080f908000 task.ti: ffff88080f908000
4d441da
RIP: 0010:[<ffffffff811fabf2>]  [<ffffffff811fabf2>] ext4_direct_IO+0x162/0x3d0
4d441da
RSP: 0018:ffff88080f90bb58  EFLAGS: 00010246
4d441da
RAX: 0000000000000400 RBX: ffff88080fdb2a28 RCX: 00000000a802c818
4d441da
RDX: 0000040000080000 RSI: ffff88080d8aeb80 RDI: 0000000000000001
4d441da
RBP: ffff88080f90bbc8 R08: 0000000000000000 R09: 0000000000001581
4d441da
R10: 0000000000000000 R11: 0000000000000000 R12: ffff88080d8aeb80
4d441da
R13: ffff88080f90bbf8 R14: ffff88080fdb28c8 R15: ffff88080fdb2a28
4d441da
FS:  00007f23b2055700(0000) GS:ffff880818400000(0000) knlGS:0000000000000000
4d441da
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
4d441da
CR2: 00007f23b2045000 CR3: 000000080cedf000 CR4: 00000000000407e0
4d441da
Stack:
4d441da
 ffff88080f90bb98 0000000000000000 7ffffffffffffffe ffff88080fdb2c30
4d441da
 0000000000000200 0000000000000200 0000000000000001 0000000000000200
4d441da
 ffff88080f90bbc8 ffff88080fdb2c30 ffff88080f90be08 0000000000000200
4d441da
Call Trace:
4d441da
 [<ffffffff8112ca9d>] generic_file_direct_write+0xed/0x180
4d441da
 [<ffffffff8112f2b2>] __generic_file_write_iter+0x222/0x370
4d441da
 [<ffffffff811f495b>] ext4_file_write_iter+0x34b/0x400
4d441da
 [<ffffffff811bd709>] ? aio_run_iocb+0x239/0x410
4d441da
 [<ffffffff811bd709>] ? aio_run_iocb+0x239/0x410
4d441da
 [<ffffffff810990e5>] ? local_clock+0x25/0x30
4d441da
 [<ffffffff810abd94>] ? __lock_acquire+0x274/0x700
4d441da
 [<ffffffff811f4610>] ? ext4_unwritten_wait+0xb0/0xb0
4d441da
 [<ffffffff811bd756>] aio_run_iocb+0x286/0x410
4d441da
 [<ffffffff810990e5>] ? local_clock+0x25/0x30
4d441da
 [<ffffffff810ac359>] ? lock_release_holdtime+0x29/0x190
4d441da
 [<ffffffff811bc05b>] ? lookup_ioctx+0x4b/0xf0
4d441da
 [<ffffffff811bde3b>] do_io_submit+0x55b/0x740
4d441da
 [<ffffffff811bdcaa>] ? do_io_submit+0x3ca/0x740
4d441da
 [<ffffffff811be030>] SyS_io_submit+0x10/0x20
4d441da
 [<ffffffff815ce192>] system_call_fastpath+0x16/0x1b
4d441da
Code: 01 48 8b 80 f0 01 00 00 48 8b 18 49 8b 45 10 0f 85 f1 01 00 00 48 03 45 c8 48 3b 43 48 0f 8f e3 01 00 00 49 83 7c 24 18 00 75 04 <0f> 0b eb fe f0 ff 83 ec 01 00 00 49 8b 44 24 18 8b 00 85 c0 89
4d441da
RIP  [<ffffffff811fabf2>] ext4_direct_IO+0x162/0x3d0
4d441da
 RSP <ffff88080f90bb58>
4d441da
4d441da
Upstream-status: Submitted but likely not accepted
4d441da
Bugzilla: 1152608
4d441da
4d441da
Reported-by: Sasha Levin <sasha.levin@xxxxxxxxxx>
4d441da
Signed-off-by: Dmitry Monakhov <dmonakhov@xxxxxxxxxx>
4d441da
---
4d441da
 fs/ext4/file.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
4d441da
 1 file changed, 95 insertions(+), 1 deletion(-)
4d441da
4d441da
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
4d441da
index aca7b24a4432..8477eb259809 100644
4d441da
--- a/fs/ext4/file.c
4d441da
+++ b/fs/ext4/file.c
4d441da
@@ -88,6 +88,100 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
4d441da
 	return 0;
4d441da
 }
4d441da
 
4d441da
+/**
4d441da
+ * copy of __generic_file_write_iter with explicit O_DIRECT status
4d441da
+ * @iocb:	IO state structure (file, offset, etc.)
4d441da
+ * @from:	iov_iter with data to write
4d441da
+ * @direct:	perform O_DIRECT IO
4d441da
+ */
4d441da
+static ssize_t
4d441da
+__ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from, int direct)
4d441da
+{
4d441da
+	struct file *file = iocb->ki_filp;
4d441da
+	struct address_space *mapping = file->f_mapping;
4d441da
+	struct inode	*inode = mapping->host;
4d441da
+	loff_t		pos = iocb->ki_pos;
4d441da
+	ssize_t		written = 0;
4d441da
+	ssize_t		err;
4d441da
+	ssize_t		status;
4d441da
+	size_t		count = iov_iter_count(from);
4d441da
+
4d441da
+	/* We can write back this queue in page reclaim */
4d441da
+	current->backing_dev_info = mapping->backing_dev_info;
4d441da
+	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
4d441da
+	if (err)
4d441da
+		goto out;
4d441da
+
4d441da
+	if (count == 0)
4d441da
+		goto out;
4d441da
+
4d441da
+	iov_iter_truncate(from, count);
4d441da
+
4d441da
+	err = file_remove_suid(file);
4d441da
+	if (err)
4d441da
+		goto out;
4d441da
+
4d441da
+	err = file_update_time(file);
4d441da
+	if (err)
4d441da
+		goto out;
4d441da
+
4d441da
+	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
4d441da
+	if (unlikely(direct)) {
4d441da
+		loff_t endbyte;
4d441da
+
4d441da
+		written = generic_file_direct_write(iocb, from, pos);
4d441da
+		if (written < 0 || written == count)
4d441da
+			goto out;
4d441da
+
4d441da
+		/*
4d441da
+		 * direct-io write to a hole: fall through to buffered I/O
4d441da
+		 * for completing the rest of the request.
4d441da
+		 */
4d441da
+		pos += written;
4d441da
+		count -= written;
4d441da
+
4d441da
+		status = generic_perform_write(file, from, pos);
4d441da
+		/*
4d441da
+		 * If generic_perform_write() returned a synchronous error
4d441da
+		 * then we want to return the number of bytes which were
4d441da
+		 * direct-written, or the error code if that was zero.  Note
4d441da
+		 * that this differs from normal direct-io semantics, which
4d441da
+		 * will return -EFOO even if some bytes were written.
4d441da
+		 */
4d441da
+		if (unlikely(status < 0)) {
4d441da
+			err = status;
4d441da
+			goto out;
4d441da
+		}
4d441da
+		iocb->ki_pos = pos + status;
4d441da
+		/*
4d441da
+		 * We need to ensure that the page cache pages are written to
4d441da
+		 * disk and invalidated to preserve the expected O_DIRECT
4d441da
+		 * semantics.
4d441da
+		 */
4d441da
+		endbyte = pos + status - 1;
4d441da
+		err = filemap_write_and_wait_range(file->f_mapping, pos,
4d441da
+						   endbyte);
4d441da
+		if (err == 0) {
4d441da
+			written += status;
4d441da
+			invalidate_mapping_pages(mapping,
4d441da
+						 pos >> PAGE_CACHE_SHIFT,
4d441da
+						 endbyte >> PAGE_CACHE_SHIFT);
4d441da
+		} else {
4d441da
+			/*
4d441da
+			 * We don't know how much we wrote, so just return
4d441da
+			 * the number of bytes which were direct-written
4d441da
+			 */
4d441da
+		}
4d441da
+	} else {
4d441da
+		written = generic_perform_write(file, from, pos);
4d441da
+		if (likely(written >= 0))
4d441da
+			iocb->ki_pos = pos + written;
4d441da
+	}
4d441da
+out:
4d441da
+	current->backing_dev_info = NULL;
4d441da
+	return written ? written : err;
4d441da
+}
4d441da
+
4d441da
 static ssize_t
4d441da
 ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
4d441da
 {
4d441da
@@ -172,7 +266,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
4d441da
 		}
4d441da
 	}
4d441da
 
4d441da
-	ret = __generic_file_write_iter(iocb, from);
4d441da
+	ret = __ext4_file_write_iter(iocb, from, o_direct);
4d441da
 	mutex_unlock(&inode->i_mutex);
4d441da
 
4d441da
 	if (ret > 0) {
4d441da
-- 
4d441da
1.9.3
4d441da