Dave Jones 43a44e9
Hi,
Dave Jones 43a44e9
Dave Jones 43a44e9
Thank you for review and comments.
Dave Jones 43a44e9
Dave Jones 43a44e9
On 02/16/12 02:26, Tejun Heo wrote:
Dave Jones 43a44e9
> On Wed, Feb 15, 2012 at 11:56:19AM +0900, Jun'ichi Nomura wrote:
Dave Jones 43a44e9
>> +int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
Dave Jones 43a44e9
>> +{
Dave Jones 43a44e9
>> +	int res;
Dave Jones 43a44e9
>> +
Dave Jones 43a44e9
>> +	res = drop_partitions(disk, bdev);
Dave Jones 43a44e9
>> +	if (res)
Dave Jones 43a44e9
>> +		return res;
Dave Jones 43a44e9
>> +
Dave Jones 43a44e9
> 
Dave Jones 43a44e9
> Hmmm... shouldn't we have set_capacity(disk, 0) here?
Dave Jones 43a44e9
Dave Jones 43a44e9
Added.
Dave Jones 43a44e9
I wasn't sure whether I should leave it to drivers.
Dave Jones 43a44e9
But it seems capacity 0 for ENOMEDIUM device is reasonable.
Dave Jones 43a44e9
Dave Jones 43a44e9
>> +	check_disk_size_change(disk, bdev);
Dave Jones 43a44e9
>> +	bdev->bd_invalidated = 0;
Dave Jones 43a44e9
>> +	/* tell userspace that the media / partition table may have changed */
Dave Jones 43a44e9
>> +	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
Dave Jones 43a44e9
> 
Dave Jones 43a44e9
> Also, we really shouldn't be generating KOBJ_CHANGE after every
Dave Jones 43a44e9
> -ENOMEDIUM open.  This can easily lead to infinite loop.  We should
Dave Jones 43a44e9
> generate this iff we actually dropped partitions && modified the size.
Dave Jones 43a44e9
Dave Jones 43a44e9
invalidate_partitions() is called only when bd_invalidated is set.
Dave Jones 43a44e9
So KOBJ_CHANGE is not raised for every ENOMEDIUM open.
Dave Jones 43a44e9
Dave Jones 43a44e9
I put it explicit in the function to make it safer for
Dave Jones 43a44e9
possible misuse.
Dave Jones 43a44e9
Dave Jones 43a44e9
How about this?
Dave Jones 43a44e9
Dave Jones 43a44e9
---------------------------------------------------------
Dave Jones 43a44e9
Do not call drivers when invalidating partitions for -ENOMEDIUM
Dave Jones 43a44e9
Dave Jones 43a44e9
When a scsi driver returns -ENOMEDIUM for open(),
Dave Jones 43a44e9
__blkdev_get() calls rescan_partitions(), which ends up calling
Dave Jones 43a44e9
sd_revalidate_disk() without getting a refcount of scsi_device.
Dave Jones 43a44e9
Dave Jones 43a44e9
That could lead to oops like this:
Dave Jones 43a44e9
Dave Jones 43a44e9
  process A                  process B
Dave Jones 43a44e9
  ----------------------------------------------
Dave Jones 43a44e9
  sys_open
Dave Jones 43a44e9
    __blkdev_get
Dave Jones 43a44e9
      sd_open
Dave Jones 43a44e9
        returns -ENOMEDIUM
Dave Jones 43a44e9
                             scsi_remove_device
Dave Jones 43a44e9
                               <scsi_device torn down>
Dave Jones 43a44e9
      rescan_partitions
Dave Jones 43a44e9
        sd_revalidate_disk
Dave Jones 43a44e9
          <oops>
Dave Jones 43a44e9
Dave Jones 43a44e9
Oopses are reported here:
Dave Jones 43a44e9
http://marc.info/?l=linux-scsi&m=132388619710052
Dave Jones 43a44e9
Dave Jones 43a44e9
This patch separates the partition invalidation from rescan_partitions()
Dave Jones 43a44e9
and use it for -ENOMEDIUM case. 
Dave Jones 43a44e9
Dave Jones 43a44e9
Index: linux-3.3/block/partition-generic.c
Dave Jones 43a44e9
===================================================================
Dave Jones 43a44e9
--- linux-3.3.orig/block/partition-generic.c	2012-02-15 09:00:25.147293790 +0900
Dave Jones 43a44e9
+++ linux-3.3/block/partition-generic.c	2012-02-16 10:48:22.257680685 +0900
Dave Jones 43a44e9
@@ -389,17 +389,11 @@ static bool disk_unlock_native_capacity(
Dave Jones 43a44e9
 	}
Dave Jones 43a44e9
 }
Dave Jones 43a44e9
 
Dave Jones 43a44e9
-int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
Dave Jones 43a44e9
+static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
Dave Jones 43a44e9
 {
Dave Jones 43a44e9
-	struct parsed_partitions *state = NULL;
Dave Jones 43a44e9
 	struct disk_part_iter piter;
Dave Jones 43a44e9
 	struct hd_struct *part;
Dave Jones 43a44e9
-	int p, highest, res;
Dave Jones 43a44e9
-rescan:
Dave Jones 43a44e9
-	if (state && !IS_ERR(state)) {
Dave Jones 43a44e9
-		kfree(state);
Dave Jones 43a44e9
-		state = NULL;
Dave Jones 43a44e9
-	}
Dave Jones 43a44e9
+	int res;
Dave Jones 43a44e9
 
Dave Jones 43a44e9
 	if (bdev->bd_part_count)
Dave Jones 43a44e9
 		return -EBUSY;
Dave Jones 43a44e9
@@ -412,6 +406,24 @@ rescan:
Dave Jones 43a44e9
 		delete_partition(disk, part->partno);
Dave Jones 43a44e9
 	disk_part_iter_exit(&piter);
Dave Jones 43a44e9
 
Dave Jones 43a44e9
+	return 0;
Dave Jones 43a44e9
+}
Dave Jones 43a44e9
+
Dave Jones 43a44e9
+int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
Dave Jones 43a44e9
+{
Dave Jones 43a44e9
+	struct parsed_partitions *state = NULL;
Dave Jones 43a44e9
+	struct hd_struct *part;
Dave Jones 43a44e9
+	int p, highest, res;
Dave Jones 43a44e9
+rescan:
Dave Jones 43a44e9
+	if (state && !IS_ERR(state)) {
Dave Jones 43a44e9
+		kfree(state);
Dave Jones 43a44e9
+		state = NULL;
Dave Jones 43a44e9
+	}
Dave Jones 43a44e9
+
Dave Jones 43a44e9
+	res = drop_partitions(disk, bdev);
Dave Jones 43a44e9
+	if (res)
Dave Jones 43a44e9
+		return res;
Dave Jones 43a44e9
+
Dave Jones 43a44e9
 	if (disk->fops->revalidate_disk)
Dave Jones 43a44e9
 		disk->fops->revalidate_disk(disk);
Dave Jones 43a44e9
 	check_disk_size_change(disk, bdev);
Dave Jones 43a44e9
@@ -515,6 +527,26 @@ rescan:
Dave Jones 43a44e9
 	return 0;
Dave Jones 43a44e9
 }
Dave Jones 43a44e9
 
Dave Jones 43a44e9
+int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
Dave Jones 43a44e9
+{
Dave Jones 43a44e9
+	int res;
Dave Jones 43a44e9
+
Dave Jones 43a44e9
+	if (!bdev->bd_invalidated)
Dave Jones 43a44e9
+		return 0;
Dave Jones 43a44e9
+
Dave Jones 43a44e9
+	res = drop_partitions(disk, bdev);
Dave Jones 43a44e9
+	if (res)
Dave Jones 43a44e9
+		return res;
Dave Jones 43a44e9
+
Dave Jones 43a44e9
+	set_capacity(disk, 0);
Dave Jones 43a44e9
+	check_disk_size_change(disk, bdev);
Dave Jones 43a44e9
+	bdev->bd_invalidated = 0;
Dave Jones 43a44e9
+	/* tell userspace that the media / partition table may have changed */
Dave Jones 43a44e9
+	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
Dave Jones 43a44e9
+
Dave Jones 43a44e9
+	return 0;
Dave Jones 43a44e9
+}
Dave Jones 43a44e9
+
Dave Jones 43a44e9
 unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
Dave Jones 43a44e9
 {
Dave Jones 43a44e9
 	struct address_space *mapping = bdev->bd_inode->i_mapping;
Dave Jones 43a44e9
Index: linux-3.3/include/linux/genhd.h
Dave Jones 43a44e9
===================================================================
Dave Jones 43a44e9
--- linux-3.3.orig/include/linux/genhd.h	2012-02-09 12:21:53.000000000 +0900
Dave Jones 43a44e9
+++ linux-3.3/include/linux/genhd.h	2012-02-16 10:47:43.783681813 +0900
Dave Jones 43a44e9
@@ -596,6 +596,7 @@ extern char *disk_name (struct gendisk *
Dave Jones 43a44e9
 
Dave Jones 43a44e9
 extern int disk_expand_part_tbl(struct gendisk *disk, int target);
Dave Jones 43a44e9
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
Dave Jones 43a44e9
+extern int invalidate_partitions(struct gendisk *disk, struct block_device *bdev);
Dave Jones 43a44e9
 extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
Dave Jones 43a44e9
 						     int partno, sector_t start,
Dave Jones 43a44e9
 						     sector_t len, int flags,
Dave Jones 43a44e9
Index: linux-3.3/fs/block_dev.c
Dave Jones 43a44e9
===================================================================
Dave Jones 43a44e9
--- linux-3.3.orig/fs/block_dev.c	2012-02-09 12:21:53.000000000 +0900
Dave Jones 43a44e9
+++ linux-3.3/fs/block_dev.c	2012-02-16 10:47:52.602681441 +0900
Dave Jones 43a44e9
@@ -1183,8 +1183,12 @@ static int __blkdev_get(struct block_dev
Dave Jones 43a44e9
 			 * The latter is necessary to prevent ghost
Dave Jones 43a44e9
 			 * partitions on a removed medium.
Dave Jones 43a44e9
 			 */
Dave Jones 43a44e9
-			if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
Dave Jones 43a44e9
-				rescan_partitions(disk, bdev);
Dave Jones 43a44e9
+			if (bdev->bd_invalidated) {
Dave Jones 43a44e9
+				if (!ret)
Dave Jones 43a44e9
+					rescan_partitions(disk, bdev);
Dave Jones 43a44e9
+				else if (ret == -ENOMEDIUM)
Dave Jones 43a44e9
+					invalidate_partitions(disk, bdev);
Dave Jones 43a44e9
+			}
Dave Jones 43a44e9
 			if (ret)
Dave Jones 43a44e9
 				goto out_clear;
Dave Jones 43a44e9
 		} else {
Dave Jones 43a44e9
@@ -1214,8 +1218,12 @@ static int __blkdev_get(struct block_dev
Dave Jones 43a44e9
 			if (bdev->bd_disk->fops->open)
Dave Jones 43a44e9
 				ret = bdev->bd_disk->fops->open(bdev, mode);
Dave Jones 43a44e9
 			/* the same as first opener case, read comment there */
Dave Jones 43a44e9
-			if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
Dave Jones 43a44e9
-				rescan_partitions(bdev->bd_disk, bdev);
Dave Jones 43a44e9
+			if (bdev->bd_invalidated) {
Dave Jones 43a44e9
+				if (!ret)
Dave Jones 43a44e9
+					rescan_partitions(bdev->bd_disk, bdev);
Dave Jones 43a44e9
+				else if (ret == -ENOMEDIUM)
Dave Jones 43a44e9
+					invalidate_partitions(bdev->bd_disk, bdev);
Dave Jones 43a44e9
+			}
Dave Jones 43a44e9
 			if (ret)
Dave Jones 43a44e9
 				goto out_unlock_bdev;
Dave Jones 43a44e9
 		}