kraxel / rpms / kernel

Forked from rpms/kernel 2 years ago
Clone
Dave Jones b2f4cd9
Hi,
Dave Jones b2f4cd9
Dave Jones b2f4cd9
Thank you for review and comments.
Dave Jones b2f4cd9
Dave Jones b2f4cd9
On 02/16/12 02:26, Tejun Heo wrote:
Dave Jones b2f4cd9
> On Wed, Feb 15, 2012 at 11:56:19AM +0900, Jun'ichi Nomura wrote:
Dave Jones b2f4cd9
>> +int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
Dave Jones b2f4cd9
>> +{
Dave Jones b2f4cd9
>> +	int res;
Dave Jones b2f4cd9
>> +
Dave Jones b2f4cd9
>> +	res = drop_partitions(disk, bdev);
Dave Jones b2f4cd9
>> +	if (res)
Dave Jones b2f4cd9
>> +		return res;
Dave Jones b2f4cd9
>> +
Dave Jones b2f4cd9
> 
Dave Jones b2f4cd9
> Hmmm... shouldn't we have set_capacity(disk, 0) here?
Dave Jones b2f4cd9
Dave Jones b2f4cd9
Added.
Dave Jones b2f4cd9
I wasn't sure whether I should leave it to drivers.
Dave Jones b2f4cd9
But it seems capacity 0 for ENOMEDIUM device is reasonable.
Dave Jones b2f4cd9
Dave Jones b2f4cd9
>> +	check_disk_size_change(disk, bdev);
Dave Jones b2f4cd9
>> +	bdev->bd_invalidated = 0;
Dave Jones b2f4cd9
>> +	/* tell userspace that the media / partition table may have changed */
Dave Jones b2f4cd9
>> +	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
Dave Jones b2f4cd9
> 
Dave Jones b2f4cd9
> Also, we really shouldn't be generating KOBJ_CHANGE after every
Dave Jones b2f4cd9
> -ENOMEDIUM open.  This can easily lead to infinite loop.  We should
Dave Jones b2f4cd9
> generate this iff we actually dropped partitions && modified the size.
Dave Jones b2f4cd9
Dave Jones b2f4cd9
invalidate_partitions() is called only when bd_invalidated is set.
Dave Jones b2f4cd9
So KOBJ_CHANGE is not raised for every ENOMEDIUM open.
Dave Jones b2f4cd9
Dave Jones b2f4cd9
I put it explicit in the function to make it safer for
Dave Jones b2f4cd9
possible misuse.
Dave Jones b2f4cd9
Dave Jones b2f4cd9
How about this?
Dave Jones b2f4cd9
Dave Jones b2f4cd9
---------------------------------------------------------
Dave Jones b2f4cd9
Do not call drivers when invalidating partitions for -ENOMEDIUM
Dave Jones b2f4cd9
Dave Jones b2f4cd9
When a scsi driver returns -ENOMEDIUM for open(),
Dave Jones b2f4cd9
__blkdev_get() calls rescan_partitions(), which ends up calling
Dave Jones b2f4cd9
sd_revalidate_disk() without getting a refcount of scsi_device.
Dave Jones b2f4cd9
Dave Jones b2f4cd9
That could lead to oops like this:
Dave Jones b2f4cd9
Dave Jones b2f4cd9
  process A                  process B
Dave Jones b2f4cd9
  ----------------------------------------------
Dave Jones b2f4cd9
  sys_open
Dave Jones b2f4cd9
    __blkdev_get
Dave Jones b2f4cd9
      sd_open
Dave Jones b2f4cd9
        returns -ENOMEDIUM
Dave Jones b2f4cd9
                             scsi_remove_device
Dave Jones b2f4cd9
                               <scsi_device torn down>
Dave Jones b2f4cd9
      rescan_partitions
Dave Jones b2f4cd9
        sd_revalidate_disk
Dave Jones b2f4cd9
          <oops>
Dave Jones b2f4cd9
Dave Jones b2f4cd9
Oopses are reported here:
Dave Jones b2f4cd9
http://marc.info/?l=linux-scsi&m=132388619710052
Dave Jones b2f4cd9
Dave Jones b2f4cd9
This patch separates the partition invalidation from rescan_partitions()
Dave Jones b2f4cd9
and use it for -ENOMEDIUM case. 
Dave Jones b2f4cd9
Dave Jones b2f4cd9
Index: linux-3.3/block/partition-generic.c
Dave Jones b2f4cd9
===================================================================
Dave Jones b2f4cd9
--- linux-3.3.orig/block/partition-generic.c	2012-02-15 09:00:25.147293790 +0900
Dave Jones b2f4cd9
+++ linux-3.3/block/partition-generic.c	2012-02-16 10:48:22.257680685 +0900
Dave Jones b2f4cd9
@@ -389,17 +389,11 @@ static bool disk_unlock_native_capacity(
Dave Jones b2f4cd9
 	}
Dave Jones b2f4cd9
 }
Dave Jones b2f4cd9
 
Dave Jones b2f4cd9
-int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
Dave Jones b2f4cd9
+static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
Dave Jones b2f4cd9
 {
Dave Jones b2f4cd9
-	struct parsed_partitions *state = NULL;
Dave Jones b2f4cd9
 	struct disk_part_iter piter;
Dave Jones b2f4cd9
 	struct hd_struct *part;
Dave Jones b2f4cd9
-	int p, highest, res;
Dave Jones b2f4cd9
-rescan:
Dave Jones b2f4cd9
-	if (state && !IS_ERR(state)) {
Dave Jones b2f4cd9
-		kfree(state);
Dave Jones b2f4cd9
-		state = NULL;
Dave Jones b2f4cd9
-	}
Dave Jones b2f4cd9
+	int res;
Dave Jones b2f4cd9
 
Dave Jones b2f4cd9
 	if (bdev->bd_part_count)
Dave Jones b2f4cd9
 		return -EBUSY;
Dave Jones b2f4cd9
@@ -412,6 +406,24 @@ rescan:
Dave Jones b2f4cd9
 		delete_partition(disk, part->partno);
Dave Jones b2f4cd9
 	disk_part_iter_exit(&piter);
Dave Jones b2f4cd9
 
Dave Jones b2f4cd9
+	return 0;
Dave Jones b2f4cd9
+}
Dave Jones b2f4cd9
+
Dave Jones b2f4cd9
+int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
Dave Jones b2f4cd9
+{
Dave Jones b2f4cd9
+	struct parsed_partitions *state = NULL;
Dave Jones b2f4cd9
+	struct hd_struct *part;
Dave Jones b2f4cd9
+	int p, highest, res;
Dave Jones b2f4cd9
+rescan:
Dave Jones b2f4cd9
+	if (state && !IS_ERR(state)) {
Dave Jones b2f4cd9
+		kfree(state);
Dave Jones b2f4cd9
+		state = NULL;
Dave Jones b2f4cd9
+	}
Dave Jones b2f4cd9
+
Dave Jones b2f4cd9
+	res = drop_partitions(disk, bdev);
Dave Jones b2f4cd9
+	if (res)
Dave Jones b2f4cd9
+		return res;
Dave Jones b2f4cd9
+
Dave Jones b2f4cd9
 	if (disk->fops->revalidate_disk)
Dave Jones b2f4cd9
 		disk->fops->revalidate_disk(disk);
Dave Jones b2f4cd9
 	check_disk_size_change(disk, bdev);
Dave Jones b2f4cd9
@@ -515,6 +527,26 @@ rescan:
Dave Jones b2f4cd9
 	return 0;
Dave Jones b2f4cd9
 }
Dave Jones b2f4cd9
 
Dave Jones b2f4cd9
+int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
Dave Jones b2f4cd9
+{
Dave Jones b2f4cd9
+	int res;
Dave Jones b2f4cd9
+
Dave Jones b2f4cd9
+	if (!bdev->bd_invalidated)
Dave Jones b2f4cd9
+		return 0;
Dave Jones b2f4cd9
+
Dave Jones b2f4cd9
+	res = drop_partitions(disk, bdev);
Dave Jones b2f4cd9
+	if (res)
Dave Jones b2f4cd9
+		return res;
Dave Jones b2f4cd9
+
Dave Jones b2f4cd9
+	set_capacity(disk, 0);
Dave Jones b2f4cd9
+	check_disk_size_change(disk, bdev);
Dave Jones b2f4cd9
+	bdev->bd_invalidated = 0;
Dave Jones b2f4cd9
+	/* tell userspace that the media / partition table may have changed */
Dave Jones b2f4cd9
+	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
Dave Jones b2f4cd9
+
Dave Jones b2f4cd9
+	return 0;
Dave Jones b2f4cd9
+}
Dave Jones b2f4cd9
+
Dave Jones b2f4cd9
 unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
Dave Jones b2f4cd9
 {
Dave Jones b2f4cd9
 	struct address_space *mapping = bdev->bd_inode->i_mapping;
Dave Jones b2f4cd9
Index: linux-3.3/include/linux/genhd.h
Dave Jones b2f4cd9
===================================================================
Dave Jones b2f4cd9
--- linux-3.3.orig/include/linux/genhd.h	2012-02-09 12:21:53.000000000 +0900
Dave Jones b2f4cd9
+++ linux-3.3/include/linux/genhd.h	2012-02-16 10:47:43.783681813 +0900
Dave Jones b2f4cd9
@@ -596,6 +596,7 @@ extern char *disk_name (struct gendisk *
Dave Jones b2f4cd9
 
Dave Jones b2f4cd9
 extern int disk_expand_part_tbl(struct gendisk *disk, int target);
Dave Jones b2f4cd9
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
Dave Jones b2f4cd9
+extern int invalidate_partitions(struct gendisk *disk, struct block_device *bdev);
Dave Jones b2f4cd9
 extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
Dave Jones b2f4cd9
 						     int partno, sector_t start,
Dave Jones b2f4cd9
 						     sector_t len, int flags,
Dave Jones b2f4cd9
Index: linux-3.3/fs/block_dev.c
Dave Jones b2f4cd9
===================================================================
Dave Jones b2f4cd9
--- linux-3.3.orig/fs/block_dev.c	2012-02-09 12:21:53.000000000 +0900
Dave Jones b2f4cd9
+++ linux-3.3/fs/block_dev.c	2012-02-16 10:47:52.602681441 +0900
Dave Jones b2f4cd9
@@ -1183,8 +1183,12 @@ static int __blkdev_get(struct block_dev
Dave Jones b2f4cd9
 			 * The latter is necessary to prevent ghost
Dave Jones b2f4cd9
 			 * partitions on a removed medium.
Dave Jones b2f4cd9
 			 */
Dave Jones b2f4cd9
-			if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
Dave Jones b2f4cd9
-				rescan_partitions(disk, bdev);
Dave Jones b2f4cd9
+			if (bdev->bd_invalidated) {
Dave Jones b2f4cd9
+				if (!ret)
Dave Jones b2f4cd9
+					rescan_partitions(disk, bdev);
Dave Jones b2f4cd9
+				else if (ret == -ENOMEDIUM)
Dave Jones b2f4cd9
+					invalidate_partitions(disk, bdev);
Dave Jones b2f4cd9
+			}
Dave Jones b2f4cd9
 			if (ret)
Dave Jones b2f4cd9
 				goto out_clear;
Dave Jones b2f4cd9
 		} else {
Dave Jones b2f4cd9
@@ -1214,8 +1218,12 @@ static int __blkdev_get(struct block_dev
Dave Jones b2f4cd9
 			if (bdev->bd_disk->fops->open)
Dave Jones b2f4cd9
 				ret = bdev->bd_disk->fops->open(bdev, mode);
Dave Jones b2f4cd9
 			/* the same as first opener case, read comment there */
Dave Jones b2f4cd9
-			if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
Dave Jones b2f4cd9
-				rescan_partitions(bdev->bd_disk, bdev);
Dave Jones b2f4cd9
+			if (bdev->bd_invalidated) {
Dave Jones b2f4cd9
+				if (!ret)
Dave Jones b2f4cd9
+					rescan_partitions(bdev->bd_disk, bdev);
Dave Jones b2f4cd9
+				else if (ret == -ENOMEDIUM)
Dave Jones b2f4cd9
+					invalidate_partitions(bdev->bd_disk, bdev);
Dave Jones b2f4cd9
+			}
Dave Jones b2f4cd9
 			if (ret)
Dave Jones b2f4cd9
 				goto out_unlock_bdev;
Dave Jones b2f4cd9
 		}