a7b9285
From f53959ca0d677a2ff0bf7243faeb2174f83268dc Mon Sep 17 00:00:00 2001
Alon Levy 408bdb5
From: Paolo Bonzini <pbonzini@redhat.com>
Alon Levy 408bdb5
Date: Fri, 22 Feb 2013 17:36:25 +0100
a7b9285
Subject: [PATCH] block-migration: add lock
Alon Levy 408bdb5
Alon Levy 408bdb5
Some state is shared between the block migration code and its AIO
Alon Levy 408bdb5
callbacks.  Once block migration will run outside the iothread,
Alon Levy 408bdb5
the block migration code and the AIO callbacks will be able to
Alon Levy 408bdb5
run concurrently.  Protect the critical sections with a separate
Alon Levy 408bdb5
lock.  Do the same for completed_sectors, which can be used from
Alon Levy 408bdb5
the monitor.
Alon Levy 408bdb5
Alon Levy 408bdb5
Reviewed-by: Orit Wasserman <owasserm@redhat.com>
Alon Levy 408bdb5
Reviewed-by: Juan Quintela <quintela@redhat.com>
Alon Levy 408bdb5
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Alon Levy 408bdb5
Signed-off-by: Juan Quintela <quintela@redhat.com>
a7b9285
(cherry picked from commit 52e850dea988585c3d693fd9cd4a4c38968d89b8)
Alon Levy 408bdb5
---
Alon Levy 408bdb5
 block-migration.c     | 54 ++++++++++++++++++++++++++++++++++++++++++++++++---
Alon Levy 408bdb5
 include/qemu/atomic.h |  1 +
Alon Levy 408bdb5
 2 files changed, 52 insertions(+), 3 deletions(-)
Alon Levy 408bdb5
Alon Levy 408bdb5
diff --git a/block-migration.c b/block-migration.c
Alon Levy 408bdb5
index d62a8b8..b726c6c 100644
Alon Levy 408bdb5
--- a/block-migration.c
Alon Levy 408bdb5
+++ b/block-migration.c
Alon Levy 408bdb5
@@ -54,7 +54,7 @@ typedef struct BlkMigDevState {
Alon Levy 408bdb5
     int64_t cur_sector;
Alon Levy 408bdb5
     int64_t cur_dirty;
Alon Levy 408bdb5
 
Alon Levy 408bdb5
-    /* Protected by iothread lock.  */
Alon Levy 408bdb5
+    /* Protected by block migration lock.  */
Alon Levy 408bdb5
     unsigned long *aio_bitmap;
Alon Levy 408bdb5
     int64_t completed_sectors;
Alon Levy 408bdb5
 } BlkMigDevState;
Alon Levy 408bdb5
@@ -69,7 +69,7 @@ typedef struct BlkMigBlock {
Alon Levy 408bdb5
     QEMUIOVector qiov;
Alon Levy 408bdb5
     BlockDriverAIOCB *aiocb;
Alon Levy 408bdb5
 
Alon Levy 408bdb5
-    /* Protected by iothread lock.  */
Alon Levy 408bdb5
+    /* Protected by block migration lock.  */
Alon Levy 408bdb5
     int ret;
Alon Levy 408bdb5
     QSIMPLEQ_ENTRY(BlkMigBlock) entry;
Alon Levy 408bdb5
 } BlkMigBlock;
Alon Levy 408bdb5
@@ -81,7 +81,7 @@ typedef struct BlkMigState {
Alon Levy 408bdb5
     QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
Alon Levy 408bdb5
     int64_t total_sector_sum;
Alon Levy 408bdb5
 
Alon Levy 408bdb5
-    /* Protected by iothread lock.  */
Alon Levy 408bdb5
+    /* Protected by lock.  */
Alon Levy 408bdb5
     QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
Alon Levy 408bdb5
     int submitted;
Alon Levy 408bdb5
     int read_done;
Alon Levy 408bdb5
@@ -90,10 +90,23 @@ typedef struct BlkMigState {
Alon Levy 408bdb5
     int transferred;
Alon Levy 408bdb5
     int prev_progress;
Alon Levy 408bdb5
     int bulk_completed;
Alon Levy 408bdb5
+
Alon Levy 408bdb5
+    /* Lock must be taken _inside_ the iothread lock.  */
Alon Levy 408bdb5
+    QemuMutex lock;
Alon Levy 408bdb5
 } BlkMigState;
Alon Levy 408bdb5
 
Alon Levy 408bdb5
 static BlkMigState block_mig_state;
Alon Levy 408bdb5
 
Alon Levy 408bdb5
+static void blk_mig_lock(void)
Alon Levy 408bdb5
+{
Alon Levy 408bdb5
+    qemu_mutex_lock(&block_mig_state.lock);
Alon Levy 408bdb5
+}
Alon Levy 408bdb5
+
Alon Levy 408bdb5
+static void blk_mig_unlock(void)
Alon Levy 408bdb5
+{
Alon Levy 408bdb5
+    qemu_mutex_unlock(&block_mig_state.lock);
Alon Levy 408bdb5
+}
Alon Levy 408bdb5
+
Alon Levy 408bdb5
 static void blk_send(QEMUFile *f, BlkMigBlock * blk)
Alon Levy 408bdb5
 {
Alon Levy 408bdb5
     int len;
Alon Levy 408bdb5
@@ -120,9 +133,11 @@ uint64_t blk_mig_bytes_transferred(void)
Alon Levy 408bdb5
     BlkMigDevState *bmds;
Alon Levy 408bdb5
     uint64_t sum = 0;
Alon Levy 408bdb5
 
Alon Levy 408bdb5
+    blk_mig_lock();
Alon Levy 408bdb5
     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
Alon Levy 408bdb5
         sum += bmds->completed_sectors;
Alon Levy 408bdb5
     }
Alon Levy 408bdb5
+    blk_mig_unlock();
Alon Levy 408bdb5
     return sum << BDRV_SECTOR_BITS;
Alon Levy 408bdb5
 }
Alon Levy 408bdb5
 
Alon Levy 408bdb5
@@ -142,6 +157,9 @@ uint64_t blk_mig_bytes_total(void)
Alon Levy 408bdb5
     return sum << BDRV_SECTOR_BITS;
Alon Levy 408bdb5
 }
Alon Levy 408bdb5
 
Alon Levy 408bdb5
+
Alon Levy 408bdb5
+/* Called with migration lock held.  */
Alon Levy 408bdb5
+
Alon Levy 408bdb5
 static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
Alon Levy 408bdb5
 {
Alon Levy 408bdb5
     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Alon Levy 408bdb5
@@ -154,6 +172,8 @@ static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
Alon Levy 408bdb5
     }
Alon Levy 408bdb5
 }
Alon Levy 408bdb5
 
Alon Levy 408bdb5
+/* Called with migration lock held.  */
Alon Levy 408bdb5
+
Alon Levy 408bdb5
 static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
Alon Levy 408bdb5
                              int nb_sectors, int set)
Alon Levy 408bdb5
 {
Alon Levy 408bdb5
@@ -188,10 +208,13 @@ static void alloc_aio_bitmap(BlkMigDevState *bmds)
Alon Levy 408bdb5
     bmds->aio_bitmap = g_malloc0(bitmap_size);
Alon Levy 408bdb5
 }
Alon Levy 408bdb5
 
Alon Levy 408bdb5
+/* Never hold migration lock when yielding to the main loop!  */
Alon Levy 408bdb5
+
Alon Levy 408bdb5
 static void blk_mig_read_cb(void *opaque, int ret)
Alon Levy 408bdb5
 {
Alon Levy 408bdb5
     BlkMigBlock *blk = opaque;
Alon Levy 408bdb5
 
Alon Levy 408bdb5
+    blk_mig_lock();
Alon Levy 408bdb5
     blk->ret = ret;
Alon Levy 408bdb5
 
Alon Levy 408bdb5
     QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
Alon Levy 408bdb5
@@ -200,6 +223,7 @@ static void blk_mig_read_cb(void *opaque, int ret)
Alon Levy 408bdb5
     block_mig_state.submitted--;
Alon Levy 408bdb5
     block_mig_state.read_done++;
Alon Levy 408bdb5
     assert(block_mig_state.submitted >= 0);
Alon Levy 408bdb5
+    blk_mig_unlock();
Alon Levy 408bdb5
 }
Alon Levy 408bdb5
 
Alon Levy 408bdb5
 static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
Alon Levy 408bdb5
@@ -244,7 +268,9 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
Alon Levy 408bdb5
     blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
Alon Levy 408bdb5
     qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
Alon Levy 408bdb5
 
Alon Levy 408bdb5
+    blk_mig_lock();
Alon Levy 408bdb5
     block_mig_state.submitted++;
Alon Levy 408bdb5
+    blk_mig_unlock();
Alon Levy 408bdb5
 
Alon Levy 408bdb5
     blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
Alon Levy 408bdb5
                                 nr_sectors, blk_mig_read_cb, blk);
Alon Levy 408bdb5
@@ -366,8 +392,12 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
Alon Levy 408bdb5
     int ret = -EIO;
Alon Levy 408bdb5
 
Alon Levy 408bdb5
     for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
Alon Levy 408bdb5
+        blk_mig_lock();
Alon Levy 408bdb5
         if (bmds_aio_inflight(bmds, sector)) {
Alon Levy 408bdb5
+            blk_mig_unlock();
Alon Levy 408bdb5
             bdrv_drain_all();
Alon Levy 408bdb5
+        } else {
Alon Levy 408bdb5
+            blk_mig_unlock();
Alon Levy 408bdb5
         }
Alon Levy 408bdb5
         if (bdrv_get_dirty(bmds->bs, sector)) {
Alon Levy 408bdb5
 
Alon Levy 408bdb5
@@ -389,8 +419,11 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
Alon Levy 408bdb5
 
Alon Levy 408bdb5
                 blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
Alon Levy 408bdb5
                                             nr_sectors, blk_mig_read_cb, blk);
Alon Levy 408bdb5
+
Alon Levy 408bdb5
+                blk_mig_lock();
Alon Levy 408bdb5
                 block_mig_state.submitted++;
Alon Levy 408bdb5
                 bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
Alon Levy 408bdb5
+                blk_mig_unlock();
Alon Levy 408bdb5
             } else {
Alon Levy 408bdb5
                 ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors);
Alon Levy 408bdb5
                 if (ret < 0) {
Alon Levy 408bdb5
@@ -446,6 +479,7 @@ static int flush_blks(QEMUFile *f)
Alon Levy 408bdb5
             __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
Alon Levy 408bdb5
             block_mig_state.transferred);
Alon Levy 408bdb5
 
Alon Levy 408bdb5
+    blk_mig_lock();
Alon Levy 408bdb5
     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
Alon Levy 408bdb5
         if (qemu_file_rate_limit(f)) {
Alon Levy 408bdb5
             break;
Alon Levy 408bdb5
@@ -456,7 +490,9 @@ static int flush_blks(QEMUFile *f)
Alon Levy 408bdb5
         }
Alon Levy 408bdb5
 
Alon Levy 408bdb5
         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
Alon Levy 408bdb5
+        blk_mig_unlock();
Alon Levy 408bdb5
         blk_send(f, blk);
Alon Levy 408bdb5
+        blk_mig_lock();
Alon Levy 408bdb5
 
Alon Levy 408bdb5
         g_free(blk->buf);
Alon Levy 408bdb5
         g_free(blk);
Alon Levy 408bdb5
@@ -465,6 +501,7 @@ static int flush_blks(QEMUFile *f)
Alon Levy 408bdb5
         block_mig_state.transferred++;
Alon Levy 408bdb5
         assert(block_mig_state.read_done >= 0);
Alon Levy 408bdb5
     }
Alon Levy 408bdb5
+    blk_mig_unlock();
Alon Levy 408bdb5
 
Alon Levy 408bdb5
     DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
Alon Levy 408bdb5
             block_mig_state.submitted, block_mig_state.read_done,
Alon Levy 408bdb5
@@ -493,6 +530,7 @@ static void blk_mig_cleanup(void)
Alon Levy 408bdb5
 
Alon Levy 408bdb5
     set_dirty_tracking(0);
Alon Levy 408bdb5
 
Alon Levy 408bdb5
+    blk_mig_lock();
Alon Levy 408bdb5
     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
Alon Levy 408bdb5
         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
Alon Levy 408bdb5
         bdrv_set_in_use(bmds->bs, 0);
Alon Levy 408bdb5
@@ -506,6 +544,7 @@ static void blk_mig_cleanup(void)
Alon Levy 408bdb5
         g_free(blk->buf);
Alon Levy 408bdb5
         g_free(blk);
Alon Levy 408bdb5
     }
Alon Levy 408bdb5
+    blk_mig_unlock();
Alon Levy 408bdb5
 }
Alon Levy 408bdb5
 
Alon Levy 408bdb5
 static void block_migration_cancel(void *opaque)
Alon Levy 408bdb5
@@ -548,9 +587,11 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
Alon Levy 408bdb5
     blk_mig_reset_dirty_cursor();
Alon Levy 408bdb5
 
Alon Levy 408bdb5
     /* control the rate of transfer */
Alon Levy 408bdb5
+    blk_mig_lock();
Alon Levy 408bdb5
     while ((block_mig_state.submitted +
Alon Levy 408bdb5
             block_mig_state.read_done) * BLOCK_SIZE <
Alon Levy 408bdb5
            qemu_file_get_rate_limit(f)) {
Alon Levy 408bdb5
+        blk_mig_unlock();
Alon Levy 408bdb5
         if (block_mig_state.bulk_completed == 0) {
Alon Levy 408bdb5
             /* first finish the bulk phase */
Alon Levy 408bdb5
             if (blk_mig_save_bulked_block(f) == 0) {
Alon Levy 408bdb5
@@ -564,11 +605,13 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
Alon Levy 408bdb5
         if (ret < 0) {
Alon Levy 408bdb5
             return ret;
Alon Levy 408bdb5
         }
Alon Levy 408bdb5
+        blk_mig_lock();
Alon Levy 408bdb5
         if (ret != 0) {
Alon Levy 408bdb5
             /* no more dirty blocks */
Alon Levy 408bdb5
             break;
Alon Levy 408bdb5
         }
Alon Levy 408bdb5
     }
Alon Levy 408bdb5
+    blk_mig_unlock();
Alon Levy 408bdb5
 
Alon Levy 408bdb5
     ret = flush_blks(f);
Alon Levy 408bdb5
     if (ret) {
Alon Levy 408bdb5
@@ -595,7 +638,9 @@ static int block_save_complete(QEMUFile *f, void *opaque)
Alon Levy 408bdb5
 
Alon Levy 408bdb5
     /* we know for sure that save bulk is completed and
Alon Levy 408bdb5
        all async read completed */
Alon Levy 408bdb5
+    blk_mig_lock();
Alon Levy 408bdb5
     assert(block_mig_state.submitted == 0);
Alon Levy 408bdb5
+    blk_mig_unlock();
Alon Levy 408bdb5
 
Alon Levy 408bdb5
     do {
Alon Levy 408bdb5
         ret = blk_mig_save_dirty_block(f, 0);
Alon Levy 408bdb5
@@ -620,6 +665,7 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
Alon Levy 408bdb5
     /* Estimate pending number of bytes to send */
Alon Levy 408bdb5
     uint64_t pending;
Alon Levy 408bdb5
 
Alon Levy 408bdb5
+    blk_mig_lock();
Alon Levy 408bdb5
     pending = get_remaining_dirty() +
Alon Levy 408bdb5
                        block_mig_state.submitted * BLOCK_SIZE +
Alon Levy 408bdb5
                        block_mig_state.read_done * BLOCK_SIZE;
Alon Levy 408bdb5
@@ -628,6 +674,7 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
Alon Levy 408bdb5
     if (pending == 0 && !block_mig_state.bulk_completed) {
Alon Levy 408bdb5
         pending = BLOCK_SIZE;
Alon Levy 408bdb5
     }
Alon Levy 408bdb5
+    blk_mig_unlock();
Alon Levy 408bdb5
 
Alon Levy 408bdb5
     DPRINTF("Enter save live pending  %" PRIu64 "\n", pending);
Alon Levy 408bdb5
     return pending;
Alon Levy 408bdb5
@@ -739,6 +786,7 @@ void blk_mig_init(void)
Alon Levy 408bdb5
 {
Alon Levy 408bdb5
     QSIMPLEQ_INIT(&block_mig_state.bmds_list);
Alon Levy 408bdb5
     QSIMPLEQ_INIT(&block_mig_state.blk_list);
Alon Levy 408bdb5
+    qemu_mutex_init(&block_mig_state.lock);
Alon Levy 408bdb5
 
Alon Levy 408bdb5
     register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers,
Alon Levy 408bdb5
                          &block_mig_state);
Alon Levy 408bdb5
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
Alon Levy 408bdb5
index 96a194b..10becb6 100644
Alon Levy 408bdb5
--- a/include/qemu/atomic.h
Alon Levy 408bdb5
+++ b/include/qemu/atomic.h
Alon Levy 408bdb5
@@ -16,6 +16,7 @@
Alon Levy 408bdb5
  */
Alon Levy 408bdb5
 #define smp_wmb()   barrier()
Alon Levy 408bdb5
 #define smp_rmb()   barrier()
Alon Levy 408bdb5
+
Alon Levy 408bdb5
 /*
Alon Levy 408bdb5
  * We use GCC builtin if it's available, as that can use
Alon Levy 408bdb5
  * mfence on 32 bit as well, e.g. if built with -march=pentium-m.