f3a92ca
From: Kevin Wolf <kwolf@redhat.com>
f3a92ca
Date: Fri, 28 Mar 2014 18:06:31 +0100
f3a92ca
Subject: [PATCH] qcow2: Don't rely on free_cluster_index in
f3a92ca
 alloc_refcount_block() (CVE-2014-0147)
f3a92ca
f3a92ca
free_cluster_index is only correct if update_refcount() was called from
f3a92ca
an allocation function, and even there it's brittle because it's used to
f3a92ca
protect unfinished allocations which still have a refcount of 0 - if it
f3a92ca
moves in the wrong place, the unfinished allocation can be corrupted.
f3a92ca
f3a92ca
So not using it any more seems to be a good idea. Instead, use the
f3a92ca
first requested cluster to do the calculations. Return -EAGAIN if
f3a92ca
unfinished allocations could become invalid and let the caller restart
f3a92ca
its search for some free clusters.
f3a92ca
f3a92ca
The context of creating a snapsnot is one situation where
f3a92ca
update_refcount() is called outside of a cluster allocation. For this
f3a92ca
case, the change fixes a buffer overflow if a cluster is referenced in
f3a92ca
an L2 table that cannot be represented by an existing refcount block.
f3a92ca
(new_table[refcount_table_index] was out of bounds)
f3a92ca
f3a92ca
[Bump the qemu-iotests 026 refblock_alloc.write leak count from 10 to
f3a92ca
11.
f3a92ca
--Stefan]
f3a92ca
f3a92ca
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
f3a92ca
Reviewed-by: Max Reitz <mreitz@redhat.com>
f3a92ca
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
f3a92ca
(cherry picked from commit b106ad9185f35fc4ad669555ad0e79e276083bd7)
f3a92ca
f3a92ca
Conflicts:
f3a92ca
	block/qcow2.c
f3a92ca
	tests/qemu-iotests/080
f3a92ca
	tests/qemu-iotests/080.out
f3a92ca
---
f3a92ca
 block/qcow2-refcount.c | 72 ++++++++++++++++++++++++++------------------------
7452f5d
 block/qcow2.c          | 11 ++++----
7452f5d
 2 files changed, 43 insertions(+), 40 deletions(-)
f3a92ca
f3a92ca
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
f3a92ca
index 1308151..d784dd6 100644
f3a92ca
--- a/block/qcow2-refcount.c
f3a92ca
+++ b/block/qcow2-refcount.c
f3a92ca
@@ -191,10 +191,11 @@ static int alloc_refcount_block(BlockDriverState *bs,
f3a92ca
      *   they can describe them themselves.
f3a92ca
      *
f3a92ca
      * - We need to consider that at this point we are inside update_refcounts
f3a92ca
-     *   and doing the initial refcount increase. This means that some clusters
f3a92ca
-     *   have already been allocated by the caller, but their refcount isn't
f3a92ca
-     *   accurate yet. free_cluster_index tells us where this allocation ends
f3a92ca
-     *   as long as we don't overwrite it by freeing clusters.
f3a92ca
+     *   and potentially doing an initial refcount increase. This means that
f3a92ca
+     *   some clusters have already been allocated by the caller, but their
f3a92ca
+     *   refcount isn't accurate yet. If we allocate clusters for metadata, we
f3a92ca
+     *   need to return -EAGAIN to signal the caller that it needs to restart
f3a92ca
+     *   the search for free clusters.
f3a92ca
      *
f3a92ca
      * - alloc_clusters_noref and qcow2_free_clusters may load a different
f3a92ca
      *   refcount block into the cache
f3a92ca
@@ -279,7 +280,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
f3a92ca
         }
f3a92ca
 
f3a92ca
         s->refcount_table[refcount_table_index] = new_block;
f3a92ca
-        return 0;
f3a92ca
+
f3a92ca
+        /* The new refcount block may be where the caller intended to put its
f3a92ca
+         * data, so let it restart the search. */
f3a92ca
+        return -EAGAIN;
f3a92ca
     }
f3a92ca
 
f3a92ca
     ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
f3a92ca
@@ -302,8 +306,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
f3a92ca
 
f3a92ca
     /* Calculate the number of refcount blocks needed so far */
f3a92ca
     uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
f3a92ca
-    uint64_t blocks_used = (s->free_cluster_index +
f3a92ca
-        refcount_block_clusters - 1) / refcount_block_clusters;
f3a92ca
+    uint64_t blocks_used = DIV_ROUND_UP(cluster_index, refcount_block_clusters);
f3a92ca
 
f3a92ca
     /* And now we need at least one block more for the new metadata */
f3a92ca
     uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
f3a92ca
@@ -336,8 +339,6 @@ static int alloc_refcount_block(BlockDriverState *bs,
f3a92ca
     uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
f3a92ca
     uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));
f3a92ca
 
f3a92ca
-    assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
f3a92ca
-
f3a92ca
     /* Fill the new refcount table */
f3a92ca
     memcpy(new_table, s->refcount_table,
f3a92ca
         s->refcount_table_size * sizeof(uint64_t));
f3a92ca
@@ -400,18 +401,19 @@ static int alloc_refcount_block(BlockDriverState *bs,
f3a92ca
     s->refcount_table_size = table_size;
f3a92ca
     s->refcount_table_offset = table_offset;
f3a92ca
 
f3a92ca
-    /* Free old table. Remember, we must not change free_cluster_index */
f3a92ca
-    uint64_t old_free_cluster_index = s->free_cluster_index;
f3a92ca
+    /* Free old table. */
f3a92ca
     qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
f3a92ca
                         QCOW2_DISCARD_OTHER);
f3a92ca
-    s->free_cluster_index = old_free_cluster_index;
f3a92ca
 
f3a92ca
     ret = load_refcount_block(bs, new_block, (void**) refcount_block);
f3a92ca
     if (ret < 0) {
f3a92ca
         return ret;
f3a92ca
     }
f3a92ca
 
f3a92ca
-    return 0;
f3a92ca
+    /* If we were trying to do the initial refcount update for some cluster
f3a92ca
+     * allocation, we might have used the same clusters to store newly
f3a92ca
+     * allocated metadata. Make the caller search some new space. */
f3a92ca
+    return -EAGAIN;
f3a92ca
 
f3a92ca
 fail_table:
f3a92ca
     g_free(new_table);
f3a92ca
@@ -657,12 +659,15 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
f3a92ca
     int ret;
f3a92ca
 
f3a92ca
     BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
f3a92ca
-    offset = alloc_clusters_noref(bs, size);
f3a92ca
-    if (offset < 0) {
f3a92ca
-        return offset;
f3a92ca
-    }
f3a92ca
+    do {
f3a92ca
+        offset = alloc_clusters_noref(bs, size);
f3a92ca
+        if (offset < 0) {
f3a92ca
+            return offset;
f3a92ca
+        }
f3a92ca
+
f3a92ca
+        ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
f3a92ca
+    } while (ret == -EAGAIN);
f3a92ca
 
f3a92ca
-    ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
f3a92ca
     if (ret < 0) {
f3a92ca
         return ret;
f3a92ca
     }
f3a92ca
@@ -675,7 +680,6 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
f3a92ca
 {
f3a92ca
     BDRVQcowState *s = bs->opaque;
f3a92ca
     uint64_t cluster_index;
f3a92ca
-    uint64_t old_free_cluster_index;
f3a92ca
     uint64_t i;
f3a92ca
     int refcount, ret;
f3a92ca
 
f3a92ca
@@ -684,30 +688,28 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
f3a92ca
         return 0;
f3a92ca
     }
f3a92ca
 
f3a92ca
-    /* Check how many clusters there are free */
f3a92ca
-    cluster_index = offset >> s->cluster_bits;
f3a92ca
-    for(i = 0; i < nb_clusters; i++) {
f3a92ca
-        refcount = get_refcount(bs, cluster_index++);
f3a92ca
+    do {
f3a92ca
+        /* Check how many clusters there are free */
f3a92ca
+        cluster_index = offset >> s->cluster_bits;
f3a92ca
+        for(i = 0; i < nb_clusters; i++) {
f3a92ca
+            refcount = get_refcount(bs, cluster_index++);
f3a92ca
 
f3a92ca
-        if (refcount < 0) {
f3a92ca
-            return refcount;
f3a92ca
-        } else if (refcount != 0) {
f3a92ca
-            break;
f3a92ca
+            if (refcount < 0) {
f3a92ca
+                return refcount;
f3a92ca
+            } else if (refcount != 0) {
f3a92ca
+                break;
f3a92ca
+            }
f3a92ca
         }
f3a92ca
-    }
f3a92ca
 
f3a92ca
-    /* And then allocate them */
f3a92ca
-    old_free_cluster_index = s->free_cluster_index;
f3a92ca
-    s->free_cluster_index = cluster_index + i;
f3a92ca
+        /* And then allocate them */
f3a92ca
+        ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
f3a92ca
+                              QCOW2_DISCARD_NEVER);
f3a92ca
+    } while (ret == -EAGAIN);
f3a92ca
 
f3a92ca
-    ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
f3a92ca
-                          QCOW2_DISCARD_NEVER);
f3a92ca
     if (ret < 0) {
f3a92ca
         return ret;
f3a92ca
     }
f3a92ca
 
f3a92ca
-    s->free_cluster_index = old_free_cluster_index;
f3a92ca
-
f3a92ca
     return i;
f3a92ca
 }
f3a92ca
 
f3a92ca
diff --git a/block/qcow2.c b/block/qcow2.c
7452f5d
index 10bfaaf..5d45036 100644
f3a92ca
--- a/block/qcow2.c
f3a92ca
+++ b/block/qcow2.c
7452f5d
@@ -1385,7 +1385,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
7452f5d
      */
7452f5d
     BlockDriverState* bs;
7452f5d
     QCowHeader *header;
7452f5d
-    uint8_t* refcount_table;
7452f5d
+    uint64_t* refcount_table;
7452f5d
     int ret;
7452f5d
 
7452f5d
     ret = bdrv_create_file(filename, options);
f3a92ca
@@ -1431,9 +1431,10 @@ static int qcow2_create2(const char *filename, int64_t total_size,
f3a92ca
         goto out;
f3a92ca
     }
f3a92ca
 
f3a92ca
-    /* Write an empty refcount table */
f3a92ca
-    refcount_table = g_malloc0(cluster_size);
f3a92ca
-    ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size);
f3a92ca
+    /* Write a refcount table with one refcount block */
f3a92ca
+    refcount_table = g_malloc0(2 * cluster_size);
f3a92ca
+    refcount_table[0] = cpu_to_be64(2 * cluster_size);
f3a92ca
+    ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size);
f3a92ca
     g_free(refcount_table);
f3a92ca
 
f3a92ca
     if (ret < 0) {
f3a92ca
@@ -1455,7 +1456,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
f3a92ca
         goto out;
f3a92ca
     }
f3a92ca
 
f3a92ca
-    ret = qcow2_alloc_clusters(bs, 2 * cluster_size);
f3a92ca
+    ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
f3a92ca
     if (ret < 0) {
f3a92ca
         goto out;
f3a92ca