diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index 6bdab89..729d5fd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -269,7 +269,7 @@ struct init_tbl_entry {
 	int (*handler)(struct nvbios *, uint16_t, struct init_exec *);
 };
 
-static int parse_init_table(struct nvbios *, unsigned int, struct init_exec *);
+static int parse_init_table(struct nvbios *, uint16_t, struct init_exec *);
 
 #define MACRO_INDEX_SIZE	2
 #define MACRO_SIZE		8
@@ -282,7 +282,7 @@ static void still_alive(void)
 {
 #if 0
 	sync();
-	msleep(2);
+	mdelay(2);
 #endif
 }
 
@@ -1904,7 +1904,7 @@ init_condition_time(struct nvbios *bios, uint16_t offset,
 			BIOSLOG(bios, "0x%04X: "
 				"Condition not met, sleeping for 20ms\n",
 								offset);
-			msleep(20);
+			mdelay(20);
 		}
 	}
 
@@ -1938,7 +1938,7 @@ init_ltime(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 	BIOSLOG(bios, "0x%04X: Sleeping for 0x%04X milliseconds\n",
 		offset, time);
 
-	msleep(time);
+	mdelay(time);
 
 	return 3;
 }
@@ -2011,6 +2011,27 @@ init_sub_direct(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 }
 
 static int
+init_jump(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
+{
+	/*
+	 * INIT_JUMP   opcode: 0x5C ('\')
+	 *
+	 * offset      (8  bit): opcode
+	 * offset + 1  (16 bit): offset (in bios)
+	 *
+	 * Continue execution of init table from 'offset'
+	 */
+
+	uint16_t jmp_offset = ROM16(bios->data[offset + 1]);
+
+	if (!iexec->execute)
+		return 3;
+
+	BIOSLOG(bios, "0x%04X: Jump to 0x%04X\n", offset, jmp_offset);
+	return jmp_offset - offset;
+}
+
+static int
 init_i2c_if(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2962,7 +2983,7 @@ init_time(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 	if (time < 1000)
 		udelay(time);
 	else
-		msleep((time + 900) / 1000);
+		mdelay((time + 900) / 1000);
 
 	return 3;
 }
@@ -3659,6 +3680,7 @@ static struct init_tbl_entry itbl_entry[] = {
 	{ "INIT_ZM_REG_SEQUENCE"              , 0x58, init_zm_reg_sequence            },
 	/* INIT_INDIRECT_REG (0x5A, 7, 0, 0) removed due to no example of use */
 	{ "INIT_SUB_DIRECT"                   , 0x5B, init_sub_direct                 },
+	{ "INIT_JUMP"                         , 0x5C, init_jump                       },
 	{ "INIT_I2C_IF"                       , 0x5E, init_i2c_if                     },
 	{ "INIT_COPY_NV_REG"                  , 0x5F, init_copy_nv_reg                },
 	{ "INIT_ZM_INDEX_IO"                  , 0x62, init_zm_index_io                },
@@ -3700,8 +3722,7 @@ static struct init_tbl_entry itbl_entry[] = {
 #define MAX_TABLE_OPS 1000
 
 static int
-parse_init_table(struct nvbios *bios, unsigned int offset,
-		 struct init_exec *iexec)
+parse_init_table(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
 	 * Parses all commands in an init table.
@@ -3856,7 +3877,7 @@ static int call_lvds_manufacturer_script(struct drm_device *dev, struct dcb_entr
 
 	if (script == LVDS_PANEL_OFF) {
 		/* off-on delay in ms */
-		msleep(ROM16(bios->data[bios->fp.xlated_entry + 7]));
+		mdelay(ROM16(bios->data[bios->fp.xlated_entry + 7]));
 	}
 #ifdef __powerpc__
 	/* Powerbook specific quirks */
@@ -5028,11 +5049,7 @@ int get_pll_limits(struct drm_device *dev, uint32_t limit_match, struct pll_lims
 		pll_lim->vco1.max_n = record[11];
 		pll_lim->min_p = record[12];
 		pll_lim->max_p = record[13];
-		/* where did this go to?? */
-		if ((entry[0] & 0xf0) == 0x80)
-			pll_lim->refclk = 27000;
-		else
-			pll_lim->refclk = 100000;
+		pll_lim->refclk = ROM16(entry[9]) * 1000;
 	}
 
 	/*
@@ -5950,6 +5967,11 @@ apply_dcb_connector_quirks(struct nvbios *bios, int idx)
 	}
 }
 
+static const u8 hpd_gpio[16] = {
+	0xff, 0x07, 0x08, 0xff, 0xff, 0x51, 0x52, 0xff,
+	0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, 0x5f, 0x60,
+};
+
 static void
 parse_dcb_connector_table(struct nvbios *bios)
 {
@@ -5986,23 +6008,9 @@ parse_dcb_connector_table(struct nvbios *bios)
 
 		cte->type  = (cte->entry & 0x000000ff) >> 0;
 		cte->index2 = (cte->entry & 0x00000f00) >> 8;
-		switch (cte->entry & 0x00033000) {
-		case 0x00001000:
-			cte->gpio_tag = 0x07;
-			break;
-		case 0x00002000:
-			cte->gpio_tag = 0x08;
-			break;
-		case 0x00010000:
-			cte->gpio_tag = 0x51;
-			break;
-		case 0x00020000:
-			cte->gpio_tag = 0x52;
-			break;
-		default:
-			cte->gpio_tag = 0xff;
-			break;
-		}
+
+		cte->gpio_tag = ffs((cte->entry & 0x07033000) >> 12);
+		cte->gpio_tag = hpd_gpio[cte->gpio_tag];
 
 		if (cte->type == 0xff)
 			continue;
@@ -6023,6 +6031,7 @@ parse_dcb_connector_table(struct nvbios *bios)
 		case DCB_CONNECTOR_DVI_I:
 		case DCB_CONNECTOR_DVI_D:
 		case DCB_CONNECTOR_LVDS:
+		case DCB_CONNECTOR_LVDS_SPWG:
 		case DCB_CONNECTOR_DP:
 		case DCB_CONNECTOR_eDP:
 		case DCB_CONNECTOR_HDMI_0:
@@ -6342,6 +6351,32 @@ apply_dcb_encoder_quirks(struct drm_device *dev, int idx, u32 *conn, u32 *conf)
 		}
 	}
 
+	/* XFX GT-240X-YA
+	 *
+	 * So many things wrong here, replace the entire encoder table..
+	 */
+	if (nv_match_device(dev, 0x0ca3, 0x1682, 0x3003)) {
+		if (idx == 0) {
+			*conn = 0x02001300; /* VGA, connector 1 */
+			*conf = 0x00000028;
+		} else
+		if (idx == 1) {
+			*conn = 0x01010312; /* DVI, connector 0 */
+			*conf = 0x00020030;
+		} else
+		if (idx == 2) {
+			*conn = 0x01010310; /* VGA, connector 0 */
+			*conf = 0x00000028;
+		} else
+		if (idx == 3) {
+			*conn = 0x02022362; /* HDMI, connector 2 */
+			*conf = 0x00020010;
+		} else {
+			*conn = 0x0000000e; /* EOL */
+			*conf = 0x00000000;
+		}
+	}
+
 	return true;
 }
 
@@ -6702,11 +6737,11 @@ nouveau_bios_run_init_table(struct drm_device *dev, uint16_t table,
 	struct nvbios *bios = &dev_priv->vbios;
 	struct init_exec iexec = { true, false };
 
-	mutex_lock(&bios->lock);
+	spin_lock_bh(&bios->lock);
 	bios->display.output = dcbent;
 	parse_init_table(bios, table, &iexec);
 	bios->display.output = NULL;
-	mutex_unlock(&bios->lock);
+	spin_unlock_bh(&bios->lock);
 }
 
 static bool NVInitVBIOS(struct drm_device *dev)
@@ -6715,7 +6750,7 @@ static bool NVInitVBIOS(struct drm_device *dev)
 	struct nvbios *bios = &dev_priv->vbios;
 
 	memset(bios, 0, sizeof(struct nvbios));
-	mutex_init(&bios->lock);
+	spin_lock_init(&bios->lock);
 	bios->dev = dev;
 
 	if (!NVShadowVBIOS(dev, bios->data))
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.h b/drivers/gpu/drm/nouveau/nouveau_bios.h
index 50a648e..050c314 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.h
@@ -82,6 +82,7 @@ enum dcb_connector_type {
 	DCB_CONNECTOR_DVI_I = 0x30,
 	DCB_CONNECTOR_DVI_D = 0x31,
 	DCB_CONNECTOR_LVDS = 0x40,
+	DCB_CONNECTOR_LVDS_SPWG = 0x41,
 	DCB_CONNECTOR_DP = 0x46,
 	DCB_CONNECTOR_eDP = 0x47,
 	DCB_CONNECTOR_HDMI_0 = 0x60,
@@ -251,7 +252,7 @@ struct nvbios {
 	uint8_t digital_min_front_porch;
 	bool fp_no_ddc;
 
-	struct mutex lock;
+	spinlock_t lock;
 
 	uint8_t data[NV_PROM_SIZE];
 	unsigned int length;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index a521840..2ad49cb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -57,8 +57,8 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
 }
 
 static void
-nouveau_bo_fixup_align(struct nouveau_bo *nvbo, int *align, int *size,
-		       int *page_shift)
+nouveau_bo_fixup_align(struct nouveau_bo *nvbo, u32 flags,
+		       int *align, int *size, int *page_shift)
 {
 	struct drm_nouveau_private *dev_priv = nouveau_bdev(nvbo->bo.bdev);
 
@@ -83,7 +83,7 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, int *align, int *size,
 		}
 	} else {
 		if (likely(dev_priv->chan_vm)) {
-			if (*size > 256 * 1024)
+			if (!(flags & TTM_PL_FLAG_TT) &&  *size > 256 * 1024)
 				*page_shift = dev_priv->chan_vm->lpg_shift;
 			else
 				*page_shift = dev_priv->chan_vm->spg_shift;
@@ -101,8 +101,7 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, int *align, int *size,
 int
 nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
 	       int size, int align, uint32_t flags, uint32_t tile_mode,
-	       uint32_t tile_flags, bool no_vm, bool mappable,
-	       struct nouveau_bo **pnvbo)
+	       uint32_t tile_flags, struct nouveau_bo **pnvbo)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_bo *nvbo;
@@ -113,16 +112,14 @@ nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
 		return -ENOMEM;
 	INIT_LIST_HEAD(&nvbo->head);
 	INIT_LIST_HEAD(&nvbo->entry);
-	nvbo->mappable = mappable;
-	nvbo->no_vm = no_vm;
 	nvbo->tile_mode = tile_mode;
 	nvbo->tile_flags = tile_flags;
 	nvbo->bo.bdev = &dev_priv->ttm.bdev;
 
-	nouveau_bo_fixup_align(nvbo, &align, &size, &page_shift);
+	nouveau_bo_fixup_align(nvbo, flags, &align, &size, &page_shift);
 	align >>= PAGE_SHIFT;
 
-	if (!nvbo->no_vm && dev_priv->chan_vm) {
+	if (dev_priv->chan_vm) {
 		ret = nouveau_vm_get(dev_priv->chan_vm, size, page_shift,
 				     NV_MEM_ACCESS_RW, &nvbo->vma);
 		if (ret) {
@@ -144,11 +141,8 @@ nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
 	}
 	nvbo->channel = NULL;
 
-	if (nvbo->vma.node) {
-		if (nvbo->bo.mem.mem_type == TTM_PL_VRAM)
-			nvbo->bo.offset = nvbo->vma.offset;
-	}
-
+	if (nvbo->vma.node)
+		nvbo->bo.offset = nvbo->vma.offset;
 	*pnvbo = nvbo;
 	return 0;
 }
@@ -318,11 +312,8 @@ nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
 	if (ret)
 		return ret;
 
-	if (nvbo->vma.node) {
-		if (nvbo->bo.mem.mem_type == TTM_PL_VRAM)
-			nvbo->bo.offset = nvbo->vma.offset;
-	}
-
+	if (nvbo->vma.node)
+		nvbo->bo.offset = nvbo->vma.offset;
 	return 0;
 }
 
@@ -385,7 +376,8 @@ nouveau_bo_create_ttm_backend_entry(struct ttm_bo_device *bdev)
 	case NOUVEAU_GART_AGP:
 		return ttm_agp_backend_init(bdev, dev->agp->bridge);
 #endif
-	case NOUVEAU_GART_SGDMA:
+	case NOUVEAU_GART_PDMA:
+	case NOUVEAU_GART_HW:
 		return nouveau_sgdma_init_ttm(dev);
 	default:
 		NV_ERROR(dev, "Unknown GART type %d\n",
@@ -431,7 +423,10 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 		man->default_caching = TTM_PL_FLAG_WC;
 		break;
 	case TTM_PL_TT:
-		man->func = &ttm_bo_manager_func;
+		if (dev_priv->card_type >= NV_50)
+			man->func = &nouveau_gart_manager;
+		else
+			man->func = &ttm_bo_manager_func;
 		switch (dev_priv->gart_info.type) {
 		case NOUVEAU_GART_AGP:
 			man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
@@ -439,7 +434,8 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 				TTM_PL_FLAG_WC;
 			man->default_caching = TTM_PL_FLAG_WC;
 			break;
-		case NOUVEAU_GART_SGDMA:
+		case NOUVEAU_GART_PDMA:
+		case NOUVEAU_GART_HW:
 			man->flags = TTM_MEMTYPE_FLAG_MAPPABLE |
 				     TTM_MEMTYPE_FLAG_CMA;
 			man->available_caching = TTM_PL_MASK_CACHING;
@@ -501,45 +497,22 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
 	return ret;
 }
 
-static inline uint32_t
-nouveau_bo_mem_ctxdma(struct ttm_buffer_object *bo,
-		      struct nouveau_channel *chan, struct ttm_mem_reg *mem)
-{
-	struct nouveau_bo *nvbo = nouveau_bo(bo);
-
-	if (nvbo->no_vm) {
-		if (mem->mem_type == TTM_PL_TT)
-			return NvDmaGART;
-		return NvDmaVRAM;
-	}
-
-	if (mem->mem_type == TTM_PL_TT)
-		return chan->gart_handle;
-	return chan->vram_handle;
-}
-
 static int
 nvc0_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
 {
-	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+	struct nouveau_mem *old_node = old_mem->mm_node;
+	struct nouveau_mem *new_node = new_mem->mm_node;
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
-	u64 src_offset = old_mem->start << PAGE_SHIFT;
-	u64 dst_offset = new_mem->start << PAGE_SHIFT;
 	u32 page_count = new_mem->num_pages;
+	u64 src_offset, dst_offset;
 	int ret;
 
-	if (!nvbo->no_vm) {
-		if (old_mem->mem_type == TTM_PL_VRAM)
-			src_offset  = nvbo->vma.offset;
-		else
-			src_offset += dev_priv->gart_info.aper_base;
-
-		if (new_mem->mem_type == TTM_PL_VRAM)
-			dst_offset  = nvbo->vma.offset;
-		else
-			dst_offset += dev_priv->gart_info.aper_base;
-	}
+	src_offset = old_node->tmp_vma.offset;
+	if (new_node->tmp_vma.node)
+		dst_offset = new_node->tmp_vma.offset;
+	else
+		dst_offset = nvbo->vma.offset;
 
 	page_count = new_mem->num_pages;
 	while (page_count) {
@@ -574,33 +547,18 @@ static int
 nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
 {
-	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+	struct nouveau_mem *old_node = old_mem->mm_node;
+	struct nouveau_mem *new_node = new_mem->mm_node;
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
 	u64 length = (new_mem->num_pages << PAGE_SHIFT);
 	u64 src_offset, dst_offset;
 	int ret;
 
-	src_offset = old_mem->start << PAGE_SHIFT;
-	dst_offset = new_mem->start << PAGE_SHIFT;
-	if (!nvbo->no_vm) {
-		if (old_mem->mem_type == TTM_PL_VRAM)
-			src_offset  = nvbo->vma.offset;
-		else
-			src_offset += dev_priv->gart_info.aper_base;
-
-		if (new_mem->mem_type == TTM_PL_VRAM)
-			dst_offset  = nvbo->vma.offset;
-		else
-			dst_offset += dev_priv->gart_info.aper_base;
-	}
-
-	ret = RING_SPACE(chan, 3);
-	if (ret)
-		return ret;
-
-	BEGIN_RING(chan, NvSubM2MF, 0x0184, 2);
-	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, old_mem));
-	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, new_mem));
+	src_offset = old_node->tmp_vma.offset;
+	if (new_node->tmp_vma.node)
+		dst_offset = new_node->tmp_vma.offset;
+	else
+		dst_offset = nvbo->vma.offset;
 
 	while (length) {
 		u32 amount, stride, height;
@@ -681,6 +639,15 @@ nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 	return 0;
 }
 
+static inline uint32_t
+nouveau_bo_mem_ctxdma(struct ttm_buffer_object *bo,
+		      struct nouveau_channel *chan, struct ttm_mem_reg *mem)
+{
+	if (mem->mem_type == TTM_PL_TT)
+		return chan->gart_handle;
+	return chan->vram_handle;
+}
+
 static int
 nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
@@ -734,15 +701,43 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 {
 	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
+	struct ttm_mem_reg *old_mem = &bo->mem;
 	struct nouveau_channel *chan;
 	int ret;
 
 	chan = nvbo->channel;
-	if (!chan || nvbo->no_vm) {
+	if (!chan) {
 		chan = dev_priv->channel;
 		mutex_lock_nested(&chan->mutex, NOUVEAU_KCHANNEL_MUTEX);
 	}
 
+	/* create temporary vma for old memory, this will get cleaned
+	 * up after ttm destroys the ttm_mem_reg
+	 */
+	if (dev_priv->card_type >= NV_50) {
+		struct nouveau_mem *node = old_mem->mm_node;
+		if (!node->tmp_vma.node) {
+			u32 page_shift = nvbo->vma.node->type;
+			if (old_mem->mem_type == TTM_PL_TT)
+				page_shift = nvbo->vma.vm->spg_shift;
+
+			ret = nouveau_vm_get(chan->vm,
+					     old_mem->num_pages << PAGE_SHIFT,
+					     page_shift, NV_MEM_ACCESS_RO,
+					     &node->tmp_vma);
+			if (ret)
+				goto out;
+		}
+
+		if (old_mem->mem_type == TTM_PL_VRAM)
+			nouveau_vm_map(&node->tmp_vma, node);
+		else {
+			nouveau_vm_map_sg(&node->tmp_vma, 0,
+					  old_mem->num_pages << PAGE_SHIFT,
+					  node, node->pages);
+		}
+	}
+
 	if (dev_priv->card_type < NV_50)
 		ret = nv04_bo_move_m2mf(chan, bo, &bo->mem, new_mem);
 	else
@@ -756,6 +751,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 						    no_wait_gpu, new_mem);
 	}
 
+out:
 	if (chan == dev_priv->channel)
 		mutex_unlock(&chan->mutex);
 	return ret;
@@ -766,6 +762,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
 		      bool no_wait_reserve, bool no_wait_gpu,
 		      struct ttm_mem_reg *new_mem)
 {
+	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
 	u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
 	struct ttm_placement placement;
 	struct ttm_mem_reg tmp_mem;
@@ -785,7 +782,23 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
 	if (ret)
 		goto out;
 
+	if (dev_priv->card_type >= NV_50) {
+		struct nouveau_bo *nvbo = nouveau_bo(bo);
+		struct nouveau_mem *node = tmp_mem.mm_node;
+		struct nouveau_vma *vma = &nvbo->vma;
+		if (vma->node->type != vma->vm->spg_shift)
+			vma = &node->tmp_vma;
+		nouveau_vm_map_sg(vma, 0, tmp_mem.num_pages << PAGE_SHIFT,
+				  node, node->pages);
+	}
+
 	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_reserve, no_wait_gpu, &tmp_mem);
+
+	if (dev_priv->card_type >= NV_50) {
+		struct nouveau_bo *nvbo = nouveau_bo(bo);
+		nouveau_vm_unmap(&nvbo->vma);
+	}
+
 	if (ret)
 		goto out;
 
@@ -828,6 +841,36 @@ out:
 	return ret;
 }
 
+static void
+nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem)
+{
+	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+	struct nouveau_mem *node = new_mem->mm_node;
+	struct nouveau_bo *nvbo = nouveau_bo(bo);
+	struct nouveau_vma *vma = &nvbo->vma;
+	struct nouveau_vm *vm = vma->vm;
+
+	if (dev_priv->card_type < NV_50)
+		return;
+
+	switch (new_mem->mem_type) {
+	case TTM_PL_VRAM:
+		nouveau_vm_map(vma, node);
+		break;
+	case TTM_PL_TT:
+		if (vma->node->type != vm->spg_shift) {
+			nouveau_vm_unmap(vma);
+			vma = &node->tmp_vma;
+		}
+		nouveau_vm_map_sg(vma, 0, new_mem->num_pages << PAGE_SHIFT,
+				  node, node->pages);
+		break;
+	default:
+		nouveau_vm_unmap(&nvbo->vma);
+		break;
+	}
+}
+
 static int
 nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem,
 		   struct nouveau_tile_reg **new_tile)
@@ -835,19 +878,13 @@ nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem,
 	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
 	struct drm_device *dev = dev_priv->dev;
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
-	uint64_t offset;
+	u64 offset = new_mem->start << PAGE_SHIFT;
 
-	if (nvbo->no_vm || new_mem->mem_type != TTM_PL_VRAM) {
-		/* Nothing to do. */
-		*new_tile = NULL;
+	*new_tile = NULL;
+	if (new_mem->mem_type != TTM_PL_VRAM)
 		return 0;
-	}
-
-	offset = new_mem->start << PAGE_SHIFT;
 
-	if (dev_priv->chan_vm) {
-		nouveau_vm_map(&nvbo->vma, new_mem->mm_node);
-	} else if (dev_priv->card_type >= NV_10) {
+	if (dev_priv->card_type >= NV_10) {
 		*new_tile = nv10_mem_set_tiling(dev, offset, new_mem->size,
 						nvbo->tile_mode,
 						nvbo->tile_flags);
@@ -864,11 +901,8 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
 	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
 	struct drm_device *dev = dev_priv->dev;
 
-	if (dev_priv->card_type >= NV_10 &&
-	    dev_priv->card_type < NV_50) {
-		nv10_mem_put_tile_region(dev, *old_tile, bo->sync_obj);
-		*old_tile = new_tile;
-	}
+	nv10_mem_put_tile_region(dev, *old_tile, bo->sync_obj);
+	*old_tile = new_tile;
 }
 
 static int
@@ -882,9 +916,11 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 	struct nouveau_tile_reg *new_tile = NULL;
 	int ret = 0;
 
-	ret = nouveau_bo_vm_bind(bo, new_mem, &new_tile);
-	if (ret)
-		return ret;
+	if (dev_priv->card_type < NV_50) {
+		ret = nouveau_bo_vm_bind(bo, new_mem, &new_tile);
+		if (ret)
+			return ret;
+	}
 
 	/* Fake bo copy. */
 	if (old_mem->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
@@ -915,10 +951,12 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 	ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
 
 out:
-	if (ret)
-		nouveau_bo_vm_cleanup(bo, NULL, &new_tile);
-	else
-		nouveau_bo_vm_cleanup(bo, new_tile, &nvbo->tile);
+	if (dev_priv->card_type < NV_50) {
+		if (ret)
+			nouveau_bo_vm_cleanup(bo, NULL, &new_tile);
+		else
+			nouveau_bo_vm_cleanup(bo, new_tile, &nvbo->tile);
+	}
 
 	return ret;
 }
@@ -959,7 +997,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 		break;
 	case TTM_PL_VRAM:
 	{
-		struct nouveau_vram *vram = mem->mm_node;
+		struct nouveau_mem *node = mem->mm_node;
 		u8 page_shift;
 
 		if (!dev_priv->bar1_vm) {
@@ -970,23 +1008,23 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 		}
 
 		if (dev_priv->card_type == NV_C0)
-			page_shift = vram->page_shift;
+			page_shift = node->page_shift;
 		else
 			page_shift = 12;
 
 		ret = nouveau_vm_get(dev_priv->bar1_vm, mem->bus.size,
 				     page_shift, NV_MEM_ACCESS_RW,
-				     &vram->bar_vma);
+				     &node->bar_vma);
 		if (ret)
 			return ret;
 
-		nouveau_vm_map(&vram->bar_vma, vram);
+		nouveau_vm_map(&node->bar_vma, node);
 		if (ret) {
-			nouveau_vm_put(&vram->bar_vma);
+			nouveau_vm_put(&node->bar_vma);
 			return ret;
 		}
 
-		mem->bus.offset = vram->bar_vma.offset;
+		mem->bus.offset = node->bar_vma.offset;
 		if (dev_priv->card_type == NV_50) /*XXX*/
 			mem->bus.offset -= 0x0020000000ULL;
 		mem->bus.base = pci_resource_start(dev->pdev, 1);
@@ -1003,16 +1041,16 @@ static void
 nouveau_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 {
 	struct drm_nouveau_private *dev_priv = nouveau_bdev(bdev);
-	struct nouveau_vram *vram = mem->mm_node;
+	struct nouveau_mem *node = mem->mm_node;
 
 	if (!dev_priv->bar1_vm || mem->mem_type != TTM_PL_VRAM)
 		return;
 
-	if (!vram->bar_vma.node)
+	if (!node->bar_vma.node)
 		return;
 
-	nouveau_vm_unmap(&vram->bar_vma);
-	nouveau_vm_put(&vram->bar_vma);
+	nouveau_vm_unmap(&node->bar_vma);
+	nouveau_vm_put(&node->bar_vma);
 }
 
 static int
@@ -1062,6 +1100,7 @@ struct ttm_bo_driver nouveau_bo_driver = {
 	.invalidate_caches = nouveau_bo_invalidate_caches,
 	.init_mem_type = nouveau_bo_init_mem_type,
 	.evict_flags = nouveau_bo_evict_flags,
+	.move_notify = nouveau_bo_move_ntfy,
 	.move = nouveau_bo_move,
 	.verify_access = nouveau_bo_verify_access,
 	.sync_obj_signaled = __nouveau_fence_signalled,
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index 3960d66..3837090 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -35,7 +35,7 @@ nouveau_channel_pushbuf_ctxdma_init(struct nouveau_channel *chan)
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_bo *pb = chan->pushbuf_bo;
 	struct nouveau_gpuobj *pushbuf = NULL;
-	int ret;
+	int ret = 0;
 
 	if (dev_priv->card_type >= NV_50) {
 		if (dev_priv->card_type < NV_C0) {
@@ -90,8 +90,7 @@ nouveau_channel_user_pushbuf_alloc(struct drm_device *dev)
 	else
 		location = TTM_PL_FLAG_TT;
 
-	ret = nouveau_bo_new(dev, NULL, 65536, 0, location, 0, 0x0000, false,
-			     true, &pushbuf);
+	ret = nouveau_bo_new(dev, NULL, 65536, 0, location, 0, 0x0000, &pushbuf);
 	if (ret) {
 		NV_ERROR(dev, "error allocating DMA push buffer: %d\n", ret);
 		return NULL;
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 390d82c..084c089 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -438,7 +438,7 @@ nouveau_connector_set_property(struct drm_connector *connector,
 		}
 
 		/* LVDS always needs gpu scaling */
-		if (nv_connector->dcb->type == DCB_CONNECTOR_LVDS &&
+		if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS &&
 		    value == DRM_MODE_SCALE_NONE)
 			return -EINVAL;
 
@@ -646,6 +646,7 @@ nouveau_connector_get_modes(struct drm_connector *connector)
 		ret = get_slave_funcs(encoder)->get_modes(encoder, connector);
 
 	if (nv_connector->dcb->type == DCB_CONNECTOR_LVDS ||
+	    nv_connector->dcb->type == DCB_CONNECTOR_LVDS_SPWG ||
 	    nv_connector->dcb->type == DCB_CONNECTOR_eDP)
 		ret += nouveau_connector_scaler_modes_add(connector);
 
@@ -806,6 +807,7 @@ nouveau_connector_create(struct drm_device *dev, int index)
 		type = DRM_MODE_CONNECTOR_HDMIA;
 		break;
 	case DCB_CONNECTOR_LVDS:
+	case DCB_CONNECTOR_LVDS_SPWG:
 		type = DRM_MODE_CONNECTOR_LVDS;
 		funcs = &nouveau_connector_funcs_lvds;
 		break;
@@ -834,7 +836,7 @@ nouveau_connector_create(struct drm_device *dev, int index)
 	drm_connector_helper_add(connector, &nouveau_connector_helper_funcs);
 
 	/* Check if we need dithering enabled */
-	if (dcb->type == DCB_CONNECTOR_LVDS) {
+	if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
 		bool dummy, is_24bit = false;
 
 		ret = nouveau_bios_parse_lvds_table(dev, 0, &dummy, &is_24bit);
@@ -879,7 +881,7 @@ nouveau_connector_create(struct drm_device *dev, int index)
 				nv_connector->use_dithering ?
 				DRM_MODE_DITHERING_ON : DRM_MODE_DITHERING_OFF);
 
-		if (dcb->type != DCB_CONNECTOR_LVDS) {
+		if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS) {
 			if (dev_priv->card_type >= NV_50)
 				connector->polled = DRM_CONNECTOR_POLL_HPD;
 			else
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 505c6bf..764c15d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -32,6 +32,7 @@
 #include "nouveau_hw.h"
 #include "nouveau_crtc.h"
 #include "nouveau_dma.h"
+#include "nv50_display.h"
 
 static void
 nouveau_user_framebuffer_destroy(struct drm_framebuffer *drm_fb)
@@ -61,18 +62,59 @@ static const struct drm_framebuffer_funcs nouveau_framebuffer_funcs = {
 };
 
 int
-nouveau_framebuffer_init(struct drm_device *dev, struct nouveau_framebuffer *nouveau_fb,
-			 struct drm_mode_fb_cmd *mode_cmd, struct nouveau_bo *nvbo)
+nouveau_framebuffer_init(struct drm_device *dev,
+			 struct nouveau_framebuffer *nv_fb,
+			 struct drm_mode_fb_cmd *mode_cmd,
+			 struct nouveau_bo *nvbo)
 {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct drm_framebuffer *fb = &nv_fb->base;
 	int ret;
 
-	ret = drm_framebuffer_init(dev, &nouveau_fb->base, &nouveau_framebuffer_funcs);
+	ret = drm_framebuffer_init(dev, fb, &nouveau_framebuffer_funcs);
 	if (ret) {
 		return ret;
 	}
 
-	drm_helper_mode_fill_fb_struct(&nouveau_fb->base, mode_cmd);
-	nouveau_fb->nvbo = nvbo;
+	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+	nv_fb->nvbo = nvbo;
+
+	if (dev_priv->card_type >= NV_50) {
+		u32 tile_flags = nouveau_bo_tile_layout(nvbo);
+		if (tile_flags == 0x7a00 ||
+		    tile_flags == 0xfe00)
+			nv_fb->r_dma = NvEvoFB32;
+		else
+		if (tile_flags == 0x7000)
+			nv_fb->r_dma = NvEvoFB16;
+		else
+			nv_fb->r_dma = NvEvoVRAM_LP;
+
+		switch (fb->depth) {
+		case  8: nv_fb->r_format = NV50_EVO_CRTC_FB_DEPTH_8; break;
+		case 15: nv_fb->r_format = NV50_EVO_CRTC_FB_DEPTH_15; break;
+		case 16: nv_fb->r_format = NV50_EVO_CRTC_FB_DEPTH_16; break;
+		case 24:
+		case 32: nv_fb->r_format = NV50_EVO_CRTC_FB_DEPTH_24; break;
+		case 30: nv_fb->r_format = NV50_EVO_CRTC_FB_DEPTH_30; break;
+		default:
+			 NV_ERROR(dev, "unknown depth %d\n", fb->depth);
+			 return -EINVAL;
+		}
+
+		if (dev_priv->chipset == 0x50)
+			nv_fb->r_format |= (tile_flags << 8);
+
+		if (!tile_flags)
+			nv_fb->r_pitch = 0x00100000 | fb->pitch;
+		else {
+			u32 mode = nvbo->tile_mode;
+			if (dev_priv->card_type >= NV_C0)
+				mode >>= 4;
+			nv_fb->r_pitch = ((fb->pitch / 4) << 4) | mode;
+		}
+	}
+
 	return 0;
 }
 
@@ -182,6 +224,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan,
 		       struct nouveau_page_flip_state *s,
 		       struct nouveau_fence **pfence)
 {
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
 	struct drm_device *dev = chan->dev;
 	unsigned long flags;
 	int ret;
@@ -201,9 +244,12 @@ nouveau_page_flip_emit(struct nouveau_channel *chan,
 	if (ret)
 		goto fail;
 
-	BEGIN_RING(chan, NvSubSw, NV_SW_PAGE_FLIP, 1);
-	OUT_RING(chan, 0);
-	FIRE_RING(chan);
+	if (dev_priv->card_type < NV_C0)
+		BEGIN_RING(chan, NvSubSw, NV_SW_PAGE_FLIP, 1);
+	else
+		BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0500, 1);
+	OUT_RING  (chan, 0);
+	FIRE_RING (chan);
 
 	ret = nouveau_fence_new(chan, pfence, true);
 	if (ret)
@@ -244,7 +290,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 
 	/* Initialize a page flip struct */
 	*s = (struct nouveau_page_flip_state)
-		{ { }, s->event, nouveau_crtc(crtc)->index,
+		{ { }, event, nouveau_crtc(crtc)->index,
 		  fb->bits_per_pixel, fb->pitch, crtc->x, crtc->y,
 		  new_bo->bo.offset };
 
@@ -255,6 +301,14 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	mutex_lock(&chan->mutex);
 
 	/* Emit a page flip */
+	if (dev_priv->card_type >= NV_50) {
+		ret = nv50_display_flip_next(crtc, fb, chan);
+		if (ret) {
+			nouveau_channel_put(&chan);
+			goto fail_unreserve;
+		}
+	}
+
 	ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, &fence);
 	nouveau_channel_put(&chan);
 	if (ret)
@@ -305,7 +359,8 @@ nouveau_finish_page_flip(struct nouveau_channel *chan,
 	}
 
 	list_del(&s->head);
-	*ps = *s;
+	if (ps)
+		*ps = *s;
 	kfree(s);
 
 	spin_unlock_irqrestore(&dev->event_lock, flags);
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index b368ed7..568caed 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -83,7 +83,7 @@ nouveau_dma_init(struct nouveau_channel *chan)
 		return ret;
 
 	/* NV_MEMORY_TO_MEMORY_FORMAT requires a notifier object */
-	ret = nouveau_notifier_alloc(chan, NvNotify0, 32, 0xfd0, 0x1000,
+	ret = nouveau_notifier_alloc(chan, NvNotify0, 32, 0xfe0, 0x1000,
 				     &chan->m2mf_ntfy);
 	if (ret)
 		return ret;
@@ -97,13 +97,15 @@ nouveau_dma_init(struct nouveau_channel *chan)
 		OUT_RING(chan, 0);
 
 	/* Initialise NV_MEMORY_TO_MEMORY_FORMAT */
-	ret = RING_SPACE(chan, 4);
+	ret = RING_SPACE(chan, 6);
 	if (ret)
 		return ret;
 	BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_NAME, 1);
-	OUT_RING(chan, NvM2MF);
-	BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1);
-	OUT_RING(chan, NvNotify0);
+	OUT_RING  (chan, NvM2MF);
+	BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3);
+	OUT_RING  (chan, NvNotify0);
+	OUT_RING  (chan, chan->vram_handle);
+	OUT_RING  (chan, chan->gart_handle);
 
 	/* Sit back and pray the channel works.. */
 	FIRE_RING(chan);
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h
index c36f176..23d4edf 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.h
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.h
@@ -61,8 +61,6 @@ enum {
 	NvM2MF		= 0x80000001,
 	NvDmaFB		= 0x80000002,
 	NvDmaTT		= 0x80000003,
-	NvDmaVRAM	= 0x80000004,
-	NvDmaGART	= 0x80000005,
 	NvNotify0       = 0x80000006,
 	Nv2D		= 0x80000007,
 	NvCtxSurf2D	= 0x80000008,
@@ -73,12 +71,15 @@ enum {
 	NvImageBlit	= 0x8000000d,
 	NvSw		= 0x8000000e,
 	NvSema		= 0x8000000f,
+	NvEvoSema0	= 0x80000010,
+	NvEvoSema1	= 0x80000011,
 
 	/* G80+ display objects */
 	NvEvoVRAM	= 0x01000000,
 	NvEvoFB16	= 0x01000001,
 	NvEvoFB32	= 0x01000002,
-	NvEvoVRAM_LP	= 0x01000003
+	NvEvoVRAM_LP	= 0x01000003,
+	NvEvoSync	= 0xcafe0000
 };
 
 #define NV_MEMORY_TO_MEMORY_FORMAT                                    0x00000039
diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
index 38d5995..7beb82a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
@@ -175,7 +175,6 @@ nouveau_dp_link_train_adjust(struct drm_encoder *encoder, uint8_t *config)
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct drm_device *dev = encoder->dev;
-	struct bit_displayport_encoder_table_entry *dpse;
 	struct bit_displayport_encoder_table *dpe;
 	int ret, i, dpe_headerlen, vs = 0, pre = 0;
 	uint8_t request[2];
@@ -183,7 +182,6 @@ nouveau_dp_link_train_adjust(struct drm_encoder *encoder, uint8_t *config)
 	dpe = nouveau_bios_dp_table(dev, nv_encoder->dcb, &dpe_headerlen);
 	if (!dpe)
 		return false;
-	dpse = (void *)((char *)dpe + dpe_headerlen);
 
 	ret = auxch_rd(encoder, DP_ADJUST_REQUEST_LANE0_1, request, 2);
 	if (ret)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 982d70b..b260c55 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -57,7 +57,7 @@ struct nouveau_fpriv {
 #include "nouveau_util.h"
 
 struct nouveau_grctx;
-struct nouveau_vram;
+struct nouveau_mem;
 #include "nouveau_vm.h"
 
 #define MAX_NUM_DCB_ENTRIES 16
@@ -65,13 +65,16 @@ struct nouveau_vram;
 #define NOUVEAU_MAX_CHANNEL_NR 128
 #define NOUVEAU_MAX_TILE_NR 15
 
-struct nouveau_vram {
+struct nouveau_mem {
 	struct drm_device *dev;
 
 	struct nouveau_vma bar_vma;
+	struct nouveau_vma tmp_vma;
 	u8  page_shift;
 
+	struct drm_mm_node *tag;
 	struct list_head regions;
+	dma_addr_t *pages;
 	u32 memtype;
 	u64 offset;
 	u64 size;
@@ -90,6 +93,7 @@ struct nouveau_tile_reg {
 struct nouveau_bo {
 	struct ttm_buffer_object bo;
 	struct ttm_placement placement;
+	u32 valid_domains;
 	u32 placements[3];
 	u32 busy_placements[3];
 	struct ttm_bo_kmap_obj kmap;
@@ -104,8 +108,6 @@ struct nouveau_bo {
 	struct nouveau_channel *channel;
 
 	struct nouveau_vma vma;
-	bool mappable;
-	bool no_vm;
 
 	uint32_t tile_mode;
 	uint32_t tile_flags;
@@ -387,6 +389,7 @@ struct nouveau_pgraph_engine {
 };
 
 struct nouveau_display_engine {
+	void *priv;
 	int (*early_init)(struct drm_device *);
 	void (*late_takedown)(struct drm_device *);
 	int (*create)(struct drm_device *);
@@ -433,6 +436,7 @@ struct nouveau_pm_level {
 	u32 memory;
 	u32 shader;
 	u32 unk05;
+	u32 unk0a;
 
 	u8 voltage;
 	u8 fanspeed;
@@ -463,6 +467,7 @@ struct nouveau_pm_memtiming {
 	u32 reg_100234;
 	u32 reg_100238;
 	u32 reg_10023c;
+	u32 reg_100240;
 };
 
 struct nouveau_pm_memtimings {
@@ -509,8 +514,8 @@ struct nouveau_crypt_engine {
 struct nouveau_vram_engine {
 	int  (*init)(struct drm_device *);
 	int  (*get)(struct drm_device *, u64, u32 align, u32 size_nc,
-		    u32 type, struct nouveau_vram **);
-	void (*put)(struct drm_device *, struct nouveau_vram **);
+		    u32 type, struct nouveau_mem **);
+	void (*put)(struct drm_device *, struct nouveau_mem **);
 
 	bool (*flags_valid)(struct drm_device *, u32 tile_flags);
 };
@@ -634,6 +639,7 @@ struct drm_nouveau_private {
 	enum nouveau_card_type card_type;
 	/* exact chipset, derived from NV_PMC_BOOT_0 */
 	int chipset;
+	int stepping;
 	int flags;
 
 	void __iomem *mmio;
@@ -652,8 +658,6 @@ struct drm_nouveau_private {
 	/* interrupt handling */
 	void (*irq_handler[32])(struct drm_device *);
 	bool msi_enabled;
-	struct workqueue_struct *wq;
-	struct work_struct irq_work;
 
 	struct list_head vbl_waiting;
 
@@ -681,6 +685,9 @@ struct drm_nouveau_private {
 	/* For PFIFO and PGRAPH. */
 	spinlock_t context_switch_lock;
 
+	/* VM/PRAMIN flush, legacy PRAMIN aperture */
+	spinlock_t vm_lock;
+
 	/* RAMIN configuration, RAMFC, RAMHT and RAMRO offsets */
 	struct nouveau_ramht  *ramht;
 	struct nouveau_gpuobj *ramfc;
@@ -691,15 +698,22 @@ struct drm_nouveau_private {
 	struct {
 		enum {
 			NOUVEAU_GART_NONE = 0,
-			NOUVEAU_GART_AGP,
-			NOUVEAU_GART_SGDMA
+			NOUVEAU_GART_AGP,	/* AGP */
+			NOUVEAU_GART_PDMA,	/* paged dma object */
+			NOUVEAU_GART_HW		/* on-chip gart/vm */
 		} type;
 		uint64_t aper_base;
 		uint64_t aper_size;
 		uint64_t aper_free;
 
+		struct ttm_backend_func *func;
+
+		struct {
+			struct page *page;
+			dma_addr_t   addr;
+		} dummy;
+
 		struct nouveau_gpuobj *sg_ctxdma;
-		struct nouveau_vma vma;
 	} gart_info;
 
 	/* nv10-nv40 tiling regions */
@@ -740,14 +754,6 @@ struct drm_nouveau_private {
 
 	struct backlight_device *backlight;
 
-	struct nouveau_channel *evo;
-	u32 evo_alloc;
-	struct {
-		struct dcb_entry *dcb;
-		u16 script;
-		u32 pclk;
-	} evo_irq;
-
 	struct {
 		struct dentry *channel_root;
 	} debugfs;
@@ -847,6 +853,7 @@ extern void nv10_mem_put_tile_region(struct drm_device *dev,
 				     struct nouveau_tile_reg *tile,
 				     struct nouveau_fence *fence);
 extern const struct ttm_mem_type_manager_func nouveau_vram_manager;
+extern const struct ttm_mem_type_manager_func nouveau_gart_manager;
 
 /* nouveau_notifier.c */
 extern int  nouveau_notifier_init_channel(struct nouveau_channel *);
@@ -879,17 +886,17 @@ extern void nouveau_channel_ref(struct nouveau_channel *chan,
 extern void nouveau_channel_idle(struct nouveau_channel *chan);
 
 /* nouveau_object.c */
-#define NVOBJ_CLASS(d,c,e) do {                                                \
+#define NVOBJ_CLASS(d, c, e) do {                                              \
 	int ret = nouveau_gpuobj_class_new((d), (c), NVOBJ_ENGINE_##e);        \
 	if (ret)                                                               \
 		return ret;                                                    \
-} while(0)
+} while (0)
 
-#define NVOBJ_MTHD(d,c,m,e) do {                                               \
+#define NVOBJ_MTHD(d, c, m, e) do {                                            \
 	int ret = nouveau_gpuobj_mthd_new((d), (c), (m), (e));                 \
 	if (ret)                                                               \
 		return ret;                                                    \
-} while(0)
+} while (0)
 
 extern int  nouveau_gpuobj_early_init(struct drm_device *);
 extern int  nouveau_gpuobj_init(struct drm_device *);
@@ -899,7 +906,7 @@ extern void nouveau_gpuobj_resume(struct drm_device *dev);
 extern int  nouveau_gpuobj_class_new(struct drm_device *, u32 class, u32 eng);
 extern int  nouveau_gpuobj_mthd_new(struct drm_device *, u32 class, u32 mthd,
 				    int (*exec)(struct nouveau_channel *,
-					        u32 class, u32 mthd, u32 data));
+						u32 class, u32 mthd, u32 data));
 extern int  nouveau_gpuobj_mthd_call(struct nouveau_channel *, u32, u32, u32);
 extern int  nouveau_gpuobj_mthd_call2(struct drm_device *, int, u32, u32, u32);
 extern int nouveau_gpuobj_channel_init(struct nouveau_channel *,
@@ -1076,7 +1083,7 @@ extern void nv40_fb_set_tile_region(struct drm_device *dev, int i);
 /* nv50_fb.c */
 extern int  nv50_fb_init(struct drm_device *);
 extern void nv50_fb_takedown(struct drm_device *);
-extern void nv50_fb_vm_trap(struct drm_device *, int display, const char *);
+extern void nv50_fb_vm_trap(struct drm_device *, int display);
 
 /* nvc0_fb.c */
 extern int  nvc0_fb_init(struct drm_device *);
@@ -1189,7 +1196,7 @@ extern int  nv50_graph_load_context(struct nouveau_channel *);
 extern int  nv50_graph_unload_context(struct drm_device *);
 extern int  nv50_grctx_init(struct nouveau_grctx *);
 extern void nv50_graph_tlb_flush(struct drm_device *dev);
-extern void nv86_graph_tlb_flush(struct drm_device *dev);
+extern void nv84_graph_tlb_flush(struct drm_device *dev);
 extern struct nouveau_enum nv50_data_error_names[];
 
 /* nvc0_graph.c */
@@ -1295,7 +1302,7 @@ extern struct ttm_bo_driver nouveau_bo_driver;
 extern int nouveau_bo_new(struct drm_device *, struct nouveau_channel *,
 			  int size, int align, uint32_t flags,
 			  uint32_t tile_mode, uint32_t tile_flags,
-			  bool no_vm, bool mappable, struct nouveau_bo **);
+			  struct nouveau_bo **);
 extern int nouveau_bo_pin(struct nouveau_bo *, uint32_t flags);
 extern int nouveau_bo_unpin(struct nouveau_bo *);
 extern int nouveau_bo_map(struct nouveau_bo *);
@@ -1356,9 +1363,9 @@ static inline struct nouveau_fence *nouveau_fence_ref(struct nouveau_fence *obj)
 
 /* nouveau_gem.c */
 extern int nouveau_gem_new(struct drm_device *, struct nouveau_channel *,
-			   int size, int align, uint32_t flags,
+			   int size, int align, uint32_t domain,
 			   uint32_t tile_mode, uint32_t tile_flags,
-			   bool no_vm, bool mappable, struct nouveau_bo **);
+			   struct nouveau_bo **);
 extern int nouveau_gem_object_new(struct drm_gem_object *);
 extern void nouveau_gem_object_del(struct drm_gem_object *);
 extern int nouveau_gem_ioctl_new(struct drm_device *, void *,
@@ -1398,8 +1405,8 @@ bool nv50_gpio_irq_enable(struct drm_device *, enum dcb_gpio_tag, bool on);
 /* nv50_calc. */
 int nv50_calc_pll(struct drm_device *, struct pll_lims *, int clk,
 		  int *N1, int *M1, int *N2, int *M2, int *P);
-int nv50_calc_pll2(struct drm_device *, struct pll_lims *,
-		   int clk, int *N, int *fN, int *M, int *P);
+int nva3_calc_pll(struct drm_device *, struct pll_lims *,
+		  int clk, int *N, int *fN, int *M, int *P);
 
 #ifndef ioread32_native
 #ifdef __BIG_ENDIAN
diff --git a/drivers/gpu/drm/nouveau/nouveau_fb.h b/drivers/gpu/drm/nouveau/nouveau_fb.h
index d432134..a3a88ad 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fb.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fb.h
@@ -30,6 +30,9 @@
 struct nouveau_framebuffer {
 	struct drm_framebuffer base;
 	struct nouveau_bo *nvbo;
+	u32 r_dma;
+	u32 r_format;
+	u32 r_pitch;
 };
 
 static inline struct nouveau_framebuffer *
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 7826be0..39aee6d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -296,8 +296,8 @@ nouveau_fbcon_create(struct nouveau_fbdev *nfbdev,
 	size = mode_cmd.pitch * mode_cmd.height;
 	size = roundup(size, PAGE_SIZE);
 
-	ret = nouveau_gem_new(dev, dev_priv->channel, size, 0, TTM_PL_FLAG_VRAM,
-			      0, 0x0000, false, true, &nvbo);
+	ret = nouveau_gem_new(dev, dev_priv->channel, size, 0,
+			      NOUVEAU_GEM_DOMAIN_VRAM, 0, 0x0000, &nvbo);
 	if (ret) {
 		NV_ERROR(dev, "failed to allocate framebuffer\n");
 		goto out;
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 221b846..4b9f449 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -27,13 +27,15 @@
 #include "drmP.h"
 #include "drm.h"
 
+#include <linux/ktime.h>
+#include <linux/hrtimer.h>
+
 #include "nouveau_drv.h"
 #include "nouveau_ramht.h"
 #include "nouveau_dma.h"
 
 #define USE_REFCNT(dev) (nouveau_private(dev)->chipset >= 0x10)
-#define USE_SEMA(dev) (nouveau_private(dev)->chipset >= 0x17 && \
-		       nouveau_private(dev)->card_type < NV_C0)
+#define USE_SEMA(dev) (nouveau_private(dev)->chipset >= 0x17)
 
 struct nouveau_fence {
 	struct nouveau_channel *channel;
@@ -230,7 +232,8 @@ int
 __nouveau_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr)
 {
 	unsigned long timeout = jiffies + (3 * DRM_HZ);
-	unsigned long sleep_time = jiffies + 1;
+	unsigned long sleep_time = NSEC_PER_MSEC / 1000;
+	ktime_t t;
 	int ret = 0;
 
 	while (1) {
@@ -244,8 +247,13 @@ __nouveau_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr)
 
 		__set_current_state(intr ? TASK_INTERRUPTIBLE
 			: TASK_UNINTERRUPTIBLE);
-		if (lazy && time_after_eq(jiffies, sleep_time))
-			schedule_timeout(1);
+		if (lazy) {
+			t = ktime_set(0, sleep_time);
+			schedule_hrtimeout(&t, HRTIMER_MODE_REL);
+			sleep_time *= 2;
+			if (sleep_time > NSEC_PER_MSEC)
+				sleep_time = NSEC_PER_MSEC;
+		}
 
 		if (intr && signal_pending(current)) {
 			ret = -ERESTARTSYS;
@@ -259,11 +267,12 @@ __nouveau_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr)
 }
 
 static struct nouveau_semaphore *
-alloc_semaphore(struct drm_device *dev)
+semaphore_alloc(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_semaphore *sema;
-	int ret;
+	int size = (dev_priv->chipset < 0x84) ? 4 : 16;
+	int ret, i;
 
 	if (!USE_SEMA(dev))
 		return NULL;
@@ -277,9 +286,9 @@ alloc_semaphore(struct drm_device *dev)
 		goto fail;
 
 	spin_lock(&dev_priv->fence.lock);
-	sema->mem = drm_mm_search_free(&dev_priv->fence.heap, 4, 0, 0);
+	sema->mem = drm_mm_search_free(&dev_priv->fence.heap, size, 0, 0);
 	if (sema->mem)
-		sema->mem = drm_mm_get_block_atomic(sema->mem, 4, 0);
+		sema->mem = drm_mm_get_block_atomic(sema->mem, size, 0);
 	spin_unlock(&dev_priv->fence.lock);
 
 	if (!sema->mem)
@@ -287,7 +296,8 @@ alloc_semaphore(struct drm_device *dev)
 
 	kref_init(&sema->ref);
 	sema->dev = dev;
-	nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 0);
+	for (i = sema->mem->start; i < sema->mem->start + size; i += 4)
+		nouveau_bo_wr32(dev_priv->fence.bo, i / 4, 0);
 
 	return sema;
 fail:
@@ -296,7 +306,7 @@ fail:
 }
 
 static void
-free_semaphore(struct kref *ref)
+semaphore_free(struct kref *ref)
 {
 	struct nouveau_semaphore *sema =
 		container_of(ref, struct nouveau_semaphore, ref);
@@ -318,61 +328,107 @@ semaphore_work(void *priv, bool signalled)
 	if (unlikely(!signalled))
 		nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 1);
 
-	kref_put(&sema->ref, free_semaphore);
+	kref_put(&sema->ref, semaphore_free);
 }
 
 static int
-emit_semaphore(struct nouveau_channel *chan, int method,
-	       struct nouveau_semaphore *sema)
+semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
 {
-	struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
-	struct nouveau_fence *fence;
-	bool smart = (dev_priv->card_type >= NV_50);
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+	struct nouveau_fence *fence = NULL;
 	int ret;
 
-	ret = RING_SPACE(chan, smart ? 8 : 4);
+	if (dev_priv->chipset < 0x84) {
+		ret = RING_SPACE(chan, 3);
+		if (ret)
+			return ret;
+
+		BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 2);
+		OUT_RING  (chan, sema->mem->start);
+		OUT_RING  (chan, 1);
+	} else
+	if (dev_priv->chipset < 0xc0) {
+		struct nouveau_vma *vma = &dev_priv->fence.bo->vma;
+		u64 offset = vma->offset + sema->mem->start;
+
+		ret = RING_SPACE(chan, 5);
+		if (ret)
+			return ret;
+
+		BEGIN_RING(chan, NvSubSw, 0x0010, 4);
+		OUT_RING  (chan, upper_32_bits(offset));
+		OUT_RING  (chan, lower_32_bits(offset));
+		OUT_RING  (chan, 1);
+		OUT_RING  (chan, 1); /* ACQUIRE_EQ */
+	} else {
+		struct nouveau_vma *vma = &dev_priv->fence.bo->vma;
+		u64 offset = vma->offset + sema->mem->start;
+
+		ret = RING_SPACE(chan, 5);
+		if (ret)
+			return ret;
+
+		BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0010, 4);
+		OUT_RING  (chan, upper_32_bits(offset));
+		OUT_RING  (chan, lower_32_bits(offset));
+		OUT_RING  (chan, 1);
+		OUT_RING  (chan, 0x1001); /* ACQUIRE_EQ */
+	}
+
+	/* Delay semaphore destruction until its work is done */
+	ret = nouveau_fence_new(chan, &fence, true);
 	if (ret)
 		return ret;
 
-	if (smart) {
-		BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
-		OUT_RING(chan, NvSema);
-	}
-	BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1);
-	OUT_RING(chan, sema->mem->start);
-
-	if (smart && method == NV_SW_SEMAPHORE_ACQUIRE) {
-		/*
-		 * NV50 tries to be too smart and context-switch
-		 * between semaphores instead of doing a "first come,
-		 * first served" strategy like previous cards
-		 * do.
-		 *
-		 * That's bad because the ACQUIRE latency can get as
-		 * large as the PFIFO context time slice in the
-		 * typical DRI2 case where you have several
-		 * outstanding semaphores at the same moment.
-		 *
-		 * If we're going to ACQUIRE, force the card to
-		 * context switch before, just in case the matching
-		 * RELEASE is already scheduled to be executed in
-		 * another channel.
-		 */
-		BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
-		OUT_RING(chan, 0);
-	}
+	kref_get(&sema->ref);
+	nouveau_fence_work(fence, semaphore_work, sema);
+	nouveau_fence_unref(&fence);
+	return 0;
+}
+
+static int
+semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
+{
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+	struct nouveau_fence *fence = NULL;
+	int ret;
+
+	if (dev_priv->chipset < 0x84) {
+		ret = RING_SPACE(chan, 4);
+		if (ret)
+			return ret;
+
+		BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1);
+		OUT_RING  (chan, sema->mem->start);
+		BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_RELEASE, 1);
+		OUT_RING  (chan, 1);
+	} else
+	if (dev_priv->chipset < 0xc0) {
+		struct nouveau_vma *vma = &dev_priv->fence.bo->vma;
+		u64 offset = vma->offset + sema->mem->start;
+
+		ret = RING_SPACE(chan, 5);
+		if (ret)
+			return ret;
+
+		BEGIN_RING(chan, NvSubSw, 0x0010, 4);
+		OUT_RING  (chan, upper_32_bits(offset));
+		OUT_RING  (chan, lower_32_bits(offset));
+		OUT_RING  (chan, 1);
+		OUT_RING  (chan, 2); /* RELEASE */
+	} else {
+		struct nouveau_vma *vma = &dev_priv->fence.bo->vma;
+		u64 offset = vma->offset + sema->mem->start;
 
-	BEGIN_RING(chan, NvSubSw, method, 1);
-	OUT_RING(chan, 1);
-
-	if (smart && method == NV_SW_SEMAPHORE_RELEASE) {
-		/*
-		 * Force the card to context switch, there may be
-		 * another channel waiting for the semaphore we just
-		 * released.
-		 */
-		BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
-		OUT_RING(chan, 0);
+		ret = RING_SPACE(chan, 5);
+		if (ret)
+			return ret;
+
+		BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0010, 4);
+		OUT_RING  (chan, upper_32_bits(offset));
+		OUT_RING  (chan, lower_32_bits(offset));
+		OUT_RING  (chan, 1);
+		OUT_RING  (chan, 0x1002); /* RELEASE */
 	}
 
 	/* Delay semaphore destruction until its work is done */
@@ -383,7 +439,6 @@ emit_semaphore(struct nouveau_channel *chan, int method,
 	kref_get(&sema->ref);
 	nouveau_fence_work(fence, semaphore_work, sema);
 	nouveau_fence_unref(&fence);
-
 	return 0;
 }
 
@@ -400,7 +455,7 @@ nouveau_fence_sync(struct nouveau_fence *fence,
 		   nouveau_fence_signalled(fence)))
 		goto out;
 
-	sema = alloc_semaphore(dev);
+	sema = semaphore_alloc(dev);
 	if (!sema) {
 		/* Early card or broken userspace, fall back to
 		 * software sync. */
@@ -418,17 +473,17 @@ nouveau_fence_sync(struct nouveau_fence *fence,
 	}
 
 	/* Make wchan wait until it gets signalled */
-	ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema);
+	ret = semaphore_acquire(wchan, sema);
 	if (ret)
 		goto out_unlock;
 
 	/* Signal the semaphore from chan */
-	ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema);
+	ret = semaphore_release(chan, sema);
 
 out_unlock:
 	mutex_unlock(&chan->mutex);
 out_unref:
-	kref_put(&sema->ref, free_semaphore);
+	kref_put(&sema->ref, semaphore_free);
 out:
 	if (chan)
 		nouveau_channel_put_unlocked(&chan);
@@ -449,22 +504,23 @@ nouveau_fence_channel_init(struct nouveau_channel *chan)
 	struct nouveau_gpuobj *obj = NULL;
 	int ret;
 
+	if (dev_priv->card_type >= NV_C0)
+		goto out_initialised;
+
 	/* Create an NV_SW object for various sync purposes */
 	ret = nouveau_gpuobj_gr_new(chan, NvSw, NV_SW);
 	if (ret)
 		return ret;
 
 	/* we leave subchannel empty for nvc0 */
-	if (dev_priv->card_type < NV_C0) {
-		ret = RING_SPACE(chan, 2);
-		if (ret)
-			return ret;
-		BEGIN_RING(chan, NvSubSw, 0, 1);
-		OUT_RING(chan, NvSw);
-	}
+	ret = RING_SPACE(chan, 2);
+	if (ret)
+		return ret;
+	BEGIN_RING(chan, NvSubSw, 0, 1);
+	OUT_RING(chan, NvSw);
 
 	/* Create a DMA object for the shared cross-channel sync area. */
-	if (USE_SEMA(dev)) {
+	if (USE_SEMA(dev) && dev_priv->chipset < 0x84) {
 		struct ttm_mem_reg *mem = &dev_priv->fence.bo->bo.mem;
 
 		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
@@ -484,14 +540,20 @@ nouveau_fence_channel_init(struct nouveau_channel *chan)
 			return ret;
 		BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
 		OUT_RING(chan, NvSema);
+	} else {
+		ret = RING_SPACE(chan, 2);
+		if (ret)
+			return ret;
+		BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
+		OUT_RING  (chan, chan->vram_handle); /* whole VM */
 	}
 
 	FIRE_RING(chan);
 
+out_initialised:
 	INIT_LIST_HEAD(&chan->fence.pending);
 	spin_lock_init(&chan->fence.lock);
 	atomic_set(&chan->fence.last_sequence_irq, 0);
-
 	return 0;
 }
 
@@ -519,12 +581,13 @@ int
 nouveau_fence_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	int size = (dev_priv->chipset < 0x84) ? 4096 : 16384;
 	int ret;
 
 	/* Create a shared VRAM heap for cross-channel sync. */
 	if (USE_SEMA(dev)) {
-		ret = nouveau_bo_new(dev, NULL, 4096, 0, TTM_PL_FLAG_VRAM,
-				     0, 0, false, true, &dev_priv->fence.bo);
+		ret = nouveau_bo_new(dev, NULL, size, 0, TTM_PL_FLAG_VRAM,
+				     0, 0, &dev_priv->fence.bo);
 		if (ret)
 			return ret;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 506c508..e8b04f4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -61,19 +61,36 @@ nouveau_gem_object_del(struct drm_gem_object *gem)
 
 int
 nouveau_gem_new(struct drm_device *dev, struct nouveau_channel *chan,
-		int size, int align, uint32_t flags, uint32_t tile_mode,
-		uint32_t tile_flags, bool no_vm, bool mappable,
-		struct nouveau_bo **pnvbo)
+		int size, int align, uint32_t domain, uint32_t tile_mode,
+		uint32_t tile_flags, struct nouveau_bo **pnvbo)
 {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_bo *nvbo;
+	u32 flags = 0;
 	int ret;
 
+	if (domain & NOUVEAU_GEM_DOMAIN_VRAM)
+		flags |= TTM_PL_FLAG_VRAM;
+	if (domain & NOUVEAU_GEM_DOMAIN_GART)
+		flags |= TTM_PL_FLAG_TT;
+	if (!flags || domain & NOUVEAU_GEM_DOMAIN_CPU)
+		flags |= TTM_PL_FLAG_SYSTEM;
+
 	ret = nouveau_bo_new(dev, chan, size, align, flags, tile_mode,
-			     tile_flags, no_vm, mappable, pnvbo);
+			     tile_flags, pnvbo);
 	if (ret)
 		return ret;
 	nvbo = *pnvbo;
 
+	/* we restrict allowed domains on nv50+ to only the types
+	 * that were requested at creation time.  not possibly on
+	 * earlier chips without busting the ABI.
+	 */
+	nvbo->valid_domains = NOUVEAU_GEM_DOMAIN_VRAM |
+			      NOUVEAU_GEM_DOMAIN_GART;
+	if (dev_priv->card_type >= NV_50)
+		nvbo->valid_domains &= domain;
+
 	nvbo->gem = drm_gem_object_alloc(dev, nvbo->bo.mem.size);
 	if (!nvbo->gem) {
 		nouveau_bo_ref(NULL, pnvbo);
@@ -97,7 +114,7 @@ nouveau_gem_info(struct drm_gem_object *gem, struct drm_nouveau_gem_info *rep)
 
 	rep->size = nvbo->bo.mem.num_pages << PAGE_SHIFT;
 	rep->offset = nvbo->bo.offset;
-	rep->map_handle = nvbo->mappable ? nvbo->bo.addr_space_offset : 0;
+	rep->map_handle = nvbo->bo.addr_space_offset;
 	rep->tile_mode = nvbo->tile_mode;
 	rep->tile_flags = nvbo->tile_flags;
 	return 0;
@@ -111,19 +128,11 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data,
 	struct drm_nouveau_gem_new *req = data;
 	struct nouveau_bo *nvbo = NULL;
 	struct nouveau_channel *chan = NULL;
-	uint32_t flags = 0;
 	int ret = 0;
 
 	if (unlikely(dev_priv->ttm.bdev.dev_mapping == NULL))
 		dev_priv->ttm.bdev.dev_mapping = dev_priv->dev->dev_mapping;
 
-	if (req->info.domain & NOUVEAU_GEM_DOMAIN_VRAM)
-		flags |= TTM_PL_FLAG_VRAM;
-	if (req->info.domain & NOUVEAU_GEM_DOMAIN_GART)
-		flags |= TTM_PL_FLAG_TT;
-	if (!flags || req->info.domain & NOUVEAU_GEM_DOMAIN_CPU)
-		flags |= TTM_PL_FLAG_SYSTEM;
-
 	if (!dev_priv->engine.vram.flags_valid(dev, req->info.tile_flags)) {
 		NV_ERROR(dev, "bad page flags: 0x%08x\n", req->info.tile_flags);
 		return -EINVAL;
@@ -135,10 +144,9 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data,
 			return PTR_ERR(chan);
 	}
 
-	ret = nouveau_gem_new(dev, chan, req->info.size, req->align, flags,
-			      req->info.tile_mode, req->info.tile_flags, false,
-			      (req->info.domain & NOUVEAU_GEM_DOMAIN_MAPPABLE),
-			      &nvbo);
+	ret = nouveau_gem_new(dev, chan, req->info.size, req->align,
+			      req->info.domain, req->info.tile_mode,
+			      req->info.tile_flags, &nvbo);
 	if (chan)
 		nouveau_channel_put(&chan);
 	if (ret)
@@ -161,7 +169,7 @@ nouveau_gem_set_domain(struct drm_gem_object *gem, uint32_t read_domains,
 {
 	struct nouveau_bo *nvbo = gem->driver_private;
 	struct ttm_buffer_object *bo = &nvbo->bo;
-	uint32_t domains = valid_domains &
+	uint32_t domains = valid_domains & nvbo->valid_domains &
 		(write_domains ? write_domains : read_domains);
 	uint32_t pref_flags = 0, valid_flags = 0;
 
@@ -592,7 +600,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 		if (push[i].bo_index >= req->nr_buffers) {
 			NV_ERROR(dev, "push %d buffer not in list\n", i);
 			ret = -EINVAL;
-			goto out;
+			goto out_prevalid;
 		}
 
 		bo[push[i].bo_index].read_domains |= (1 << 31);
@@ -604,7 +612,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 	if (ret) {
 		if (ret != -ERESTARTSYS)
 			NV_ERROR(dev, "validate: %d\n", ret);
-		goto out;
+		goto out_prevalid;
 	}
 
 	/* Apply any relocations that are required */
@@ -697,6 +705,8 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 out:
 	validate_fini(&op, fence);
 	nouveau_fence_unref(&fence);
+
+out_prevalid:
 	kfree(bo);
 	kfree(push);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_grctx.h b/drivers/gpu/drm/nouveau/nouveau_grctx.h
index 4a8ad13..86c2e37 100644
--- a/drivers/gpu/drm/nouveau/nouveau_grctx.h
+++ b/drivers/gpu/drm/nouveau/nouveau_grctx.h
@@ -87,10 +87,10 @@ _cp_bra(struct nouveau_grctx *ctx, u32 mod, int flag, int state, int name)
 	cp_out(ctx, CP_BRA | (mod << 18) | ip | flag |
 		    (state ? 0 : CP_BRA_IF_CLEAR));
 }
-#define cp_bra(c,f,s,n) _cp_bra((c), 0, CP_FLAG_##f, CP_FLAG_##f##_##s, n)
+#define cp_bra(c, f, s, n) _cp_bra((c), 0, CP_FLAG_##f, CP_FLAG_##f##_##s, n)
 #ifdef CP_BRA_MOD
-#define cp_cal(c,f,s,n) _cp_bra((c), 1, CP_FLAG_##f, CP_FLAG_##f##_##s, n)
-#define cp_ret(c,f,s) _cp_bra((c), 2, CP_FLAG_##f, CP_FLAG_##f##_##s, 0)
+#define cp_cal(c, f, s, n) _cp_bra((c), 1, CP_FLAG_##f, CP_FLAG_##f##_##s, n)
+#define cp_ret(c, f, s) _cp_bra((c), 2, CP_FLAG_##f, CP_FLAG_##f##_##s, 0)
 #endif
 
 static inline void
@@ -98,14 +98,14 @@ _cp_wait(struct nouveau_grctx *ctx, int flag, int state)
 {
 	cp_out(ctx, CP_WAIT | flag | (state ? CP_WAIT_SET : 0));
 }
-#define cp_wait(c,f,s) _cp_wait((c), CP_FLAG_##f, CP_FLAG_##f##_##s)
+#define cp_wait(c, f, s) _cp_wait((c), CP_FLAG_##f, CP_FLAG_##f##_##s)
 
 static inline void
 _cp_set(struct nouveau_grctx *ctx, int flag, int state)
 {
 	cp_out(ctx, CP_SET | flag | (state ? CP_SET_1 : 0));
 }
-#define cp_set(c,f,s) _cp_set((c), CP_FLAG_##f, CP_FLAG_##f##_##s)
+#define cp_set(c, f, s) _cp_set((c), CP_FLAG_##f, CP_FLAG_##f##_##s)
 
 static inline void
 cp_pos(struct nouveau_grctx *ctx, int offset)
diff --git a/drivers/gpu/drm/nouveau/nouveau_hw.c b/drivers/gpu/drm/nouveau/nouveau_hw.c
index 053edf9..ba896e5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_hw.c
+++ b/drivers/gpu/drm/nouveau/nouveau_hw.c
@@ -900,6 +900,7 @@ nv_save_state_ext(struct drm_device *dev, int head,
 	}
 	/* NV11 and NV20 don't have this, they stop at 0x52. */
 	if (nv_gf4_disp_arch(dev)) {
+		rd_cio_state(dev, head, regp, NV_CIO_CRE_42);
 		rd_cio_state(dev, head, regp, NV_CIO_CRE_53);
 		rd_cio_state(dev, head, regp, NV_CIO_CRE_54);
 
@@ -1003,6 +1004,7 @@ nv_load_state_ext(struct drm_device *dev, int head,
 			nouveau_wait_eq(dev, 650000000, NV_PRMCIO_INP0__COLOR, 0x8, 0x0);
 		}
 
+		wr_cio_state(dev, head, regp, NV_CIO_CRE_42);
 		wr_cio_state(dev, head, regp, NV_CIO_CRE_53);
 		wr_cio_state(dev, head, regp, NV_CIO_CRE_54);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index b0fb9bd..4942294 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -152,9 +152,6 @@ nouveau_mem_vram_fini(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
-	nouveau_bo_unpin(dev_priv->vga_ram);
-	nouveau_bo_ref(NULL, &dev_priv->vga_ram);
-
 	ttm_bo_device_release(&dev_priv->ttm.bdev);
 
 	nouveau_ttm_global_release(dev_priv);
@@ -393,11 +390,17 @@ nouveau_mem_vram_init(struct drm_device *dev)
 	struct ttm_bo_device *bdev = &dev_priv->ttm.bdev;
 	int ret, dma_bits;
 
-	if (dev_priv->card_type >= NV_50 &&
-	    pci_dma_supported(dev->pdev, DMA_BIT_MASK(40)))
-		dma_bits = 40;
-	else
-		dma_bits = 32;
+	dma_bits = 32;
+	if (dev_priv->card_type >= NV_50) {
+		if (pci_dma_supported(dev->pdev, DMA_BIT_MASK(40)))
+			dma_bits = 40;
+	} else
+	if (0 && drm_device_is_pcie(dev) &&
+	    dev_priv->chipset  > 0x40 &&
+	    dev_priv->chipset != 0x45) {
+		if (pci_dma_supported(dev->pdev, DMA_BIT_MASK(39)))
+			dma_bits = 39;
+	}
 
 	ret = pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(dma_bits));
 	if (ret)
@@ -419,14 +422,32 @@ nouveau_mem_vram_init(struct drm_device *dev)
 	}
 
 	/* reserve space at end of VRAM for PRAMIN */
-	if (dev_priv->chipset == 0x40 || dev_priv->chipset == 0x47 ||
-	    dev_priv->chipset == 0x49 || dev_priv->chipset == 0x4b)
-		dev_priv->ramin_rsvd_vram = (2 * 1024 * 1024);
-	else
-	if (dev_priv->card_type >= NV_40)
-		dev_priv->ramin_rsvd_vram = (1 * 1024 * 1024);
-	else
-		dev_priv->ramin_rsvd_vram = (512 * 1024);
+	if (dev_priv->card_type >= NV_50) {
+		dev_priv->ramin_rsvd_vram = 1 * 1024 * 1024;
+	} else
+	if (dev_priv->card_type >= NV_40) {
+		u32 vs = hweight8((nv_rd32(dev, 0x001540) & 0x0000ff00) >> 8);
+		u32 rsvd;
+
+		/* estimate grctx size, the magics come from nv40_grctx.c */
+		if      (dev_priv->chipset == 0x40) rsvd = 0x6aa0 * vs;
+		else if (dev_priv->chipset  < 0x43) rsvd = 0x4f00 * vs;
+		else if (nv44_graph_class(dev))	    rsvd = 0x4980 * vs;
+		else				    rsvd = 0x4a40 * vs;
+		rsvd += 16 * 1024;
+		rsvd *= dev_priv->engine.fifo.channels;
+
+		/* pciegart table */
+		if (drm_device_is_pcie(dev))
+			rsvd += 512 * 1024;
+
+		/* object storage */
+		rsvd += 512 * 1024;
+
+		dev_priv->ramin_rsvd_vram = round_up(rsvd, 4096);
+	} else {
+		dev_priv->ramin_rsvd_vram = 512 * 1024;
+	}
 
 	ret = dev_priv->engine.vram.init(dev);
 	if (ret)
@@ -455,13 +476,17 @@ nouveau_mem_vram_init(struct drm_device *dev)
 		return ret;
 	}
 
-	ret = nouveau_bo_new(dev, NULL, 256*1024, 0, TTM_PL_FLAG_VRAM,
-			     0, 0, true, true, &dev_priv->vga_ram);
-	if (ret == 0)
-		ret = nouveau_bo_pin(dev_priv->vga_ram, TTM_PL_FLAG_VRAM);
-	if (ret) {
-		NV_WARN(dev, "failed to reserve VGA memory\n");
-		nouveau_bo_ref(NULL, &dev_priv->vga_ram);
+	if (dev_priv->card_type < NV_50) {
+		ret = nouveau_bo_new(dev, NULL, 256*1024, 0, TTM_PL_FLAG_VRAM,
+				     0, 0, &dev_priv->vga_ram);
+		if (ret == 0)
+			ret = nouveau_bo_pin(dev_priv->vga_ram,
+					     TTM_PL_FLAG_VRAM);
+
+		if (ret) {
+			NV_WARN(dev, "failed to reserve VGA memory\n");
+			nouveau_bo_ref(NULL, &dev_priv->vga_ram);
+		}
 	}
 
 	dev_priv->fb_mtrr = drm_mtrr_add(pci_resource_start(dev->pdev, 1),
@@ -525,6 +550,7 @@ nouveau_mem_timing_init(struct drm_device *dev)
 	u8 tRC;		/* Byte 9 */
 	u8 tUNK_10, tUNK_11, tUNK_12, tUNK_13, tUNK_14;
 	u8 tUNK_18, tUNK_19, tUNK_20, tUNK_21;
+	u8 magic_number = 0; /* Yeah... sorry*/
 	u8 *mem = NULL, *entry;
 	int i, recordlen, entries;
 
@@ -569,6 +595,12 @@ nouveau_mem_timing_init(struct drm_device *dev)
 	if (!memtimings->timing)
 		return;
 
+	/* Get "some number" from the timing reg for NV_40 and NV_50
+	 * Used in calculations later */
+	if (dev_priv->card_type >= NV_40 && dev_priv->chipset < 0x98) {
+		magic_number = (nv_rd32(dev, 0x100228) & 0x0f000000) >> 24;
+	}
+
 	entry = mem + mem[1];
 	for (i = 0; i < entries; i++, entry += recordlen) {
 		struct nouveau_pm_memtiming *timing = &pm->memtimings.timing[i];
@@ -608,36 +640,67 @@ nouveau_mem_timing_init(struct drm_device *dev)
 
 		/* XXX: I don't trust the -1's and +1's... they must come
 		 *      from somewhere! */
-		timing->reg_100224 = ((tUNK_0 + tUNK_19 + 1) << 24 |
-				      tUNK_18 << 16 |
-				      (tUNK_1 + tUNK_19 + 1) << 8 |
-				      (tUNK_2 - 1));
+		timing->reg_100224 = (tUNK_0 + tUNK_19 + 1 + magic_number) << 24 |
+				      max(tUNK_18, (u8) 1) << 16 |
+				      (tUNK_1 + tUNK_19 + 1 + magic_number) << 8;
+		if (dev_priv->chipset == 0xa8) {
+			timing->reg_100224 |= (tUNK_2 - 1);
+		} else {
+			timing->reg_100224 |= (tUNK_2 + 2 - magic_number);
+		}
 
 		timing->reg_100228 = (tUNK_12 << 16 | tUNK_11 << 8 | tUNK_10);
-		if(recordlen > 19) {
-			timing->reg_100228 += (tUNK_19 - 1) << 24;
-		}/* I cannot back-up this else-statement right now
-			 else {
-			timing->reg_100228 += tUNK_12 << 24;
-		}*/
-
-		/* XXX: reg_10022c */
-		timing->reg_10022c = tUNK_2 - 1;
-
-		timing->reg_100230 = (tUNK_20 << 24 | tUNK_21 << 16 |
-				      tUNK_13 << 8  | tUNK_13);
-
-		/* XXX: +6? */
-		timing->reg_100234 = (tRAS << 24 | (tUNK_19 + 6) << 8 | tRC);
-		timing->reg_100234 += max(tUNK_10,tUNK_11) << 16;
-
-		/* XXX; reg_100238, reg_10023c
-		 * reg: 0x00??????
-		 * reg_10023c:
-		 *      0 for pre-NV50 cards
-		 *      0x????0202 for NV50+ cards (empirical evidence) */
-		if(dev_priv->card_type >= NV_50) {
+		if (dev_priv->chipset >= 0xa3 && dev_priv->chipset < 0xaa)
+			timing->reg_100228 |= (tUNK_19 - 1) << 24;
+		else
+			timing->reg_100228 |= magic_number << 24;
+
+		if (dev_priv->card_type == NV_40) {
+			/* NV40: don't know what the rest of the regs are..
+			 * And don't need to know either */
+			timing->reg_100228 |= 0x20200000;
+		} else if (dev_priv->card_type >= NV_50) {
+			if (dev_priv->chipset < 0x98 ||
+			    (dev_priv->chipset == 0x98 &&
+			     dev_priv->stepping <= 0xa1)) {
+				timing->reg_10022c = (0x14 + tUNK_2) << 24 |
+						     0x16 << 16 |
+						     (tUNK_2 - 1) << 8 |
+						     (tUNK_2 - 1);
+			} else {
+				/* XXX: reg_10022c for recentish cards */
+				timing->reg_10022c = tUNK_2 - 1;
+			}
+
+			timing->reg_100230 = (tUNK_20 << 24 | tUNK_21 << 16 |
+						  tUNK_13 << 8  | tUNK_13);
+
+			timing->reg_100234 = (tRAS << 24 | tRC);
+			timing->reg_100234 += max(tUNK_10, tUNK_11) << 16;
+
+			if (dev_priv->chipset < 0x98 ||
+			    (dev_priv->chipset == 0x98 &&
+			     dev_priv->stepping <= 0xa1)) {
+				timing->reg_100234 |= (tUNK_2 + 2) << 8;
+			} else {
+				/* XXX: +6? */
+				timing->reg_100234 |= (tUNK_19 + 6) << 8;
+			}
+
+			/* XXX; reg_100238
+			 * reg_100238: 0x00?????? */
 			timing->reg_10023c = 0x202;
+			if (dev_priv->chipset < 0x98 ||
+			    (dev_priv->chipset == 0x98 &&
+			     dev_priv->stepping <= 0xa1)) {
+				timing->reg_10023c |= 0x4000000 | (tUNK_2 - 1) << 16;
+			} else {
+				/* XXX: reg_10023c
+				 * currently unknown
+				 * 10023c seen as 06xxxxxx, 0bxxxxxx or 0fxxxxxx */
+			}
+
+			/* XXX: reg_100240? */
 		}
 
 		NV_DEBUG(dev, "Entry %d: 220: %08x %08x %08x %08x\n", i,
@@ -646,9 +709,10 @@ nouveau_mem_timing_init(struct drm_device *dev)
 		NV_DEBUG(dev, "         230: %08x %08x %08x %08x\n",
 			 timing->reg_100230, timing->reg_100234,
 			 timing->reg_100238, timing->reg_10023c);
+		NV_DEBUG(dev, "         240: %08x\n", timing->reg_100240);
 	}
 
-	memtimings->nr_timing  = entries;
+	memtimings->nr_timing = entries;
 	memtimings->supported = true;
 }
 
@@ -666,13 +730,14 @@ nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long p_size
 {
 	struct drm_nouveau_private *dev_priv = nouveau_bdev(man->bdev);
 	struct nouveau_mm *mm;
-	u32 b_size;
+	u64 size, block, rsvd;
 	int ret;
 
-	p_size = (p_size << PAGE_SHIFT) >> 12;
-	b_size = dev_priv->vram_rblock_size >> 12;
+	rsvd  = (256 * 1024); /* vga memory */
+	size  = (p_size << PAGE_SHIFT) - rsvd;
+	block = dev_priv->vram_rblock_size;
 
-	ret = nouveau_mm_init(&mm, 0, p_size, b_size);
+	ret = nouveau_mm_init(&mm, rsvd >> 12, size >> 12, block >> 12);
 	if (ret)
 		return ret;
 
@@ -700,9 +765,15 @@ nouveau_vram_manager_del(struct ttm_mem_type_manager *man,
 {
 	struct drm_nouveau_private *dev_priv = nouveau_bdev(man->bdev);
 	struct nouveau_vram_engine *vram = &dev_priv->engine.vram;
+	struct nouveau_mem *node = mem->mm_node;
 	struct drm_device *dev = dev_priv->dev;
 
-	vram->put(dev, (struct nouveau_vram **)&mem->mm_node);
+	if (node->tmp_vma.node) {
+		nouveau_vm_unmap(&node->tmp_vma);
+		nouveau_vm_put(&node->tmp_vma);
+	}
+
+	vram->put(dev, (struct nouveau_mem **)&mem->mm_node);
 }
 
 static int
@@ -715,7 +786,7 @@ nouveau_vram_manager_new(struct ttm_mem_type_manager *man,
 	struct nouveau_vram_engine *vram = &dev_priv->engine.vram;
 	struct drm_device *dev = dev_priv->dev;
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
-	struct nouveau_vram *node;
+	struct nouveau_mem *node;
 	u32 size_nc = 0;
 	int ret;
 
@@ -724,7 +795,7 @@ nouveau_vram_manager_new(struct ttm_mem_type_manager *man,
 
 	ret = vram->get(dev, mem->num_pages << PAGE_SHIFT,
 			mem->page_alignment << PAGE_SHIFT, size_nc,
-			(nvbo->tile_flags >> 8) & 0xff, &node);
+			(nvbo->tile_flags >> 8) & 0x3ff, &node);
 	if (ret) {
 		mem->mm_node = NULL;
 		return (ret == -ENOSPC) ? 0 : ret;
@@ -771,3 +842,84 @@ const struct ttm_mem_type_manager_func nouveau_vram_manager = {
 	nouveau_vram_manager_del,
 	nouveau_vram_manager_debug
 };
+
+static int
+nouveau_gart_manager_init(struct ttm_mem_type_manager *man, unsigned long psize)
+{
+	return 0;
+}
+
+static int
+nouveau_gart_manager_fini(struct ttm_mem_type_manager *man)
+{
+	return 0;
+}
+
+static void
+nouveau_gart_manager_del(struct ttm_mem_type_manager *man,
+			 struct ttm_mem_reg *mem)
+{
+	struct nouveau_mem *node = mem->mm_node;
+
+	if (node->tmp_vma.node) {
+		nouveau_vm_unmap(&node->tmp_vma);
+		nouveau_vm_put(&node->tmp_vma);
+	}
+	mem->mm_node = NULL;
+}
+
+static int
+nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
+			 struct ttm_buffer_object *bo,
+			 struct ttm_placement *placement,
+			 struct ttm_mem_reg *mem)
+{
+	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+	struct nouveau_bo *nvbo = nouveau_bo(bo);
+	struct nouveau_vma *vma = &nvbo->vma;
+	struct nouveau_vm *vm = vma->vm;
+	struct nouveau_mem *node;
+	int ret;
+
+	if (unlikely((mem->num_pages << PAGE_SHIFT) >=
+		     dev_priv->gart_info.aper_size))
+		return -ENOMEM;
+
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	/* This node must be for evicting large-paged VRAM
+	 * to system memory.  Due to a nv50 limitation of
+	 * not being able to mix large/small pages within
+	 * the same PDE, we need to create a temporary
+	 * small-paged VMA for the eviction.
+	 */
+	if (vma->node->type != vm->spg_shift) {
+		ret = nouveau_vm_get(vm, (u64)vma->node->length << 12,
+				     vm->spg_shift, NV_MEM_ACCESS_RW,
+				     &node->tmp_vma);
+		if (ret) {
+			kfree(node);
+			return ret;
+		}
+	}
+
+	node->page_shift = nvbo->vma.node->type;
+	mem->mm_node = node;
+	mem->start   = 0;
+	return 0;
+}
+
+void
+nouveau_gart_manager_debug(struct ttm_mem_type_manager *man, const char *prefix)
+{
+}
+
+const struct ttm_mem_type_manager_func nouveau_gart_manager = {
+	nouveau_gart_manager_init,
+	nouveau_gart_manager_fini,
+	nouveau_gart_manager_new,
+	nouveau_gart_manager_del,
+	nouveau_gart_manager_debug
+};
diff --git a/drivers/gpu/drm/nouveau/nouveau_mm.h b/drivers/gpu/drm/nouveau/nouveau_mm.h
index 798eaf3..1f7483a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_mm.h
@@ -53,13 +53,13 @@ void nouveau_mm_put(struct nouveau_mm *, struct nouveau_mm_node *);
 
 int  nv50_vram_init(struct drm_device *);
 int  nv50_vram_new(struct drm_device *, u64 size, u32 align, u32 size_nc,
-		    u32 memtype, struct nouveau_vram **);
-void nv50_vram_del(struct drm_device *, struct nouveau_vram **);
+		    u32 memtype, struct nouveau_mem **);
+void nv50_vram_del(struct drm_device *, struct nouveau_mem **);
 bool nv50_vram_flags_valid(struct drm_device *, u32 tile_flags);
 
 int  nvc0_vram_init(struct drm_device *);
 int  nvc0_vram_new(struct drm_device *, u64 size, u32 align, u32 ncmin,
-		    u32 memtype, struct nouveau_vram **);
+		    u32 memtype, struct nouveau_mem **);
 bool nvc0_vram_flags_valid(struct drm_device *, u32 tile_flags);
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c
index 5ea1676..5b39718 100644
--- a/drivers/gpu/drm/nouveau/nouveau_notifier.c
+++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c
@@ -35,20 +35,22 @@ nouveau_notifier_init_channel(struct nouveau_channel *chan)
 {
 	struct drm_device *dev = chan->dev;
 	struct nouveau_bo *ntfy = NULL;
-	uint32_t flags;
+	uint32_t flags, ttmpl;
 	int ret;
 
-	if (nouveau_vram_notify)
-		flags = TTM_PL_FLAG_VRAM;
-	else
-		flags = TTM_PL_FLAG_TT;
+	if (nouveau_vram_notify) {
+		flags = NOUVEAU_GEM_DOMAIN_VRAM;
+		ttmpl = TTM_PL_FLAG_VRAM;
+	} else {
+		flags = NOUVEAU_GEM_DOMAIN_GART;
+		ttmpl = TTM_PL_FLAG_TT;
+	}
 
-	ret = nouveau_gem_new(dev, NULL, PAGE_SIZE, 0, flags,
-			      0, 0x0000, false, true, &ntfy);
+	ret = nouveau_gem_new(dev, NULL, PAGE_SIZE, 0, flags, 0, 0, &ntfy);
 	if (ret)
 		return ret;
 
-	ret = nouveau_bo_pin(ntfy, flags);
+	ret = nouveau_bo_pin(ntfy, ttmpl);
 	if (ret)
 		goto out_err;
 
@@ -100,6 +102,7 @@ nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle,
 		       uint32_t *b_offset)
 {
 	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *nobj = NULL;
 	struct drm_mm_node *mem;
 	uint32_t offset;
@@ -114,11 +117,16 @@ nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle,
 		return -ENOMEM;
 	}
 
-	if (chan->notifier_bo->bo.mem.mem_type == TTM_PL_VRAM)
-		target = NV_MEM_TARGET_VRAM;
-	else
-		target = NV_MEM_TARGET_GART;
-	offset  = chan->notifier_bo->bo.mem.start << PAGE_SHIFT;
+	if (dev_priv->card_type < NV_50) {
+		if (chan->notifier_bo->bo.mem.mem_type == TTM_PL_VRAM)
+			target = NV_MEM_TARGET_VRAM;
+		else
+			target = NV_MEM_TARGET_GART;
+		offset  = chan->notifier_bo->bo.mem.start << PAGE_SHIFT;
+	} else {
+		target = NV_MEM_TARGET_VM;
+		offset = chan->notifier_bo->vma.offset;
+	}
 	offset += mem->start;
 
 	ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY, offset,
diff --git a/drivers/gpu/drm/nouveau/nouveau_object.c b/drivers/gpu/drm/nouveau/nouveau_object.c
index 30b6544..59b446e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_object.c
+++ b/drivers/gpu/drm/nouveau/nouveau_object.c
@@ -36,6 +36,7 @@
 #include "nouveau_drm.h"
 #include "nouveau_ramht.h"
 #include "nouveau_vm.h"
+#include "nv50_display.h"
 
 struct nouveau_gpuobj_method {
 	struct list_head head;
@@ -490,16 +491,22 @@ nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class, u64 base,
 	}
 
 	if (target == NV_MEM_TARGET_GART) {
-		if (dev_priv->gart_info.type == NOUVEAU_GART_AGP) {
-			target = NV_MEM_TARGET_PCI_NOSNOOP;
-			base  += dev_priv->gart_info.aper_base;
-		} else
-		if (base != 0) {
-			base = nouveau_sgdma_get_physical(dev, base);
+		struct nouveau_gpuobj *gart = dev_priv->gart_info.sg_ctxdma;
+
+		if (dev_priv->gart_info.type == NOUVEAU_GART_PDMA) {
+			if (base == 0) {
+				nouveau_gpuobj_ref(gart, pobj);
+				return 0;
+			}
+
+			base   = nouveau_sgdma_get_physical(dev, base);
 			target = NV_MEM_TARGET_PCI;
 		} else {
-			nouveau_gpuobj_ref(dev_priv->gart_info.sg_ctxdma, pobj);
-			return 0;
+			base += dev_priv->gart_info.aper_base;
+			if (dev_priv->gart_info.type == NOUVEAU_GART_AGP)
+				target = NV_MEM_TARGET_PCI_NOSNOOP;
+			else
+				target = NV_MEM_TARGET_PCI;
 		}
 	}
 
@@ -776,7 +783,7 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *vram = NULL, *tt = NULL;
-	int ret;
+	int ret, i;
 
 	NV_DEBUG(dev, "ch%d vram=0x%08x tt=0x%08x\n", chan->id, vram_h, tt_h);
 
@@ -841,6 +848,25 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
 		nouveau_gpuobj_ref(NULL, &ramht);
 		if (ret)
 			return ret;
+
+		/* dma objects for display sync channel semaphore blocks */
+		for (i = 0; i < 2; i++) {
+			struct nouveau_gpuobj *sem = NULL;
+			struct nv50_display_crtc *dispc =
+				&nv50_display(dev)->crtc[i];
+			u64 offset = dispc->sem.bo->bo.mem.start << PAGE_SHIFT;
+
+			ret = nouveau_gpuobj_dma_new(chan, 0x3d, offset, 0xfff,
+						     NV_MEM_ACCESS_RW,
+						     NV_MEM_TARGET_VRAM, &sem);
+			if (ret)
+				return ret;
+
+			ret = nouveau_ramht_insert(chan, NvEvoSema0 + i, sem);
+			nouveau_gpuobj_ref(NULL, &sem);
+			if (ret)
+				return ret;
+		}
 	}
 
 	/* VRAM ctxdma */
@@ -1013,19 +1039,20 @@ nv_ro32(struct nouveau_gpuobj *gpuobj, u32 offset)
 {
 	struct drm_nouveau_private *dev_priv = gpuobj->dev->dev_private;
 	struct drm_device *dev = gpuobj->dev;
+	unsigned long flags;
 
 	if (gpuobj->pinst == ~0 || !dev_priv->ramin_available) {
 		u64  ptr = gpuobj->vinst + offset;
 		u32 base = ptr >> 16;
 		u32  val;
 
-		spin_lock(&dev_priv->ramin_lock);
+		spin_lock_irqsave(&dev_priv->vm_lock, flags);
 		if (dev_priv->ramin_base != base) {
 			dev_priv->ramin_base = base;
 			nv_wr32(dev, 0x001700, dev_priv->ramin_base);
 		}
 		val = nv_rd32(dev, 0x700000 + (ptr & 0xffff));
-		spin_unlock(&dev_priv->ramin_lock);
+		spin_unlock_irqrestore(&dev_priv->vm_lock, flags);
 		return val;
 	}
 
@@ -1037,18 +1064,19 @@ nv_wo32(struct nouveau_gpuobj *gpuobj, u32 offset, u32 val)
 {
 	struct drm_nouveau_private *dev_priv = gpuobj->dev->dev_private;
 	struct drm_device *dev = gpuobj->dev;
+	unsigned long flags;
 
 	if (gpuobj->pinst == ~0 || !dev_priv->ramin_available) {
 		u64  ptr = gpuobj->vinst + offset;
 		u32 base = ptr >> 16;
 
-		spin_lock(&dev_priv->ramin_lock);
+		spin_lock_irqsave(&dev_priv->vm_lock, flags);
 		if (dev_priv->ramin_base != base) {
 			dev_priv->ramin_base = base;
 			nv_wr32(dev, 0x001700, dev_priv->ramin_base);
 		}
 		nv_wr32(dev, 0x700000 + (ptr & 0xffff), val);
-		spin_unlock(&dev_priv->ramin_lock);
+		spin_unlock_irqrestore(&dev_priv->vm_lock, flags);
 		return;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_perf.c b/drivers/gpu/drm/nouveau/nouveau_perf.c
index ac62a1b..3045566 100644
--- a/drivers/gpu/drm/nouveau/nouveau_perf.c
+++ b/drivers/gpu/drm/nouveau/nouveau_perf.c
@@ -134,7 +134,7 @@ nouveau_perf_init(struct drm_device *dev)
 		case 0x13:
 		case 0x15:
 			perflvl->fanspeed = entry[55];
-			perflvl->voltage = entry[56];
+			perflvl->voltage = (recordlen > 56) ? entry[56] : 0;
 			perflvl->core = ROM32(entry[1]) * 10;
 			perflvl->memory = ROM32(entry[5]) * 20;
 			break;
@@ -174,9 +174,21 @@ nouveau_perf_init(struct drm_device *dev)
 #define subent(n) entry[perf[2] + ((n) * perf[3])]
 			perflvl->fanspeed = 0; /*XXX*/
 			perflvl->voltage = entry[2];
-			perflvl->core = (ROM16(subent(0)) & 0xfff) * 1000;
-			perflvl->shader = (ROM16(subent(1)) & 0xfff) * 1000;
-			perflvl->memory = (ROM16(subent(2)) & 0xfff) * 1000;
+			if (dev_priv->card_type == NV_50) {
+				perflvl->core = ROM16(subent(0)) & 0xfff;
+				perflvl->shader = ROM16(subent(1)) & 0xfff;
+				perflvl->memory = ROM16(subent(2)) & 0xfff;
+			} else {
+				perflvl->shader = ROM16(subent(3)) & 0xfff;
+				perflvl->core   = perflvl->shader / 2;
+				perflvl->unk0a  = ROM16(subent(4)) & 0xfff;
+				perflvl->memory = ROM16(subent(5)) & 0xfff;
+			}
+
+			perflvl->core *= 1000;
+			perflvl->shader *= 1000;
+			perflvl->memory *= 1000;
+			perflvl->unk0a *= 1000;
 			break;
 		}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_pm.c b/drivers/gpu/drm/nouveau/nouveau_pm.c
index 4399e2f..0b1caeb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_pm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_pm.c
@@ -490,6 +490,7 @@ nouveau_pm_init(struct drm_device *dev)
 	/* determine current ("boot") performance level */
 	ret = nouveau_pm_perflvl_get(dev, &pm->boot);
 	if (ret == 0) {
+		strncpy(pm->boot.name, "boot", 4);
 		pm->cur = &pm->boot;
 
 		nouveau_pm_perflvl_info(&pm->boot, info, sizeof(info));
diff --git a/drivers/gpu/drm/nouveau/nouveau_ramht.c b/drivers/gpu/drm/nouveau/nouveau_ramht.c
index bef3e69..a24a81f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ramht.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ramht.c
@@ -114,7 +114,9 @@ nouveau_ramht_insert(struct nouveau_channel *chan, u32 handle,
 		      (gpuobj->engine << NV40_RAMHT_CONTEXT_ENGINE_SHIFT);
 	} else {
 		if (gpuobj->engine == NVOBJ_ENGINE_DISPLAY) {
-			ctx = (gpuobj->cinst << 10) | chan->id;
+			ctx = (gpuobj->cinst << 10) |
+			      (chan->id << 28) |
+			      chan->id; /* HASH_TAG */
 		} else {
 			ctx = (gpuobj->cinst >> 4) |
 			      ((gpuobj->engine <<
diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h
index 04e8fb7..f18cdfc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_reg.h
+++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
@@ -639,9 +639,9 @@
 #    define NV50_PCONNECTOR_I2C_PORT_4                      0x0000e240
 #    define NV50_PCONNECTOR_I2C_PORT_5                      0x0000e258
 
-#define NV50_AUXCH_DATA_OUT(i,n)             ((n) * 4 + (i) * 0x50 + 0x0000e4c0)
+#define NV50_AUXCH_DATA_OUT(i, n)            ((n) * 4 + (i) * 0x50 + 0x0000e4c0)
 #define NV50_AUXCH_DATA_OUT__SIZE                                             4
-#define NV50_AUXCH_DATA_IN(i,n)              ((n) * 4 + (i) * 0x50 + 0x0000e4d0)
+#define NV50_AUXCH_DATA_IN(i, n)             ((n) * 4 + (i) * 0x50 + 0x0000e4d0)
 #define NV50_AUXCH_DATA_IN__SIZE                                              4
 #define NV50_AUXCH_ADDR(i)                             ((i) * 0x50 + 0x0000e4e0)
 #define NV50_AUXCH_CTRL(i)                             ((i) * 0x50 + 0x0000e4e4)
@@ -829,7 +829,7 @@
 #define NV50_PDISPLAY_SOR_BACKLIGHT                                  0x0061c084
 #define NV50_PDISPLAY_SOR_BACKLIGHT_ENABLE                           0x80000000
 #define NV50_PDISPLAY_SOR_BACKLIGHT_LEVEL                            0x00000fff
-#define NV50_SOR_DP_CTRL(i,l)            (0x0061c10c + (i) * 0x800 + (l) * 0x80)
+#define NV50_SOR_DP_CTRL(i, l)           (0x0061c10c + (i) * 0x800 + (l) * 0x80)
 #define NV50_SOR_DP_CTRL_ENABLED                                     0x00000001
 #define NV50_SOR_DP_CTRL_ENHANCED_FRAME_ENABLED                      0x00004000
 #define NV50_SOR_DP_CTRL_LANE_MASK                                   0x001f0000
@@ -841,10 +841,10 @@
 #define NV50_SOR_DP_CTRL_TRAINING_PATTERN_DISABLED                   0x00000000
 #define NV50_SOR_DP_CTRL_TRAINING_PATTERN_1                          0x01000000
 #define NV50_SOR_DP_CTRL_TRAINING_PATTERN_2                          0x02000000
-#define NV50_SOR_DP_UNK118(i,l)          (0x0061c118 + (i) * 0x800 + (l) * 0x80)
-#define NV50_SOR_DP_UNK120(i,l)          (0x0061c120 + (i) * 0x800 + (l) * 0x80)
-#define NV50_SOR_DP_UNK128(i,l)          (0x0061c128 + (i) * 0x800 + (l) * 0x80)
-#define NV50_SOR_DP_UNK130(i,l)          (0x0061c130 + (i) * 0x800 + (l) * 0x80)
+#define NV50_SOR_DP_UNK118(i, l)         (0x0061c118 + (i) * 0x800 + (l) * 0x80)
+#define NV50_SOR_DP_UNK120(i, l)         (0x0061c120 + (i) * 0x800 + (l) * 0x80)
+#define NV50_SOR_DP_UNK128(i, l)         (0x0061c128 + (i) * 0x800 + (l) * 0x80)
+#define NV50_SOR_DP_UNK130(i, l)         (0x0061c130 + (i) * 0x800 + (l) * 0x80)
 
 #define NV50_PDISPLAY_USER(i)                        ((i) * 0x1000 + 0x00640000)
 #define NV50_PDISPLAY_USER_PUT(i)                    ((i) * 0x1000 + 0x00640000)
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index 9a250eb..2bf9686 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -74,8 +74,24 @@ nouveau_sgdma_clear(struct ttm_backend *be)
 	}
 }
 
+static void
+nouveau_sgdma_destroy(struct ttm_backend *be)
+{
+	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
+
+	if (be) {
+		NV_DEBUG(nvbe->dev, "\n");
+
+		if (nvbe) {
+			if (nvbe->pages)
+				be->func->clear(be);
+			kfree(nvbe);
+		}
+	}
+}
+
 static int
-nouveau_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
+nv04_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
 	struct drm_device *dev = nvbe->dev;
@@ -102,7 +118,7 @@ nouveau_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
 }
 
 static int
-nouveau_sgdma_unbind(struct ttm_backend *be)
+nv04_sgdma_unbind(struct ttm_backend *be)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
 	struct drm_device *dev = nvbe->dev;
@@ -125,59 +141,245 @@ nouveau_sgdma_unbind(struct ttm_backend *be)
 	return 0;
 }
 
+static struct ttm_backend_func nv04_sgdma_backend = {
+	.populate		= nouveau_sgdma_populate,
+	.clear			= nouveau_sgdma_clear,
+	.bind			= nv04_sgdma_bind,
+	.unbind			= nv04_sgdma_unbind,
+	.destroy		= nouveau_sgdma_destroy
+};
+
 static void
-nouveau_sgdma_destroy(struct ttm_backend *be)
+nv41_sgdma_flush(struct nouveau_sgdma_be *nvbe)
+{
+	struct drm_device *dev = nvbe->dev;
+
+	nv_wr32(dev, 0x100810, 0x00000022);
+	if (!nv_wait(dev, 0x100810, 0x00000100, 0x00000100))
+		NV_ERROR(dev, "vm flush timeout: 0x%08x\n",
+			 nv_rd32(dev, 0x100810));
+	nv_wr32(dev, 0x100810, 0x00000000);
+}
+
+static int
+nv41_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
+	struct drm_nouveau_private *dev_priv = nvbe->dev->dev_private;
+	struct nouveau_gpuobj *pgt = dev_priv->gart_info.sg_ctxdma;
+	dma_addr_t *list = nvbe->pages;
+	u32 pte = mem->start << 2;
+	u32 cnt = nvbe->nr_pages;
 
-	if (be) {
-		NV_DEBUG(nvbe->dev, "\n");
+	nvbe->offset = mem->start << PAGE_SHIFT;
 
-		if (nvbe) {
-			if (nvbe->pages)
-				be->func->clear(be);
-			kfree(nvbe);
+	while (cnt--) {
+		nv_wo32(pgt, pte, (*list++ >> 7) | 1);
+		pte += 4;
+	}
+
+	nv41_sgdma_flush(nvbe);
+	nvbe->bound = true;
+	return 0;
+}
+
+static int
+nv41_sgdma_unbind(struct ttm_backend *be)
+{
+	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
+	struct drm_nouveau_private *dev_priv = nvbe->dev->dev_private;
+	struct nouveau_gpuobj *pgt = dev_priv->gart_info.sg_ctxdma;
+	u32 pte = (nvbe->offset >> 12) << 2;
+	u32 cnt = nvbe->nr_pages;
+
+	while (cnt--) {
+		nv_wo32(pgt, pte, 0x00000000);
+		pte += 4;
+	}
+
+	nv41_sgdma_flush(nvbe);
+	nvbe->bound = false;
+	return 0;
+}
+
+static struct ttm_backend_func nv41_sgdma_backend = {
+	.populate		= nouveau_sgdma_populate,
+	.clear			= nouveau_sgdma_clear,
+	.bind			= nv41_sgdma_bind,
+	.unbind			= nv41_sgdma_unbind,
+	.destroy		= nouveau_sgdma_destroy
+};
+
+static void
+nv44_sgdma_flush(struct nouveau_sgdma_be *nvbe)
+{
+	struct drm_device *dev = nvbe->dev;
+
+	nv_wr32(dev, 0x100814, (nvbe->nr_pages - 1) << 12);
+	nv_wr32(dev, 0x100808, nvbe->offset | 0x20);
+	if (!nv_wait(dev, 0x100808, 0x00000001, 0x00000001))
+		NV_ERROR(dev, "gart flush timeout: 0x%08x\n",
+			 nv_rd32(dev, 0x100808));
+	nv_wr32(dev, 0x100808, 0x00000000);
+}
+
+static void
+nv44_sgdma_fill(struct nouveau_gpuobj *pgt, dma_addr_t *list, u32 base, u32 cnt)
+{
+	struct drm_nouveau_private *dev_priv = pgt->dev->dev_private;
+	dma_addr_t dummy = dev_priv->gart_info.dummy.addr;
+	u32 pte, tmp[4];
+
+	pte   = base >> 2;
+	base &= ~0x0000000f;
+
+	tmp[0] = nv_ro32(pgt, base + 0x0);
+	tmp[1] = nv_ro32(pgt, base + 0x4);
+	tmp[2] = nv_ro32(pgt, base + 0x8);
+	tmp[3] = nv_ro32(pgt, base + 0xc);
+	while (cnt--) {
+		u32 addr = list ? (*list++ >> 12) : (dummy >> 12);
+		switch (pte++ & 0x3) {
+		case 0:
+			tmp[0] &= ~0x07ffffff;
+			tmp[0] |= addr;
+			break;
+		case 1:
+			tmp[0] &= ~0xf8000000;
+			tmp[0] |= addr << 27;
+			tmp[1] &= ~0x003fffff;
+			tmp[1] |= addr >> 5;
+			break;
+		case 2:
+			tmp[1] &= ~0xffc00000;
+			tmp[1] |= addr << 22;
+			tmp[2] &= ~0x0001ffff;
+			tmp[2] |= addr >> 10;
+			break;
+		case 3:
+			tmp[2] &= ~0xfffe0000;
+			tmp[2] |= addr << 17;
+			tmp[3] &= ~0x00000fff;
+			tmp[3] |= addr >> 15;
+			break;
 		}
 	}
+
+	tmp[3] |= 0x40000000;
+
+	nv_wo32(pgt, base + 0x0, tmp[0]);
+	nv_wo32(pgt, base + 0x4, tmp[1]);
+	nv_wo32(pgt, base + 0x8, tmp[2]);
+	nv_wo32(pgt, base + 0xc, tmp[3]);
 }
 
 static int
-nv50_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
+nv44_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
 	struct drm_nouveau_private *dev_priv = nvbe->dev->dev_private;
+	struct nouveau_gpuobj *pgt = dev_priv->gart_info.sg_ctxdma;
+	dma_addr_t *list = nvbe->pages;
+	u32 pte = mem->start << 2, tmp[4];
+	u32 cnt = nvbe->nr_pages;
+	int i;
 
 	nvbe->offset = mem->start << PAGE_SHIFT;
 
-	nouveau_vm_map_sg(&dev_priv->gart_info.vma, nvbe->offset,
-			  nvbe->nr_pages << PAGE_SHIFT, nvbe->pages);
+	if (pte & 0x0000000c) {
+		u32  max = 4 - ((pte >> 2) & 0x3);
+		u32 part = (cnt > max) ? max : cnt;
+		nv44_sgdma_fill(pgt, list, pte, part);
+		pte  += (part << 2);
+		list += part;
+		cnt  -= part;
+	}
+
+	while (cnt >= 4) {
+		for (i = 0; i < 4; i++)
+			tmp[i] = *list++ >> 12;
+		nv_wo32(pgt, pte + 0x0, tmp[0] >>  0 | tmp[1] << 27);
+		nv_wo32(pgt, pte + 0x4, tmp[1] >>  5 | tmp[2] << 22);
+		nv_wo32(pgt, pte + 0x8, tmp[2] >> 10 | tmp[3] << 17);
+		nv_wo32(pgt, pte + 0xc, tmp[3] >> 15 | 0x40000000);
+		pte  += 0x10;
+		cnt  -= 4;
+	}
+
+	if (cnt)
+		nv44_sgdma_fill(pgt, list, pte, cnt);
+
+	nv44_sgdma_flush(nvbe);
 	nvbe->bound = true;
 	return 0;
 }
 
 static int
-nv50_sgdma_unbind(struct ttm_backend *be)
+nv44_sgdma_unbind(struct ttm_backend *be)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
 	struct drm_nouveau_private *dev_priv = nvbe->dev->dev_private;
+	struct nouveau_gpuobj *pgt = dev_priv->gart_info.sg_ctxdma;
+	u32 pte = (nvbe->offset >> 12) << 2;
+	u32 cnt = nvbe->nr_pages;
+
+	if (pte & 0x0000000c) {
+		u32  max = 4 - ((pte >> 2) & 0x3);
+		u32 part = (cnt > max) ? max : cnt;
+		nv44_sgdma_fill(pgt, NULL, pte, part);
+		pte  += (part << 2);
+		cnt  -= part;
+	}
 
-	if (!nvbe->bound)
-		return 0;
+	while (cnt >= 4) {
+		nv_wo32(pgt, pte + 0x0, 0x00000000);
+		nv_wo32(pgt, pte + 0x4, 0x00000000);
+		nv_wo32(pgt, pte + 0x8, 0x00000000);
+		nv_wo32(pgt, pte + 0xc, 0x00000000);
+		pte  += 0x10;
+		cnt  -= 4;
+	}
 
-	nouveau_vm_unmap_at(&dev_priv->gart_info.vma, nvbe->offset,
-			    nvbe->nr_pages << PAGE_SHIFT);
+	if (cnt)
+		nv44_sgdma_fill(pgt, NULL, pte, cnt);
+
+	nv44_sgdma_flush(nvbe);
 	nvbe->bound = false;
 	return 0;
 }
 
-static struct ttm_backend_func nouveau_sgdma_backend = {
+static struct ttm_backend_func nv44_sgdma_backend = {
 	.populate		= nouveau_sgdma_populate,
 	.clear			= nouveau_sgdma_clear,
-	.bind			= nouveau_sgdma_bind,
-	.unbind			= nouveau_sgdma_unbind,
+	.bind			= nv44_sgdma_bind,
+	.unbind			= nv44_sgdma_unbind,
 	.destroy		= nouveau_sgdma_destroy
 };
 
+static int
+nv50_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
+{
+	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
+	struct nouveau_mem *node = mem->mm_node;
+	/* noop: bound in move_notify() */
+	node->pages = nvbe->pages;
+	nvbe->pages = (dma_addr_t *)node;
+	nvbe->bound = true;
+	return 0;
+}
+
+static int
+nv50_sgdma_unbind(struct ttm_backend *be)
+{
+	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
+	struct nouveau_mem *node = (struct nouveau_mem *)nvbe->pages;
+	/* noop: unbound in move_notify() */
+	nvbe->pages = node->pages;
+	node->pages = NULL;
+	nvbe->bound = false;
+	return 0;
+}
+
 static struct ttm_backend_func nv50_sgdma_backend = {
 	.populate		= nouveau_sgdma_populate,
 	.clear			= nouveau_sgdma_clear,
@@ -198,10 +400,7 @@ nouveau_sgdma_init_ttm(struct drm_device *dev)
 
 	nvbe->dev = dev;
 
-	if (dev_priv->card_type < NV_50)
-		nvbe->backend.func = &nouveau_sgdma_backend;
-	else
-		nvbe->backend.func = &nv50_sgdma_backend;
+	nvbe->backend.func = dev_priv->gart_info.func;
 	return &nvbe->backend;
 }
 
@@ -210,21 +409,64 @@ nouveau_sgdma_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *gpuobj = NULL;
-	uint32_t aper_size, obj_size;
-	int i, ret;
+	u32 aper_size, align;
+	int ret;
 
-	if (dev_priv->card_type < NV_50) {
-		if(dev_priv->ramin_rsvd_vram < 2 * 1024 * 1024)
-			aper_size = 64 * 1024 * 1024;
-		else
-			aper_size = 512 * 1024 * 1024;
+	if (dev_priv->card_type >= NV_40 && drm_device_is_pcie(dev))
+		aper_size = 512 * 1024 * 1024;
+	else
+		aper_size = 64 * 1024 * 1024;
+
+	/* Dear NVIDIA, NV44+ would like proper present bits in PTEs for
+	 * christmas.  The cards before it have them, the cards after
+	 * it have them, why is NV44 so unloved?
+	 */
+	dev_priv->gart_info.dummy.page = alloc_page(GFP_DMA32 | GFP_KERNEL);
+	if (!dev_priv->gart_info.dummy.page)
+		return -ENOMEM;
 
-		obj_size  = (aper_size >> NV_CTXDMA_PAGE_SHIFT) * 4;
-		obj_size += 8; /* ctxdma header */
+	dev_priv->gart_info.dummy.addr =
+		pci_map_page(dev->pdev, dev_priv->gart_info.dummy.page,
+			     0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	if (pci_dma_mapping_error(dev->pdev, dev_priv->gart_info.dummy.addr)) {
+		NV_ERROR(dev, "error mapping dummy page\n");
+		__free_page(dev_priv->gart_info.dummy.page);
+		dev_priv->gart_info.dummy.page = NULL;
+		return -ENOMEM;
+	}
 
-		ret = nouveau_gpuobj_new(dev, NULL, obj_size, 16,
-					      NVOBJ_FLAG_ZERO_ALLOC |
-					      NVOBJ_FLAG_ZERO_FREE, &gpuobj);
+	if (dev_priv->card_type >= NV_50) {
+		dev_priv->gart_info.aper_base = 0;
+		dev_priv->gart_info.aper_size = aper_size;
+		dev_priv->gart_info.type = NOUVEAU_GART_HW;
+		dev_priv->gart_info.func = &nv50_sgdma_backend;
+	} else
+	if (0 && drm_device_is_pcie(dev) &&
+	    dev_priv->chipset > 0x40 && dev_priv->chipset != 0x45) {
+		if (nv44_graph_class(dev)) {
+			dev_priv->gart_info.func = &nv44_sgdma_backend;
+			align = 512 * 1024;
+		} else {
+			dev_priv->gart_info.func = &nv41_sgdma_backend;
+			align = 16;
+		}
+
+		ret = nouveau_gpuobj_new(dev, NULL, aper_size / 1024, align,
+					 NVOBJ_FLAG_ZERO_ALLOC |
+					 NVOBJ_FLAG_ZERO_FREE, &gpuobj);
+		if (ret) {
+			NV_ERROR(dev, "Error creating sgdma object: %d\n", ret);
+			return ret;
+		}
+
+		dev_priv->gart_info.sg_ctxdma = gpuobj;
+		dev_priv->gart_info.aper_base = 0;
+		dev_priv->gart_info.aper_size = aper_size;
+		dev_priv->gart_info.type = NOUVEAU_GART_HW;
+	} else {
+		ret = nouveau_gpuobj_new(dev, NULL, (aper_size / 1024) + 8, 16,
+					 NVOBJ_FLAG_ZERO_ALLOC |
+					 NVOBJ_FLAG_ZERO_FREE, &gpuobj);
 		if (ret) {
 			NV_ERROR(dev, "Error creating sgdma object: %d\n", ret);
 			return ret;
@@ -236,25 +478,14 @@ nouveau_sgdma_init(struct drm_device *dev)
 				   (0 << 14) /* RW */ |
 				   (2 << 16) /* PCI */);
 		nv_wo32(gpuobj, 4, aper_size - 1);
-		for (i = 2; i < 2 + (aper_size >> 12); i++)
-			nv_wo32(gpuobj, i * 4, 0x00000000);
 
 		dev_priv->gart_info.sg_ctxdma = gpuobj;
 		dev_priv->gart_info.aper_base = 0;
 		dev_priv->gart_info.aper_size = aper_size;
-	} else
-	if (dev_priv->chan_vm) {
-		ret = nouveau_vm_get(dev_priv->chan_vm, 512 * 1024 * 1024,
-				     12, NV_MEM_ACCESS_RW,
-				     &dev_priv->gart_info.vma);
-		if (ret)
-			return ret;
-
-		dev_priv->gart_info.aper_base = dev_priv->gart_info.vma.offset;
-		dev_priv->gart_info.aper_size = 512 * 1024 * 1024;
+		dev_priv->gart_info.type = NOUVEAU_GART_PDMA;
+		dev_priv->gart_info.func = &nv04_sgdma_backend;
 	}
 
-	dev_priv->gart_info.type      = NOUVEAU_GART_SGDMA;
 	return 0;
 }
 
@@ -264,7 +495,13 @@ nouveau_sgdma_takedown(struct drm_device *dev)
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
 	nouveau_gpuobj_ref(NULL, &dev_priv->gart_info.sg_ctxdma);
-	nouveau_vm_put(&dev_priv->gart_info.vma);
+
+	if (dev_priv->gart_info.dummy.page) {
+		pci_unmap_page(dev->pdev, dev_priv->gart_info.dummy.addr,
+			       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+		__free_page(dev_priv->gart_info.dummy.page);
+		dev_priv->gart_info.dummy.page = NULL;
+	}
 }
 
 uint32_t
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index a54fc43..adf6dac 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -376,15 +376,11 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->graph.destroy_context	= nv50_graph_destroy_context;
 		engine->graph.load_context	= nv50_graph_load_context;
 		engine->graph.unload_context	= nv50_graph_unload_context;
-		if (dev_priv->chipset != 0x86)
+		if (dev_priv->chipset == 0x50 ||
+		    dev_priv->chipset == 0xac)
 			engine->graph.tlb_flush	= nv50_graph_tlb_flush;
-		else {
-			/* from what i can see nvidia do this on every
-			 * pre-NVA3 board except NVAC, but, we've only
-			 * ever seen problems on NV86
-			 */
-			engine->graph.tlb_flush	= nv86_graph_tlb_flush;
-		}
+		else
+			engine->graph.tlb_flush	= nv84_graph_tlb_flush;
 		engine->fifo.channels		= 128;
 		engine->fifo.init		= nv50_fifo_init;
 		engine->fifo.takedown		= nv50_fifo_takedown;
@@ -513,6 +509,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->vram.get		= nvc0_vram_new;
 		engine->vram.put		= nv50_vram_del;
 		engine->vram.flags_valid	= nvc0_vram_flags_valid;
+		engine->pm.temp_get		= nv84_temp_get;
 		break;
 	default:
 		NV_ERROR(dev, "NV%02x unsupported\n", dev_priv->chipset);
@@ -544,7 +541,6 @@ static int
 nouveau_card_init_channel(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_gpuobj *gpuobj = NULL;
 	int ret;
 
 	ret = nouveau_channel_alloc(dev, &dev_priv->channel,
@@ -552,41 +548,8 @@ nouveau_card_init_channel(struct drm_device *dev)
 	if (ret)
 		return ret;
 
-	/* no dma objects on fermi... */
-	if (dev_priv->card_type >= NV_C0)
-		goto out_done;
-
-	ret = nouveau_gpuobj_dma_new(dev_priv->channel, NV_CLASS_DMA_IN_MEMORY,
-				     0, dev_priv->vram_size,
-				     NV_MEM_ACCESS_RW, NV_MEM_TARGET_VRAM,
-				     &gpuobj);
-	if (ret)
-		goto out_err;
-
-	ret = nouveau_ramht_insert(dev_priv->channel, NvDmaVRAM, gpuobj);
-	nouveau_gpuobj_ref(NULL, &gpuobj);
-	if (ret)
-		goto out_err;
-
-	ret = nouveau_gpuobj_dma_new(dev_priv->channel, NV_CLASS_DMA_IN_MEMORY,
-				     0, dev_priv->gart_info.aper_size,
-				     NV_MEM_ACCESS_RW, NV_MEM_TARGET_GART,
-				     &gpuobj);
-	if (ret)
-		goto out_err;
-
-	ret = nouveau_ramht_insert(dev_priv->channel, NvDmaGART, gpuobj);
-	nouveau_gpuobj_ref(NULL, &gpuobj);
-	if (ret)
-		goto out_err;
-
-out_done:
 	mutex_unlock(&dev_priv->channel->mutex);
 	return 0;
-
-out_err:
-	nouveau_channel_put(&dev_priv->channel);
-	return ret;
 }
 
 static void nouveau_switcheroo_set_state(struct pci_dev *pdev,
@@ -646,6 +609,7 @@ nouveau_card_init(struct drm_device *dev)
 	spin_lock_init(&dev_priv->channels.lock);
 	spin_lock_init(&dev_priv->tile.lock);
 	spin_lock_init(&dev_priv->context_switch_lock);
+	spin_lock_init(&dev_priv->vm_lock);
 
 	/* Make the CRTCs and I2C buses accessible */
 	ret = engine->display.early_init(dev);
@@ -811,6 +775,11 @@ static void nouveau_card_takedown(struct drm_device *dev)
 	engine->mc.takedown(dev);
 	engine->display.late_takedown(dev);
 
+	if (dev_priv->vga_ram) {
+		nouveau_bo_unpin(dev_priv->vga_ram);
+		nouveau_bo_ref(NULL, &dev_priv->vga_ram);
+	}
+
 	mutex_lock(&dev->struct_mutex);
 	ttm_bo_clean_mm(&dev_priv->ttm.bdev, TTM_PL_VRAM);
 	ttm_bo_clean_mm(&dev_priv->ttm.bdev, TTM_PL_TT);
@@ -904,7 +873,7 @@ static int nouveau_remove_conflicting_drivers(struct drm_device *dev)
 #ifdef CONFIG_X86
 	primary = dev->pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
 #endif
-	
+
 	remove_conflicting_framebuffers(dev_priv->apertures, "nouveaufb", primary);
 	return 0;
 }
@@ -929,12 +898,6 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 	NV_DEBUG(dev, "vendor: 0x%X device: 0x%X class: 0x%X\n",
 		 dev->pci_vendor, dev->pci_device, dev->pdev->class);
 
-	dev_priv->wq = create_workqueue("nouveau");
-	if (!dev_priv->wq) {
-		ret = -EINVAL;
-		goto err_priv;
-	}
-
 	/* resource 0 is mmio regs */
 	/* resource 1 is linear FB */
 	/* resource 2 is RAMIN (mmio regs + 0x1000000) */
@@ -947,7 +910,7 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 		NV_ERROR(dev, "Unable to initialize the mmio mapping. "
 			 "Please report your setup to " DRIVER_EMAIL "\n");
 		ret = -EINVAL;
-		goto err_wq;
+		goto err_priv;
 	}
 	NV_DEBUG(dev, "regs mapped ok at 0x%llx\n",
 					(unsigned long long)mmio_start_offs);
@@ -962,11 +925,13 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 
 	/* Time to determine the card architecture */
 	reg0 = nv_rd32(dev, NV03_PMC_BOOT_0);
+	dev_priv->stepping = 0; /* XXX: add stepping for pre-NV10? */
 
 	/* We're dealing with >=NV10 */
 	if ((reg0 & 0x0f000000) > 0) {
 		/* Bit 27-20 contain the architecture in hex */
 		dev_priv->chipset = (reg0 & 0xff00000) >> 20;
+		dev_priv->stepping = (reg0 & 0xff);
 	/* NV04 or NV05 */
 	} else if ((reg0 & 0xff00fff0) == 0x20004000) {
 		if (reg0 & 0x00f00000)
@@ -1054,8 +1019,6 @@ err_ramin:
 	iounmap(dev_priv->ramin);
 err_mmio:
 	iounmap(dev_priv->mmio);
-err_wq:
-	destroy_workqueue(dev_priv->wq);
 err_priv:
 	kfree(dev_priv);
 	dev->dev_private = NULL;
@@ -1126,7 +1089,7 @@ int nouveau_ioctl_getparam(struct drm_device *dev, void *data,
 		getparam->value = 1;
 		break;
 	case NOUVEAU_GETPARAM_HAS_PAGEFLIP:
-		getparam->value = (dev_priv->card_type < NV_50);
+		getparam->value = 1;
 		break;
 	case NOUVEAU_GETPARAM_GRAPH_UNITS:
 		/* NV40 and NV50 versions are quite different, but register
diff --git a/drivers/gpu/drm/nouveau/nouveau_temp.c b/drivers/gpu/drm/nouveau/nouveau_temp.c
index 8d9968e..649b041 100644
--- a/drivers/gpu/drm/nouveau/nouveau_temp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_temp.c
@@ -239,11 +239,9 @@ static bool
 probe_monitoring_device(struct nouveau_i2c_chan *i2c,
 			struct i2c_board_info *info)
 {
-	char modalias[16] = "i2c:";
 	struct i2c_client *client;
 
-	strlcat(modalias, info->type, sizeof(modalias));
-	request_module(modalias);
+	request_module("%s%s", I2C_MODULE_PREFIX, info->type);
 
 	client = i2c_new_device(&i2c->adapter, info);
 	if (!client)
diff --git a/drivers/gpu/drm/nouveau/nouveau_util.c b/drivers/gpu/drm/nouveau/nouveau_util.c
index fbe0fb1..e51b515 100644
--- a/drivers/gpu/drm/nouveau/nouveau_util.c
+++ b/drivers/gpu/drm/nouveau/nouveau_util.c
@@ -47,18 +47,27 @@ nouveau_bitfield_print(const struct nouveau_bitfield *bf, u32 value)
 		printk(" (unknown bits 0x%08x)", value);
 }
 
-void
-nouveau_enum_print(const struct nouveau_enum *en, u32 value)
+const struct nouveau_enum *
+nouveau_enum_find(const struct nouveau_enum *en, u32 value)
 {
 	while (en->name) {
-		if (value == en->value) {
-			printk("%s", en->name);
-			return;
-		}
-
+		if (en->value == value)
+			return en;
 		en++;
 	}
 
+	return NULL;
+}
+
+void
+nouveau_enum_print(const struct nouveau_enum *en, u32 value)
+{
+	en = nouveau_enum_find(en, value);
+	if (en) {
+		printk("%s", en->name);
+		return;
+	}
+
 	printk("(unknown enum 0x%08x)", value);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_util.h b/drivers/gpu/drm/nouveau/nouveau_util.h
index d9ceaea..b97719f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_util.h
+++ b/drivers/gpu/drm/nouveau/nouveau_util.h
@@ -36,10 +36,14 @@ struct nouveau_bitfield {
 struct nouveau_enum {
 	u32 value;
 	const char *name;
+	void *data;
 };
 
 void nouveau_bitfield_print(const struct nouveau_bitfield *, u32 value);
 void nouveau_enum_print(const struct nouveau_enum *, u32 value);
+const struct nouveau_enum *
+nouveau_enum_find(const struct nouveau_enum *, u32 value);
+
 int nouveau_ratelimit(void);
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_vm.c b/drivers/gpu/drm/nouveau/nouveau_vm.c
index 97d82ae..0059e6f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vm.c
@@ -28,7 +28,7 @@
 #include "nouveau_vm.h"
 
 void
-nouveau_vm_map_at(struct nouveau_vma *vma, u64 delta, struct nouveau_vram *vram)
+nouveau_vm_map_at(struct nouveau_vma *vma, u64 delta, struct nouveau_mem *node)
 {
 	struct nouveau_vm *vm = vma->vm;
 	struct nouveau_mm_node *r;
@@ -40,7 +40,8 @@ nouveau_vm_map_at(struct nouveau_vma *vma, u64 delta, struct nouveau_vram *vram)
 	u32 max  = 1 << (vm->pgt_bits - bits);
 	u32 end, len;
 
-	list_for_each_entry(r, &vram->regions, rl_entry) {
+	delta = 0;
+	list_for_each_entry(r, &node->regions, rl_entry) {
 		u64 phys = (u64)r->offset << 12;
 		u32 num  = r->length >> bits;
 
@@ -52,7 +53,7 @@ nouveau_vm_map_at(struct nouveau_vma *vma, u64 delta, struct nouveau_vram *vram)
 				end = max;
 			len = end - pte;
 
-			vm->map(vma, pgt, vram, pte, len, phys);
+			vm->map(vma, pgt, node, pte, len, phys, delta);
 
 			num -= len;
 			pte += len;
@@ -60,6 +61,8 @@ nouveau_vm_map_at(struct nouveau_vma *vma, u64 delta, struct nouveau_vram *vram)
 				pde++;
 				pte = 0;
 			}
+
+			delta += (u64)len << vma->node->type;
 		}
 	}
 
@@ -67,14 +70,14 @@ nouveau_vm_map_at(struct nouveau_vma *vma, u64 delta, struct nouveau_vram *vram)
 }
 
 void
-nouveau_vm_map(struct nouveau_vma *vma, struct nouveau_vram *vram)
+nouveau_vm_map(struct nouveau_vma *vma, struct nouveau_mem *node)
 {
-	nouveau_vm_map_at(vma, 0, vram);
+	nouveau_vm_map_at(vma, 0, node);
 }
 
 void
 nouveau_vm_map_sg(struct nouveau_vma *vma, u64 delta, u64 length,
-		  dma_addr_t *list)
+		  struct nouveau_mem *mem, dma_addr_t *list)
 {
 	struct nouveau_vm *vm = vma->vm;
 	int big = vma->node->type != vm->spg_shift;
@@ -94,7 +97,7 @@ nouveau_vm_map_sg(struct nouveau_vma *vma, u64 delta, u64 length,
 			end = max;
 		len = end - pte;
 
-		vm->map_sg(vma, pgt, pte, list, len);
+		vm->map_sg(vma, pgt, mem, pte, len, list);
 
 		num  -= len;
 		pte  += len;
@@ -311,18 +314,7 @@ nouveau_vm_new(struct drm_device *dev, u64 offset, u64 length, u64 mm_offset,
 		vm->spg_shift = 12;
 		vm->lpg_shift = 17;
 		pgt_bits = 27;
-
-		/* Should be 4096 everywhere, this is a hack that's
-		 * currently necessary to avoid an elusive bug that
-		 * causes corruption when mixing small/large pages
-		 */
-		if (length < (1ULL << 40))
-			block = 4096;
-		else {
-			block = (1 << pgt_bits);
-			if (length < block)
-				block = length;
-		}
+		block = 4096;
 	} else {
 		kfree(vm);
 		return -ENOSYS;
diff --git a/drivers/gpu/drm/nouveau/nouveau_vm.h b/drivers/gpu/drm/nouveau/nouveau_vm.h
index e119351..2e06b55 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_vm.h
@@ -67,9 +67,10 @@ struct nouveau_vm {
 	void (*map_pgt)(struct nouveau_gpuobj *pgd, u32 pde,
 			struct nouveau_gpuobj *pgt[2]);
 	void (*map)(struct nouveau_vma *, struct nouveau_gpuobj *,
-		    struct nouveau_vram *, u32 pte, u32 cnt, u64 phys);
+		    struct nouveau_mem *, u32 pte, u32 cnt,
+		    u64 phys, u64 delta);
 	void (*map_sg)(struct nouveau_vma *, struct nouveau_gpuobj *,
-		       u32 pte, dma_addr_t *, u32 cnt);
+		       struct nouveau_mem *, u32 pte, u32 cnt, dma_addr_t *);
 	void (*unmap)(struct nouveau_gpuobj *pgt, u32 pte, u32 cnt);
 	void (*flush)(struct nouveau_vm *);
 };
@@ -82,20 +83,20 @@ int  nouveau_vm_ref(struct nouveau_vm *, struct nouveau_vm **,
 int  nouveau_vm_get(struct nouveau_vm *, u64 size, u32 page_shift,
 		    u32 access, struct nouveau_vma *);
 void nouveau_vm_put(struct nouveau_vma *);
-void nouveau_vm_map(struct nouveau_vma *, struct nouveau_vram *);
-void nouveau_vm_map_at(struct nouveau_vma *, u64 offset, struct nouveau_vram *);
+void nouveau_vm_map(struct nouveau_vma *, struct nouveau_mem *);
+void nouveau_vm_map_at(struct nouveau_vma *, u64 offset, struct nouveau_mem *);
 void nouveau_vm_unmap(struct nouveau_vma *);
 void nouveau_vm_unmap_at(struct nouveau_vma *, u64 offset, u64 length);
 void nouveau_vm_map_sg(struct nouveau_vma *, u64 offset, u64 length,
-		       dma_addr_t *);
+		       struct nouveau_mem *, dma_addr_t *);
 
 /* nv50_vm.c */
 void nv50_vm_map_pgt(struct nouveau_gpuobj *pgd, u32 pde,
 		     struct nouveau_gpuobj *pgt[2]);
 void nv50_vm_map(struct nouveau_vma *, struct nouveau_gpuobj *,
-		 struct nouveau_vram *, u32 pte, u32 cnt, u64 phys);
+		 struct nouveau_mem *, u32 pte, u32 cnt, u64 phys, u64 delta);
 void nv50_vm_map_sg(struct nouveau_vma *, struct nouveau_gpuobj *,
-		    u32 pte, dma_addr_t *, u32 cnt);
+		    struct nouveau_mem *, u32 pte, u32 cnt, dma_addr_t *);
 void nv50_vm_unmap(struct nouveau_gpuobj *, u32 pte, u32 cnt);
 void nv50_vm_flush(struct nouveau_vm *);
 void nv50_vm_flush_engine(struct drm_device *, int engine);
@@ -104,9 +105,9 @@ void nv50_vm_flush_engine(struct drm_device *, int engine);
 void nvc0_vm_map_pgt(struct nouveau_gpuobj *pgd, u32 pde,
 		     struct nouveau_gpuobj *pgt[2]);
 void nvc0_vm_map(struct nouveau_vma *, struct nouveau_gpuobj *,
-		 struct nouveau_vram *, u32 pte, u32 cnt, u64 phys);
+		 struct nouveau_mem *, u32 pte, u32 cnt, u64 phys, u64 delta);
 void nvc0_vm_map_sg(struct nouveau_vma *, struct nouveau_gpuobj *,
-		    u32 pte, dma_addr_t *, u32 cnt);
+		    struct nouveau_mem *, u32 pte, u32 cnt, dma_addr_t *);
 void nvc0_vm_unmap(struct nouveau_gpuobj *, u32 pte, u32 cnt);
 void nvc0_vm_flush(struct nouveau_vm *);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_volt.c b/drivers/gpu/drm/nouveau/nouveau_volt.c
index 04fdc00..75e87274 100644
--- a/drivers/gpu/drm/nouveau/nouveau_volt.c
+++ b/drivers/gpu/drm/nouveau/nouveau_volt.c
@@ -159,8 +159,16 @@ nouveau_volt_init(struct drm_device *dev)
 		headerlen = volt[1];
 		recordlen = volt[2];
 		entries   = volt[3];
-		vidshift  = hweight8(volt[5]);
 		vidmask   = volt[4];
+		/* no longer certain what volt[5] is, if it's related to
+		 * the vid shift then it's definitely not a function of
+		 * how many bits are set.
+		 *
+		 * after looking at a number of nva3+ vbios images, they
+		 * all seem likely to have a static shift of 2.. lets
+		 * go with that for now until proven otherwise.
+		 */
+		vidshift  = 2;
 		break;
 	default:
 		NV_WARN(dev, "voltage table 0x%02x unknown\n", volt[0]);
diff --git a/drivers/gpu/drm/nouveau/nv04_crtc.c b/drivers/gpu/drm/nouveau/nv04_crtc.c
index 297505e..9eaafcc 100644
--- a/drivers/gpu/drm/nouveau/nv04_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv04_crtc.c
@@ -376,7 +376,10 @@ nv_crtc_mode_set_vga(struct drm_crtc *crtc, struct drm_display_mode *mode)
 	 */
 
 	/* framebuffer can be larger than crtc scanout area. */
-	regp->CRTC[NV_CIO_CRE_RPC0_INDEX] = XLATE(fb->pitch / 8, 8, NV_CIO_CRE_RPC0_OFFSET_10_8);
+	regp->CRTC[NV_CIO_CRE_RPC0_INDEX] =
+		XLATE(fb->pitch / 8, 8, NV_CIO_CRE_RPC0_OFFSET_10_8);
+	regp->CRTC[NV_CIO_CRE_42] =
+		XLATE(fb->pitch / 8, 11, NV_CIO_CRE_42_OFFSET_11);
 	regp->CRTC[NV_CIO_CRE_RPC1_INDEX] = mode->crtc_hdisplay < 1280 ?
 					    MASK(NV_CIO_CRE_RPC1_LARGE) : 0x00;
 	regp->CRTC[NV_CIO_CRE_LSR_INDEX] = XLATE(horizBlankEnd, 6, NV_CIO_CRE_LSR_HBE_6) |
@@ -790,8 +793,7 @@ nv04_crtc_do_mode_set_base(struct drm_crtc *crtc,
 	if (atomic) {
 		drm_fb = passed_fb;
 		fb = nouveau_framebuffer(passed_fb);
-	}
-	else {
+	} else {
 		/* If not atomic, we can go ahead and pin, and unpin the
 		 * old fb we were passed.
 		 */
@@ -825,8 +827,11 @@ nv04_crtc_do_mode_set_base(struct drm_crtc *crtc,
 	regp->CRTC[NV_CIO_CR_OFFSET_INDEX] = drm_fb->pitch >> 3;
 	regp->CRTC[NV_CIO_CRE_RPC0_INDEX] =
 		XLATE(drm_fb->pitch >> 3, 8, NV_CIO_CRE_RPC0_OFFSET_10_8);
+	regp->CRTC[NV_CIO_CRE_42] =
+		XLATE(drm_fb->pitch / 8, 11, NV_CIO_CRE_42_OFFSET_11);
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_RPC0_INDEX);
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CR_OFFSET_INDEX);
+	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_42);
 
 	/* Update the framebuffer location. */
 	regp->fb_start = nv_crtc->fb.offset & ~3;
@@ -944,14 +949,14 @@ nv04_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 	struct drm_gem_object *gem;
 	int ret = 0;
 
-	if (width != 64 || height != 64)
-		return -EINVAL;
-
 	if (!buffer_handle) {
 		nv_crtc->cursor.hide(nv_crtc, true);
 		return 0;
 	}
 
+	if (width != 64 || height != 64)
+		return -EINVAL;
+
 	gem = drm_gem_object_lookup(dev, file_priv, buffer_handle);
 	if (!gem)
 		return -ENOENT;
@@ -1031,7 +1036,7 @@ nv04_crtc_create(struct drm_device *dev, int crtc_num)
 	drm_mode_crtc_set_gamma_size(&nv_crtc->base, 256);
 
 	ret = nouveau_bo_new(dev, NULL, 64*64*4, 0x100, TTM_PL_FLAG_VRAM,
-			     0, 0x0000, false, true, &nv_crtc->cursor.nvbo);
+			     0, 0x0000, &nv_crtc->cursor.nvbo);
 	if (!ret) {
 		ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM);
 		if (!ret)
diff --git a/drivers/gpu/drm/nouveau/nv04_dfp.c b/drivers/gpu/drm/nouveau/nv04_dfp.c
index c82db37..12098bf 100644
--- a/drivers/gpu/drm/nouveau/nv04_dfp.c
+++ b/drivers/gpu/drm/nouveau/nv04_dfp.c
@@ -581,12 +581,13 @@ static void nv04_dfp_restore(struct drm_encoder *encoder)
 	int head = nv_encoder->restore.head;
 
 	if (nv_encoder->dcb->type == OUTPUT_LVDS) {
-		struct drm_display_mode *native_mode = nouveau_encoder_connector_get(nv_encoder)->native_mode;
-		if (native_mode)
-			call_lvds_script(dev, nv_encoder->dcb, head, LVDS_PANEL_ON,
-					 native_mode->clock);
-		else
-			NV_ERROR(dev, "Not restoring LVDS without native mode\n");
+		struct nouveau_connector *connector =
+			nouveau_encoder_connector_get(nv_encoder);
+
+		if (connector && connector->native_mode)
+			call_lvds_script(dev, nv_encoder->dcb, head,
+					 LVDS_PANEL_ON,
+					 connector->native_mode->clock);
 
 	} else if (nv_encoder->dcb->type == OUTPUT_TMDS) {
 		int clock = nouveau_hw_pllvals_to_clk
diff --git a/drivers/gpu/drm/nouveau/nv04_fifo.c b/drivers/gpu/drm/nouveau/nv04_fifo.c
index f89d104..db465a3 100644
--- a/drivers/gpu/drm/nouveau/nv04_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv04_fifo.c
@@ -379,6 +379,15 @@ out:
 	return handled;
 }
 
+static const char *nv_dma_state_err(u32 state)
+{
+	static const char * const desc[] = {
+		"NONE", "CALL_SUBR_ACTIVE", "INVALID_MTHD", "RET_SUBR_INACTIVE",
+		"INVALID_CMD", "IB_EMPTY"/* NV50+ */, "MEM_FAULT", "UNK"
+	};
+	return desc[(state >> 29) & 0x7];
+}
+
 void
 nv04_fifo_isr(struct drm_device *dev)
 {
@@ -460,9 +469,10 @@ nv04_fifo_isr(struct drm_device *dev)
 				if (nouveau_ratelimit())
 					NV_INFO(dev, "PFIFO_DMA_PUSHER - Ch %d Get 0x%02x%08x "
 					     "Put 0x%02x%08x IbGet 0x%08x IbPut 0x%08x "
-					     "State 0x%08x Push 0x%08x\n",
+					     "State 0x%08x (err: %s) Push 0x%08x\n",
 						chid, ho_get, dma_get, ho_put,
 						dma_put, ib_get, ib_put, state,
+						nv_dma_state_err(state),
 						push);
 
 				/* METHOD_COUNT, in DMA_STATE on earlier chipsets */
@@ -476,8 +486,9 @@ nv04_fifo_isr(struct drm_device *dev)
 				}
 			} else {
 				NV_INFO(dev, "PFIFO_DMA_PUSHER - Ch %d Get 0x%08x "
-					     "Put 0x%08x State 0x%08x Push 0x%08x\n",
-					chid, dma_get, dma_put, state, push);
+					     "Put 0x%08x State 0x%08x (err: %s) Push 0x%08x\n",
+					chid, dma_get, dma_put, state,
+					nv_dma_state_err(state), push);
 
 				if (dma_get != dma_put)
 					nv_wr32(dev, 0x003244, dma_put);
@@ -505,7 +516,7 @@ nv04_fifo_isr(struct drm_device *dev)
 
 		if (dev_priv->card_type == NV_50) {
 			if (status & 0x00000010) {
-				nv50_fb_vm_trap(dev, 1, "PFIFO_BAR_FAULT");
+				nv50_fb_vm_trap(dev, nouveau_ratelimit());
 				status &= ~0x00000010;
 				nv_wr32(dev, 0x002100, 0x00000010);
 			}
diff --git a/drivers/gpu/drm/nouveau/nv04_graph.c b/drivers/gpu/drm/nouveau/nv04_graph.c
index af75015..055677a 100644
--- a/drivers/gpu/drm/nouveau/nv04_graph.c
+++ b/drivers/gpu/drm/nouveau/nv04_graph.c
@@ -507,7 +507,7 @@ int nv04_graph_init(struct drm_device *dev)
 	nv_wr32(dev, NV04_PGRAPH_DEBUG_0, 0x001FFFFF);*/
 	nv_wr32(dev, NV04_PGRAPH_DEBUG_0, 0x1231c000);
 	/*1231C000 blob, 001 haiku*/
-	//*V_WRITE(NV04_PGRAPH_DEBUG_1, 0xf2d91100);*/
+	/*V_WRITE(NV04_PGRAPH_DEBUG_1, 0xf2d91100);*/
 	nv_wr32(dev, NV04_PGRAPH_DEBUG_1, 0x72111100);
 	/*0x72111100 blob , 01 haiku*/
 	/*nv_wr32(dev, NV04_PGRAPH_DEBUG_2, 0x11d5f870);*/
@@ -1232,8 +1232,7 @@ static struct nouveau_bitfield nv04_graph_intr[] = {
 	{}
 };
 
-static struct nouveau_bitfield nv04_graph_nstatus[] =
-{
+static struct nouveau_bitfield nv04_graph_nstatus[] = {
 	{ NV04_PGRAPH_NSTATUS_STATE_IN_USE,       "STATE_IN_USE" },
 	{ NV04_PGRAPH_NSTATUS_INVALID_STATE,      "INVALID_STATE" },
 	{ NV04_PGRAPH_NSTATUS_BAD_ARGUMENT,       "BAD_ARGUMENT" },
@@ -1241,8 +1240,7 @@ static struct nouveau_bitfield nv04_graph_nstatus[] =
 	{}
 };
 
-struct nouveau_bitfield nv04_graph_nsource[] =
-{
+struct nouveau_bitfield nv04_graph_nsource[] = {
 	{ NV03_PGRAPH_NSOURCE_NOTIFICATION,       "NOTIFICATION" },
 	{ NV03_PGRAPH_NSOURCE_DATA_ERROR,         "DATA_ERROR" },
 	{ NV03_PGRAPH_NSOURCE_PROTECTION_ERROR,   "PROTECTION_ERROR" },
diff --git a/drivers/gpu/drm/nouveau/nv04_instmem.c b/drivers/gpu/drm/nouveau/nv04_instmem.c
index b8e3edb..0671b7f 100644
--- a/drivers/gpu/drm/nouveau/nv04_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv04_instmem.c
@@ -95,6 +95,9 @@ nv04_instmem_takedown(struct drm_device *dev)
 	nouveau_ramht_ref(NULL, &dev_priv->ramht, NULL);
 	nouveau_gpuobj_ref(NULL, &dev_priv->ramro);
 	nouveau_gpuobj_ref(NULL, &dev_priv->ramfc);
+
+	if (dev_priv->ramin_heap.free_stack.next)
+		drm_mm_takedown(&dev_priv->ramin_heap);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nv10_graph.c b/drivers/gpu/drm/nouveau/nv10_graph.c
index 8c92edb..531d7ba 100644
--- a/drivers/gpu/drm/nouveau/nv10_graph.c
+++ b/drivers/gpu/drm/nouveau/nv10_graph.c
@@ -1117,8 +1117,7 @@ struct nouveau_bitfield nv10_graph_intr[] = {
 	{}
 };
 
-struct nouveau_bitfield nv10_graph_nstatus[] =
-{
+struct nouveau_bitfield nv10_graph_nstatus[] = {
 	{ NV10_PGRAPH_NSTATUS_STATE_IN_USE,       "STATE_IN_USE" },
 	{ NV10_PGRAPH_NSTATUS_INVALID_STATE,      "INVALID_STATE" },
 	{ NV10_PGRAPH_NSTATUS_BAD_ARGUMENT,       "BAD_ARGUMENT" },
diff --git a/drivers/gpu/drm/nouveau/nv40_fb.c b/drivers/gpu/drm/nouveau/nv40_fb.c
index f3d9c05..f0ac2a7 100644
--- a/drivers/gpu/drm/nouveau/nv40_fb.c
+++ b/drivers/gpu/drm/nouveau/nv40_fb.c
@@ -24,6 +24,53 @@ nv40_fb_set_tile_region(struct drm_device *dev, int i)
 	}
 }
 
+static void
+nv40_fb_init_gart(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_gpuobj *gart = dev_priv->gart_info.sg_ctxdma;
+
+	if (dev_priv->gart_info.type != NOUVEAU_GART_HW) {
+		nv_wr32(dev, 0x100800, 0x00000001);
+		return;
+	}
+
+	nv_wr32(dev, 0x100800, gart->pinst | 0x00000002);
+	nv_mask(dev, 0x10008c, 0x00000100, 0x00000100);
+	nv_wr32(dev, 0x100820, 0x00000000);
+}
+
+static void
+nv44_fb_init_gart(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_gpuobj *gart = dev_priv->gart_info.sg_ctxdma;
+	u32 vinst;
+
+	if (dev_priv->gart_info.type != NOUVEAU_GART_HW) {
+		nv_wr32(dev, 0x100850, 0x80000000);
+		nv_wr32(dev, 0x100800, 0x00000001);
+		return;
+	}
+
+	/* calculate vram address of this PRAMIN block, object
+	 * must be allocated on 512KiB alignment, and not exceed
+	 * a total size of 512KiB for this to work correctly
+	 */
+	vinst  = nv_rd32(dev, 0x10020c);
+	vinst -= ((gart->pinst >> 19) + 1) << 19;
+
+	nv_wr32(dev, 0x100850, 0x80000000);
+	nv_wr32(dev, 0x100818, dev_priv->gart_info.dummy.addr);
+
+	nv_wr32(dev, 0x100804, dev_priv->gart_info.aper_size);
+	nv_wr32(dev, 0x100850, 0x00008000);
+	nv_mask(dev, 0x10008c, 0x00000200, 0x00000200);
+	nv_wr32(dev, 0x100820, 0x00000000);
+	nv_wr32(dev, 0x10082c, 0x00000001);
+	nv_wr32(dev, 0x100800, vinst | 0x00000010);
+}
+
 int
 nv40_fb_init(struct drm_device *dev)
 {
@@ -32,12 +79,12 @@ nv40_fb_init(struct drm_device *dev)
 	uint32_t tmp;
 	int i;
 
-	/* This is strictly a NV4x register (don't know about NV5x). */
-	/* The blob sets these to all kinds of values, and they mess up our setup. */
-	/* I got value 0x52802 instead. For some cards the blob even sets it back to 0x1. */
-	/* Note: the blob doesn't read this value, so i'm pretty sure this is safe for all cards. */
-	/* Any idea what this is? */
-	nv_wr32(dev, NV40_PFB_UNK_800, 0x1);
+	if (dev_priv->chipset != 0x40 && dev_priv->chipset != 0x45) {
+		if (nv44_graph_class(dev))
+			nv44_fb_init_gart(dev);
+		else
+			nv40_fb_init_gart(dev);
+	}
 
 	switch (dev_priv->chipset) {
 	case 0x40:
diff --git a/drivers/gpu/drm/nouveau/nv50_calc.c b/drivers/gpu/drm/nouveau/nv50_calc.c
index de81151..8cf63a8 100644
--- a/drivers/gpu/drm/nouveau/nv50_calc.c
+++ b/drivers/gpu/drm/nouveau/nv50_calc.c
@@ -23,7 +23,6 @@
  */
 
 #include "drmP.h"
-#include "drm_fixed.h"
 #include "nouveau_drv.h"
 #include "nouveau_hw.h"
 
@@ -47,45 +46,52 @@ nv50_calc_pll(struct drm_device *dev, struct pll_lims *pll, int clk,
 }
 
 int
-nv50_calc_pll2(struct drm_device *dev, struct pll_lims *pll, int clk,
-	       int *N, int *fN, int *M, int *P)
+nva3_calc_pll(struct drm_device *dev, struct pll_lims *pll, int clk,
+	      int *pN, int *pfN, int *pM, int *P)
 {
-	fixed20_12 fb_div, a, b;
-	u32 refclk = pll->refclk / 10;
-	u32 max_vco_freq = pll->vco1.maxfreq / 10;
-	u32 max_vco_inputfreq = pll->vco1.max_inputfreq / 10;
-	clk /= 10;
+	u32 best_err = ~0, err;
+	int M, lM, hM, N, fN;
 
-	*P = max_vco_freq / clk;
+	*P = pll->vco1.maxfreq / clk;
 	if (*P > pll->max_p)
 		*P = pll->max_p;
 	if (*P < pll->min_p)
 		*P = pll->min_p;
 
-	/* *M = floor((refclk + max_vco_inputfreq) / max_vco_inputfreq); */
-	a.full = dfixed_const(refclk + max_vco_inputfreq);
-	b.full = dfixed_const(max_vco_inputfreq);
-	a.full = dfixed_div(a, b);
-	a.full = dfixed_floor(a);
-	*M = dfixed_trunc(a);
+	lM = (pll->refclk + pll->vco1.max_inputfreq) / pll->vco1.max_inputfreq;
+	lM = max(lM, (int)pll->vco1.min_m);
+	hM = (pll->refclk + pll->vco1.min_inputfreq) / pll->vco1.min_inputfreq;
+	hM = min(hM, (int)pll->vco1.max_m);
 
-	/* fb_div = (vco * *M) / refclk; */
-	fb_div.full = dfixed_const(clk * *P);
-	fb_div.full = dfixed_mul(fb_div, a);
-	a.full = dfixed_const(refclk);
-	fb_div.full = dfixed_div(fb_div, a);
+	for (M = lM; M <= hM; M++) {
+		u32 tmp = clk * *P * M;
+		N  = tmp / pll->refclk;
+		fN = tmp % pll->refclk;
+		if (!pfN && fN >= pll->refclk / 2)
+			N++;
 
-	/* *N = floor(fb_div); */
-	a.full = dfixed_floor(fb_div);
-	*N = dfixed_trunc(fb_div);
+		if (N < pll->vco1.min_n)
+			continue;
+		if (N > pll->vco1.max_n)
+			break;
 
-	/* *fN = (fmod(fb_div, 1.0) * 8192) - 4096; */
-	b.full = dfixed_const(8192);
-	a.full = dfixed_mul(a, b);
-	fb_div.full = dfixed_mul(fb_div, b);
-	fb_div.full = fb_div.full - a.full;
-	*fN = dfixed_trunc(fb_div) - 4096;
-	*fN &= 0xffff;
+		err = abs(clk - (pll->refclk * N / M / *P));
+		if (err < best_err) {
+			best_err = err;
+			*pN = N;
+			*pM = M;
+		}
 
-	return clk;
+		if (pfN) {
+			*pfN = (((fN << 13) / pll->refclk) - 4096) & 0xffff;
+			return clk;
+		}
+	}
+
+	if (unlikely(best_err == ~0)) {
+		NV_ERROR(dev, "unable to find matching pll values\n");
+		return -EINVAL;
+	}
+
+	return pll->refclk * *pN / *pM / *P;
 }
diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
index 9023c4d..ebabacf 100644
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
@@ -65,7 +65,7 @@ nv50_crtc_blank(struct nouveau_crtc *nv_crtc, bool blanked)
 {
 	struct drm_device *dev = nv_crtc->base.dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_channel *evo = dev_priv->evo;
+	struct nouveau_channel *evo = nv50_display(dev)->master;
 	int index = nv_crtc->index, ret;
 
 	NV_DEBUG_KMS(dev, "index %d\n", nv_crtc->index);
@@ -135,8 +135,7 @@ static int
 nv50_crtc_set_dither(struct nouveau_crtc *nv_crtc, bool on, bool update)
 {
 	struct drm_device *dev = nv_crtc->base.dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_channel *evo = dev_priv->evo;
+	struct nouveau_channel *evo = nv50_display(dev)->master;
 	int ret;
 
 	NV_DEBUG_KMS(dev, "\n");
@@ -186,8 +185,7 @@ nv50_crtc_set_scale(struct nouveau_crtc *nv_crtc, int scaling_mode, bool update)
 	struct nouveau_connector *nv_connector =
 		nouveau_crtc_connector_get(nv_crtc);
 	struct drm_device *dev = nv_crtc->base.dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_channel *evo = dev_priv->evo;
+	struct nouveau_channel *evo = nv50_display(dev)->master;
 	struct drm_display_mode *native_mode = NULL;
 	struct drm_display_mode *mode = &nv_crtc->base.mode;
 	uint32_t outX, outY, horiz, vert;
@@ -288,7 +286,7 @@ nv50_crtc_set_clock(struct drm_device *dev, int head, int pclk)
 		nv_wr32(dev, pll.reg + 8, reg2 | (P << 28) | (M2 << 16) | N2);
 	} else
 	if (dev_priv->chipset < NV_C0) {
-		ret = nv50_calc_pll2(dev, &pll, pclk, &N1, &N2, &M1, &P);
+		ret = nva3_calc_pll(dev, &pll, pclk, &N1, &N2, &M1, &P);
 		if (ret <= 0)
 			return 0;
 
@@ -300,7 +298,7 @@ nv50_crtc_set_clock(struct drm_device *dev, int head, int pclk)
 		nv_wr32(dev, pll.reg + 4, reg1 | (P << 16) | (M1 << 8) | N1);
 		nv_wr32(dev, pll.reg + 8, N2);
 	} else {
-		ret = nv50_calc_pll2(dev, &pll, pclk, &N1, &N2, &M1, &P);
+		ret = nva3_calc_pll(dev, &pll, pclk, &N1, &N2, &M1, &P);
 		if (ret <= 0)
 			return 0;
 
@@ -351,14 +349,14 @@ nv50_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 	struct drm_gem_object *gem;
 	int ret = 0, i;
 
-	if (width != 64 || height != 64)
-		return -EINVAL;
-
 	if (!buffer_handle) {
 		nv_crtc->cursor.hide(nv_crtc, true);
 		return 0;
 	}
 
+	if (width != 64 || height != 64)
+		return -EINVAL;
+
 	gem = drm_gem_object_lookup(dev, file_priv, buffer_handle);
 	if (!gem)
 		return -ENOENT;
@@ -445,6 +443,39 @@ nv50_crtc_dpms(struct drm_crtc *crtc, int mode)
 {
 }
 
+static int
+nv50_crtc_wait_complete(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_timer_engine *ptimer = &dev_priv->engine.timer;
+	struct nv50_display *disp = nv50_display(dev);
+	struct nouveau_channel *evo = disp->master;
+	u64 start;
+	int ret;
+
+	ret = RING_SPACE(evo, 6);
+	if (ret)
+		return ret;
+	BEGIN_RING(evo, 0, 0x0084, 1);
+	OUT_RING  (evo, 0x80000000);
+	BEGIN_RING(evo, 0, 0x0080, 1);
+	OUT_RING  (evo, 0);
+	BEGIN_RING(evo, 0, 0x0084, 1);
+	OUT_RING  (evo, 0x00000000);
+
+	nv_wo32(disp->ntfy, 0x000, 0x00000000);
+	FIRE_RING (evo);
+
+	start = ptimer->read(dev);
+	do {
+		if (nv_ro32(disp->ntfy, 0x000))
+			return 0;
+	} while (ptimer->read(dev) - start < 2000000000ULL);
+
+	return -EBUSY;
+}
+
 static void
 nv50_crtc_prepare(struct drm_crtc *crtc)
 {
@@ -453,6 +484,7 @@ nv50_crtc_prepare(struct drm_crtc *crtc)
 
 	NV_DEBUG_KMS(dev, "index %d\n", nv_crtc->index);
 
+	nv50_display_flip_stop(crtc);
 	drm_vblank_pre_modeset(dev, nv_crtc->index);
 	nv50_crtc_blank(nv_crtc, true);
 }
@@ -461,24 +493,14 @@ static void
 nv50_crtc_commit(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_channel *evo = dev_priv->evo;
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	int ret;
 
 	NV_DEBUG_KMS(dev, "index %d\n", nv_crtc->index);
 
 	nv50_crtc_blank(nv_crtc, false);
 	drm_vblank_post_modeset(dev, nv_crtc->index);
-
-	ret = RING_SPACE(evo, 2);
-	if (ret) {
-		NV_ERROR(dev, "no space while committing crtc\n");
-		return;
-	}
-	BEGIN_RING(evo, 0, NV50_EVO_UPDATE, 1);
-	OUT_RING  (evo, 0);
-	FIRE_RING (evo);
+	nv50_crtc_wait_complete(crtc);
+	nv50_display_flip_next(crtc, crtc->fb, NULL);
 }
 
 static bool
@@ -491,15 +513,15 @@ nv50_crtc_mode_fixup(struct drm_crtc *crtc, struct drm_display_mode *mode,
 static int
 nv50_crtc_do_mode_set_base(struct drm_crtc *crtc,
 			   struct drm_framebuffer *passed_fb,
-			   int x, int y, bool update, bool atomic)
+			   int x, int y, bool atomic)
 {
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	struct drm_device *dev = nv_crtc->base.dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_channel *evo = dev_priv->evo;
+	struct nouveau_channel *evo = nv50_display(dev)->master;
 	struct drm_framebuffer *drm_fb = nv_crtc->base.fb;
 	struct nouveau_framebuffer *fb = nouveau_framebuffer(drm_fb);
-	int ret, format;
+	int ret;
 
 	NV_DEBUG_KMS(dev, "index %d\n", nv_crtc->index);
 
@@ -510,8 +532,7 @@ nv50_crtc_do_mode_set_base(struct drm_crtc *crtc,
 	if (atomic) {
 		drm_fb = passed_fb;
 		fb = nouveau_framebuffer(passed_fb);
-	}
-	else {
+	} else {
 		/* If not atomic, we can go ahead and pin, and unpin the
 		 * old fb we were passed.
 		 */
@@ -525,28 +546,6 @@ nv50_crtc_do_mode_set_base(struct drm_crtc *crtc,
 		}
 	}
 
-	switch (drm_fb->depth) {
-	case  8:
-		format = NV50_EVO_CRTC_FB_DEPTH_8;
-		break;
-	case 15:
-		format = NV50_EVO_CRTC_FB_DEPTH_15;
-		break;
-	case 16:
-		format = NV50_EVO_CRTC_FB_DEPTH_16;
-		break;
-	case 24:
-	case 32:
-		format = NV50_EVO_CRTC_FB_DEPTH_24;
-		break;
-	case 30:
-		format = NV50_EVO_CRTC_FB_DEPTH_30;
-		break;
-	default:
-		 NV_ERROR(dev, "unknown depth %d\n", drm_fb->depth);
-		 return -EINVAL;
-	}
-
 	nv_crtc->fb.offset = fb->nvbo->bo.mem.start << PAGE_SHIFT;
 	nv_crtc->fb.tile_flags = nouveau_bo_tile_layout(fb->nvbo);
 	nv_crtc->fb.cpp = drm_fb->bits_per_pixel / 8;
@@ -556,14 +555,7 @@ nv50_crtc_do_mode_set_base(struct drm_crtc *crtc,
 			return ret;
 
 		BEGIN_RING(evo, 0, NV50_EVO_CRTC(nv_crtc->index, FB_DMA), 1);
-		if (nv_crtc->fb.tile_flags == 0x7a00 ||
-		    nv_crtc->fb.tile_flags == 0xfe00)
-			OUT_RING(evo, NvEvoFB32);
-		else
-		if (nv_crtc->fb.tile_flags == 0x7000)
-			OUT_RING(evo, NvEvoFB16);
-		else
-			OUT_RING(evo, NvEvoVRAM_LP);
+		OUT_RING  (evo, fb->r_dma);
 	}
 
 	ret = RING_SPACE(evo, 12);
@@ -571,45 +563,26 @@ nv50_crtc_do_mode_set_base(struct drm_crtc *crtc,
 		return ret;
 
 	BEGIN_RING(evo, 0, NV50_EVO_CRTC(nv_crtc->index, FB_OFFSET), 5);
-	OUT_RING(evo, nv_crtc->fb.offset >> 8);
-	OUT_RING(evo, 0);
-	OUT_RING(evo, (drm_fb->height << 16) | drm_fb->width);
-	if (!nv_crtc->fb.tile_flags) {
-		OUT_RING(evo, drm_fb->pitch | (1 << 20));
-	} else {
-		u32 tile_mode = fb->nvbo->tile_mode;
-		if (dev_priv->card_type >= NV_C0)
-			tile_mode >>= 4;
-		OUT_RING(evo, ((drm_fb->pitch / 4) << 4) | tile_mode);
-	}
-	if (dev_priv->chipset == 0x50)
-		OUT_RING(evo, (nv_crtc->fb.tile_flags << 8) | format);
-	else
-		OUT_RING(evo, format);
+	OUT_RING  (evo, nv_crtc->fb.offset >> 8);
+	OUT_RING  (evo, 0);
+	OUT_RING  (evo, (drm_fb->height << 16) | drm_fb->width);
+	OUT_RING  (evo, fb->r_pitch);
+	OUT_RING  (evo, fb->r_format);
 
 	BEGIN_RING(evo, 0, NV50_EVO_CRTC(nv_crtc->index, CLUT_MODE), 1);
-	OUT_RING(evo, fb->base.depth == 8 ?
-		 NV50_EVO_CRTC_CLUT_MODE_OFF : NV50_EVO_CRTC_CLUT_MODE_ON);
+	OUT_RING  (evo, fb->base.depth == 8 ?
+		   NV50_EVO_CRTC_CLUT_MODE_OFF : NV50_EVO_CRTC_CLUT_MODE_ON);
 
 	BEGIN_RING(evo, 0, NV50_EVO_CRTC(nv_crtc->index, COLOR_CTRL), 1);
-	OUT_RING(evo, NV50_EVO_CRTC_COLOR_CTRL_COLOR);
+	OUT_RING  (evo, NV50_EVO_CRTC_COLOR_CTRL_COLOR);
 	BEGIN_RING(evo, 0, NV50_EVO_CRTC(nv_crtc->index, FB_POS), 1);
-	OUT_RING(evo, (y << 16) | x);
+	OUT_RING  (evo, (y << 16) | x);
 
 	if (nv_crtc->lut.depth != fb->base.depth) {
 		nv_crtc->lut.depth = fb->base.depth;
 		nv50_crtc_lut_load(crtc);
 	}
 
-	if (update) {
-		ret = RING_SPACE(evo, 2);
-		if (ret)
-			return ret;
-		BEGIN_RING(evo, 0, NV50_EVO_UPDATE, 1);
-		OUT_RING(evo, 0);
-		FIRE_RING(evo);
-	}
-
 	return 0;
 }
 
@@ -619,8 +592,7 @@ nv50_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode,
 		   struct drm_framebuffer *old_fb)
 {
 	struct drm_device *dev = crtc->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_channel *evo = dev_priv->evo;
+	struct nouveau_channel *evo = nv50_display(dev)->master;
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	struct nouveau_connector *nv_connector = NULL;
 	uint32_t hsync_dur,  vsync_dur, hsync_start_to_end, vsync_start_to_end;
@@ -700,14 +672,25 @@ nv50_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode,
 	nv_crtc->set_dither(nv_crtc, nv_connector->use_dithering, false);
 	nv_crtc->set_scale(nv_crtc, nv_connector->scaling_mode, false);
 
-	return nv50_crtc_do_mode_set_base(crtc, old_fb, x, y, false, false);
+	return nv50_crtc_do_mode_set_base(crtc, old_fb, x, y, false);
 }
 
 static int
 nv50_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
 			struct drm_framebuffer *old_fb)
 {
-	return nv50_crtc_do_mode_set_base(crtc, old_fb, x, y, true, false);
+	int ret;
+
+	nv50_display_flip_stop(crtc);
+	ret = nv50_crtc_do_mode_set_base(crtc, old_fb, x, y, false);
+	if (ret)
+		return ret;
+
+	ret = nv50_crtc_wait_complete(crtc);
+	if (ret)
+		return ret;
+
+	return nv50_display_flip_next(crtc, crtc->fb, NULL);
 }
 
 static int
@@ -715,7 +698,14 @@ nv50_crtc_mode_set_base_atomic(struct drm_crtc *crtc,
 			       struct drm_framebuffer *fb,
 			       int x, int y, enum mode_set_atomic state)
 {
-	return nv50_crtc_do_mode_set_base(crtc, fb, x, y, true, true);
+	int ret;
+
+	nv50_display_flip_stop(crtc);
+	ret = nv50_crtc_do_mode_set_base(crtc, fb, x, y, true);
+	if (ret)
+		return ret;
+
+	return nv50_crtc_wait_complete(crtc);
 }
 
 static const struct drm_crtc_helper_funcs nv50_crtc_helper_funcs = {
@@ -758,7 +748,7 @@ nv50_crtc_create(struct drm_device *dev, int index)
 	nv_crtc->lut.depth = 0;
 
 	ret = nouveau_bo_new(dev, NULL, 4096, 0x100, TTM_PL_FLAG_VRAM,
-			     0, 0x0000, false, true, &nv_crtc->lut.nvbo);
+			     0, 0x0000, &nv_crtc->lut.nvbo);
 	if (!ret) {
 		ret = nouveau_bo_pin(nv_crtc->lut.nvbo, TTM_PL_FLAG_VRAM);
 		if (!ret)
@@ -784,7 +774,7 @@ nv50_crtc_create(struct drm_device *dev, int index)
 	drm_mode_crtc_set_gamma_size(&nv_crtc->base, 256);
 
 	ret = nouveau_bo_new(dev, NULL, 64*64*4, 0x100, TTM_PL_FLAG_VRAM,
-			     0, 0x0000, false, true, &nv_crtc->cursor.nvbo);
+			     0, 0x0000, &nv_crtc->cursor.nvbo);
 	if (!ret) {
 		ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM);
 		if (!ret)
diff --git a/drivers/gpu/drm/nouveau/nv50_cursor.c b/drivers/gpu/drm/nouveau/nv50_cursor.c
index 1b9ce30..9752c35 100644
--- a/drivers/gpu/drm/nouveau/nv50_cursor.c
+++ b/drivers/gpu/drm/nouveau/nv50_cursor.c
@@ -36,9 +36,9 @@
 static void
 nv50_cursor_show(struct nouveau_crtc *nv_crtc, bool update)
 {
-	struct drm_nouveau_private *dev_priv = nv_crtc->base.dev->dev_private;
-	struct nouveau_channel *evo = dev_priv->evo;
 	struct drm_device *dev = nv_crtc->base.dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_channel *evo = nv50_display(dev)->master;
 	int ret;
 
 	NV_DEBUG_KMS(dev, "\n");
@@ -71,9 +71,9 @@ nv50_cursor_show(struct nouveau_crtc *nv_crtc, bool update)
 static void
 nv50_cursor_hide(struct nouveau_crtc *nv_crtc, bool update)
 {
-	struct drm_nouveau_private *dev_priv = nv_crtc->base.dev->dev_private;
-	struct nouveau_channel *evo = dev_priv->evo;
 	struct drm_device *dev = nv_crtc->base.dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_channel *evo = nv50_display(dev)->master;
 	int ret;
 
 	NV_DEBUG_KMS(dev, "\n");
diff --git a/drivers/gpu/drm/nouveau/nv50_dac.c b/drivers/gpu/drm/nouveau/nv50_dac.c
index 875414b..808f3ec 100644
--- a/drivers/gpu/drm/nouveau/nv50_dac.c
+++ b/drivers/gpu/drm/nouveau/nv50_dac.c
@@ -41,8 +41,7 @@ nv50_dac_disconnect(struct drm_encoder *encoder)
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct drm_device *dev = encoder->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_channel *evo = dev_priv->evo;
+	struct nouveau_channel *evo = nv50_display(dev)->master;
 	int ret;
 
 	if (!nv_encoder->crtc)
@@ -216,8 +215,7 @@ nv50_dac_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct drm_device *dev = encoder->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_channel *evo = dev_priv->evo;
+	struct nouveau_channel *evo = nv50_display(dev)->master;
 	struct nouveau_crtc *crtc = nouveau_crtc(encoder->crtc);
 	uint32_t mode_ctl = 0, mode_ctl2 = 0;
 	int ret;
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 7cc94ed..74a3f68 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -24,6 +24,7 @@
  *
  */
 
+#define NOUVEAU_DMA_DEBUG (nouveau_reg_debug & NOUVEAU_REG_DEBUG_EVO)
 #include "nv50_display.h"
 #include "nouveau_crtc.h"
 #include "nouveau_encoder.h"
@@ -34,6 +35,7 @@
 #include "drm_crtc_helper.h"
 
 static void nv50_display_isr(struct drm_device *);
+static void nv50_display_bh(unsigned long);
 
 static inline int
 nv50_sor_nr(struct drm_device *dev)
@@ -172,16 +174,16 @@ nv50_display_init(struct drm_device *dev)
 	ret = nv50_evo_init(dev);
 	if (ret)
 		return ret;
-	evo = dev_priv->evo;
+	evo = nv50_display(dev)->master;
 
 	nv_wr32(dev, NV50_PDISPLAY_OBJECTS, (evo->ramin->vinst >> 8) | 9);
 
-	ret = RING_SPACE(evo, 11);
+	ret = RING_SPACE(evo, 15);
 	if (ret)
 		return ret;
 	BEGIN_RING(evo, 0, NV50_EVO_UNK84, 2);
 	OUT_RING(evo, NV50_EVO_UNK84_NOTIFY_DISABLED);
-	OUT_RING(evo, NV50_EVO_DMA_NOTIFY_HANDLE_NONE);
+	OUT_RING(evo, NvEvoSync);
 	BEGIN_RING(evo, 0, NV50_EVO_CRTC(0, FB_DMA), 1);
 	OUT_RING(evo, NV50_EVO_CRTC_FB_DMA_HANDLE_NONE);
 	BEGIN_RING(evo, 0, NV50_EVO_CRTC(0, UNK0800), 1);
@@ -190,6 +192,11 @@ nv50_display_init(struct drm_device *dev)
 	OUT_RING(evo, 0);
 	BEGIN_RING(evo, 0, NV50_EVO_CRTC(0, UNK082C), 1);
 	OUT_RING(evo, 0);
+	/* required to make display sync channels not hate life */
+	BEGIN_RING(evo, 0, NV50_EVO_CRTC(0, UNK900), 1);
+	OUT_RING  (evo, 0x00000311);
+	BEGIN_RING(evo, 0, NV50_EVO_CRTC(1, UNK900), 1);
+	OUT_RING  (evo, 0x00000311);
 	FIRE_RING(evo);
 	if (!nv_wait(dev, 0x640004, 0xffffffff, evo->dma.put << 2))
 		NV_ERROR(dev, "evo pushbuf stalled\n");
@@ -201,6 +208,8 @@ nv50_display_init(struct drm_device *dev)
 static int nv50_display_disable(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv50_display *disp = nv50_display(dev);
+	struct nouveau_channel *evo = disp->master;
 	struct drm_crtc *drm_crtc;
 	int ret, i;
 
@@ -212,12 +221,12 @@ static int nv50_display_disable(struct drm_device *dev)
 		nv50_crtc_blank(crtc, true);
 	}
 
-	ret = RING_SPACE(dev_priv->evo, 2);
+	ret = RING_SPACE(evo, 2);
 	if (ret == 0) {
-		BEGIN_RING(dev_priv->evo, 0, NV50_EVO_UPDATE, 1);
-		OUT_RING(dev_priv->evo, 0);
+		BEGIN_RING(evo, 0, NV50_EVO_UPDATE, 1);
+		OUT_RING(evo, 0);
 	}
-	FIRE_RING(dev_priv->evo);
+	FIRE_RING(evo);
 
 	/* Almost like ack'ing a vblank interrupt, maybe in the spirit of
 	 * cleaning up?
@@ -267,10 +276,16 @@ int nv50_display_create(struct drm_device *dev)
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct dcb_table *dcb = &dev_priv->vbios.dcb;
 	struct drm_connector *connector, *ct;
+	struct nv50_display *priv;
 	int ret, i;
 
 	NV_DEBUG_KMS(dev, "\n");
 
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+	dev_priv->engine.display.priv = priv;
+
 	/* init basic kernel modesetting */
 	drm_mode_config_init(dev);
 
@@ -330,7 +345,7 @@ int nv50_display_create(struct drm_device *dev)
 		}
 	}
 
-	INIT_WORK(&dev_priv->irq_work, nv50_display_irq_handler_bh);
+	tasklet_init(&priv->tasklet, nv50_display_bh, (unsigned long)dev);
 	nouveau_irq_register(dev, 26, nv50_display_isr);
 
 	ret = nv50_display_init(dev);
@@ -345,12 +360,131 @@ int nv50_display_create(struct drm_device *dev)
 void
 nv50_display_destroy(struct drm_device *dev)
 {
+	struct nv50_display *disp = nv50_display(dev);
+
 	NV_DEBUG_KMS(dev, "\n");
 
 	drm_mode_config_cleanup(dev);
 
 	nv50_display_disable(dev);
 	nouveau_irq_unregister(dev, 26);
+	kfree(disp);
+}
+
+void
+nv50_display_flip_stop(struct drm_crtc *crtc)
+{
+	struct nv50_display *disp = nv50_display(crtc->dev);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	struct nv50_display_crtc *dispc = &disp->crtc[nv_crtc->index];
+	struct nouveau_channel *evo = dispc->sync;
+	int ret;
+
+	ret = RING_SPACE(evo, 8);
+	if (ret) {
+		WARN_ON(1);
+		return;
+	}
+
+	BEGIN_RING(evo, 0, 0x0084, 1);
+	OUT_RING  (evo, 0x00000000);
+	BEGIN_RING(evo, 0, 0x0094, 1);
+	OUT_RING  (evo, 0x00000000);
+	BEGIN_RING(evo, 0, 0x00c0, 1);
+	OUT_RING  (evo, 0x00000000);
+	BEGIN_RING(evo, 0, 0x0080, 1);
+	OUT_RING  (evo, 0x00000000);
+	FIRE_RING (evo);
+}
+
+int
+nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+		       struct nouveau_channel *chan)
+{
+	struct drm_nouveau_private *dev_priv = crtc->dev->dev_private;
+	struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
+	struct nv50_display *disp = nv50_display(crtc->dev);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	struct nv50_display_crtc *dispc = &disp->crtc[nv_crtc->index];
+	struct nouveau_channel *evo = dispc->sync;
+	int ret;
+
+	ret = RING_SPACE(evo, 24);
+	if (unlikely(ret))
+		return ret;
+
+	/* synchronise with the rendering channel, if necessary */
+	if (likely(chan)) {
+		u64 offset = dispc->sem.bo->vma.offset + dispc->sem.offset;
+
+		ret = RING_SPACE(chan, 10);
+		if (ret) {
+			WIND_RING(evo);
+			return ret;
+		}
+
+		if (dev_priv->chipset < 0xc0) {
+			BEGIN_RING(chan, NvSubSw, 0x0060, 2);
+			OUT_RING  (chan, NvEvoSema0 + nv_crtc->index);
+			OUT_RING  (chan, dispc->sem.offset);
+			BEGIN_RING(chan, NvSubSw, 0x006c, 1);
+			OUT_RING  (chan, 0xf00d0000 | dispc->sem.value);
+			BEGIN_RING(chan, NvSubSw, 0x0064, 2);
+			OUT_RING  (chan, dispc->sem.offset ^ 0x10);
+			OUT_RING  (chan, 0x74b1e000);
+			BEGIN_RING(chan, NvSubSw, 0x0060, 1);
+			if (dev_priv->chipset < 0x84)
+				OUT_RING  (chan, NvSema);
+			else
+				OUT_RING  (chan, chan->vram_handle);
+		} else {
+			BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0010, 4);
+			OUT_RING  (chan, upper_32_bits(offset));
+			OUT_RING  (chan, lower_32_bits(offset));
+			OUT_RING  (chan, 0xf00d0000 | dispc->sem.value);
+			OUT_RING  (chan, 0x1002);
+			BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0010, 4);
+			OUT_RING  (chan, upper_32_bits(offset));
+			OUT_RING  (chan, lower_32_bits(offset ^ 0x10));
+			OUT_RING  (chan, 0x74b1e000);
+			OUT_RING  (chan, 0x1001);
+		}
+		FIRE_RING (chan);
+	} else {
+		nouveau_bo_wr32(dispc->sem.bo, dispc->sem.offset / 4,
+				0xf00d0000 | dispc->sem.value);
+	}
+
+	/* queue the flip on the crtc's "display sync" channel */
+	BEGIN_RING(evo, 0, 0x0100, 1);
+	OUT_RING  (evo, 0xfffe0000);
+	BEGIN_RING(evo, 0, 0x0084, 5);
+	OUT_RING  (evo, chan ? 0x00000100 : 0x00000010);
+	OUT_RING  (evo, dispc->sem.offset);
+	OUT_RING  (evo, 0xf00d0000 | dispc->sem.value);
+	OUT_RING  (evo, 0x74b1e000);
+	OUT_RING  (evo, NvEvoSync);
+	BEGIN_RING(evo, 0, 0x00a0, 2);
+	OUT_RING  (evo, 0x00000000);
+	OUT_RING  (evo, 0x00000000);
+	BEGIN_RING(evo, 0, 0x00c0, 1);
+	OUT_RING  (evo, nv_fb->r_dma);
+	BEGIN_RING(evo, 0, 0x0110, 2);
+	OUT_RING  (evo, 0x00000000);
+	OUT_RING  (evo, 0x00000000);
+	BEGIN_RING(evo, 0, 0x0800, 5);
+	OUT_RING  (evo, (nv_fb->nvbo->bo.mem.start << PAGE_SHIFT) >> 8);
+	OUT_RING  (evo, 0);
+	OUT_RING  (evo, (fb->height << 16) | fb->width);
+	OUT_RING  (evo, nv_fb->r_pitch);
+	OUT_RING  (evo, nv_fb->r_format);
+	BEGIN_RING(evo, 0, 0x0080, 1);
+	OUT_RING  (evo, 0x00000000);
+	FIRE_RING (evo);
+
+	dispc->sem.offset ^= 0x10;
+	dispc->sem.value++;
+	return 0;
 }
 
 static u16
@@ -383,13 +517,25 @@ nv50_display_script_select(struct drm_device *dev, struct dcb_entry *dcb,
 			if (bios->fp.if_is_24bit)
 				script |= 0x0200;
 		} else {
+			/* determine number of lvds links */
+			if (nv_connector && nv_connector->edid &&
+			    nv_connector->dcb->type == DCB_CONNECTOR_LVDS_SPWG) {
+				/* http://www.spwg.org */
+				if (((u8 *)nv_connector->edid)[121] == 2)
+					script |= 0x0100;
+			} else
 			if (pxclk >= bios->fp.duallink_transition_clk) {
 				script |= 0x0100;
+			}
+
+			/* determine panel depth */
+			if (script & 0x0100) {
 				if (bios->fp.strapless_is_24bit & 2)
 					script |= 0x0200;
-			} else
-			if (bios->fp.strapless_is_24bit & 1)
-				script |= 0x0200;
+			} else {
+				if (bios->fp.strapless_is_24bit & 1)
+					script |= 0x0200;
+			}
 
 			if (nv_connector && nv_connector->edid &&
 			    (nv_connector->edid->revision >= 4) &&
@@ -466,11 +612,12 @@ static void
 nv50_display_unk10_handler(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv50_display *disp = nv50_display(dev);
 	u32 unk30 = nv_rd32(dev, 0x610030), mc;
 	int i, crtc, or, type = OUTPUT_ANY;
 
 	NV_DEBUG_KMS(dev, "0x610030: 0x%08x\n", unk30);
-	dev_priv->evo_irq.dcb = NULL;
+	disp->irq.dcb = NULL;
 
 	nv_wr32(dev, 0x619494, nv_rd32(dev, 0x619494) & ~8);
 
@@ -541,7 +688,7 @@ nv50_display_unk10_handler(struct drm_device *dev)
 
 		if (dcb->type == type && (dcb->or & (1 << or))) {
 			nouveau_bios_run_display_table(dev, dcb, 0, -1);
-			dev_priv->evo_irq.dcb = dcb;
+			disp->irq.dcb = dcb;
 			goto ack;
 		}
 	}
@@ -587,15 +734,16 @@ static void
 nv50_display_unk20_handler(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	u32 unk30 = nv_rd32(dev, 0x610030), tmp, pclk, script, mc;
+	struct nv50_display *disp = nv50_display(dev);
+	u32 unk30 = nv_rd32(dev, 0x610030), tmp, pclk, script, mc = 0;
 	struct dcb_entry *dcb;
 	int i, crtc, or, type = OUTPUT_ANY;
 
 	NV_DEBUG_KMS(dev, "0x610030: 0x%08x\n", unk30);
-	dcb = dev_priv->evo_irq.dcb;
+	dcb = disp->irq.dcb;
 	if (dcb) {
 		nouveau_bios_run_display_table(dev, dcb, 0, -2);
-		dev_priv->evo_irq.dcb = NULL;
+		disp->irq.dcb = NULL;
 	}
 
 	/* CRTC clock change requested? */
@@ -692,9 +840,9 @@ nv50_display_unk20_handler(struct drm_device *dev)
 		nv_wr32(dev, NV50_PDISPLAY_DAC_CLK_CTRL2(or), 0);
 	}
 
-	dev_priv->evo_irq.dcb = dcb;
-	dev_priv->evo_irq.pclk = pclk;
-	dev_priv->evo_irq.script = script;
+	disp->irq.dcb = dcb;
+	disp->irq.pclk = pclk;
+	disp->irq.script = script;
 
 ack:
 	nv_wr32(dev, NV50_PDISPLAY_INTR_1, NV50_PDISPLAY_INTR_1_CLK_UNK20);
@@ -735,13 +883,13 @@ nv50_display_unk40_dp_set_tmds(struct drm_device *dev, struct dcb_entry *dcb)
 static void
 nv50_display_unk40_handler(struct drm_device *dev)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct dcb_entry *dcb = dev_priv->evo_irq.dcb;
-	u16 script = dev_priv->evo_irq.script;
-	u32 unk30 = nv_rd32(dev, 0x610030), pclk = dev_priv->evo_irq.pclk;
+	struct nv50_display *disp = nv50_display(dev);
+	struct dcb_entry *dcb = disp->irq.dcb;
+	u16 script = disp->irq.script;
+	u32 unk30 = nv_rd32(dev, 0x610030), pclk = disp->irq.pclk;
 
 	NV_DEBUG_KMS(dev, "0x610030: 0x%08x\n", unk30);
-	dev_priv->evo_irq.dcb = NULL;
+	disp->irq.dcb = NULL;
 	if (!dcb)
 		goto ack;
 
@@ -754,12 +902,10 @@ ack:
 	nv_wr32(dev, 0x619494, nv_rd32(dev, 0x619494) | 8);
 }
 
-void
-nv50_display_irq_handler_bh(struct work_struct *work)
+static void
+nv50_display_bh(unsigned long data)
 {
-	struct drm_nouveau_private *dev_priv =
-		container_of(work, struct drm_nouveau_private, irq_work);
-	struct drm_device *dev = dev_priv->dev;
+	struct drm_device *dev = (struct drm_device *)data;
 
 	for (;;) {
 		uint32_t intr0 = nv_rd32(dev, NV50_PDISPLAY_INTR_0);
@@ -807,7 +953,7 @@ nv50_display_error_handler(struct drm_device *dev)
 static void
 nv50_display_isr(struct drm_device *dev)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv50_display *disp = nv50_display(dev);
 	uint32_t delayed = 0;
 
 	while (nv_rd32(dev, NV50_PMC_INTR_0) & NV50_PMC_INTR_0_DISPLAY) {
@@ -835,8 +981,7 @@ nv50_display_isr(struct drm_device *dev)
 				  NV50_PDISPLAY_INTR_1_CLK_UNK40));
 		if (clock) {
 			nv_wr32(dev, NV03_PMC_INTR_EN_0, 0);
-			if (!work_pending(&dev_priv->irq_work))
-				queue_work(dev_priv->wq, &dev_priv->irq_work);
+			tasklet_schedule(&disp->tasklet);
 			delayed |= clock;
 			intr1 &= ~clock;
 		}
diff --git a/drivers/gpu/drm/nouveau/nv50_display.h b/drivers/gpu/drm/nouveau/nv50_display.h
index f0e30b78..c2da503 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.h
+++ b/drivers/gpu/drm/nouveau/nv50_display.h
@@ -35,7 +35,36 @@
 #include "nouveau_crtc.h"
 #include "nv50_evo.h"
 
-void nv50_display_irq_handler_bh(struct work_struct *work);
+struct nv50_display_crtc {
+	struct nouveau_channel *sync;
+	struct {
+		struct nouveau_bo *bo;
+		u32 offset;
+		u16 value;
+	} sem;
+};
+
+struct nv50_display {
+	struct nouveau_channel *master;
+	struct nouveau_gpuobj *ntfy;
+
+	struct nv50_display_crtc crtc[2];
+
+	struct tasklet_struct tasklet;
+	struct {
+		struct dcb_entry *dcb;
+		u16 script;
+		u32 pclk;
+	} irq;
+};
+
+static inline struct nv50_display *
+nv50_display(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	return dev_priv->engine.display.priv;
+}
+
 int nv50_display_early_init(struct drm_device *dev);
 void nv50_display_late_takedown(struct drm_device *dev);
 int nv50_display_create(struct drm_device *dev);
@@ -44,4 +73,15 @@ void nv50_display_destroy(struct drm_device *dev);
 int nv50_crtc_blank(struct nouveau_crtc *, bool blank);
 int nv50_crtc_set_clock(struct drm_device *, int head, int pclk);
 
+int  nv50_display_flip_next(struct drm_crtc *, struct drm_framebuffer *,
+			    struct nouveau_channel *chan);
+void nv50_display_flip_stop(struct drm_crtc *);
+
+int  nv50_evo_init(struct drm_device *dev);
+void nv50_evo_fini(struct drm_device *dev);
+void nv50_evo_dmaobj_init(struct nouveau_gpuobj *, u32 memtype, u64 base,
+			  u64 size);
+int  nv50_evo_dmaobj_new(struct nouveau_channel *, u32 handle, u32 memtype,
+			 u64 base, u64 size, struct nouveau_gpuobj **);
+
 #endif /* __NV50_DISPLAY_H__ */
diff --git a/drivers/gpu/drm/nouveau/nv50_evo.c b/drivers/gpu/drm/nouveau/nv50_evo.c
index 0ea090f..c8e83c1 100644
--- a/drivers/gpu/drm/nouveau/nv50_evo.c
+++ b/drivers/gpu/drm/nouveau/nv50_evo.c
@@ -27,20 +27,17 @@
 #include "nouveau_drv.h"
 #include "nouveau_dma.h"
 #include "nouveau_ramht.h"
+#include "nv50_display.h"
 
 static void
 nv50_evo_channel_del(struct nouveau_channel **pevo)
 {
-	struct drm_nouveau_private *dev_priv;
 	struct nouveau_channel *evo = *pevo;
 
 	if (!evo)
 		return;
 	*pevo = NULL;
 
-	dev_priv = evo->dev->dev_private;
-	dev_priv->evo_alloc &= ~(1 << evo->id);
-
 	nouveau_gpuobj_channel_takedown(evo);
 	nouveau_bo_unmap(evo->pushbuf_bo);
 	nouveau_bo_ref(NULL, &evo->pushbuf_bo);
@@ -51,42 +48,61 @@ nv50_evo_channel_del(struct nouveau_channel **pevo)
 	kfree(evo);
 }
 
+void
+nv50_evo_dmaobj_init(struct nouveau_gpuobj *obj, u32 memtype, u64 base, u64 size)
+{
+	struct drm_nouveau_private *dev_priv = obj->dev->dev_private;
+	u32 flags5;
+
+	if (dev_priv->chipset < 0xc0) {
+		/* not supported on 0x50, specified in format mthd */
+		if (dev_priv->chipset == 0x50)
+			memtype = 0;
+		flags5 = 0x00010000;
+	} else {
+		if (memtype & 0x80000000)
+			flags5 = 0x00000000; /* large pages */
+		else
+			flags5 = 0x00020000;
+	}
+
+	nv50_gpuobj_dma_init(obj, 0, 0x3d, base, size, NV_MEM_TARGET_VRAM,
+			     NV_MEM_ACCESS_RW, (memtype >> 8) & 0xff, 0);
+	nv_wo32(obj, 0x14, flags5);
+	dev_priv->engine.instmem.flush(obj->dev);
+}
+
 int
-nv50_evo_dmaobj_new(struct nouveau_channel *evo, u32 class, u32 name,
-		    u32 tile_flags, u32 magic_flags, u32 offset, u32 limit,
-		    u32 flags5)
+nv50_evo_dmaobj_new(struct nouveau_channel *evo, u32 handle, u32 memtype,
+		    u64 base, u64 size, struct nouveau_gpuobj **pobj)
 {
-	struct drm_nouveau_private *dev_priv = evo->dev->dev_private;
-	struct drm_device *dev = evo->dev;
+	struct nv50_display *disp = nv50_display(evo->dev);
 	struct nouveau_gpuobj *obj = NULL;
 	int ret;
 
-	ret = nouveau_gpuobj_new(dev, dev_priv->evo, 6*4, 32, 0, &obj);
+	ret = nouveau_gpuobj_new(evo->dev, disp->master, 6*4, 32, 0, &obj);
 	if (ret)
 		return ret;
 	obj->engine = NVOBJ_ENGINE_DISPLAY;
 
-	nv_wo32(obj,  0, (tile_flags << 22) | (magic_flags << 16) | class);
-	nv_wo32(obj,  4, limit);
-	nv_wo32(obj,  8, offset);
-	nv_wo32(obj, 12, 0x00000000);
-	nv_wo32(obj, 16, 0x00000000);
-	nv_wo32(obj, 20, flags5);
-	dev_priv->engine.instmem.flush(dev);
+	nv50_evo_dmaobj_init(obj, memtype, base, size);
 
-	ret = nouveau_ramht_insert(evo, name, obj);
-	nouveau_gpuobj_ref(NULL, &obj);
-	if (ret) {
-		return ret;
-	}
+	ret = nouveau_ramht_insert(evo, handle, obj);
+	if (ret)
+		goto out;
 
-	return 0;
+	if (pobj)
+		nouveau_gpuobj_ref(obj, pobj);
+out:
+	nouveau_gpuobj_ref(NULL, &obj);
+	return ret;
 }
 
 static int
-nv50_evo_channel_new(struct drm_device *dev, struct nouveau_channel **pevo)
+nv50_evo_channel_new(struct drm_device *dev, int chid,
+		     struct nouveau_channel **pevo)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv50_display *disp = nv50_display(dev);
 	struct nouveau_channel *evo;
 	int ret;
 
@@ -95,25 +111,13 @@ nv50_evo_channel_new(struct drm_device *dev, struct nouveau_channel **pevo)
 		return -ENOMEM;
 	*pevo = evo;
 
-	for (evo->id = 0; evo->id < 5; evo->id++) {
-		if (dev_priv->evo_alloc & (1 << evo->id))
-			continue;
-
-		dev_priv->evo_alloc |= (1 << evo->id);
-		break;
-	}
-
-	if (evo->id == 5) {
-		kfree(evo);
-		return -ENODEV;
-	}
-
+	evo->id = chid;
 	evo->dev = dev;
 	evo->user_get = 4;
 	evo->user_put = 0;
 
 	ret = nouveau_bo_new(dev, NULL, 4096, 0, TTM_PL_FLAG_VRAM, 0, 0,
-			     false, true, &evo->pushbuf_bo);
+			     &evo->pushbuf_bo);
 	if (ret == 0)
 		ret = nouveau_bo_pin(evo->pushbuf_bo, TTM_PL_FLAG_VRAM);
 	if (ret) {
@@ -138,8 +142,8 @@ nv50_evo_channel_new(struct drm_device *dev, struct nouveau_channel **pevo)
 	}
 
 	/* bind primary evo channel's ramht to the channel */
-	if (dev_priv->evo && evo != dev_priv->evo)
-		nouveau_ramht_ref(dev_priv->evo->ramht, &evo->ramht, NULL);
+	if (disp->master && evo != disp->master)
+		nouveau_ramht_ref(disp->master->ramht, &evo->ramht, NULL);
 
 	return 0;
 }
@@ -182,6 +186,7 @@ nv50_evo_channel_init(struct nouveau_channel *evo)
 	nv_mask(dev, 0x610028, 0x00000000, 0x00010001 << id);
 
 	evo->dma.max = (4096/4) - 2;
+	evo->dma.max &= ~7;
 	evo->dma.put = 0;
 	evo->dma.cur = evo->dma.put;
 	evo->dma.free = evo->dma.max - evo->dma.cur;
@@ -212,21 +217,39 @@ nv50_evo_channel_fini(struct nouveau_channel *evo)
 	}
 }
 
+static void
+nv50_evo_destroy(struct drm_device *dev)
+{
+	struct nv50_display *disp = nv50_display(dev);
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		if (disp->crtc[i].sem.bo) {
+			nouveau_bo_unmap(disp->crtc[i].sem.bo);
+			nouveau_bo_ref(NULL, &disp->crtc[i].sem.bo);
+		}
+		nv50_evo_channel_del(&disp->crtc[i].sync);
+	}
+	nouveau_gpuobj_ref(NULL, &disp->ntfy);
+	nv50_evo_channel_del(&disp->master);
+}
+
 static int
 nv50_evo_create(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv50_display *disp = nv50_display(dev);
 	struct nouveau_gpuobj *ramht = NULL;
 	struct nouveau_channel *evo;
-	int ret;
+	int ret, i, j;
 
 	/* create primary evo channel, the one we use for modesetting
 	 * purporses
 	 */
-	ret = nv50_evo_channel_new(dev, &dev_priv->evo);
+	ret = nv50_evo_channel_new(dev, 0, &disp->master);
 	if (ret)
 		return ret;
-	evo = dev_priv->evo;
+	evo = disp->master;
 
 	/* setup object management on it, any other evo channel will
 	 * use this also as there's no per-channel support on the
@@ -236,109 +259,167 @@ nv50_evo_create(struct drm_device *dev)
 				 NVOBJ_FLAG_ZERO_ALLOC, &evo->ramin);
 	if (ret) {
 		NV_ERROR(dev, "Error allocating EVO channel memory: %d\n", ret);
-		nv50_evo_channel_del(&dev_priv->evo);
-		return ret;
+		goto err;
 	}
 
 	ret = drm_mm_init(&evo->ramin_heap, 0, 32768);
 	if (ret) {
 		NV_ERROR(dev, "Error initialising EVO PRAMIN heap: %d\n", ret);
-		nv50_evo_channel_del(&dev_priv->evo);
-		return ret;
+		goto err;
 	}
 
 	ret = nouveau_gpuobj_new(dev, evo, 4096, 16, 0, &ramht);
 	if (ret) {
 		NV_ERROR(dev, "Unable to allocate EVO RAMHT: %d\n", ret);
-		nv50_evo_channel_del(&dev_priv->evo);
-		return ret;
+		goto err;
 	}
 
 	ret = nouveau_ramht_new(dev, ramht, &evo->ramht);
 	nouveau_gpuobj_ref(NULL, &ramht);
-	if (ret) {
-		nv50_evo_channel_del(&dev_priv->evo);
-		return ret;
-	}
+	if (ret)
+		goto err;
+
+	/* not sure exactly what this is..
+	 *
+	 * the first dword of the structure is used by nvidia to wait on
+	 * full completion of an EVO "update" command.
+	 *
+	 * method 0x8c on the master evo channel will fill a lot more of
+	 * this structure with some undefined info
+	 */
+	ret = nouveau_gpuobj_new(dev, disp->master, 0x1000, 0,
+				 NVOBJ_FLAG_ZERO_ALLOC, &disp->ntfy);
+	if (ret)
+		goto err;
+
+	ret = nv50_evo_dmaobj_new(disp->master, NvEvoSync, 0x0000,
+				  disp->ntfy->vinst, disp->ntfy->size, NULL);
+	if (ret)
+		goto err;
 
 	/* create some default objects for the scanout memtypes we support */
-	if (dev_priv->card_type >= NV_C0) {
-		ret = nv50_evo_dmaobj_new(evo, 0x3d, NvEvoFB32, 0xfe, 0x19,
-					  0, 0xffffffff, 0x00000000);
-		if (ret) {
-			nv50_evo_channel_del(&dev_priv->evo);
-			return ret;
-		}
+	ret = nv50_evo_dmaobj_new(disp->master, NvEvoVRAM, 0x0000,
+				  0, dev_priv->vram_size, NULL);
+	if (ret)
+		goto err;
 
-		ret = nv50_evo_dmaobj_new(evo, 0x3d, NvEvoVRAM, 0, 0x19,
-					  0, dev_priv->vram_size, 0x00020000);
-		if (ret) {
-			nv50_evo_channel_del(&dev_priv->evo);
-			return ret;
-		}
+	ret = nv50_evo_dmaobj_new(disp->master, NvEvoVRAM_LP, 0x80000000,
+				  0, dev_priv->vram_size, NULL);
+	if (ret)
+		goto err;
 
-		ret = nv50_evo_dmaobj_new(evo, 0x3d, NvEvoVRAM_LP, 0, 0x19,
-					  0, dev_priv->vram_size, 0x00000000);
-		if (ret) {
-			nv50_evo_channel_del(&dev_priv->evo);
-			return ret;
-		}
-	} else {
-		ret = nv50_evo_dmaobj_new(evo, 0x3d, NvEvoFB16, 0x70, 0x19,
-					  0, 0xffffffff, 0x00010000);
-		if (ret) {
-			nv50_evo_channel_del(&dev_priv->evo);
-			return ret;
-		}
+	ret = nv50_evo_dmaobj_new(disp->master, NvEvoFB32, 0x80000000 |
+				  (dev_priv->chipset < 0xc0 ? 0x7a00 : 0xfe00),
+				  0, dev_priv->vram_size, NULL);
+	if (ret)
+		goto err;
 
+	ret = nv50_evo_dmaobj_new(disp->master, NvEvoFB16, 0x80000000 |
+				  (dev_priv->chipset < 0xc0 ? 0x7000 : 0xfe00),
+				  0, dev_priv->vram_size, NULL);
+	if (ret)
+		goto err;
 
-		ret = nv50_evo_dmaobj_new(evo, 0x3d, NvEvoFB32, 0x7a, 0x19,
-					  0, 0xffffffff, 0x00010000);
-		if (ret) {
-			nv50_evo_channel_del(&dev_priv->evo);
-			return ret;
-		}
+	/* create "display sync" channels and other structures we need
+	 * to implement page flipping
+	 */
+	for (i = 0; i < 2; i++) {
+		struct nv50_display_crtc *dispc = &disp->crtc[i];
+		u64 offset;
 
-		ret = nv50_evo_dmaobj_new(evo, 0x3d, NvEvoVRAM, 0, 0x19,
-					  0, dev_priv->vram_size, 0x00010000);
-		if (ret) {
-			nv50_evo_channel_del(&dev_priv->evo);
-			return ret;
+		ret = nv50_evo_channel_new(dev, 1 + i, &dispc->sync);
+		if (ret)
+			goto err;
+
+		ret = nouveau_bo_new(dev, NULL, 4096, 0x1000, TTM_PL_FLAG_VRAM,
+				     0, 0x0000, &dispc->sem.bo);
+		if (!ret) {
+			offset = dispc->sem.bo->bo.mem.start << PAGE_SHIFT;
+
+			ret = nouveau_bo_pin(dispc->sem.bo, TTM_PL_FLAG_VRAM);
+			if (!ret)
+				ret = nouveau_bo_map(dispc->sem.bo);
+			if (ret)
+				nouveau_bo_ref(NULL, &dispc->sem.bo);
 		}
 
-		ret = nv50_evo_dmaobj_new(evo, 0x3d, NvEvoVRAM_LP, 0, 0x19,
-					  0, dev_priv->vram_size, 0x00010000);
-		if (ret) {
-			nv50_evo_channel_del(&dev_priv->evo);
-			return ret;
-		}
+		if (ret)
+			goto err;
+
+		ret = nv50_evo_dmaobj_new(dispc->sync, NvEvoSync, 0x0000,
+					  offset, 4096, NULL);
+		if (ret)
+			goto err;
+
+		ret = nv50_evo_dmaobj_new(dispc->sync, NvEvoVRAM_LP, 0x80000000,
+					  0, dev_priv->vram_size, NULL);
+		if (ret)
+			goto err;
+
+		ret = nv50_evo_dmaobj_new(dispc->sync, NvEvoFB32, 0x80000000 |
+					  (dev_priv->chipset < 0xc0 ?
+					  0x7a00 : 0xfe00),
+					  0, dev_priv->vram_size, NULL);
+		if (ret)
+			goto err;
+
+		ret = nv50_evo_dmaobj_new(dispc->sync, NvEvoFB16, 0x80000000 |
+					  (dev_priv->chipset < 0xc0 ?
+					  0x7000 : 0xfe00),
+					  0, dev_priv->vram_size, NULL);
+		if (ret)
+			goto err;
+
+		for (j = 0; j < 4096; j += 4)
+			nouveau_bo_wr32(dispc->sem.bo, j / 4, 0x74b1e000);
+		dispc->sem.offset = 0;
 	}
 
 	return 0;
+
+err:
+	nv50_evo_destroy(dev);
+	return ret;
 }
 
 int
 nv50_evo_init(struct drm_device *dev)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int ret;
+	struct nv50_display *disp = nv50_display(dev);
+	int ret, i;
 
-	if (!dev_priv->evo) {
+	if (!disp->master) {
 		ret = nv50_evo_create(dev);
 		if (ret)
 			return ret;
 	}
 
-	return nv50_evo_channel_init(dev_priv->evo);
+	ret = nv50_evo_channel_init(disp->master);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < 2; i++) {
+		ret = nv50_evo_channel_init(disp->crtc[i].sync);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 void
 nv50_evo_fini(struct drm_device *dev)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv50_display *disp = nv50_display(dev);
+	int i;
 
-	if (dev_priv->evo) {
-		nv50_evo_channel_fini(dev_priv->evo);
-		nv50_evo_channel_del(&dev_priv->evo);
+	for (i = 0; i < 2; i++) {
+		if (disp->crtc[i].sync)
+			nv50_evo_channel_fini(disp->crtc[i].sync);
 	}
+
+	if (disp->master)
+		nv50_evo_channel_fini(disp->master);
+
+	nv50_evo_destroy(dev);
 }
diff --git a/drivers/gpu/drm/nouveau/nv50_evo.h b/drivers/gpu/drm/nouveau/nv50_evo.h
index aa4f0d3..3860ca6 100644
--- a/drivers/gpu/drm/nouveau/nv50_evo.h
+++ b/drivers/gpu/drm/nouveau/nv50_evo.h
@@ -27,12 +27,6 @@
 #ifndef __NV50_EVO_H__
 #define __NV50_EVO_H__
 
-int  nv50_evo_init(struct drm_device *dev);
-void nv50_evo_fini(struct drm_device *dev);
-int  nv50_evo_dmaobj_new(struct nouveau_channel *, u32 class, u32 name,
-			 u32 tile_flags, u32 magic_flags,
-			 u32 offset, u32 limit);
-
 #define NV50_EVO_UPDATE                                              0x00000080
 #define NV50_EVO_UNK84                                               0x00000084
 #define NV50_EVO_UNK84_NOTIFY                                        0x40000000
@@ -119,5 +113,7 @@ int  nv50_evo_dmaobj_new(struct nouveau_channel *, u32 class, u32 name,
 /* Both of these are needed, otherwise nothing happens. */
 #define NV50_EVO_CRTC_SCALE_RES1                                     0x000008d8
 #define NV50_EVO_CRTC_SCALE_RES2                                     0x000008dc
+#define NV50_EVO_CRTC_UNK900                                         0x00000900
+#define NV50_EVO_CRTC_UNK904                                         0x00000904
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nv50_fb.c b/drivers/gpu/drm/nouveau/nv50_fb.c
index 50290de..ddebd71 100644
--- a/drivers/gpu/drm/nouveau/nv50_fb.c
+++ b/drivers/gpu/drm/nouveau/nv50_fb.c
@@ -8,31 +8,61 @@ struct nv50_fb_priv {
 	dma_addr_t r100c08;
 };
 
+static void
+nv50_fb_destroy(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+	struct nv50_fb_priv *priv = pfb->priv;
+
+	if (pfb->tag_heap.free_stack.next)
+		drm_mm_takedown(&pfb->tag_heap);
+
+	if (priv->r100c08_page) {
+		pci_unmap_page(dev->pdev, priv->r100c08, PAGE_SIZE,
+			       PCI_DMA_BIDIRECTIONAL);
+		__free_page(priv->r100c08_page);
+	}
+
+	kfree(priv);
+	pfb->priv = NULL;
+}
+
 static int
 nv50_fb_create(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
 	struct nv50_fb_priv *priv;
+	u32 tagmem;
+	int ret;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
+	pfb->priv = priv;
 
 	priv->r100c08_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!priv->r100c08_page) {
-		kfree(priv);
+		nv50_fb_destroy(dev);
 		return -ENOMEM;
 	}
 
 	priv->r100c08 = pci_map_page(dev->pdev, priv->r100c08_page, 0,
 				     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 	if (pci_dma_mapping_error(dev->pdev, priv->r100c08)) {
-		__free_page(priv->r100c08_page);
-		kfree(priv);
+		nv50_fb_destroy(dev);
 		return -EFAULT;
 	}
 
-	dev_priv->engine.fb.priv = priv;
+	tagmem = nv_rd32(dev, 0x100320);
+	NV_DEBUG(dev, "%d tags available\n", tagmem);
+	ret = drm_mm_init(&pfb->tag_heap, 0, tagmem);
+	if (ret) {
+		nv50_fb_destroy(dev);
+		return ret;
+	}
+
 	return 0;
 }
 
@@ -81,26 +111,112 @@ nv50_fb_init(struct drm_device *dev)
 void
 nv50_fb_takedown(struct drm_device *dev)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nv50_fb_priv *priv;
+	nv50_fb_destroy(dev);
+}
 
-	priv = dev_priv->engine.fb.priv;
-	if (!priv)
-		return;
-	dev_priv->engine.fb.priv = NULL;
+static struct nouveau_enum vm_dispatch_subclients[] = {
+	{ 0x00000000, "GRCTX", NULL },
+	{ 0x00000001, "NOTIFY", NULL },
+	{ 0x00000002, "QUERY", NULL },
+	{ 0x00000003, "COND", NULL },
+	{ 0x00000004, "M2M_IN", NULL },
+	{ 0x00000005, "M2M_OUT", NULL },
+	{ 0x00000006, "M2M_NOTIFY", NULL },
+	{}
+};
 
-	pci_unmap_page(dev->pdev, priv->r100c08, PAGE_SIZE,
-		       PCI_DMA_BIDIRECTIONAL);
-	__free_page(priv->r100c08_page);
-	kfree(priv);
-}
+static struct nouveau_enum vm_ccache_subclients[] = {
+	{ 0x00000000, "CB", NULL },
+	{ 0x00000001, "TIC", NULL },
+	{ 0x00000002, "TSC", NULL },
+	{}
+};
+
+static struct nouveau_enum vm_prop_subclients[] = {
+	{ 0x00000000, "RT0", NULL },
+	{ 0x00000001, "RT1", NULL },
+	{ 0x00000002, "RT2", NULL },
+	{ 0x00000003, "RT3", NULL },
+	{ 0x00000004, "RT4", NULL },
+	{ 0x00000005, "RT5", NULL },
+	{ 0x00000006, "RT6", NULL },
+	{ 0x00000007, "RT7", NULL },
+	{ 0x00000008, "ZETA", NULL },
+	{ 0x00000009, "LOCAL", NULL },
+	{ 0x0000000a, "GLOBAL", NULL },
+	{ 0x0000000b, "STACK", NULL },
+	{ 0x0000000c, "DST2D", NULL },
+	{}
+};
+
+static struct nouveau_enum vm_pfifo_subclients[] = {
+	{ 0x00000000, "PUSHBUF", NULL },
+	{ 0x00000001, "SEMAPHORE", NULL },
+	{}
+};
+
+static struct nouveau_enum vm_bar_subclients[] = {
+	{ 0x00000000, "FB", NULL },
+	{ 0x00000001, "IN", NULL },
+	{}
+};
+
+static struct nouveau_enum vm_client[] = {
+	{ 0x00000000, "STRMOUT", NULL },
+	{ 0x00000003, "DISPATCH", vm_dispatch_subclients },
+	{ 0x00000004, "PFIFO_WRITE", NULL },
+	{ 0x00000005, "CCACHE", vm_ccache_subclients },
+	{ 0x00000006, "PPPP", NULL },
+	{ 0x00000007, "CLIPID", NULL },
+	{ 0x00000008, "PFIFO_READ", NULL },
+	{ 0x00000009, "VFETCH", NULL },
+	{ 0x0000000a, "TEXTURE", NULL },
+	{ 0x0000000b, "PROP", vm_prop_subclients },
+	{ 0x0000000c, "PVP", NULL },
+	{ 0x0000000d, "PBSP", NULL },
+	{ 0x0000000e, "PCRYPT", NULL },
+	{ 0x0000000f, "PCOUNTER", NULL },
+	{ 0x00000011, "PDAEMON", NULL },
+	{}
+};
+
+static struct nouveau_enum vm_engine[] = {
+	{ 0x00000000, "PGRAPH", NULL },
+	{ 0x00000001, "PVP", NULL },
+	{ 0x00000004, "PEEPHOLE", NULL },
+	{ 0x00000005, "PFIFO", vm_pfifo_subclients },
+	{ 0x00000006, "BAR", vm_bar_subclients },
+	{ 0x00000008, "PPPP", NULL },
+	{ 0x00000009, "PBSP", NULL },
+	{ 0x0000000a, "PCRYPT", NULL },
+	{ 0x0000000b, "PCOUNTER", NULL },
+	{ 0x0000000c, "SEMAPHORE_BG", NULL },
+	{ 0x0000000d, "PCOPY", NULL },
+	{ 0x0000000e, "PDAEMON", NULL },
+	{}
+};
+
+static struct nouveau_enum vm_fault[] = {
+	{ 0x00000000, "PT_NOT_PRESENT", NULL },
+	{ 0x00000001, "PT_TOO_SHORT", NULL },
+	{ 0x00000002, "PAGE_NOT_PRESENT", NULL },
+	{ 0x00000003, "PAGE_SYSTEM_ONLY", NULL },
+	{ 0x00000004, "PAGE_READ_ONLY", NULL },
+	{ 0x00000006, "NULL_DMAOBJ", NULL },
+	{ 0x00000007, "WRONG_MEMTYPE", NULL },
+	{ 0x0000000b, "VRAM_LIMIT", NULL },
+	{ 0x0000000f, "DMAOBJ_LIMIT", NULL },
+	{}
+};
 
 void
-nv50_fb_vm_trap(struct drm_device *dev, int display, const char *name)
+nv50_fb_vm_trap(struct drm_device *dev, int display)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	const struct nouveau_enum *en, *cl;
 	unsigned long flags;
 	u32 trap[6], idx, chinst;
+	u8 st0, st1, st2, st3;
 	int i, ch;
 
 	idx = nv_rd32(dev, 0x100c90);
@@ -117,8 +233,8 @@ nv50_fb_vm_trap(struct drm_device *dev, int display, const char *name)
 	if (!display)
 		return;
 
+	/* lookup channel id */
 	chinst = (trap[2] << 16) | trap[1];
-
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
 	for (ch = 0; ch < dev_priv->engine.fifo.channels; ch++) {
 		struct nouveau_channel *chan = dev_priv->channels.ptr[ch];
@@ -131,9 +247,48 @@ nv50_fb_vm_trap(struct drm_device *dev, int display, const char *name)
 	}
 	spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
 
-	NV_INFO(dev, "%s - VM: Trapped %s at %02x%04x%04x status %08x "
-		     "channel %d (0x%08x)\n",
-		name, (trap[5] & 0x100 ? "read" : "write"),
-		trap[5] & 0xff, trap[4] & 0xffff, trap[3] & 0xffff,
-		trap[0], ch, chinst);
+	/* decode status bits into something more useful */
+	if (dev_priv->chipset  < 0xa3 ||
+	    dev_priv->chipset == 0xaa || dev_priv->chipset == 0xac) {
+		st0 = (trap[0] & 0x0000000f) >> 0;
+		st1 = (trap[0] & 0x000000f0) >> 4;
+		st2 = (trap[0] & 0x00000f00) >> 8;
+		st3 = (trap[0] & 0x0000f000) >> 12;
+	} else {
+		st0 = (trap[0] & 0x000000ff) >> 0;
+		st1 = (trap[0] & 0x0000ff00) >> 8;
+		st2 = (trap[0] & 0x00ff0000) >> 16;
+		st3 = (trap[0] & 0xff000000) >> 24;
+	}
+
+	NV_INFO(dev, "VM: trapped %s at 0x%02x%04x%04x on ch %d [0x%08x] ",
+		(trap[5] & 0x00000100) ? "read" : "write",
+		trap[5] & 0xff, trap[4] & 0xffff, trap[3] & 0xffff, ch, chinst);
+
+	en = nouveau_enum_find(vm_engine, st0);
+	if (en)
+		printk("%s/", en->name);
+	else
+		printk("%02x/", st0);
+
+	cl = nouveau_enum_find(vm_client, st2);
+	if (cl)
+		printk("%s/", cl->name);
+	else
+		printk("%02x/", st2);
+
+	if      (cl && cl->data) cl = nouveau_enum_find(cl->data, st3);
+	else if (en && en->data) cl = nouveau_enum_find(en->data, st3);
+	else                     cl = NULL;
+	if (cl)
+		printk("%s", cl->name);
+	else
+		printk("%02x", st3);
+
+	printk(" reason: ");
+	en = nouveau_enum_find(vm_fault, st1);
+	if (en)
+		printk("%s\n", en->name);
+	else
+		printk("0x%08x\n", st1);
 }
diff --git a/drivers/gpu/drm/nouveau/nv50_fifo.c b/drivers/gpu/drm/nouveau/nv50_fifo.c
index 8dd04c5..c34a074 100644
--- a/drivers/gpu/drm/nouveau/nv50_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv50_fifo.c
@@ -149,6 +149,7 @@ nv50_fifo_init_regs(struct drm_device *dev)
 	nv_wr32(dev, 0x3204, 0);
 	nv_wr32(dev, 0x3210, 0);
 	nv_wr32(dev, 0x3270, 0);
+	nv_wr32(dev, 0x2044, 0x01003fff);
 
 	/* Enable dummy channels setup by nv50_instmem.c */
 	nv50_fifo_channel_enable(dev, 0);
@@ -273,7 +274,7 @@ nv50_fifo_create_context(struct nouveau_channel *chan)
 	nv_wo32(ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
 			     (4 << 24) /* SEARCH_FULL */ |
 			     (chan->ramht->gpuobj->cinst >> 4));
-	nv_wo32(ramfc, 0x44, 0x2101ffff);
+	nv_wo32(ramfc, 0x44, 0x01003fff);
 	nv_wo32(ramfc, 0x60, 0x7fffffff);
 	nv_wo32(ramfc, 0x40, 0x00000000);
 	nv_wo32(ramfc, 0x7c, 0x30000001);
diff --git a/drivers/gpu/drm/nouveau/nv50_gpio.c b/drivers/gpu/drm/nouveau/nv50_gpio.c
index 6b149c0..d4f4206 100644
--- a/drivers/gpu/drm/nouveau/nv50_gpio.c
+++ b/drivers/gpu/drm/nouveau/nv50_gpio.c
@@ -137,6 +137,7 @@ nv50_gpio_irq_unregister(struct drm_device *dev, enum dcb_gpio_tag tag,
 	struct nv50_gpio_priv *priv = pgpio->priv;
 	struct nv50_gpio_handler *gpioh, *tmp;
 	struct dcb_gpio_entry *gpio;
+	LIST_HEAD(tofree);
 	unsigned long flags;
 
 	gpio = nouveau_bios_gpio_entry(dev, tag);
@@ -149,10 +150,14 @@ nv50_gpio_irq_unregister(struct drm_device *dev, enum dcb_gpio_tag tag,
 		    gpioh->handler != handler ||
 		    gpioh->data != data)
 			continue;
-		list_del(&gpioh->head);
-		kfree(gpioh);
+		list_move(&gpioh->head, &tofree);
 	}
 	spin_unlock_irqrestore(&priv->lock, flags);
+
+	list_for_each_entry_safe(gpioh, tmp, &tofree, head) {
+		flush_work_sync(&gpioh->work);
+		kfree(gpioh);
+	}
 }
 
 bool
@@ -205,7 +210,6 @@ nv50_gpio_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpio_engine *pgpio = &dev_priv->engine.gpio;
-	struct nv50_gpio_priv *priv;
 	int ret;
 
 	if (!pgpio->priv) {
@@ -213,7 +217,6 @@ nv50_gpio_init(struct drm_device *dev)
 		if (ret)
 			return ret;
 	}
-	priv = pgpio->priv;
 
 	/* disable, and ack any pending gpio interrupts */
 	nv_wr32(dev, 0xe050, 0x00000000);
@@ -293,7 +296,7 @@ nv50_gpio_isr(struct drm_device *dev)
 			continue;
 		gpioh->inhibit = true;
 
-		queue_work(dev_priv->wq, &gpioh->work);
+		schedule_work(&gpioh->work);
 	}
 	spin_unlock(&priv->lock);
 }
diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c
index 37e21d2..7950bac 100644
--- a/drivers/gpu/drm/nouveau/nv50_graph.c
+++ b/drivers/gpu/drm/nouveau/nv50_graph.c
@@ -95,13 +95,41 @@ nv50_graph_init_regs__nv(struct drm_device *dev)
 }
 
 static void
-nv50_graph_init_regs(struct drm_device *dev)
+nv50_graph_init_zcull(struct drm_device *dev)
 {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	int i;
+
 	NV_DEBUG(dev, "\n");
 
-	nv_wr32(dev, NV04_PGRAPH_DEBUG_3,
-				(1 << 2) /* HW_CONTEXT_SWITCH_ENABLED */);
-	nv_wr32(dev, 0x402ca8, 0x800);
+	switch (dev_priv->chipset & 0xf0) {
+	case 0x50:
+	case 0x80:
+	case 0x90:
+		nv_wr32(dev, 0x402ca8, 0x00000800);
+		break;
+	case 0xa0:
+	default:
+		nv_wr32(dev, 0x402cc0, 0x00000000);
+		if (dev_priv->chipset == 0xa0 ||
+		    dev_priv->chipset == 0xaa ||
+		    dev_priv->chipset == 0xac) {
+			nv_wr32(dev, 0x402ca8, 0x00000802);
+		} else {
+			nv_wr32(dev, 0x402cc0, 0x00000000);
+			nv_wr32(dev, 0x402ca8, 0x00000002);
+		}
+
+		break;
+	}
+
+	/* zero out zcull regions */
+	for (i = 0; i < 8; i++) {
+		nv_wr32(dev, 0x402c20 + (i * 8), 0x00000000);
+		nv_wr32(dev, 0x402c24 + (i * 8), 0x00000000);
+		nv_wr32(dev, 0x402c28 + (i * 8), 0x00000000);
+		nv_wr32(dev, 0x402c2c + (i * 8), 0x00000000);
+	}
 }
 
 static int
@@ -136,6 +164,7 @@ nv50_graph_init_ctxctl(struct drm_device *dev)
 	}
 	kfree(cp);
 
+	nv_wr32(dev, 0x40008c, 0x00000004); /* HW_CTX_SWITCH_ENABLED */
 	nv_wr32(dev, 0x400320, 4);
 	nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR, 0);
 	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_POINTER, 0);
@@ -151,7 +180,7 @@ nv50_graph_init(struct drm_device *dev)
 
 	nv50_graph_init_reset(dev);
 	nv50_graph_init_regs__nv(dev);
-	nv50_graph_init_regs(dev);
+	nv50_graph_init_zcull(dev);
 
 	ret = nv50_graph_init_ctxctl(dev);
 	if (ret)
@@ -409,12 +438,7 @@ static int
 nv50_graph_nvsw_mthd_page_flip(struct nouveau_channel *chan,
 			       u32 class, u32 mthd, u32 data)
 {
-	struct nouveau_page_flip_state s;
-
-	if (!nouveau_finish_page_flip(chan, &s)) {
-		/* XXX - Do something here */
-	}
-
+	nouveau_finish_page_flip(chan, NULL);
 	return 0;
 }
 
@@ -479,7 +503,7 @@ nv50_graph_tlb_flush(struct drm_device *dev)
 }
 
 void
-nv86_graph_tlb_flush(struct drm_device *dev)
+nv84_graph_tlb_flush(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_timer_engine *ptimer = &dev_priv->engine.timer;
@@ -524,13 +548,12 @@ nv86_graph_tlb_flush(struct drm_device *dev)
 	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 }
 
-static struct nouveau_enum nv50_mp_exec_error_names[] =
-{
-	{ 3, "STACK_UNDERFLOW" },
-	{ 4, "QUADON_ACTIVE" },
-	{ 8, "TIMEOUT" },
-	{ 0x10, "INVALID_OPCODE" },
-	{ 0x40, "BREAKPOINT" },
+static struct nouveau_enum nv50_mp_exec_error_names[] = {
+	{ 3, "STACK_UNDERFLOW", NULL },
+	{ 4, "QUADON_ACTIVE", NULL },
+	{ 8, "TIMEOUT", NULL },
+	{ 0x10, "INVALID_OPCODE", NULL },
+	{ 0x40, "BREAKPOINT", NULL },
 	{}
 };
 
@@ -558,47 +581,47 @@ static struct nouveau_bitfield nv50_graph_trap_ccache[] = {
 
 /* There must be a *lot* of these. Will take some time to gather them up. */
 struct nouveau_enum nv50_data_error_names[] = {
-	{ 0x00000003, "INVALID_QUERY_OR_TEXTURE" },
-	{ 0x00000004, "INVALID_VALUE" },
-	{ 0x00000005, "INVALID_ENUM" },
-	{ 0x00000008, "INVALID_OBJECT" },
-	{ 0x00000009, "READ_ONLY_OBJECT" },
-	{ 0x0000000a, "SUPERVISOR_OBJECT" },
-	{ 0x0000000b, "INVALID_ADDRESS_ALIGNMENT" },
-	{ 0x0000000c, "INVALID_BITFIELD" },
-	{ 0x0000000d, "BEGIN_END_ACTIVE" },
-	{ 0x0000000e, "SEMANTIC_COLOR_BACK_OVER_LIMIT" },
-	{ 0x0000000f, "VIEWPORT_ID_NEEDS_GP" },
-	{ 0x00000010, "RT_DOUBLE_BIND" },
-	{ 0x00000011, "RT_TYPES_MISMATCH" },
-	{ 0x00000012, "RT_LINEAR_WITH_ZETA" },
-	{ 0x00000015, "FP_TOO_FEW_REGS" },
-	{ 0x00000016, "ZETA_FORMAT_CSAA_MISMATCH" },
-	{ 0x00000017, "RT_LINEAR_WITH_MSAA" },
-	{ 0x00000018, "FP_INTERPOLANT_START_OVER_LIMIT" },
-	{ 0x00000019, "SEMANTIC_LAYER_OVER_LIMIT" },
-	{ 0x0000001a, "RT_INVALID_ALIGNMENT" },
-	{ 0x0000001b, "SAMPLER_OVER_LIMIT" },
-	{ 0x0000001c, "TEXTURE_OVER_LIMIT" },
-	{ 0x0000001e, "GP_TOO_MANY_OUTPUTS" },
-	{ 0x0000001f, "RT_BPP128_WITH_MS8" },
-	{ 0x00000021, "Z_OUT_OF_BOUNDS" },
-	{ 0x00000023, "XY_OUT_OF_BOUNDS" },
-	{ 0x00000027, "CP_MORE_PARAMS_THAN_SHARED" },
-	{ 0x00000028, "CP_NO_REG_SPACE_STRIPED" },
-	{ 0x00000029, "CP_NO_REG_SPACE_PACKED" },
-	{ 0x0000002a, "CP_NOT_ENOUGH_WARPS" },
-	{ 0x0000002b, "CP_BLOCK_SIZE_MISMATCH" },
-	{ 0x0000002c, "CP_NOT_ENOUGH_LOCAL_WARPS" },
-	{ 0x0000002d, "CP_NOT_ENOUGH_STACK_WARPS" },
-	{ 0x0000002e, "CP_NO_BLOCKDIM_LATCH" },
-	{ 0x00000031, "ENG2D_FORMAT_MISMATCH" },
-	{ 0x0000003f, "PRIMITIVE_ID_NEEDS_GP" },
-	{ 0x00000044, "SEMANTIC_VIEWPORT_OVER_LIMIT" },
-	{ 0x00000045, "SEMANTIC_COLOR_FRONT_OVER_LIMIT" },
-	{ 0x00000046, "LAYER_ID_NEEDS_GP" },
-	{ 0x00000047, "SEMANTIC_CLIP_OVER_LIMIT" },
-	{ 0x00000048, "SEMANTIC_PTSZ_OVER_LIMIT" },
+	{ 0x00000003, "INVALID_QUERY_OR_TEXTURE", NULL },
+	{ 0x00000004, "INVALID_VALUE", NULL },
+	{ 0x00000005, "INVALID_ENUM", NULL },
+	{ 0x00000008, "INVALID_OBJECT", NULL },
+	{ 0x00000009, "READ_ONLY_OBJECT", NULL },
+	{ 0x0000000a, "SUPERVISOR_OBJECT", NULL },
+	{ 0x0000000b, "INVALID_ADDRESS_ALIGNMENT", NULL },
+	{ 0x0000000c, "INVALID_BITFIELD", NULL },
+	{ 0x0000000d, "BEGIN_END_ACTIVE", NULL },
+	{ 0x0000000e, "SEMANTIC_COLOR_BACK_OVER_LIMIT", NULL },
+	{ 0x0000000f, "VIEWPORT_ID_NEEDS_GP", NULL },
+	{ 0x00000010, "RT_DOUBLE_BIND", NULL },
+	{ 0x00000011, "RT_TYPES_MISMATCH", NULL },
+	{ 0x00000012, "RT_LINEAR_WITH_ZETA", NULL },
+	{ 0x00000015, "FP_TOO_FEW_REGS", NULL },
+	{ 0x00000016, "ZETA_FORMAT_CSAA_MISMATCH", NULL },
+	{ 0x00000017, "RT_LINEAR_WITH_MSAA", NULL },
+	{ 0x00000018, "FP_INTERPOLANT_START_OVER_LIMIT", NULL },
+	{ 0x00000019, "SEMANTIC_LAYER_OVER_LIMIT", NULL },
+	{ 0x0000001a, "RT_INVALID_ALIGNMENT", NULL },
+	{ 0x0000001b, "SAMPLER_OVER_LIMIT", NULL },
+	{ 0x0000001c, "TEXTURE_OVER_LIMIT", NULL },
+	{ 0x0000001e, "GP_TOO_MANY_OUTPUTS", NULL },
+	{ 0x0000001f, "RT_BPP128_WITH_MS8", NULL },
+	{ 0x00000021, "Z_OUT_OF_BOUNDS", NULL },
+	{ 0x00000023, "XY_OUT_OF_BOUNDS", NULL },
+	{ 0x00000027, "CP_MORE_PARAMS_THAN_SHARED", NULL },
+	{ 0x00000028, "CP_NO_REG_SPACE_STRIPED", NULL },
+	{ 0x00000029, "CP_NO_REG_SPACE_PACKED", NULL },
+	{ 0x0000002a, "CP_NOT_ENOUGH_WARPS", NULL },
+	{ 0x0000002b, "CP_BLOCK_SIZE_MISMATCH", NULL },
+	{ 0x0000002c, "CP_NOT_ENOUGH_LOCAL_WARPS", NULL },
+	{ 0x0000002d, "CP_NOT_ENOUGH_STACK_WARPS", NULL },
+	{ 0x0000002e, "CP_NO_BLOCKDIM_LATCH", NULL },
+	{ 0x00000031, "ENG2D_FORMAT_MISMATCH", NULL },
+	{ 0x0000003f, "PRIMITIVE_ID_NEEDS_GP", NULL },
+	{ 0x00000044, "SEMANTIC_VIEWPORT_OVER_LIMIT", NULL },
+	{ 0x00000045, "SEMANTIC_COLOR_FRONT_OVER_LIMIT", NULL },
+	{ 0x00000046, "LAYER_ID_NEEDS_GP", NULL },
+	{ 0x00000047, "SEMANTIC_CLIP_OVER_LIMIT", NULL },
+	{ 0x00000048, "SEMANTIC_PTSZ_OVER_LIMIT", NULL },
 	{}
 };
 
@@ -639,7 +662,7 @@ nv50_pgraph_mp_trap(struct drm_device *dev, int tpid, int display)
 			nv_rd32(dev, addr + 0x20);
 			pc = nv_rd32(dev, addr + 0x24);
 			oplow = nv_rd32(dev, addr + 0x70);
-			ophigh= nv_rd32(dev, addr + 0x74);
+			ophigh = nv_rd32(dev, addr + 0x74);
 			NV_INFO(dev, "PGRAPH_TRAP_MP_EXEC - "
 					"TP %d MP %d: ", tpid, i);
 			nouveau_enum_print(nv50_mp_exec_error_names, status);
@@ -678,7 +701,6 @@ nv50_pgraph_tp_trap(struct drm_device *dev, int type, uint32_t ustatus_old,
 		tps++;
 		switch (type) {
 		case 6: /* texture error... unknown for now */
-			nv50_fb_vm_trap(dev, display, name);
 			if (display) {
 				NV_ERROR(dev, "magic set %d:\n", i);
 				for (r = ustatus_addr + 4; r <= ustatus_addr + 0x10; r += 4)
@@ -701,7 +723,6 @@ nv50_pgraph_tp_trap(struct drm_device *dev, int type, uint32_t ustatus_old,
 			uint32_t e1c = nv_rd32(dev, ustatus_addr + 0x14);
 			uint32_t e20 = nv_rd32(dev, ustatus_addr + 0x18);
 			uint32_t e24 = nv_rd32(dev, ustatus_addr + 0x1c);
-			nv50_fb_vm_trap(dev, display, name);
 			/* 2d engine destination */
 			if (ustatus & 0x00000010) {
 				if (display) {
@@ -912,10 +933,10 @@ nv50_pgraph_trap_handler(struct drm_device *dev, u32 display, u64 inst, u32 chid
 			printk("\n");
 			NV_INFO(dev, "PGRAPH - TRAP_CCACHE %08x %08x %08x %08x"
 				     " %08x %08x %08x\n",
-				nv_rd32(dev, 0x405800), nv_rd32(dev, 0x405804),
-				nv_rd32(dev, 0x405808), nv_rd32(dev, 0x40580c),
-				nv_rd32(dev, 0x405810), nv_rd32(dev, 0x405814),
-				nv_rd32(dev, 0x40581c));
+				nv_rd32(dev, 0x405000), nv_rd32(dev, 0x405004),
+				nv_rd32(dev, 0x405008), nv_rd32(dev, 0x40500c),
+				nv_rd32(dev, 0x405010), nv_rd32(dev, 0x405014),
+				nv_rd32(dev, 0x40501c));
 
 		}
 
@@ -1044,6 +1065,7 @@ nv50_graph_isr(struct drm_device *dev)
 			NV_INFO(dev, "PGRAPH - ch %d (0x%010llx) subc %d "
 				     "class 0x%04x mthd 0x%04x data 0x%08x\n",
 				chid, inst, subc, class, mthd, data);
+			nv50_fb_vm_trap(dev, 1);
 		}
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nv50_grctx.c b/drivers/gpu/drm/nouveau/nv50_grctx.c
index 336aab2..de9abff 100644
--- a/drivers/gpu/drm/nouveau/nv50_grctx.c
+++ b/drivers/gpu/drm/nouveau/nv50_grctx.c
@@ -747,7 +747,7 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
 				gr_def(ctx, offset + 0x64, 0x0000001f);
 				gr_def(ctx, offset + 0x68, 0x0000000f);
 				gr_def(ctx, offset + 0x6c, 0x0000000f);
-			} else if(dev_priv->chipset < 0xa0) {
+			} else if (dev_priv->chipset < 0xa0) {
 				cp_ctx(ctx, offset + 0x50, 1);
 				cp_ctx(ctx, offset + 0x70, 1);
 			} else {
@@ -924,7 +924,7 @@ nv50_graph_construct_mmio_ddata(struct nouveau_grctx *ctx)
 		dd_emit(ctx, 1, 0);	/* 0000007f MULTISAMPLE_SAMPLES_LOG2 */
 	} else {
 		dd_emit(ctx, 1, 0);	/* 0000000f MULTISAMPLE_SAMPLES_LOG2 */
-	} 
+	}
 	dd_emit(ctx, 1, 0xc);		/* 000000ff SEMANTIC_COLOR.BFC0_ID */
 	if (dev_priv->chipset != 0x50)
 		dd_emit(ctx, 1, 0);	/* 00000001 SEMANTIC_COLOR.CLMP_EN */
@@ -1803,9 +1803,7 @@ nv50_graph_construct_gene_unk24xx(struct nouveau_grctx *ctx)
 		xf_emit(ctx, 1, 0);	/* 1ff */
 		xf_emit(ctx, 8, 0);	/* 0? */
 		xf_emit(ctx, 9, 0);	/* ffffffff, 7ff */
-	}
-	else
-	{
+	} else {
 		xf_emit(ctx, 0xc, 0);	/* RO */
 		/* SEEK */
 		xf_emit(ctx, 0xe10, 0); /* 190 * 9: 8*ffffffff, 7ff */
@@ -2836,7 +2834,7 @@ nv50_graph_construct_xfer_tprop(struct nouveau_grctx *ctx)
 	xf_emit(ctx, 1, 1);		/* 00000001 DST_LINEAR */
 	if (IS_NVA3F(dev_priv->chipset))
 		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
-	if(dev_priv->chipset == 0x50)
+	if (dev_priv->chipset == 0x50)
 		xf_emit(ctx, 1, 0);	/* ff */
 	else
 		xf_emit(ctx, 3, 0);	/* 1, 7, 3ff */
diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
index e57caa2..993ad3f 100644
--- a/drivers/gpu/drm/nouveau/nv50_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
@@ -300,7 +300,7 @@ nv50_instmem_resume(struct drm_device *dev)
 }
 
 struct nv50_gpuobj_node {
-	struct nouveau_vram *vram;
+	struct nouveau_mem *vram;
 	struct nouveau_vma chan_vma;
 	u32 align;
 };
@@ -404,23 +404,25 @@ void
 nv50_instmem_flush(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	unsigned long flags;
 
-	spin_lock(&dev_priv->ramin_lock);
+	spin_lock_irqsave(&dev_priv->vm_lock, flags);
 	nv_wr32(dev, 0x00330c, 0x00000001);
 	if (!nv_wait(dev, 0x00330c, 0x00000002, 0x00000000))
 		NV_ERROR(dev, "PRAMIN flush timeout\n");
-	spin_unlock(&dev_priv->ramin_lock);
+	spin_unlock_irqrestore(&dev_priv->vm_lock, flags);
 }
 
 void
 nv84_instmem_flush(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	unsigned long flags;
 
-	spin_lock(&dev_priv->ramin_lock);
+	spin_lock_irqsave(&dev_priv->vm_lock, flags);
 	nv_wr32(dev, 0x070000, 0x00000001);
 	if (!nv_wait(dev, 0x070000, 0x00000002, 0x00000000))
 		NV_ERROR(dev, "PRAMIN flush timeout\n");
-	spin_unlock(&dev_priv->ramin_lock);
+	spin_unlock_irqrestore(&dev_priv->vm_lock, flags);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv50_sor.c b/drivers/gpu/drm/nouveau/nv50_sor.c
index b4a5ecb..c25c593 100644
--- a/drivers/gpu/drm/nouveau/nv50_sor.c
+++ b/drivers/gpu/drm/nouveau/nv50_sor.c
@@ -41,8 +41,7 @@ nv50_sor_disconnect(struct drm_encoder *encoder)
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct drm_device *dev = encoder->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_channel *evo = dev_priv->evo;
+	struct nouveau_channel *evo = nv50_display(dev)->master;
 	int ret;
 
 	if (!nv_encoder->crtc)
@@ -184,8 +183,7 @@ static void
 nv50_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 		  struct drm_display_mode *adjusted_mode)
 {
-	struct drm_nouveau_private *dev_priv = encoder->dev->dev_private;
-	struct nouveau_channel *evo = dev_priv->evo;
+	struct nouveau_channel *evo = nv50_display(encoder->dev)->master;
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct drm_device *dev = encoder->dev;
 	struct nouveau_crtc *crtc = nouveau_crtc(encoder->crtc);
diff --git a/drivers/gpu/drm/nouveau/nv50_vm.c b/drivers/gpu/drm/nouveau/nv50_vm.c
index 6144156..6c26944 100644
--- a/drivers/gpu/drm/nouveau/nv50_vm.c
+++ b/drivers/gpu/drm/nouveau/nv50_vm.c
@@ -31,7 +31,6 @@ void
 nv50_vm_map_pgt(struct nouveau_gpuobj *pgd, u32 pde,
 		struct nouveau_gpuobj *pgt[2])
 {
-	struct drm_nouveau_private *dev_priv = pgd->dev->dev_private;
 	u64 phys = 0xdeadcafe00000000ULL;
 	u32 coverage = 0;
 
@@ -58,10 +57,9 @@ nv50_vm_map_pgt(struct nouveau_gpuobj *pgd, u32 pde,
 }
 
 static inline u64
-nv50_vm_addr(struct nouveau_vma *vma, struct nouveau_gpuobj *pgt,
-	     u64 phys, u32 memtype, u32 target)
+nv50_vm_addr(struct nouveau_vma *vma, u64 phys, u32 memtype, u32 target)
 {
-	struct drm_nouveau_private *dev_priv = pgt->dev->dev_private;
+	struct drm_nouveau_private *dev_priv = vma->vm->dev->dev_private;
 
 	phys |= 1; /* present */
 	phys |= (u64)memtype << 40;
@@ -85,12 +83,13 @@ nv50_vm_addr(struct nouveau_vma *vma, struct nouveau_gpuobj *pgt,
 
 void
 nv50_vm_map(struct nouveau_vma *vma, struct nouveau_gpuobj *pgt,
-	    struct nouveau_vram *mem, u32 pte, u32 cnt, u64 phys)
+	    struct nouveau_mem *mem, u32 pte, u32 cnt, u64 phys, u64 delta)
 {
+	u32 comp = (mem->memtype & 0x180) >> 7;
 	u32 block;
 	int i;
 
-	phys  = nv50_vm_addr(vma, pgt, phys, mem->memtype, 0);
+	phys  = nv50_vm_addr(vma, phys, mem->memtype, 0);
 	pte <<= 3;
 	cnt <<= 3;
 
@@ -107,6 +106,11 @@ nv50_vm_map(struct nouveau_vma *vma, struct nouveau_gpuobj *pgt,
 
 		phys += block << (vma->node->type - 3);
 		cnt  -= block;
+		if (comp) {
+			u32 tag = mem->tag->start + ((delta >> 16) * comp);
+			offset_h |= (tag << 17);
+			delta    += block << (vma->node->type - 3);
+		}
 
 		while (block) {
 			nv_wo32(pgt, pte + 0, offset_l);
@@ -119,11 +123,11 @@ nv50_vm_map(struct nouveau_vma *vma, struct nouveau_gpuobj *pgt,
 
 void
 nv50_vm_map_sg(struct nouveau_vma *vma, struct nouveau_gpuobj *pgt,
-	       u32 pte, dma_addr_t *list, u32 cnt)
+	       struct nouveau_mem *mem, u32 pte, u32 cnt, dma_addr_t *list)
 {
 	pte <<= 3;
 	while (cnt--) {
-		u64 phys = nv50_vm_addr(vma, pgt, (u64)*list++, 0, 2);
+		u64 phys = nv50_vm_addr(vma, (u64)*list++, mem->memtype, 2);
 		nv_wo32(pgt, pte + 0, lower_32_bits(phys));
 		nv_wo32(pgt, pte + 4, upper_32_bits(phys));
 		pte += 8;
@@ -170,10 +174,11 @@ void
 nv50_vm_flush_engine(struct drm_device *dev, int engine)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	unsigned long flags;
 
-	spin_lock(&dev_priv->ramin_lock);
+	spin_lock_irqsave(&dev_priv->vm_lock, flags);
 	nv_wr32(dev, 0x100c80, (engine << 16) | 1);
 	if (!nv_wait(dev, 0x100c80, 0x00000001, 0x00000000))
 		NV_ERROR(dev, "vm flush timeout: engine %d\n", engine);
-	spin_unlock(&dev_priv->ramin_lock);
+	spin_unlock_irqrestore(&dev_priv->vm_lock, flags);
 }
diff --git a/drivers/gpu/drm/nouveau/nv50_vram.c b/drivers/gpu/drm/nouveau/nv50_vram.c
index 58e98ad..ffbc3d8 100644
--- a/drivers/gpu/drm/nouveau/nv50_vram.c
+++ b/drivers/gpu/drm/nouveau/nv50_vram.c
@@ -48,42 +48,49 @@ nv50_vram_flags_valid(struct drm_device *dev, u32 tile_flags)
 }
 
 void
-nv50_vram_del(struct drm_device *dev, struct nouveau_vram **pvram)
+nv50_vram_del(struct drm_device *dev, struct nouveau_mem **pmem)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct ttm_bo_device *bdev = &dev_priv->ttm.bdev;
 	struct ttm_mem_type_manager *man = &bdev->man[TTM_PL_VRAM];
 	struct nouveau_mm *mm = man->priv;
 	struct nouveau_mm_node *this;
-	struct nouveau_vram *vram;
+	struct nouveau_mem *mem;
 
-	vram = *pvram;
-	*pvram = NULL;
-	if (unlikely(vram == NULL))
+	mem = *pmem;
+	*pmem = NULL;
+	if (unlikely(mem == NULL))
 		return;
 
 	mutex_lock(&mm->mutex);
-	while (!list_empty(&vram->regions)) {
-		this = list_first_entry(&vram->regions, struct nouveau_mm_node, rl_entry);
+	while (!list_empty(&mem->regions)) {
+		this = list_first_entry(&mem->regions, struct nouveau_mm_node, rl_entry);
 
 		list_del(&this->rl_entry);
 		nouveau_mm_put(mm, this);
 	}
+
+	if (mem->tag) {
+		drm_mm_put_block(mem->tag);
+		mem->tag = NULL;
+	}
 	mutex_unlock(&mm->mutex);
 
-	kfree(vram);
+	kfree(mem);
 }
 
 int
 nv50_vram_new(struct drm_device *dev, u64 size, u32 align, u32 size_nc,
-	      u32 type, struct nouveau_vram **pvram)
+	      u32 memtype, struct nouveau_mem **pmem)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct ttm_bo_device *bdev = &dev_priv->ttm.bdev;
 	struct ttm_mem_type_manager *man = &bdev->man[TTM_PL_VRAM];
 	struct nouveau_mm *mm = man->priv;
 	struct nouveau_mm_node *r;
-	struct nouveau_vram *vram;
+	struct nouveau_mem *mem;
+	int comp = (memtype & 0x300) >> 8;
+	int type = (memtype & 0x07f);
 	int ret;
 
 	if (!types[type])
@@ -92,32 +99,46 @@ nv50_vram_new(struct drm_device *dev, u64 size, u32 align, u32 size_nc,
 	align >>= 12;
 	size_nc >>= 12;
 
-	vram = kzalloc(sizeof(*vram), GFP_KERNEL);
-	if (!vram)
+	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	if (!mem)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&vram->regions);
-	vram->dev = dev_priv->dev;
-	vram->memtype = type;
-	vram->size = size;
-
 	mutex_lock(&mm->mutex);
+	if (comp) {
+		if (align == 16) {
+			struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+			int n = (size >> 4) * comp;
+
+			mem->tag = drm_mm_search_free(&pfb->tag_heap, n, 0, 0);
+			if (mem->tag)
+				mem->tag = drm_mm_get_block(mem->tag, n, 0);
+		}
+
+		if (unlikely(!mem->tag))
+			comp = 0;
+	}
+
+	INIT_LIST_HEAD(&mem->regions);
+	mem->dev = dev_priv->dev;
+	mem->memtype = (comp << 7) | type;
+	mem->size = size;
+
 	do {
 		ret = nouveau_mm_get(mm, types[type], size, size_nc, align, &r);
 		if (ret) {
 			mutex_unlock(&mm->mutex);
-			nv50_vram_del(dev, &vram);
+			nv50_vram_del(dev, &mem);
 			return ret;
 		}
 
-		list_add_tail(&r->rl_entry, &vram->regions);
+		list_add_tail(&r->rl_entry, &mem->regions);
 		size -= r->length;
 	} while (size);
 	mutex_unlock(&mm->mutex);
 
-	r = list_first_entry(&vram->regions, struct nouveau_mm_node, rl_entry);
-	vram->offset = (u64)r->offset << 12;
-	*pvram = vram;
+	r = list_first_entry(&mem->regions, struct nouveau_mm_node, rl_entry);
+	mem->offset = (u64)r->offset << 12;
+	*pmem = mem;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv84_crypt.c b/drivers/gpu/drm/nouveau/nv84_crypt.c
index ec18ae1..fabc7fd 100644
--- a/drivers/gpu/drm/nouveau/nv84_crypt.c
+++ b/drivers/gpu/drm/nouveau/nv84_crypt.c
@@ -136,5 +136,5 @@ nv84_crypt_isr(struct drm_device *dev)
 	nv_wr32(dev, 0x102130, stat);
 	nv_wr32(dev, 0x10200c, 0x10);
 
-	nv50_fb_vm_trap(dev, show, "PCRYPT");
+	nv50_fb_vm_trap(dev, show);
 }
diff --git a/drivers/gpu/drm/nouveau/nva3_pm.c b/drivers/gpu/drm/nouveau/nva3_pm.c
index dbbafed..e4b2b9e 100644
--- a/drivers/gpu/drm/nouveau/nva3_pm.c
+++ b/drivers/gpu/drm/nouveau/nva3_pm.c
@@ -27,32 +27,74 @@
 #include "nouveau_bios.h"
 #include "nouveau_pm.h"
 
-/*XXX: boards using limits 0x40 need fixing, the register layout
- *     is correct here, but, there's some other funny magic
- *     that modifies things, so it's not likely we'll set/read
- *     the correct timings yet..  working on it...
+/* This is actually a lot more complex than it appears here, but hopefully
+ * this should be able to deal with what the VBIOS leaves for us..
+ *
+ * If not, well, I'll jump off that bridge when I come to it.
  */
 
 struct nva3_pm_state {
-	struct pll_lims pll;
-	int N, M, P;
+	enum pll_types type;
+	u32 src0;
+	u32 src1;
+	u32 ctrl;
+	u32 coef;
+	u32 old_pnm;
+	u32 new_pnm;
+	u32 new_div;
 };
 
+static int
+nva3_pm_pll_offset(u32 id)
+{
+	static const u32 pll_map[] = {
+		0x00, PLL_CORE,
+		0x01, PLL_SHADER,
+		0x02, PLL_MEMORY,
+		0x00, 0x00
+	};
+	const u32 *map = pll_map;
+
+	while (map[1]) {
+		if (id == map[1])
+			return map[0];
+		map += 2;
+	}
+
+	return -ENOENT;
+}
+
 int
 nva3_pm_clock_get(struct drm_device *dev, u32 id)
 {
+	u32 src0, src1, ctrl, coef;
 	struct pll_lims pll;
-	int P, N, M, ret;
-	u32 reg;
+	int ret, off;
+	int P, N, M;
 
 	ret = get_pll_limits(dev, id, &pll);
 	if (ret)
 		return ret;
 
-	reg = nv_rd32(dev, pll.reg + 4);
-	P = (reg & 0x003f0000) >> 16;
-	N = (reg & 0x0000ff00) >> 8;
-	M = (reg & 0x000000ff);
+	off = nva3_pm_pll_offset(id);
+	if (off < 0)
+		return off;
+
+	src0 = nv_rd32(dev, 0x4120 + (off * 4));
+	src1 = nv_rd32(dev, 0x4160 + (off * 4));
+	ctrl = nv_rd32(dev, pll.reg + 0);
+	coef = nv_rd32(dev, pll.reg + 4);
+	NV_DEBUG(dev, "PLL %02x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+		      id, src0, src1, ctrl, coef);
+
+	if (ctrl & 0x00000008) {
+		u32 div = ((src1 & 0x003c0000) >> 18) + 1;
+		return (pll.refclk * 2) / div;
+	}
+
+	P = (coef & 0x003f0000) >> 16;
+	N = (coef & 0x0000ff00) >> 8;
+	M = (coef & 0x000000ff);
 	return pll.refclk * N / M / P;
 }
 
@@ -60,36 +102,103 @@ void *
 nva3_pm_clock_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl,
 		  u32 id, int khz)
 {
-	struct nva3_pm_state *state;
-	int dummy, ret;
+	struct nva3_pm_state *pll;
+	struct pll_lims limits;
+	int N, M, P, diff;
+	int ret, off;
+
+	ret = get_pll_limits(dev, id, &limits);
+	if (ret < 0)
+		return (ret == -ENOENT) ? NULL : ERR_PTR(ret);
+
+	off = nva3_pm_pll_offset(id);
+	if (id < 0)
+		return ERR_PTR(-EINVAL);
 
-	state = kzalloc(sizeof(*state), GFP_KERNEL);
-	if (!state)
+
+	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
+	if (!pll)
 		return ERR_PTR(-ENOMEM);
+	pll->type = id;
+	pll->src0 = 0x004120 + (off * 4);
+	pll->src1 = 0x004160 + (off * 4);
+	pll->ctrl = limits.reg + 0;
+	pll->coef = limits.reg + 4;
 
-	ret = get_pll_limits(dev, id, &state->pll);
-	if (ret < 0) {
-		kfree(state);
-		return (ret == -ENOENT) ? NULL : ERR_PTR(ret);
+	/* If target clock is within [-2, 3) MHz of a divisor, we'll
+	 * use that instead of calculating MNP values
+	 */
+	pll->new_div = min((limits.refclk * 2) / (khz - 2999), 16);
+	if (pll->new_div) {
+		diff = khz - ((limits.refclk * 2) / pll->new_div);
+		if (diff < -2000 || diff >= 3000)
+			pll->new_div = 0;
 	}
 
-	ret = nv50_calc_pll2(dev, &state->pll, khz, &state->N, &dummy,
-			     &state->M, &state->P);
-	if (ret < 0) {
-		kfree(state);
-		return ERR_PTR(ret);
+	if (!pll->new_div) {
+		ret = nva3_calc_pll(dev, &limits, khz, &N, NULL, &M, &P);
+		if (ret < 0)
+			return ERR_PTR(ret);
+
+		pll->new_pnm = (P << 16) | (N << 8) | M;
+		pll->new_div = 2 - 1;
+	} else {
+		pll->new_pnm = 0;
+		pll->new_div--;
 	}
 
-	return state;
+	if ((nv_rd32(dev, pll->src1) & 0x00000101) != 0x00000101)
+		pll->old_pnm = nv_rd32(dev, pll->coef);
+	return pll;
 }
 
 void
 nva3_pm_clock_set(struct drm_device *dev, void *pre_state)
 {
-	struct nva3_pm_state *state = pre_state;
-	u32 reg = state->pll.reg;
+	struct nva3_pm_state *pll = pre_state;
+	u32 ctrl = 0;
+
+	/* For the memory clock, NVIDIA will build a "script" describing
+	 * the reclocking process and ask PDAEMON to execute it.
+	 */
+	if (pll->type == PLL_MEMORY) {
+		nv_wr32(dev, 0x100210, 0);
+		nv_wr32(dev, 0x1002dc, 1);
+		nv_wr32(dev, 0x004018, 0x00001000);
+		ctrl = 0x18000100;
+	}
+
+	if (pll->old_pnm || !pll->new_pnm) {
+		nv_mask(dev, pll->src1, 0x003c0101, 0x00000101 |
+						    (pll->new_div << 18));
+		nv_wr32(dev, pll->ctrl, 0x0001001d | ctrl);
+		nv_mask(dev, pll->ctrl, 0x00000001, 0x00000000);
+	}
+
+	if (pll->new_pnm) {
+		nv_mask(dev, pll->src0, 0x00000101, 0x00000101);
+		nv_wr32(dev, pll->coef, pll->new_pnm);
+		nv_wr32(dev, pll->ctrl, 0x0001001d | ctrl);
+		nv_mask(dev, pll->ctrl, 0x00000010, 0x00000000);
+		nv_mask(dev, pll->ctrl, 0x00020010, 0x00020010);
+		nv_wr32(dev, pll->ctrl, 0x00010015 | ctrl);
+		nv_mask(dev, pll->src1, 0x00000100, 0x00000000);
+		nv_mask(dev, pll->src1, 0x00000001, 0x00000000);
+		if (pll->type == PLL_MEMORY)
+			nv_wr32(dev, 0x4018, 0x10005000);
+	} else {
+		nv_mask(dev, pll->ctrl, 0x00000001, 0x00000000);
+		nv_mask(dev, pll->src0, 0x00000100, 0x00000000);
+		nv_mask(dev, pll->src0, 0x00000001, 0x00000000);
+		if (pll->type == PLL_MEMORY)
+			nv_wr32(dev, 0x4018, 0x1000d000);
+	}
+
+	if (pll->type == PLL_MEMORY) {
+		nv_wr32(dev, 0x1002dc, 0);
+		nv_wr32(dev, 0x100210, 0x80000000);
+	}
 
-	nv_wr32(dev, reg + 4, (state->P << 16) | (state->N << 8) | state->M);
-	kfree(state);
+	kfree(pll);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvc0_fb.c b/drivers/gpu/drm/nouveau/nvc0_fb.c
index 26a9960..08e6b11 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fb.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fb.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010 Red Hat Inc.
+ * Copyright 2011 Red Hat Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -23,16 +23,80 @@
  */
 
 #include "drmP.h"
-
+#include "drm.h"
 #include "nouveau_drv.h"
+#include "nouveau_drm.h"
+
+struct nvc0_fb_priv {
+	struct page *r100c10_page;
+	dma_addr_t r100c10;
+};
+
+static void
+nvc0_fb_destroy(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+	struct nvc0_fb_priv *priv = pfb->priv;
+
+	if (priv->r100c10_page) {
+		pci_unmap_page(dev->pdev, priv->r100c10, PAGE_SIZE,
+			       PCI_DMA_BIDIRECTIONAL);
+		__free_page(priv->r100c10_page);
+	}
+
+	kfree(priv);
+	pfb->priv = NULL;
+}
+
+static int
+nvc0_fb_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+	struct nvc0_fb_priv *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+	pfb->priv = priv;
+
+	priv->r100c10_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!priv->r100c10_page) {
+		nvc0_fb_destroy(dev);
+		return -ENOMEM;
+	}
+
+	priv->r100c10 = pci_map_page(dev->pdev, priv->r100c10_page, 0,
+				     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	if (pci_dma_mapping_error(dev->pdev, priv->r100c10)) {
+		nvc0_fb_destroy(dev);
+		return -EFAULT;
+	}
+
+	return 0;
+}
 
 int
 nvc0_fb_init(struct drm_device *dev)
 {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvc0_fb_priv *priv;
+	int ret;
+
+	if (!dev_priv->engine.fb.priv) {
+		ret = nvc0_fb_create(dev);
+		if (ret)
+			return ret;
+	}
+	priv = dev_priv->engine.fb.priv;
+
+	nv_wr32(dev, 0x100c10, priv->r100c10 >> 8);
 	return 0;
 }
 
 void
 nvc0_fb_takedown(struct drm_device *dev)
 {
+	nvc0_fb_destroy(dev);
 }
diff --git a/drivers/gpu/drm/nouveau/nvc0_fifo.c b/drivers/gpu/drm/nouveau/nvc0_fifo.c
index e6f92c5..55a4245 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fifo.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fifo.c
@@ -116,7 +116,7 @@ nvc0_fifo_create_context(struct nouveau_channel *chan)
 
 	/* allocate vram for control regs, map into polling area */
 	ret = nouveau_bo_new(dev, NULL, 0x1000, 0, TTM_PL_FLAG_VRAM,
-			     0, 0, true, true, &fifoch->user);
+			     0, 0, &fifoch->user);
 	if (ret)
 		goto error;
 
@@ -355,19 +355,57 @@ nvc0_fifo_init(struct drm_device *dev)
 }
 
 struct nouveau_enum nvc0_fifo_fault_unit[] = {
-	{ 0, "PGRAPH" },
-	{ 3, "PEEPHOLE" },
-	{ 4, "BAR1" },
-	{ 5, "BAR3" },
-	{ 7, "PFIFO" },
+	{ 0x00, "PGRAPH" },
+	{ 0x03, "PEEPHOLE" },
+	{ 0x04, "BAR1" },
+	{ 0x05, "BAR3" },
+	{ 0x07, "PFIFO" },
+	{ 0x10, "PBSP" },
+	{ 0x11, "PPPP" },
+	{ 0x13, "PCOUNTER" },
+	{ 0x14, "PVP" },
+	{ 0x15, "PCOPY0" },
+	{ 0x16, "PCOPY1" },
+	{ 0x17, "PDAEMON" },
 	{}
 };
 
 struct nouveau_enum nvc0_fifo_fault_reason[] = {
-	{ 0, "PT_NOT_PRESENT" },
-	{ 1, "PT_TOO_SHORT" },
-	{ 2, "PAGE_NOT_PRESENT" },
-	{ 3, "VM_LIMIT_EXCEEDED" },
+	{ 0x00, "PT_NOT_PRESENT" },
+	{ 0x01, "PT_TOO_SHORT" },
+	{ 0x02, "PAGE_NOT_PRESENT" },
+	{ 0x03, "VM_LIMIT_EXCEEDED" },
+	{ 0x04, "NO_CHANNEL" },
+	{ 0x05, "PAGE_SYSTEM_ONLY" },
+	{ 0x06, "PAGE_READ_ONLY" },
+	{ 0x0a, "COMPRESSED_SYSRAM" },
+	{ 0x0c, "INVALID_STORAGE_TYPE" },
+	{}
+};
+
+struct nouveau_enum nvc0_fifo_fault_hubclient[] = {
+	{ 0x01, "PCOPY0" },
+	{ 0x02, "PCOPY1" },
+	{ 0x04, "DISPATCH" },
+	{ 0x05, "CTXCTL" },
+	{ 0x06, "PFIFO" },
+	{ 0x07, "BAR_READ" },
+	{ 0x08, "BAR_WRITE" },
+	{ 0x0b, "PVP" },
+	{ 0x0c, "PPPP" },
+	{ 0x0d, "PBSP" },
+	{ 0x11, "PCOUNTER" },
+	{ 0x12, "PDAEMON" },
+	{ 0x14, "CCACHE" },
+	{ 0x15, "CCACHE_POST" },
+	{}
+};
+
+struct nouveau_enum nvc0_fifo_fault_gpcclient[] = {
+	{ 0x01, "TEX" },
+	{ 0x0c, "ESETUP" },
+	{ 0x0e, "CTXCTL" },
+	{ 0x0f, "PROP" },
 	{}
 };
 
@@ -385,12 +423,20 @@ nvc0_fifo_isr_vm_fault(struct drm_device *dev, int unit)
 	u32 valo = nv_rd32(dev, 0x2804 + (unit * 0x10));
 	u32 vahi = nv_rd32(dev, 0x2808 + (unit * 0x10));
 	u32 stat = nv_rd32(dev, 0x280c + (unit * 0x10));
+	u32 client = (stat & 0x00001f00) >> 8;
 
 	NV_INFO(dev, "PFIFO: %s fault at 0x%010llx [",
 		(stat & 0x00000080) ? "write" : "read", (u64)vahi << 32 | valo);
 	nouveau_enum_print(nvc0_fifo_fault_reason, stat & 0x0000000f);
 	printk("] from ");
 	nouveau_enum_print(nvc0_fifo_fault_unit, unit);
+	if (stat & 0x00000040) {
+		printk("/");
+		nouveau_enum_print(nvc0_fifo_fault_hubclient, client);
+	} else {
+		printk("/GPC%d/", (stat & 0x1f000000) >> 24);
+		nouveau_enum_print(nvc0_fifo_fault_gpcclient, client);
+	}
 	printk(" on channel 0x%010llx\n", (u64)inst << 12);
 }
 
@@ -418,6 +464,12 @@ nvc0_fifo_isr(struct drm_device *dev)
 {
 	u32 stat = nv_rd32(dev, 0x002100);
 
+	if (stat & 0x00000100) {
+		NV_INFO(dev, "PFIFO: unknown status 0x00000100\n");
+		nv_wr32(dev, 0x002100, 0x00000100);
+		stat &= ~0x00000100;
+	}
+
 	if (stat & 0x10000000) {
 		u32 units = nv_rd32(dev, 0x00259c);
 		u32 u = units;
@@ -446,10 +498,15 @@ nvc0_fifo_isr(struct drm_device *dev)
 		stat &= ~0x20000000;
 	}
 
+	if (stat & 0x40000000) {
+		NV_INFO(dev, "PFIFO: unknown status 0x40000000\n");
+		nv_mask(dev, 0x002a00, 0x00000000, 0x00000000);
+		stat &= ~0x40000000;
+	}
+
 	if (stat) {
 		NV_INFO(dev, "PFIFO: unhandled status 0x%08x\n", stat);
 		nv_wr32(dev, 0x002100, stat);
+		nv_wr32(dev, 0x002140, 0);
 	}
-
-	nv_wr32(dev, 0x2140, 0);
 }
diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.c b/drivers/gpu/drm/nouveau/nvc0_graph.c
index eb18a7e..68f5c3f 100644
--- a/drivers/gpu/drm/nouveau/nvc0_graph.c
+++ b/drivers/gpu/drm/nouveau/nvc0_graph.c
@@ -200,15 +200,15 @@ nvc0_graph_create_context(struct nouveau_channel *chan)
 	for (i = 0; i < priv->grctx_size; i += 4)
 		nv_wo32(grctx, i, priv->grctx_vals[i / 4]);
 
-        nv_wo32(grctx, 0xf4, 0);
-        nv_wo32(grctx, 0xf8, 0);
-        nv_wo32(grctx, 0x10, grch->mmio_nr);
-        nv_wo32(grctx, 0x14, lower_32_bits(grch->mmio->vinst));
-        nv_wo32(grctx, 0x18, upper_32_bits(grch->mmio->vinst));
-        nv_wo32(grctx, 0x1c, 1);
-        nv_wo32(grctx, 0x20, 0);
-        nv_wo32(grctx, 0x28, 0);
-        nv_wo32(grctx, 0x2c, 0);
+	nv_wo32(grctx, 0xf4, 0);
+	nv_wo32(grctx, 0xf8, 0);
+	nv_wo32(grctx, 0x10, grch->mmio_nr);
+	nv_wo32(grctx, 0x14, lower_32_bits(grch->mmio->vinst));
+	nv_wo32(grctx, 0x18, upper_32_bits(grch->mmio->vinst));
+	nv_wo32(grctx, 0x1c, 1);
+	nv_wo32(grctx, 0x20, 0);
+	nv_wo32(grctx, 0x28, 0);
+	nv_wo32(grctx, 0x2c, 0);
 	pinstmem->flush(dev);
 	return 0;
 
@@ -299,6 +299,14 @@ nvc0_graph_takedown(struct drm_device *dev)
 }
 
 static int
+nvc0_graph_mthd_page_flip(struct nouveau_channel *chan,
+			  u32 class, u32 mthd, u32 data)
+{
+	nouveau_finish_page_flip(chan, NULL);
+	return 0;
+}
+
+static int
 nvc0_graph_create(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -395,6 +403,7 @@ nvc0_graph_create(struct drm_device *dev)
 	nouveau_irq_register(dev, 25, nvc0_runk140_isr);
 	NVOBJ_CLASS(dev, 0x902d, GR); /* 2D */
 	NVOBJ_CLASS(dev, 0x9039, GR); /* M2MF */
+	NVOBJ_MTHD (dev, 0x9039, 0x0500, nvc0_graph_mthd_page_flip);
 	NVOBJ_CLASS(dev, 0x9097, GR); /* 3D */
 	NVOBJ_CLASS(dev, 0x90c0, GR); /* COMPUTE */
 	return 0;
@@ -443,28 +452,30 @@ nvc0_graph_init_gpc_0(struct drm_device *dev)
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nvc0_graph_priv *priv = dev_priv->engine.graph.priv;
 	int gpc;
-	
-	//      TP      ROP UNKVAL(magic_not_rop_nr)
-	// 450: 4/0/0/0 2        3
-	// 460: 3/4/0/0 4        1
-	// 465: 3/4/4/0 4        7
-	// 470: 3/3/4/4 5        5
-	// 480: 3/4/4/4 6        6
-
-	// magicgpc918
-	// 450: 00200000 00000000001000000000000000000000
-	// 460: 00124925 00000000000100100100100100100101
-	// 465: 000ba2e9 00000000000010111010001011101001
-	// 470: 00092493 00000000000010010010010010010011
-	// 480: 00088889 00000000000010001000100010001001
-
-	/* filled values up to tp_total, remainder 0 */
-	// 450: 00003210 00000000 00000000 00000000
-	// 460: 02321100 00000000 00000000 00000000
-	// 465: 22111000 00000233 00000000 00000000
-	// 470: 11110000 00233222 00000000 00000000
-	// 480: 11110000 03332222 00000000 00000000
-	
+
+	/*
+	 *      TP      ROP UNKVAL(magic_not_rop_nr)
+	 * 450: 4/0/0/0 2        3
+	 * 460: 3/4/0/0 4        1
+	 * 465: 3/4/4/0 4        7
+	 * 470: 3/3/4/4 5        5
+	 * 480: 3/4/4/4 6        6
+
+	 * magicgpc918
+	 * 450: 00200000 00000000001000000000000000000000
+	 * 460: 00124925 00000000000100100100100100100101
+	 * 465: 000ba2e9 00000000000010111010001011101001
+	 * 470: 00092493 00000000000010010010010010010011
+	 * 480: 00088889 00000000000010001000100010001001
+
+	 * filled values up to tp_total, remainder 0
+	 * 450: 00003210 00000000 00000000 00000000
+	 * 460: 02321100 00000000 00000000 00000000
+	 * 465: 22111000 00000233 00000000 00000000
+	 * 470: 11110000 00233222 00000000 00000000
+	 * 480: 11110000 03332222 00000000 00000000
+	 */
+
 	nv_wr32(dev, GPC_BCAST(0x0980), priv->magicgpc980[0]);
 	nv_wr32(dev, GPC_BCAST(0x0984), priv->magicgpc980[1]);
 	nv_wr32(dev, GPC_BCAST(0x0988), priv->magicgpc980[2]);
@@ -640,7 +651,6 @@ nvc0_graph_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	struct nvc0_graph_priv *priv;
 	int ret;
 
 	dev_priv->engine.graph.accel_blocked = true;
@@ -665,13 +675,12 @@ nvc0_graph_init(struct drm_device *dev)
 		if (ret)
 			return ret;
 	}
-	priv = pgraph->priv;
 
 	nvc0_graph_init_obj418880(dev);
 	nvc0_graph_init_regs(dev);
-	//nvc0_graph_init_unitplemented_magics(dev);
+	/*nvc0_graph_init_unitplemented_magics(dev);*/
 	nvc0_graph_init_gpc_0(dev);
-	//nvc0_graph_init_unitplemented_c242(dev);
+	/*nvc0_graph_init_unitplemented_c242(dev);*/
 
 	nv_wr32(dev, 0x400500, 0x00010001);
 	nv_wr32(dev, 0x400100, 0xffffffff);
@@ -730,9 +739,12 @@ nvc0_graph_isr(struct drm_device *dev)
 	u32 class = nv_rd32(dev, 0x404200 + (subc * 4));
 
 	if (stat & 0x00000010) {
-		NV_INFO(dev, "PGRAPH: ILLEGAL_MTHD ch %d [0x%010llx] subc %d "
-			     "class 0x%04x mthd 0x%04x data 0x%08x\n",
-			chid, inst, subc, class, mthd, data);
+		if (nouveau_gpuobj_mthd_call2(dev, chid, class, mthd, data)) {
+			NV_INFO(dev, "PGRAPH: ILLEGAL_MTHD ch %d [0x%010llx] "
+				     "subc %d class 0x%04x mthd 0x%04x "
+				     "data 0x%08x\n",
+				chid, inst, subc, class, mthd, data);
+		}
 		nv_wr32(dev, 0x400100, 0x00000010);
 		stat &= ~0x00000010;
 	}
diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.h b/drivers/gpu/drm/nouveau/nvc0_graph.h
index 40e26f9..d32b385 100644
--- a/drivers/gpu/drm/nouveau/nvc0_graph.h
+++ b/drivers/gpu/drm/nouveau/nvc0_graph.h
@@ -28,11 +28,11 @@
 #define GPC_MAX 4
 #define TP_MAX 32
 
-#define ROP_BCAST(r)   (0x408800 + (r))
-#define ROP_UNIT(u,r)  (0x410000 + (u) * 0x400 + (r))
-#define GPC_BCAST(r)   (0x418000 + (r))
-#define GPC_UNIT(t,r)  (0x500000 + (t) * 0x8000 + (r))
-#define TP_UNIT(t,m,r) (0x504000 + (t) * 0x8000 + (m) * 0x800 + (r))
+#define ROP_BCAST(r)     (0x408800 + (r))
+#define ROP_UNIT(u, r)   (0x410000 + (u) * 0x400 + (r))
+#define GPC_BCAST(r)     (0x418000 + (r))
+#define GPC_UNIT(t, r)   (0x500000 + (t) * 0x8000 + (r))
+#define TP_UNIT(t, m, r) (0x504000 + (t) * 0x8000 + (m) * 0x800 + (r))
 
 struct nvc0_graph_priv {
 	u8 gpc_nr;
@@ -52,9 +52,9 @@ struct nvc0_graph_priv {
 
 struct nvc0_graph_chan {
 	struct nouveau_gpuobj *grctx;
-	struct nouveau_gpuobj *unk408004; // 0x418810 too
-	struct nouveau_gpuobj *unk40800c; // 0x419004 too
-	struct nouveau_gpuobj *unk418810; // 0x419848 too
+	struct nouveau_gpuobj *unk408004; /* 0x418810 too */
+	struct nouveau_gpuobj *unk40800c; /* 0x419004 too */
+	struct nouveau_gpuobj *unk418810; /* 0x419848 too */
 	struct nouveau_gpuobj *mmio;
 	int mmio_nr;
 };
diff --git a/drivers/gpu/drm/nouveau/nvc0_grctx.c b/drivers/gpu/drm/nouveau/nvc0_grctx.c
index f880ff7..6cede9f 100644
--- a/drivers/gpu/drm/nouveau/nvc0_grctx.c
+++ b/drivers/gpu/drm/nouveau/nvc0_grctx.c
@@ -1623,7 +1623,7 @@ nvc0_grctx_generate_rop(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
-	// ROPC_BROADCAST
+	/* ROPC_BROADCAST */
 	nv_wr32(dev, 0x408800, 0x02802a3c);
 	nv_wr32(dev, 0x408804, 0x00000040);
 	nv_wr32(dev, 0x408808, 0x0003e00d);
@@ -1647,7 +1647,7 @@ nvc0_grctx_generate_gpc(struct drm_device *dev)
 {
 	int i;
 
-	// GPC_BROADCAST
+	/* GPC_BROADCAST */
 	nv_wr32(dev, 0x418380, 0x00000016);
 	nv_wr32(dev, 0x418400, 0x38004e00);
 	nv_wr32(dev, 0x418404, 0x71e0ffff);
@@ -1728,7 +1728,7 @@ nvc0_grctx_generate_tp(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
-	// GPC_BROADCAST.TP_BROADCAST
+	/* GPC_BROADCAST.TP_BROADCAST */
 	nv_wr32(dev, 0x419848, 0x00000000);
 	nv_wr32(dev, 0x419864, 0x0000012a);
 	nv_wr32(dev, 0x419888, 0x00000000);
@@ -1741,7 +1741,7 @@ nvc0_grctx_generate_tp(struct drm_device *dev)
 	nv_wr32(dev, 0x419a1c, 0x00000000);
 	nv_wr32(dev, 0x419a20, 0x00000800);
 	if (dev_priv->chipset != 0xc0)
-		nv_wr32(dev, 0x00419ac4, 0x0007f440); // 0xc3
+		nv_wr32(dev, 0x00419ac4, 0x0007f440); /* 0xc3 */
 	nv_wr32(dev, 0x419b00, 0x0a418820);
 	nv_wr32(dev, 0x419b04, 0x062080e6);
 	nv_wr32(dev, 0x419b08, 0x020398a4);
@@ -1912,13 +1912,13 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
 		for (i = 1; i < 7; i++)
 			data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5);
 
-		// GPC_BROADCAST
+		/* GPC_BROADCAST */
 		nv_wr32(dev, 0x418bb8, (priv->tp_total << 8) |
 					priv->magic_not_rop_nr);
 		for (i = 0; i < 6; i++)
 			nv_wr32(dev, 0x418b08 + (i * 4), data[i]);
 
-		// GPC_BROADCAST.TP_BROADCAST
+		/* GPC_BROADCAST.TP_BROADCAST */
 		nv_wr32(dev, 0x419bd0, (priv->tp_total << 8) |
 				       priv->magic_not_rop_nr |
 				       data2[0]);
@@ -1926,7 +1926,7 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
 		for (i = 0; i < 6; i++)
 			nv_wr32(dev, 0x419b00 + (i * 4), data[i]);
 
-		// UNK78xx
+		/* UNK78xx */
 		nv_wr32(dev, 0x4078bc, (priv->tp_total << 8) |
 					priv->magic_not_rop_nr);
 		for (i = 0; i < 6; i++)
@@ -1944,7 +1944,7 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
 		gpc = -1;
 		for (i = 0, gpc = -1; i < 32; i++) {
 			int ltp = i * (priv->tp_total - 1) / 32;
-			
+
 			do {
 				gpc = (gpc + 1) % priv->gpc_nr;
 			} while (!tpnr[gpc]);
diff --git a/drivers/gpu/drm/nouveau/nvc0_vm.c b/drivers/gpu/drm/nouveau/nvc0_vm.c
index e4e83c2..a179e6c 100644
--- a/drivers/gpu/drm/nouveau/nvc0_vm.c
+++ b/drivers/gpu/drm/nouveau/nvc0_vm.c
@@ -59,7 +59,7 @@ nvc0_vm_addr(struct nouveau_vma *vma, u64 phys, u32 memtype, u32 target)
 
 void
 nvc0_vm_map(struct nouveau_vma *vma, struct nouveau_gpuobj *pgt,
-	    struct nouveau_vram *mem, u32 pte, u32 cnt, u64 phys)
+	    struct nouveau_mem *mem, u32 pte, u32 cnt, u64 phys, u64 delta)
 {
 	u32 next = 1 << (vma->node->type - 8);
 
@@ -75,11 +75,11 @@ nvc0_vm_map(struct nouveau_vma *vma, struct nouveau_gpuobj *pgt,
 
 void
 nvc0_vm_map_sg(struct nouveau_vma *vma, struct nouveau_gpuobj *pgt,
-	       u32 pte, dma_addr_t *list, u32 cnt)
+	       struct nouveau_mem *mem, u32 pte, u32 cnt, dma_addr_t *list)
 {
 	pte <<= 3;
 	while (cnt--) {
-		u64 phys = nvc0_vm_addr(vma, *list++, 0, 5);
+		u64 phys = nvc0_vm_addr(vma, *list++, mem->memtype, 5);
 		nv_wo32(pgt, pte + 0, lower_32_bits(phys));
 		nv_wo32(pgt, pte + 4, upper_32_bits(phys));
 		pte += 8;
@@ -104,20 +104,27 @@ nvc0_vm_flush(struct nouveau_vm *vm)
 	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
 	struct drm_device *dev = vm->dev;
 	struct nouveau_vm_pgd *vpgd;
-	u32 r100c80, engine;
+	unsigned long flags;
+	u32 engine = (dev_priv->chan_vm == vm) ? 1 : 5;
 
 	pinstmem->flush(vm->dev);
 
-	if (vm == dev_priv->chan_vm)
-		engine = 1;
-	else
-		engine = 5;
-
+	spin_lock_irqsave(&dev_priv->vm_lock, flags);
 	list_for_each_entry(vpgd, &vm->pgd_list, head) {
-		r100c80 = nv_rd32(dev, 0x100c80);
+		/* looks like maybe a "free flush slots" counter, the
+		 * faster you write to 0x100cbc to more it decreases
+		 */
+		if (!nv_wait_ne(dev, 0x100c80, 0x00ff0000, 0x00000000)) {
+			NV_ERROR(dev, "vm timeout 0: 0x%08x %d\n",
+				 nv_rd32(dev, 0x100c80), engine);
+		}
 		nv_wr32(dev, 0x100cb8, vpgd->obj->vinst >> 8);
 		nv_wr32(dev, 0x100cbc, 0x80000000 | engine);
-		if (!nv_wait(dev, 0x100c80, 0xffffffff, r100c80))
-			NV_ERROR(dev, "vm flush timeout eng %d\n", engine);
+		/* wait for flush to be queued? */
+		if (!nv_wait(dev, 0x100c80, 0x00008000, 0x00008000)) {
+			NV_ERROR(dev, "vm timeout 1: 0x%08x %d\n",
+				 nv_rd32(dev, 0x100c80), engine);
+		}
 	}
+	spin_unlock_irqrestore(&dev_priv->vm_lock, flags);
 }
diff --git a/drivers/gpu/drm/nouveau/nvc0_vram.c b/drivers/gpu/drm/nouveau/nvc0_vram.c
index 858eda5..67c6ec6 100644
--- a/drivers/gpu/drm/nouveau/nvc0_vram.c
+++ b/drivers/gpu/drm/nouveau/nvc0_vram.c
@@ -26,64 +26,78 @@
 #include "nouveau_drv.h"
 #include "nouveau_mm.h"
 
+/* 0 = unsupported
+ * 1 = non-compressed
+ * 3 = compressed
+ */
+static const u8 types[256] = {
+	1, 1, 3, 3, 3, 3, 0, 3, 3, 3, 3, 0, 0, 0, 0, 0,
+	0, 1, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3,
+	3, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 1, 1, 1, 1, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
+	3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3,
+	3, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 3, 0, 3, 0, 3,
+	3, 0, 3, 3, 3, 3, 3, 0, 0, 3, 0, 3, 0, 3, 3, 0,
+	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 1, 1, 0
+};
+
 bool
 nvc0_vram_flags_valid(struct drm_device *dev, u32 tile_flags)
 {
-	switch (tile_flags & NOUVEAU_GEM_TILE_LAYOUT_MASK) {
-	case 0x0000:
-	case 0xfe00:
-	case 0xdb00:
-	case 0x1100:
-		return true;
-	default:
-		break;
-	}
-
-	return false;
+	u8 memtype = (tile_flags & NOUVEAU_GEM_TILE_LAYOUT_MASK) >> 8;
+	return likely((types[memtype] == 1));
 }
 
 int
 nvc0_vram_new(struct drm_device *dev, u64 size, u32 align, u32 ncmin,
-	      u32 type, struct nouveau_vram **pvram)
+	      u32 type, struct nouveau_mem **pmem)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct ttm_bo_device *bdev = &dev_priv->ttm.bdev;
 	struct ttm_mem_type_manager *man = &bdev->man[TTM_PL_VRAM];
 	struct nouveau_mm *mm = man->priv;
 	struct nouveau_mm_node *r;
-	struct nouveau_vram *vram;
+	struct nouveau_mem *mem;
 	int ret;
 
 	size  >>= 12;
 	align >>= 12;
 	ncmin >>= 12;
 
-	vram = kzalloc(sizeof(*vram), GFP_KERNEL);
-	if (!vram)
+	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	if (!mem)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&vram->regions);
-	vram->dev = dev_priv->dev;
-	vram->memtype = type;
-	vram->size = size;
+	INIT_LIST_HEAD(&mem->regions);
+	mem->dev = dev_priv->dev;
+	mem->memtype = (type & 0xff);
+	mem->size = size;
 
 	mutex_lock(&mm->mutex);
 	do {
 		ret = nouveau_mm_get(mm, 1, size, ncmin, align, &r);
 		if (ret) {
 			mutex_unlock(&mm->mutex);
-			nv50_vram_del(dev, &vram);
+			nv50_vram_del(dev, &mem);
 			return ret;
 		}
 
-		list_add_tail(&r->rl_entry, &vram->regions);
+		list_add_tail(&r->rl_entry, &mem->regions);
 		size -= r->length;
 	} while (size);
 	mutex_unlock(&mm->mutex);
 
-	r = list_first_entry(&vram->regions, struct nouveau_mm_node, rl_entry);
-	vram->offset = (u64)r->offset << 12;
-	*pvram = vram;
+	r = list_first_entry(&mem->regions, struct nouveau_mm_node, rl_entry);
+	mem->offset = (u64)r->offset << 12;
+	*pmem = mem;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvreg.h b/drivers/gpu/drm/nouveau/nvreg.h
index fe0f253..bbfb1a6 100644
--- a/drivers/gpu/drm/nouveau/nvreg.h
+++ b/drivers/gpu/drm/nouveau/nvreg.h
@@ -277,6 +277,8 @@
 #		define NV_CIO_CRE_EBR_VDE_11		2:2
 #		define NV_CIO_CRE_EBR_VRS_11		4:4
 #		define NV_CIO_CRE_EBR_VBS_11		6:6
+#	define NV_CIO_CRE_42			0x42
+#		define NV_CIO_CRE_42_OFFSET_11		6:6
 #	define NV_CIO_CRE_43			0x43
 #	define NV_CIO_CRE_44			0x44	/* head control */
 #	define NV_CIO_CRE_CSB			0x45	/* colour saturation boost */