diff --git a/drm-nouveau-updates.patch b/drm-nouveau-updates.patch index 607602c..d012a1c 100644 --- a/drm-nouveau-updates.patch +++ b/drm-nouveau-updates.patch @@ -1 +1,1333 @@ -nil + drivers/gpu/drm/nouveau/nouveau_bios.c | 25 +-- + drivers/gpu/drm/nouveau/nouveau_bo.c | 6 +- + drivers/gpu/drm/nouveau/nouveau_channel.c | 2 +- + drivers/gpu/drm/nouveau/nouveau_dp.c | 2 - + drivers/gpu/drm/nouveau/nouveau_drv.h | 13 +- + drivers/gpu/drm/nouveau/nouveau_fence.c | 190 ++++++++++------ + drivers/gpu/drm/nouveau/nouveau_mem.c | 16 +- + drivers/gpu/drm/nouveau/nouveau_object.c | 22 ++- + drivers/gpu/drm/nouveau/nouveau_sgdma.c | 342 +++++++++++++++++++++++++---- + drivers/gpu/drm/nouveau/nouveau_state.c | 10 +- + drivers/gpu/drm/nouveau/nouveau_temp.c | 4 +- + drivers/gpu/drm/nouveau/nv04_fifo.c | 17 ++- + drivers/gpu/drm/nouveau/nv40_fb.c | 59 +++++- + drivers/gpu/drm/nouveau/nv50_display.c | 7 +- + drivers/gpu/drm/nouveau/nv50_fifo.c | 3 +- + drivers/gpu/drm/nouveau/nv50_gpio.c | 13 +- + drivers/gpu/drm/nouveau/nv50_graph.c | 8 +- + drivers/gpu/drm/nouveau/nv50_vm.c | 1 - + drivers/gpu/drm/nouveau/nvc0_fifo.c | 15 +- + drivers/gpu/drm/nouveau/nvc0_graph.c | 2 - + 20 files changed, 567 insertions(+), 190 deletions(-) + +diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c +index 6bdab89..b8ff1e7 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_bios.c ++++ b/drivers/gpu/drm/nouveau/nouveau_bios.c +@@ -5950,6 +5950,11 @@ apply_dcb_connector_quirks(struct nvbios *bios, int idx) + } + } + ++static const u8 hpd_gpio[16] = { ++ 0xff, 0x07, 0x08, 0xff, 0xff, 0x51, 0x52, 0xff, ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, 0x5f, 0x60, ++}; ++ + static void + parse_dcb_connector_table(struct nvbios *bios) + { +@@ -5986,23 +5991,9 @@ parse_dcb_connector_table(struct nvbios *bios) + + cte->type = (cte->entry & 0x000000ff) >> 0; + cte->index2 = (cte->entry & 0x00000f00) >> 8; +- switch (cte->entry & 0x00033000) { +- case 0x00001000: +- cte->gpio_tag = 0x07; +- break; +- case 0x00002000: +- cte->gpio_tag = 0x08; +- break; +- case 0x00010000: +- cte->gpio_tag = 0x51; +- break; +- case 0x00020000: +- cte->gpio_tag = 0x52; +- break; +- default: +- cte->gpio_tag = 0xff; +- break; +- } ++ ++ cte->gpio_tag = ffs((cte->entry & 0x07033000) >> 12); ++ cte->gpio_tag = hpd_gpio[cte->gpio_tag]; + + if (cte->type == 0xff) + continue; +diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c +index d38a4d9..bf260af 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_bo.c ++++ b/drivers/gpu/drm/nouveau/nouveau_bo.c +@@ -382,7 +382,8 @@ nouveau_bo_create_ttm_backend_entry(struct ttm_bo_device *bdev) + case NOUVEAU_GART_AGP: + return ttm_agp_backend_init(bdev, dev->agp->bridge); + #endif +- case NOUVEAU_GART_SGDMA: ++ case NOUVEAU_GART_PDMA: ++ case NOUVEAU_GART_HW: + return nouveau_sgdma_init_ttm(dev); + default: + NV_ERROR(dev, "Unknown GART type %d\n", +@@ -436,7 +437,8 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, + TTM_PL_FLAG_WC; + man->default_caching = TTM_PL_FLAG_WC; + break; +- case NOUVEAU_GART_SGDMA: ++ case NOUVEAU_GART_PDMA: ++ case NOUVEAU_GART_HW: + man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | + TTM_MEMTYPE_FLAG_CMA; + man->available_caching = TTM_PL_MASK_CACHING; +diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c +index 3960d66..3d7b316 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_channel.c ++++ b/drivers/gpu/drm/nouveau/nouveau_channel.c +@@ -35,7 +35,7 @@ nouveau_channel_pushbuf_ctxdma_init(struct nouveau_channel *chan) + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_bo *pb = chan->pushbuf_bo; + struct nouveau_gpuobj *pushbuf = NULL; +- int ret; ++ int ret = 0; + + if (dev_priv->card_type >= NV_50) { + if (dev_priv->card_type < NV_C0) { +diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c +index 38d5995..7beb82a 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_dp.c ++++ b/drivers/gpu/drm/nouveau/nouveau_dp.c +@@ -175,7 +175,6 @@ nouveau_dp_link_train_adjust(struct drm_encoder *encoder, uint8_t *config) + { + struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); + struct drm_device *dev = encoder->dev; +- struct bit_displayport_encoder_table_entry *dpse; + struct bit_displayport_encoder_table *dpe; + int ret, i, dpe_headerlen, vs = 0, pre = 0; + uint8_t request[2]; +@@ -183,7 +182,6 @@ nouveau_dp_link_train_adjust(struct drm_encoder *encoder, uint8_t *config) + dpe = nouveau_bios_dp_table(dev, nv_encoder->dcb, &dpe_headerlen); + if (!dpe) + return false; +- dpse = (void *)((char *)dpe + dpe_headerlen); + + ret = auxch_rd(encoder, DP_ADJUST_REQUEST_LANE0_1, request, 2); + if (ret) +diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h +index 9821fca..2e3d7fb 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_drv.h ++++ b/drivers/gpu/drm/nouveau/nouveau_drv.h +@@ -652,7 +652,6 @@ struct drm_nouveau_private { + /* interrupt handling */ + void (*irq_handler[32])(struct drm_device *); + bool msi_enabled; +- struct workqueue_struct *wq; + struct work_struct irq_work; + + struct list_head vbl_waiting; +@@ -691,13 +690,21 @@ struct drm_nouveau_private { + struct { + enum { + NOUVEAU_GART_NONE = 0, +- NOUVEAU_GART_AGP, +- NOUVEAU_GART_SGDMA ++ NOUVEAU_GART_AGP, /* AGP */ ++ NOUVEAU_GART_PDMA, /* paged dma object */ ++ NOUVEAU_GART_HW /* on-chip gart/vm */ + } type; + uint64_t aper_base; + uint64_t aper_size; + uint64_t aper_free; + ++ struct ttm_backend_func *func; ++ ++ struct { ++ struct page *page; ++ dma_addr_t addr; ++ } dummy; ++ + struct nouveau_gpuobj *sg_ctxdma; + struct nouveau_vma vma; + } gart_info; +diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c +index 221b846..8b46392 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_fence.c ++++ b/drivers/gpu/drm/nouveau/nouveau_fence.c +@@ -32,8 +32,7 @@ + #include "nouveau_dma.h" + + #define USE_REFCNT(dev) (nouveau_private(dev)->chipset >= 0x10) +-#define USE_SEMA(dev) (nouveau_private(dev)->chipset >= 0x17 && \ +- nouveau_private(dev)->card_type < NV_C0) ++#define USE_SEMA(dev) (nouveau_private(dev)->chipset >= 0x17) + + struct nouveau_fence { + struct nouveau_channel *channel; +@@ -259,11 +258,12 @@ __nouveau_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr) + } + + static struct nouveau_semaphore * +-alloc_semaphore(struct drm_device *dev) ++semaphore_alloc(struct drm_device *dev) + { + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_semaphore *sema; +- int ret; ++ int size = (dev_priv->chipset < 0x84) ? 4 : 16; ++ int ret, i; + + if (!USE_SEMA(dev)) + return NULL; +@@ -277,9 +277,9 @@ alloc_semaphore(struct drm_device *dev) + goto fail; + + spin_lock(&dev_priv->fence.lock); +- sema->mem = drm_mm_search_free(&dev_priv->fence.heap, 4, 0, 0); ++ sema->mem = drm_mm_search_free(&dev_priv->fence.heap, size, 0, 0); + if (sema->mem) +- sema->mem = drm_mm_get_block_atomic(sema->mem, 4, 0); ++ sema->mem = drm_mm_get_block_atomic(sema->mem, size, 0); + spin_unlock(&dev_priv->fence.lock); + + if (!sema->mem) +@@ -287,7 +287,8 @@ alloc_semaphore(struct drm_device *dev) + + kref_init(&sema->ref); + sema->dev = dev; +- nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 0); ++ for (i = sema->mem->start; i < sema->mem->start + size; i += 4) ++ nouveau_bo_wr32(dev_priv->fence.bo, i / 4, 0); + + return sema; + fail: +@@ -296,7 +297,7 @@ fail: + } + + static void +-free_semaphore(struct kref *ref) ++semaphore_free(struct kref *ref) + { + struct nouveau_semaphore *sema = + container_of(ref, struct nouveau_semaphore, ref); +@@ -318,61 +319,107 @@ semaphore_work(void *priv, bool signalled) + if (unlikely(!signalled)) + nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 1); + +- kref_put(&sema->ref, free_semaphore); ++ kref_put(&sema->ref, semaphore_free); + } + + static int +-emit_semaphore(struct nouveau_channel *chan, int method, +- struct nouveau_semaphore *sema) ++semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema) + { +- struct drm_nouveau_private *dev_priv = sema->dev->dev_private; +- struct nouveau_fence *fence; +- bool smart = (dev_priv->card_type >= NV_50); ++ struct drm_nouveau_private *dev_priv = chan->dev->dev_private; ++ struct nouveau_fence *fence = NULL; + int ret; + +- ret = RING_SPACE(chan, smart ? 8 : 4); ++ if (dev_priv->chipset < 0x84) { ++ ret = RING_SPACE(chan, 3); ++ if (ret) ++ return ret; ++ ++ BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 2); ++ OUT_RING (chan, sema->mem->start); ++ OUT_RING (chan, 1); ++ } else ++ if (dev_priv->chipset < 0xc0) { ++ struct nouveau_vma *vma = &dev_priv->fence.bo->vma; ++ u64 offset = vma->offset + sema->mem->start; ++ ++ ret = RING_SPACE(chan, 5); ++ if (ret) ++ return ret; ++ ++ BEGIN_RING(chan, NvSubSw, 0x0010, 4); ++ OUT_RING (chan, upper_32_bits(offset)); ++ OUT_RING (chan, lower_32_bits(offset)); ++ OUT_RING (chan, 1); ++ OUT_RING (chan, 1); /* ACQUIRE_EQ */ ++ } else { ++ struct nouveau_vma *vma = &dev_priv->fence.bo->vma; ++ u64 offset = vma->offset + sema->mem->start; ++ ++ ret = RING_SPACE(chan, 5); ++ if (ret) ++ return ret; ++ ++ BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0010, 4); ++ OUT_RING (chan, upper_32_bits(offset)); ++ OUT_RING (chan, lower_32_bits(offset)); ++ OUT_RING (chan, 1); ++ OUT_RING (chan, 0x1001); /* ACQUIRE_EQ */ ++ } ++ ++ /* Delay semaphore destruction until its work is done */ ++ ret = nouveau_fence_new(chan, &fence, true); + if (ret) + return ret; + +- if (smart) { +- BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1); +- OUT_RING(chan, NvSema); +- } +- BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1); +- OUT_RING(chan, sema->mem->start); +- +- if (smart && method == NV_SW_SEMAPHORE_ACQUIRE) { +- /* +- * NV50 tries to be too smart and context-switch +- * between semaphores instead of doing a "first come, +- * first served" strategy like previous cards +- * do. +- * +- * That's bad because the ACQUIRE latency can get as +- * large as the PFIFO context time slice in the +- * typical DRI2 case where you have several +- * outstanding semaphores at the same moment. +- * +- * If we're going to ACQUIRE, force the card to +- * context switch before, just in case the matching +- * RELEASE is already scheduled to be executed in +- * another channel. +- */ +- BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1); +- OUT_RING(chan, 0); +- } ++ kref_get(&sema->ref); ++ nouveau_fence_work(fence, semaphore_work, sema); ++ nouveau_fence_unref(&fence); ++ return 0; ++} ++ ++static int ++semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema) ++{ ++ struct drm_nouveau_private *dev_priv = chan->dev->dev_private; ++ struct nouveau_fence *fence = NULL; ++ int ret; ++ ++ if (dev_priv->chipset < 0x84) { ++ ret = RING_SPACE(chan, 4); ++ if (ret) ++ return ret; ++ ++ BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1); ++ OUT_RING (chan, sema->mem->start); ++ BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_RELEASE, 1); ++ OUT_RING (chan, 1); ++ } else ++ if (dev_priv->chipset < 0xc0) { ++ struct nouveau_vma *vma = &dev_priv->fence.bo->vma; ++ u64 offset = vma->offset + sema->mem->start; + +- BEGIN_RING(chan, NvSubSw, method, 1); +- OUT_RING(chan, 1); +- +- if (smart && method == NV_SW_SEMAPHORE_RELEASE) { +- /* +- * Force the card to context switch, there may be +- * another channel waiting for the semaphore we just +- * released. +- */ +- BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1); +- OUT_RING(chan, 0); ++ ret = RING_SPACE(chan, 5); ++ if (ret) ++ return ret; ++ ++ BEGIN_RING(chan, NvSubSw, 0x0010, 4); ++ OUT_RING (chan, upper_32_bits(offset)); ++ OUT_RING (chan, lower_32_bits(offset)); ++ OUT_RING (chan, 1); ++ OUT_RING (chan, 2); /* RELEASE */ ++ } else { ++ struct nouveau_vma *vma = &dev_priv->fence.bo->vma; ++ u64 offset = vma->offset + sema->mem->start; ++ ++ ret = RING_SPACE(chan, 5); ++ if (ret) ++ return ret; ++ ++ BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0010, 4); ++ OUT_RING (chan, upper_32_bits(offset)); ++ OUT_RING (chan, lower_32_bits(offset)); ++ OUT_RING (chan, 1); ++ OUT_RING (chan, 0x1002); /* RELEASE */ + } + + /* Delay semaphore destruction until its work is done */ +@@ -383,7 +430,6 @@ emit_semaphore(struct nouveau_channel *chan, int method, + kref_get(&sema->ref); + nouveau_fence_work(fence, semaphore_work, sema); + nouveau_fence_unref(&fence); +- + return 0; + } + +@@ -400,7 +446,7 @@ nouveau_fence_sync(struct nouveau_fence *fence, + nouveau_fence_signalled(fence))) + goto out; + +- sema = alloc_semaphore(dev); ++ sema = semaphore_alloc(dev); + if (!sema) { + /* Early card or broken userspace, fall back to + * software sync. */ +@@ -418,17 +464,17 @@ nouveau_fence_sync(struct nouveau_fence *fence, + } + + /* Make wchan wait until it gets signalled */ +- ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema); ++ ret = semaphore_acquire(wchan, sema); + if (ret) + goto out_unlock; + + /* Signal the semaphore from chan */ +- ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema); ++ ret = semaphore_release(chan, sema); + + out_unlock: + mutex_unlock(&chan->mutex); + out_unref: +- kref_put(&sema->ref, free_semaphore); ++ kref_put(&sema->ref, semaphore_free); + out: + if (chan) + nouveau_channel_put_unlocked(&chan); +@@ -449,22 +495,23 @@ nouveau_fence_channel_init(struct nouveau_channel *chan) + struct nouveau_gpuobj *obj = NULL; + int ret; + ++ if (dev_priv->card_type >= NV_C0) ++ goto out_initialised; ++ + /* Create an NV_SW object for various sync purposes */ + ret = nouveau_gpuobj_gr_new(chan, NvSw, NV_SW); + if (ret) + return ret; + + /* we leave subchannel empty for nvc0 */ +- if (dev_priv->card_type < NV_C0) { +- ret = RING_SPACE(chan, 2); +- if (ret) +- return ret; +- BEGIN_RING(chan, NvSubSw, 0, 1); +- OUT_RING(chan, NvSw); +- } ++ ret = RING_SPACE(chan, 2); ++ if (ret) ++ return ret; ++ BEGIN_RING(chan, NvSubSw, 0, 1); ++ OUT_RING(chan, NvSw); + + /* Create a DMA object for the shared cross-channel sync area. */ +- if (USE_SEMA(dev)) { ++ if (USE_SEMA(dev) && dev_priv->chipset < 0x84) { + struct ttm_mem_reg *mem = &dev_priv->fence.bo->bo.mem; + + ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY, +@@ -484,14 +531,20 @@ nouveau_fence_channel_init(struct nouveau_channel *chan) + return ret; + BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1); + OUT_RING(chan, NvSema); ++ } else { ++ ret = RING_SPACE(chan, 2); ++ if (ret) ++ return ret; ++ BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1); ++ OUT_RING (chan, chan->vram_handle); /* whole VM */ + } + + FIRE_RING(chan); + ++out_initialised: + INIT_LIST_HEAD(&chan->fence.pending); + spin_lock_init(&chan->fence.lock); + atomic_set(&chan->fence.last_sequence_irq, 0); +- + return 0; + } + +@@ -519,11 +572,12 @@ int + nouveau_fence_init(struct drm_device *dev) + { + struct drm_nouveau_private *dev_priv = dev->dev_private; ++ int size = (dev_priv->chipset < 0x84) ? 4096 : 16384; + int ret; + + /* Create a shared VRAM heap for cross-channel sync. */ + if (USE_SEMA(dev)) { +- ret = nouveau_bo_new(dev, NULL, 4096, 0, TTM_PL_FLAG_VRAM, ++ ret = nouveau_bo_new(dev, NULL, size, 0, TTM_PL_FLAG_VRAM, + 0, 0, false, true, &dev_priv->fence.bo); + if (ret) + return ret; +diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c +index 26347b7..30bd230 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_mem.c ++++ b/drivers/gpu/drm/nouveau/nouveau_mem.c +@@ -393,11 +393,17 @@ nouveau_mem_vram_init(struct drm_device *dev) + struct ttm_bo_device *bdev = &dev_priv->ttm.bdev; + int ret, dma_bits; + +- if (dev_priv->card_type >= NV_50 && +- pci_dma_supported(dev->pdev, DMA_BIT_MASK(40))) +- dma_bits = 40; +- else +- dma_bits = 32; ++ dma_bits = 32; ++ if (dev_priv->card_type >= NV_50) { ++ if (pci_dma_supported(dev->pdev, DMA_BIT_MASK(40))) ++ dma_bits = 40; ++ } else ++ if (drm_device_is_pcie(dev) && ++ dev_priv->chipset != 0x40 && ++ dev_priv->chipset != 0x45) { ++ if (pci_dma_supported(dev->pdev, DMA_BIT_MASK(39))) ++ dma_bits = 39; ++ } + + ret = pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(dma_bits)); + if (ret) +diff --git a/drivers/gpu/drm/nouveau/nouveau_object.c b/drivers/gpu/drm/nouveau/nouveau_object.c +index 30b6544..3c12461 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_object.c ++++ b/drivers/gpu/drm/nouveau/nouveau_object.c +@@ -490,16 +490,22 @@ nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class, u64 base, + } + + if (target == NV_MEM_TARGET_GART) { +- if (dev_priv->gart_info.type == NOUVEAU_GART_AGP) { +- target = NV_MEM_TARGET_PCI_NOSNOOP; +- base += dev_priv->gart_info.aper_base; +- } else +- if (base != 0) { +- base = nouveau_sgdma_get_physical(dev, base); ++ struct nouveau_gpuobj *gart = dev_priv->gart_info.sg_ctxdma; ++ ++ if (dev_priv->gart_info.type == NOUVEAU_GART_PDMA) { ++ if (base == 0) { ++ nouveau_gpuobj_ref(gart, pobj); ++ return 0; ++ } ++ ++ base = nouveau_sgdma_get_physical(dev, base); + target = NV_MEM_TARGET_PCI; + } else { +- nouveau_gpuobj_ref(dev_priv->gart_info.sg_ctxdma, pobj); +- return 0; ++ base += dev_priv->gart_info.aper_base; ++ if (dev_priv->gart_info.type == NOUVEAU_GART_AGP) ++ target = NV_MEM_TARGET_PCI_NOSNOOP; ++ else ++ target = NV_MEM_TARGET_PCI; + } + } + +diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c +index 9a250eb..bdbaa54 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c ++++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c +@@ -74,8 +74,24 @@ nouveau_sgdma_clear(struct ttm_backend *be) + } + } + ++static void ++nouveau_sgdma_destroy(struct ttm_backend *be) ++{ ++ struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be; ++ ++ if (be) { ++ NV_DEBUG(nvbe->dev, "\n"); ++ ++ if (nvbe) { ++ if (nvbe->pages) ++ be->func->clear(be); ++ kfree(nvbe); ++ } ++ } ++} ++ + static int +-nouveau_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem) ++nv04_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem) + { + struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be; + struct drm_device *dev = nvbe->dev; +@@ -102,7 +118,7 @@ nouveau_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem) + } + + static int +-nouveau_sgdma_unbind(struct ttm_backend *be) ++nv04_sgdma_unbind(struct ttm_backend *be) + { + struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be; + struct drm_device *dev = nvbe->dev; +@@ -125,23 +141,222 @@ nouveau_sgdma_unbind(struct ttm_backend *be) + return 0; + } + ++static struct ttm_backend_func nv04_sgdma_backend = { ++ .populate = nouveau_sgdma_populate, ++ .clear = nouveau_sgdma_clear, ++ .bind = nv04_sgdma_bind, ++ .unbind = nv04_sgdma_unbind, ++ .destroy = nouveau_sgdma_destroy ++}; ++ + static void +-nouveau_sgdma_destroy(struct ttm_backend *be) ++nv41_sgdma_flush(struct nouveau_sgdma_be *nvbe) ++{ ++ struct drm_device *dev = nvbe->dev; ++ ++ nv_wr32(dev, 0x100810, 0x00000022); ++ if (!nv_wait(dev, 0x100810, 0x00000100, 0x00000100)) ++ NV_ERROR(dev, "vm flush timeout: 0x%08x\n", ++ nv_rd32(dev, 0x100810)); ++ nv_wr32(dev, 0x100810, 0x00000000); ++} ++ ++static int ++nv41_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem) + { + struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be; ++ struct drm_nouveau_private *dev_priv = nvbe->dev->dev_private; ++ struct nouveau_gpuobj *pgt = dev_priv->gart_info.sg_ctxdma; ++ dma_addr_t *list = nvbe->pages; ++ u32 pte = mem->start << 2; ++ u32 cnt = nvbe->nr_pages; + +- if (be) { +- NV_DEBUG(nvbe->dev, "\n"); ++ nvbe->offset = mem->start << PAGE_SHIFT; + +- if (nvbe) { +- if (nvbe->pages) +- be->func->clear(be); +- kfree(nvbe); ++ while (cnt--) { ++ nv_wo32(pgt, pte, (*list++ >> 7) | 1); ++ pte += 4; ++ } ++ ++ nv41_sgdma_flush(nvbe); ++ nvbe->bound = true; ++ return 0; ++} ++ ++static int ++nv41_sgdma_unbind(struct ttm_backend *be) ++{ ++ struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be; ++ struct drm_nouveau_private *dev_priv = nvbe->dev->dev_private; ++ struct nouveau_gpuobj *pgt = dev_priv->gart_info.sg_ctxdma; ++ u32 pte = (nvbe->offset >> 12) << 2; ++ u32 cnt = nvbe->nr_pages; ++ ++ while (cnt--) { ++ nv_wo32(pgt, pte, 0x00000000); ++ pte += 4; ++ } ++ ++ nv41_sgdma_flush(nvbe); ++ nvbe->bound = false; ++ return 0; ++} ++ ++static struct ttm_backend_func nv41_sgdma_backend = { ++ .populate = nouveau_sgdma_populate, ++ .clear = nouveau_sgdma_clear, ++ .bind = nv41_sgdma_bind, ++ .unbind = nv41_sgdma_unbind, ++ .destroy = nouveau_sgdma_destroy ++}; ++ ++static void ++nv44_sgdma_flush(struct nouveau_sgdma_be *nvbe) ++{ ++ struct drm_device *dev = nvbe->dev; ++ ++ nv_wr32(dev, 0x100814, (nvbe->nr_pages - 1) << 12); ++ nv_wr32(dev, 0x100808, nvbe->offset | 0x20); ++ if (!nv_wait(dev, 0x100808, 0x00000001, 0x00000001)) ++ NV_ERROR(dev, "gart flush timeout: 0x%08x\n", ++ nv_rd32(dev, 0x100808)); ++ nv_wr32(dev, 0x100808, 0x00000000); ++} ++ ++static void ++nv44_sgdma_fill(struct nouveau_gpuobj *pgt, dma_addr_t *list, u32 base, u32 cnt) ++{ ++ struct drm_nouveau_private *dev_priv = pgt->dev->dev_private; ++ dma_addr_t dummy = dev_priv->gart_info.dummy.addr; ++ u32 pte, tmp[4]; ++ ++ pte = base >> 2; ++ base &= ~0x0000000f; ++ ++ tmp[0] = nv_ro32(pgt, base + 0x0); ++ tmp[1] = nv_ro32(pgt, base + 0x4); ++ tmp[2] = nv_ro32(pgt, base + 0x8); ++ tmp[3] = nv_ro32(pgt, base + 0xc); ++ while (cnt--) { ++ u32 addr = list ? (*list++ >> 12) : (dummy >> 12); ++ switch (pte++ & 0x3) { ++ case 0: ++ tmp[0] &= ~0x07ffffff; ++ tmp[0] |= addr; ++ break; ++ case 1: ++ tmp[0] &= ~0xf8000000; ++ tmp[0] |= addr << 27; ++ tmp[1] &= ~0x003fffff; ++ tmp[1] |= addr >> 5; ++ break; ++ case 2: ++ tmp[1] &= ~0xffc00000; ++ tmp[1] |= addr << 22; ++ tmp[2] &= ~0x0001ffff; ++ tmp[2] |= addr >> 10; ++ break; ++ case 3: ++ tmp[2] &= ~0xfffe0000; ++ tmp[2] |= addr << 17; ++ tmp[3] &= ~0x00000fff; ++ tmp[3] |= addr >> 15; ++ break; + } + } ++ ++ tmp[3] |= 0x40000000; ++ ++ nv_wo32(pgt, base + 0x0, tmp[0]); ++ nv_wo32(pgt, base + 0x4, tmp[1]); ++ nv_wo32(pgt, base + 0x8, tmp[2]); ++ nv_wo32(pgt, base + 0xc, tmp[3]); + } + + static int ++nv44_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem) ++{ ++ struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be; ++ struct drm_nouveau_private *dev_priv = nvbe->dev->dev_private; ++ struct nouveau_gpuobj *pgt = dev_priv->gart_info.sg_ctxdma; ++ dma_addr_t *list = nvbe->pages; ++ u32 pte = mem->start << 2, tmp[4]; ++ u32 cnt = nvbe->nr_pages; ++ int i; ++ ++ nvbe->offset = mem->start << PAGE_SHIFT; ++ ++ if (pte & 0x0000000c) { ++ u32 max = 4 - ((pte >> 2) & 0x3); ++ u32 part = (cnt > max) ? max : cnt; ++ nv44_sgdma_fill(pgt, list, pte, part); ++ pte += (part << 2); ++ list += part; ++ cnt -= part; ++ } ++ ++ while (cnt >= 4) { ++ for (i = 0; i < 4; i++) ++ tmp[i] = *list++ >> 12; ++ nv_wo32(pgt, pte + 0x0, tmp[0] >> 0 | tmp[1] << 27); ++ nv_wo32(pgt, pte + 0x4, tmp[1] >> 5 | tmp[2] << 22); ++ nv_wo32(pgt, pte + 0x8, tmp[2] >> 10 | tmp[3] << 17); ++ nv_wo32(pgt, pte + 0xc, tmp[3] >> 15 | 0x40000000); ++ pte += 0x10; ++ cnt -= 4; ++ } ++ ++ if (cnt) ++ nv44_sgdma_fill(pgt, list, pte, cnt); ++ ++ nv44_sgdma_flush(nvbe); ++ nvbe->bound = true; ++ return 0; ++} ++ ++static int ++nv44_sgdma_unbind(struct ttm_backend *be) ++{ ++ struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be; ++ struct drm_nouveau_private *dev_priv = nvbe->dev->dev_private; ++ struct nouveau_gpuobj *pgt = dev_priv->gart_info.sg_ctxdma; ++ u32 pte = (nvbe->offset >> 12) << 2; ++ u32 cnt = nvbe->nr_pages; ++ ++ if (pte & 0x0000000c) { ++ u32 max = 4 - ((pte >> 2) & 0x3); ++ u32 part = (cnt > max) ? max : cnt; ++ nv44_sgdma_fill(pgt, NULL, pte, part); ++ pte += (part << 2); ++ cnt -= part; ++ } ++ ++ while (cnt >= 4) { ++ nv_wo32(pgt, pte + 0x0, 0x00000000); ++ nv_wo32(pgt, pte + 0x4, 0x00000000); ++ nv_wo32(pgt, pte + 0x8, 0x00000000); ++ nv_wo32(pgt, pte + 0xc, 0x00000000); ++ pte += 0x10; ++ cnt -= 4; ++ } ++ ++ if (cnt) ++ nv44_sgdma_fill(pgt, NULL, pte, cnt); ++ ++ nv44_sgdma_flush(nvbe); ++ nvbe->bound = false; ++ return 0; ++} ++ ++static struct ttm_backend_func nv44_sgdma_backend = { ++ .populate = nouveau_sgdma_populate, ++ .clear = nouveau_sgdma_clear, ++ .bind = nv44_sgdma_bind, ++ .unbind = nv44_sgdma_unbind, ++ .destroy = nouveau_sgdma_destroy ++}; ++ ++static int + nv50_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem) + { + struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be; +@@ -170,14 +385,6 @@ nv50_sgdma_unbind(struct ttm_backend *be) + return 0; + } + +-static struct ttm_backend_func nouveau_sgdma_backend = { +- .populate = nouveau_sgdma_populate, +- .clear = nouveau_sgdma_clear, +- .bind = nouveau_sgdma_bind, +- .unbind = nouveau_sgdma_unbind, +- .destroy = nouveau_sgdma_destroy +-}; +- + static struct ttm_backend_func nv50_sgdma_backend = { + .populate = nouveau_sgdma_populate, + .clear = nouveau_sgdma_clear, +@@ -198,10 +405,7 @@ nouveau_sgdma_init_ttm(struct drm_device *dev) + + nvbe->dev = dev; + +- if (dev_priv->card_type < NV_50) +- nvbe->backend.func = &nouveau_sgdma_backend; +- else +- nvbe->backend.func = &nv50_sgdma_backend; ++ nvbe->backend.func = dev_priv->gart_info.func; + return &nvbe->backend; + } + +@@ -210,21 +414,71 @@ nouveau_sgdma_init(struct drm_device *dev) + { + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_gpuobj *gpuobj = NULL; +- uint32_t aper_size, obj_size; +- int i, ret; ++ u32 aper_size, align; ++ int ret; ++ ++ if (dev_priv->card_type >= NV_50 || ++ dev_priv->ramin_rsvd_vram >= 2 * 1024 * 1024) ++ aper_size = 512 * 1024 * 1024; ++ else ++ aper_size = 64 * 1024 * 1024; ++ ++ /* Dear NVIDIA, NV44+ would like proper present bits in PTEs for ++ * christmas. The cards before it have them, the cards after ++ * it have them, why is NV44 so unloved? ++ */ ++ dev_priv->gart_info.dummy.page = alloc_page(GFP_DMA32 | GFP_KERNEL); ++ if (!dev_priv->gart_info.dummy.page) ++ return -ENOMEM; ++ ++ dev_priv->gart_info.dummy.addr = ++ pci_map_page(dev->pdev, dev_priv->gart_info.dummy.page, ++ 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); ++ if (pci_dma_mapping_error(dev->pdev, dev_priv->gart_info.dummy.addr)) { ++ NV_ERROR(dev, "error mapping dummy page\n"); ++ __free_page(dev_priv->gart_info.dummy.page); ++ dev_priv->gart_info.dummy.page = NULL; ++ return -ENOMEM; ++ } + +- if (dev_priv->card_type < NV_50) { +- if(dev_priv->ramin_rsvd_vram < 2 * 1024 * 1024) +- aper_size = 64 * 1024 * 1024; +- else +- aper_size = 512 * 1024 * 1024; ++ if (dev_priv->card_type >= NV_50) { ++ ret = nouveau_vm_get(dev_priv->chan_vm, aper_size, ++ 12, NV_MEM_ACCESS_RW, ++ &dev_priv->gart_info.vma); ++ if (ret) ++ return ret; ++ ++ dev_priv->gart_info.aper_base = dev_priv->gart_info.vma.offset; ++ dev_priv->gart_info.aper_size = aper_size; ++ dev_priv->gart_info.type = NOUVEAU_GART_HW; ++ dev_priv->gart_info.func = &nv50_sgdma_backend; ++ } else ++ if (drm_device_is_pcie(dev) && ++ dev_priv->chipset != 0x40 && dev_priv->chipset != 0x45) { ++ if (nv44_graph_class(dev)) { ++ dev_priv->gart_info.func = &nv44_sgdma_backend; ++ align = 512 * 1024; ++ } else { ++ dev_priv->gart_info.func = &nv41_sgdma_backend; ++ align = 16; ++ } + +- obj_size = (aper_size >> NV_CTXDMA_PAGE_SHIFT) * 4; +- obj_size += 8; /* ctxdma header */ ++ ret = nouveau_gpuobj_new(dev, NULL, aper_size / 1024, align, ++ NVOBJ_FLAG_ZERO_ALLOC | ++ NVOBJ_FLAG_ZERO_FREE, &gpuobj); ++ if (ret) { ++ NV_ERROR(dev, "Error creating sgdma object: %d\n", ret); ++ return ret; ++ } + +- ret = nouveau_gpuobj_new(dev, NULL, obj_size, 16, +- NVOBJ_FLAG_ZERO_ALLOC | +- NVOBJ_FLAG_ZERO_FREE, &gpuobj); ++ dev_priv->gart_info.sg_ctxdma = gpuobj; ++ dev_priv->gart_info.aper_base = 0; ++ dev_priv->gart_info.aper_size = aper_size; ++ dev_priv->gart_info.type = NOUVEAU_GART_HW; ++ } else { ++ ret = nouveau_gpuobj_new(dev, NULL, (aper_size / 1024) + 8, 16, ++ NVOBJ_FLAG_ZERO_ALLOC | ++ NVOBJ_FLAG_ZERO_FREE, &gpuobj); + if (ret) { + NV_ERROR(dev, "Error creating sgdma object: %d\n", ret); + return ret; +@@ -236,25 +490,14 @@ nouveau_sgdma_init(struct drm_device *dev) + (0 << 14) /* RW */ | + (2 << 16) /* PCI */); + nv_wo32(gpuobj, 4, aper_size - 1); +- for (i = 2; i < 2 + (aper_size >> 12); i++) +- nv_wo32(gpuobj, i * 4, 0x00000000); + + dev_priv->gart_info.sg_ctxdma = gpuobj; + dev_priv->gart_info.aper_base = 0; + dev_priv->gart_info.aper_size = aper_size; +- } else +- if (dev_priv->chan_vm) { +- ret = nouveau_vm_get(dev_priv->chan_vm, 512 * 1024 * 1024, +- 12, NV_MEM_ACCESS_RW, +- &dev_priv->gart_info.vma); +- if (ret) +- return ret; +- +- dev_priv->gart_info.aper_base = dev_priv->gart_info.vma.offset; +- dev_priv->gart_info.aper_size = 512 * 1024 * 1024; ++ dev_priv->gart_info.type = NOUVEAU_GART_PDMA; ++ dev_priv->gart_info.func = &nv04_sgdma_backend; + } + +- dev_priv->gart_info.type = NOUVEAU_GART_SGDMA; + return 0; + } + +@@ -265,6 +508,13 @@ nouveau_sgdma_takedown(struct drm_device *dev) + + nouveau_gpuobj_ref(NULL, &dev_priv->gart_info.sg_ctxdma); + nouveau_vm_put(&dev_priv->gart_info.vma); ++ ++ if (dev_priv->gart_info.dummy.page) { ++ pci_unmap_page(dev->pdev, dev_priv->gart_info.dummy.addr, ++ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); ++ __free_page(dev_priv->gart_info.dummy.page); ++ dev_priv->gart_info.dummy.page = NULL; ++ } + } + + uint32_t +diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c +index a54fc43..916505d 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_state.c ++++ b/drivers/gpu/drm/nouveau/nouveau_state.c +@@ -929,12 +929,6 @@ int nouveau_load(struct drm_device *dev, unsigned long flags) + NV_DEBUG(dev, "vendor: 0x%X device: 0x%X class: 0x%X\n", + dev->pci_vendor, dev->pci_device, dev->pdev->class); + +- dev_priv->wq = create_workqueue("nouveau"); +- if (!dev_priv->wq) { +- ret = -EINVAL; +- goto err_priv; +- } +- + /* resource 0 is mmio regs */ + /* resource 1 is linear FB */ + /* resource 2 is RAMIN (mmio regs + 0x1000000) */ +@@ -947,7 +941,7 @@ int nouveau_load(struct drm_device *dev, unsigned long flags) + NV_ERROR(dev, "Unable to initialize the mmio mapping. " + "Please report your setup to " DRIVER_EMAIL "\n"); + ret = -EINVAL; +- goto err_wq; ++ goto err_priv; + } + NV_DEBUG(dev, "regs mapped ok at 0x%llx\n", + (unsigned long long)mmio_start_offs); +@@ -1054,8 +1048,6 @@ err_ramin: + iounmap(dev_priv->ramin); + err_mmio: + iounmap(dev_priv->mmio); +-err_wq: +- destroy_workqueue(dev_priv->wq); + err_priv: + kfree(dev_priv); + dev->dev_private = NULL; +diff --git a/drivers/gpu/drm/nouveau/nouveau_temp.c b/drivers/gpu/drm/nouveau/nouveau_temp.c +index 8d9968e..649b041 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_temp.c ++++ b/drivers/gpu/drm/nouveau/nouveau_temp.c +@@ -239,11 +239,9 @@ static bool + probe_monitoring_device(struct nouveau_i2c_chan *i2c, + struct i2c_board_info *info) + { +- char modalias[16] = "i2c:"; + struct i2c_client *client; + +- strlcat(modalias, info->type, sizeof(modalias)); +- request_module(modalias); ++ request_module("%s%s", I2C_MODULE_PREFIX, info->type); + + client = i2c_new_device(&i2c->adapter, info); + if (!client) +diff --git a/drivers/gpu/drm/nouveau/nv04_fifo.c b/drivers/gpu/drm/nouveau/nv04_fifo.c +index f89d104..dfa600c 100644 +--- a/drivers/gpu/drm/nouveau/nv04_fifo.c ++++ b/drivers/gpu/drm/nouveau/nv04_fifo.c +@@ -379,6 +379,15 @@ out: + return handled; + } + ++static const char *nv_dma_state_err(u32 state) ++{ ++ static const char * const desc[] = { ++ "NONE", "CALL_SUBR_ACTIVE", "INVALID_MTHD", "RET_SUBR_INACTIVE", ++ "INVALID_CMD", "IB_EMPTY"/* NV50+ */, "MEM_FAULT", "UNK" ++ }; ++ return desc[(state >> 29) & 0x7]; ++} ++ + void + nv04_fifo_isr(struct drm_device *dev) + { +@@ -460,9 +469,10 @@ nv04_fifo_isr(struct drm_device *dev) + if (nouveau_ratelimit()) + NV_INFO(dev, "PFIFO_DMA_PUSHER - Ch %d Get 0x%02x%08x " + "Put 0x%02x%08x IbGet 0x%08x IbPut 0x%08x " +- "State 0x%08x Push 0x%08x\n", ++ "State 0x%08x (err: %s) Push 0x%08x\n", + chid, ho_get, dma_get, ho_put, + dma_put, ib_get, ib_put, state, ++ nv_dma_state_err(state), + push); + + /* METHOD_COUNT, in DMA_STATE on earlier chipsets */ +@@ -476,8 +486,9 @@ nv04_fifo_isr(struct drm_device *dev) + } + } else { + NV_INFO(dev, "PFIFO_DMA_PUSHER - Ch %d Get 0x%08x " +- "Put 0x%08x State 0x%08x Push 0x%08x\n", +- chid, dma_get, dma_put, state, push); ++ "Put 0x%08x State 0x%08x (err: %s) Push 0x%08x\n", ++ chid, dma_get, dma_put, state, ++ nv_dma_state_err(state), push); + + if (dma_get != dma_put) + nv_wr32(dev, 0x003244, dma_put); +diff --git a/drivers/gpu/drm/nouveau/nv40_fb.c b/drivers/gpu/drm/nouveau/nv40_fb.c +index f3d9c05..f0ac2a7 100644 +--- a/drivers/gpu/drm/nouveau/nv40_fb.c ++++ b/drivers/gpu/drm/nouveau/nv40_fb.c +@@ -24,6 +24,53 @@ nv40_fb_set_tile_region(struct drm_device *dev, int i) + } + } + ++static void ++nv40_fb_init_gart(struct drm_device *dev) ++{ ++ struct drm_nouveau_private *dev_priv = dev->dev_private; ++ struct nouveau_gpuobj *gart = dev_priv->gart_info.sg_ctxdma; ++ ++ if (dev_priv->gart_info.type != NOUVEAU_GART_HW) { ++ nv_wr32(dev, 0x100800, 0x00000001); ++ return; ++ } ++ ++ nv_wr32(dev, 0x100800, gart->pinst | 0x00000002); ++ nv_mask(dev, 0x10008c, 0x00000100, 0x00000100); ++ nv_wr32(dev, 0x100820, 0x00000000); ++} ++ ++static void ++nv44_fb_init_gart(struct drm_device *dev) ++{ ++ struct drm_nouveau_private *dev_priv = dev->dev_private; ++ struct nouveau_gpuobj *gart = dev_priv->gart_info.sg_ctxdma; ++ u32 vinst; ++ ++ if (dev_priv->gart_info.type != NOUVEAU_GART_HW) { ++ nv_wr32(dev, 0x100850, 0x80000000); ++ nv_wr32(dev, 0x100800, 0x00000001); ++ return; ++ } ++ ++ /* calculate vram address of this PRAMIN block, object ++ * must be allocated on 512KiB alignment, and not exceed ++ * a total size of 512KiB for this to work correctly ++ */ ++ vinst = nv_rd32(dev, 0x10020c); ++ vinst -= ((gart->pinst >> 19) + 1) << 19; ++ ++ nv_wr32(dev, 0x100850, 0x80000000); ++ nv_wr32(dev, 0x100818, dev_priv->gart_info.dummy.addr); ++ ++ nv_wr32(dev, 0x100804, dev_priv->gart_info.aper_size); ++ nv_wr32(dev, 0x100850, 0x00008000); ++ nv_mask(dev, 0x10008c, 0x00000200, 0x00000200); ++ nv_wr32(dev, 0x100820, 0x00000000); ++ nv_wr32(dev, 0x10082c, 0x00000001); ++ nv_wr32(dev, 0x100800, vinst | 0x00000010); ++} ++ + int + nv40_fb_init(struct drm_device *dev) + { +@@ -32,12 +79,12 @@ nv40_fb_init(struct drm_device *dev) + uint32_t tmp; + int i; + +- /* This is strictly a NV4x register (don't know about NV5x). */ +- /* The blob sets these to all kinds of values, and they mess up our setup. */ +- /* I got value 0x52802 instead. For some cards the blob even sets it back to 0x1. */ +- /* Note: the blob doesn't read this value, so i'm pretty sure this is safe for all cards. */ +- /* Any idea what this is? */ +- nv_wr32(dev, NV40_PFB_UNK_800, 0x1); ++ if (dev_priv->chipset != 0x40 && dev_priv->chipset != 0x45) { ++ if (nv44_graph_class(dev)) ++ nv44_fb_init_gart(dev); ++ else ++ nv40_fb_init_gart(dev); ++ } + + switch (dev_priv->chipset) { + case 0x40: +diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c +index 7cc94ed..a804a35 100644 +--- a/drivers/gpu/drm/nouveau/nv50_display.c ++++ b/drivers/gpu/drm/nouveau/nv50_display.c +@@ -345,12 +345,15 @@ int nv50_display_create(struct drm_device *dev) + void + nv50_display_destroy(struct drm_device *dev) + { ++ struct drm_nouveau_private *dev_priv = dev->dev_private; ++ + NV_DEBUG_KMS(dev, "\n"); + + drm_mode_config_cleanup(dev); + + nv50_display_disable(dev); + nouveau_irq_unregister(dev, 26); ++ flush_work_sync(&dev_priv->irq_work); + } + + static u16 +@@ -587,7 +590,7 @@ static void + nv50_display_unk20_handler(struct drm_device *dev) + { + struct drm_nouveau_private *dev_priv = dev->dev_private; +- u32 unk30 = nv_rd32(dev, 0x610030), tmp, pclk, script, mc; ++ u32 unk30 = nv_rd32(dev, 0x610030), tmp, pclk, script, mc = 0; + struct dcb_entry *dcb; + int i, crtc, or, type = OUTPUT_ANY; + +@@ -836,7 +839,7 @@ nv50_display_isr(struct drm_device *dev) + if (clock) { + nv_wr32(dev, NV03_PMC_INTR_EN_0, 0); + if (!work_pending(&dev_priv->irq_work)) +- queue_work(dev_priv->wq, &dev_priv->irq_work); ++ schedule_work(&dev_priv->irq_work); + delayed |= clock; + intr1 &= ~clock; + } +diff --git a/drivers/gpu/drm/nouveau/nv50_fifo.c b/drivers/gpu/drm/nouveau/nv50_fifo.c +index 8dd04c5..c34a074 100644 +--- a/drivers/gpu/drm/nouveau/nv50_fifo.c ++++ b/drivers/gpu/drm/nouveau/nv50_fifo.c +@@ -149,6 +149,7 @@ nv50_fifo_init_regs(struct drm_device *dev) + nv_wr32(dev, 0x3204, 0); + nv_wr32(dev, 0x3210, 0); + nv_wr32(dev, 0x3270, 0); ++ nv_wr32(dev, 0x2044, 0x01003fff); + + /* Enable dummy channels setup by nv50_instmem.c */ + nv50_fifo_channel_enable(dev, 0); +@@ -273,7 +274,7 @@ nv50_fifo_create_context(struct nouveau_channel *chan) + nv_wo32(ramfc, 0x80, ((chan->ramht->bits - 9) << 27) | + (4 << 24) /* SEARCH_FULL */ | + (chan->ramht->gpuobj->cinst >> 4)); +- nv_wo32(ramfc, 0x44, 0x2101ffff); ++ nv_wo32(ramfc, 0x44, 0x01003fff); + nv_wo32(ramfc, 0x60, 0x7fffffff); + nv_wo32(ramfc, 0x40, 0x00000000); + nv_wo32(ramfc, 0x7c, 0x30000001); +diff --git a/drivers/gpu/drm/nouveau/nv50_gpio.c b/drivers/gpu/drm/nouveau/nv50_gpio.c +index 6b149c0..d4f4206 100644 +--- a/drivers/gpu/drm/nouveau/nv50_gpio.c ++++ b/drivers/gpu/drm/nouveau/nv50_gpio.c +@@ -137,6 +137,7 @@ nv50_gpio_irq_unregister(struct drm_device *dev, enum dcb_gpio_tag tag, + struct nv50_gpio_priv *priv = pgpio->priv; + struct nv50_gpio_handler *gpioh, *tmp; + struct dcb_gpio_entry *gpio; ++ LIST_HEAD(tofree); + unsigned long flags; + + gpio = nouveau_bios_gpio_entry(dev, tag); +@@ -149,10 +150,14 @@ nv50_gpio_irq_unregister(struct drm_device *dev, enum dcb_gpio_tag tag, + gpioh->handler != handler || + gpioh->data != data) + continue; +- list_del(&gpioh->head); +- kfree(gpioh); ++ list_move(&gpioh->head, &tofree); + } + spin_unlock_irqrestore(&priv->lock, flags); ++ ++ list_for_each_entry_safe(gpioh, tmp, &tofree, head) { ++ flush_work_sync(&gpioh->work); ++ kfree(gpioh); ++ } + } + + bool +@@ -205,7 +210,6 @@ nv50_gpio_init(struct drm_device *dev) + { + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_gpio_engine *pgpio = &dev_priv->engine.gpio; +- struct nv50_gpio_priv *priv; + int ret; + + if (!pgpio->priv) { +@@ -213,7 +217,6 @@ nv50_gpio_init(struct drm_device *dev) + if (ret) + return ret; + } +- priv = pgpio->priv; + + /* disable, and ack any pending gpio interrupts */ + nv_wr32(dev, 0xe050, 0x00000000); +@@ -293,7 +296,7 @@ nv50_gpio_isr(struct drm_device *dev) + continue; + gpioh->inhibit = true; + +- queue_work(dev_priv->wq, &gpioh->work); ++ schedule_work(&gpioh->work); + } + spin_unlock(&priv->lock); + } +diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c +index 37e21d2..c75cff1 100644 +--- a/drivers/gpu/drm/nouveau/nv50_graph.c ++++ b/drivers/gpu/drm/nouveau/nv50_graph.c +@@ -912,10 +912,10 @@ nv50_pgraph_trap_handler(struct drm_device *dev, u32 display, u64 inst, u32 chid + printk("\n"); + NV_INFO(dev, "PGRAPH - TRAP_CCACHE %08x %08x %08x %08x" + " %08x %08x %08x\n", +- nv_rd32(dev, 0x405800), nv_rd32(dev, 0x405804), +- nv_rd32(dev, 0x405808), nv_rd32(dev, 0x40580c), +- nv_rd32(dev, 0x405810), nv_rd32(dev, 0x405814), +- nv_rd32(dev, 0x40581c)); ++ nv_rd32(dev, 0x405000), nv_rd32(dev, 0x405004), ++ nv_rd32(dev, 0x405008), nv_rd32(dev, 0x40500c), ++ nv_rd32(dev, 0x405010), nv_rd32(dev, 0x405014), ++ nv_rd32(dev, 0x40501c)); + + } + +diff --git a/drivers/gpu/drm/nouveau/nv50_vm.c b/drivers/gpu/drm/nouveau/nv50_vm.c +index 459ff08..03c1a63 100644 +--- a/drivers/gpu/drm/nouveau/nv50_vm.c ++++ b/drivers/gpu/drm/nouveau/nv50_vm.c +@@ -31,7 +31,6 @@ void + nv50_vm_map_pgt(struct nouveau_gpuobj *pgd, u32 pde, + struct nouveau_gpuobj *pgt[2]) + { +- struct drm_nouveau_private *dev_priv = pgd->dev->dev_private; + u64 phys = 0xdeadcafe00000000ULL; + u32 coverage = 0; + +diff --git a/drivers/gpu/drm/nouveau/nvc0_fifo.c b/drivers/gpu/drm/nouveau/nvc0_fifo.c +index e6f92c5..e9f8643 100644 +--- a/drivers/gpu/drm/nouveau/nvc0_fifo.c ++++ b/drivers/gpu/drm/nouveau/nvc0_fifo.c +@@ -418,6 +418,12 @@ nvc0_fifo_isr(struct drm_device *dev) + { + u32 stat = nv_rd32(dev, 0x002100); + ++ if (stat & 0x00000100) { ++ NV_INFO(dev, "PFIFO: unknown status 0x00000100\n"); ++ nv_wr32(dev, 0x002100, 0x00000100); ++ stat &= ~0x00000100; ++ } ++ + if (stat & 0x10000000) { + u32 units = nv_rd32(dev, 0x00259c); + u32 u = units; +@@ -446,10 +452,15 @@ nvc0_fifo_isr(struct drm_device *dev) + stat &= ~0x20000000; + } + ++ if (stat & 0x40000000) { ++ NV_INFO(dev, "PFIFO: unknown status 0x40000000\n"); ++ nv_mask(dev, 0x002a00, 0x00000000, 0x00000000); ++ stat &= ~0x40000000; ++ } ++ + if (stat) { + NV_INFO(dev, "PFIFO: unhandled status 0x%08x\n", stat); + nv_wr32(dev, 0x002100, stat); ++ nv_wr32(dev, 0x002140, 0); + } +- +- nv_wr32(dev, 0x2140, 0); + } +diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.c b/drivers/gpu/drm/nouveau/nvc0_graph.c +index eb18a7e..afa7afe 100644 +--- a/drivers/gpu/drm/nouveau/nvc0_graph.c ++++ b/drivers/gpu/drm/nouveau/nvc0_graph.c +@@ -640,7 +640,6 @@ nvc0_graph_init(struct drm_device *dev) + { + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph; +- struct nvc0_graph_priv *priv; + int ret; + + dev_priv->engine.graph.accel_blocked = true; +@@ -665,7 +664,6 @@ nvc0_graph_init(struct drm_device *dev) + if (ret) + return ret; + } +- priv = pgraph->priv; + + nvc0_graph_init_obj418880(dev); + nvc0_graph_init_regs(dev); diff --git a/kernel.spec b/kernel.spec index b03adf0..a3fb4be 100644 --- a/kernel.spec +++ b/kernel.spec @@ -51,7 +51,7 @@ Summary: The Linux kernel # For non-released -rc kernels, this will be prepended with "0.", so # for example a 3 here will become 0.3 # -%global baserelease 1 +%global baserelease 2 %global fedora_build %{baserelease} # base_sublevel is the kernel version we're starting with and patching @@ -1957,6 +1957,9 @@ fi # and build. %changelog +* Wed Feb 23 2011 Ben Skeggs 2.6.38-0.rc6.git0.2 +- nouveau: nv4x pciegart fixes, nvc0 accel improvements + * Tue Feb 22 2011 Kyle McMartin 2.6.38-0.rc6.git0.1 - Linux 2.6.38-rc6