From 7cf755a92d9e2b7631f9fcd37e86f3e59281d9b8 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sep 05 2008 05:48:02 +0000 Subject: - latest snapshot - r300 bufmgr code - stop building mach64, patch around some intel issues --- diff --git a/.cvsignore b/.cvsignore index ed91d79..3a4010b 100644 --- a/.cvsignore +++ b/.cvsignore @@ -1,2 +1,2 @@ gl-manpages-1.0.1.tar.bz2 -mesa-20080814.tar.bz2 +mesa-20080905.tar.bz2 diff --git a/intel-mmio-fix.patch b/intel-mmio-fix.patch new file mode 100644 index 0000000..4d93c7a --- /dev/null +++ b/intel-mmio-fix.patch @@ -0,0 +1,57 @@ +diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c +index c2ad5a0..18e7348 100644 +--- a/src/mesa/drivers/dri/intel/intel_context.c ++++ b/src/mesa/drivers/dri/intel/intel_context.c +@@ -409,10 +409,12 @@ static const struct dri_extension brw_extensions[] = { + { NULL, NULL } + }; + ++#ifdef I915_MMIO_READ + static const struct dri_extension arb_oc_extensions[] = { + {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}, + {NULL, NULL} + }; ++#endif + + static const struct dri_extension ttm_extensions[] = { + {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions}, +@@ -437,10 +439,12 @@ void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging) + if (intel == NULL || intel->ttm) + driInitExtensions(ctx, ttm_extensions, GL_FALSE); + ++#ifdef I915_MMIO_READ + if (intel == NULL || + (IS_965(intel->intelScreen->deviceID) && + intel->intelScreen->drmMinor >= 8)) + driInitExtensions(ctx, arb_oc_extensions, GL_FALSE); ++#endif + + if (intel == NULL || IS_965(intel->intelScreen->deviceID)) + driInitExtensions(ctx, brw_extensions, GL_FALSE); +@@ -538,6 +542,7 @@ intelFinish(GLcontext * ctx) + } + } + ++#ifdef I915_MMIO_READ + static void + intelBeginQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q) + { +@@ -568,6 +573,7 @@ intelEndQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q) + q->Ready = GL_TRUE; + intel->stats_wm--; + } ++#endif + + /** Driver-specific fence emit implementation for the fake memory manager. */ + static unsigned int +@@ -684,8 +690,10 @@ intelInitDriverFunctions(struct dd_function_table *functions) + functions->CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D; + functions->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D; + ++#ifdef I915_MMIO_READ + functions->BeginQuery = intelBeginQuery; + functions->EndQuery = intelEndQuery; ++#endif + + intelInitTextureFuncs(functions); + intelInitStateFuncs(functions); diff --git a/mesa-7.1-nukeglthread-debug.patch b/mesa-7.1-nukeglthread-debug.patch new file mode 100644 index 0000000..dc8ad6f --- /dev/null +++ b/mesa-7.1-nukeglthread-debug.patch @@ -0,0 +1,24 @@ +diff -up Mesa-7.1/src/mesa/drivers/dri/intel/intel_fbo.c.intel-glthread Mesa-7.1/src/mesa/drivers/dri/intel/intel_fbo.c +--- Mesa-7.1/src/mesa/drivers/dri/intel/intel_fbo.c.intel-glthread 2008-08-25 10:49:40.000000000 -0400 ++++ Mesa-7.1/src/mesa/drivers/dri/intel/intel_fbo.c 2008-08-28 14:26:17.000000000 -0400 +@@ -633,11 +633,6 @@ intel_render_texture(GLcontext * ctx, + return; + } + +- DBG("Begin render texture tid %x tex=%u w=%d h=%d refcount=%d\n", +- _glthread_GetID(), +- att->Texture->Name, newImage->Width, newImage->Height, +- irb->Base.RefCount); +- + /* point the renderbufer's region to the texture image region */ + intel_image = intel_texture_image(newImage); + if (irb->region != intel_image->mt->region) { +@@ -674,8 +669,6 @@ intel_finish_render_texture(GLcontext * + { + struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer); + +- DBG("End render texture (tid %x) tex %u\n", _glthread_GetID(), att->Texture->Name); +- + if (irb) { + /* just release the region */ + intel_region_release(&irb->region); diff --git a/mesa-7.1-osmesa-version.patch b/mesa-7.1-osmesa-version.patch new file mode 100644 index 0000000..cd41ad2 --- /dev/null +++ b/mesa-7.1-osmesa-version.patch @@ -0,0 +1,21 @@ +diff -up Mesa-7.1/src/mesa/drivers/osmesa/Makefile.jx Mesa-7.1/src/mesa/drivers/osmesa/Makefile +--- Mesa-7.1/src/mesa/drivers/osmesa/Makefile.jx 2008-08-28 14:05:47.000000000 -0400 ++++ Mesa-7.1/src/mesa/drivers/osmesa/Makefile 2008-08-28 14:07:13.000000000 -0400 +@@ -46,7 +46,7 @@ osmesa8: $(TOP)/lib/$(OSMESA_LIB_NAME) + + $(TOP)/lib/$(OSMESA_LIB_NAME): $(OBJECTS) + $(MKLIB) -o $(OSMESA_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ +- -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ ++ -major 6 -minor 5 -patch 3 \ + -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ + -id $(INSTALL_LIB_DIR)/lib$(OSMESA_LIB).$(MESA_MAJOR).dylib \ + $(OSMESA_LIB_DEPS) $(OBJECTS) +@@ -58,7 +58,7 @@ $(TOP)/lib/$(OSMESA_LIB_NAME): $(OBJECTS + # with all the other Mesa sources (compiled with -DCHAN_BITS=16/32 + osmesa16: $(OBJECTS) $(CORE_MESA) + $(MKLIB) -o $(OSMESA_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ +- -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ ++ -major 6 -minor 5 -patch 3 \ + -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ + -id $(INSTALL_LIB_DIR)/lib$(OSMESA_LIB).$(MESA_MAJOR).dylib \ + $(OSMESA_LIB_DEPS) $(OBJECTS) $(CORE_MESA) diff --git a/mesa-7.1pre-nukeglthread-debug.patch b/mesa-7.1pre-nukeglthread-debug.patch deleted file mode 100644 index 0ce6298..0000000 --- a/mesa-7.1pre-nukeglthread-debug.patch +++ /dev/null @@ -1,25 +0,0 @@ -diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c -index 94d499f..2ba596e 100644 ---- a/src/mesa/drivers/dri/intel/intel_fbo.c -+++ b/src/mesa/drivers/dri/intel/intel_fbo.c -@@ -615,11 +615,6 @@ intel_render_texture(GLcontext * ctx, - } - } - -- DBG("Begin render texture tid %x tex=%u w=%d h=%d refcount=%d\n", -- _glthread_GetID(), -- att->Texture->Name, newImage->Width, newImage->Height, -- irb->Base.RefCount); -- - /* point the renderbufer's region to the texture image region */ - intel_image = intel_texture_image(newImage); - if (irb->region != intel_image->mt->region) { -@@ -656,8 +651,6 @@ intel_finish_render_texture(GLcontext * ctx, - { - struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer); - -- DBG("End render texture (tid %x) tex %u\n", _glthread_GetID(), att->Texture->Name); -- - if (irb) { - /* just release the region */ - intel_region_release(&irb->region); diff --git a/mesa-7.1pre-osmesa-version.patch b/mesa-7.1pre-osmesa-version.patch deleted file mode 100644 index b958f89..0000000 --- a/mesa-7.1pre-osmesa-version.patch +++ /dev/null @@ -1,21 +0,0 @@ -diff -up mesa-20080814/src/mesa/drivers/osmesa/Makefile.osmesa mesa-20080814/src/mesa/drivers/osmesa/Makefile ---- mesa-20080814/src/mesa/drivers/osmesa/Makefile.osmesa 2008-08-28 22:33:46.000000000 +1000 -+++ mesa-20080814/src/mesa/drivers/osmesa/Makefile 2008-08-28 22:34:06.000000000 +1000 -@@ -46,7 +46,7 @@ osmesa8: $(TOP)/lib/$(OSMESA_LIB_NAME) - - $(TOP)/lib/$(OSMESA_LIB_NAME): $(OBJECTS) - $(MKLIB) -o $(OSMESA_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ -- -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ -+ -major 6 -minor 5 -patch 3 \ - -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ - -id $(INSTALL_LIB_DIR)/lib$(OSMESA_LIB).$(MESA_MAJOR).dylib \ - $(OSMESA_LIB_DEPS) $(OBJECTS) -@@ -58,7 +58,7 @@ $(TOP)/lib/$(OSMESA_LIB_NAME): $(OBJECTS - # with all the other Mesa sources (compiled with -DCHAN_BITS=16/32 - osmesa16: $(OBJECTS) $(CORE_MESA) - $(MKLIB) -o $(OSMESA_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ -- -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ -+ -major 6 -minor 5 -patch 3 \ - -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ - -id $(INSTALL_LIB_DIR)/lib$(OSMESA_LIB).$(MESA_MAJOR).dylib \ - $(OSMESA_LIB_DEPS) $(OBJECTS) $(CORE_MESA) diff --git a/mesa-fixes.patch b/mesa-fixes.patch deleted file mode 100644 index 2abf8e5..0000000 --- a/mesa-fixes.patch +++ /dev/null @@ -1,48 +0,0 @@ -diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c -index 91b835d..ddfdce3 100644 ---- a/src/mesa/drivers/dri/intel/intel_regions.c -+++ b/src/mesa/drivers/dri/intel/intel_regions.c -@@ -455,8 +455,7 @@ static struct intel_region * - intel_recreate_static(struct intel_context *intel, - const char *name, - struct intel_region *region, -- intelRegion *region_desc, -- GLuint mem_type) -+ intelRegion *region_desc) - { - intelScreenPrivate *intelScreen = intel->intelScreen; - int ret; -@@ -537,22 +536,19 @@ intel_recreate_static_regions(struct intel_context *intel) - intel->front_region = - intel_recreate_static(intel, "front", - intel->front_region, -- &intelScreen->front, -- DRM_BO_FLAG_MEM_TT); -+ &intelScreen->front); - - intel->back_region = - intel_recreate_static(intel, "back", - intel->back_region, -- &intelScreen->back, -- DRM_BO_FLAG_MEM_TT); -+ &intelScreen->back); - - #ifdef I915 - if (intelScreen->third.handle) { - intel->third_region = - intel_recreate_static(intel, "third", - intel->third_region, -- &intelScreen->third, -- DRM_BO_FLAG_MEM_TT); -+ &intelScreen->third); - } - #endif /* I915 */ - -@@ -562,6 +558,5 @@ intel_recreate_static_regions(struct intel_context *intel) - intel->depth_region = - intel_recreate_static(intel, "depth", - intel->depth_region, -- &intelScreen->depth, -- DRM_BO_FLAG_MEM_TT); -+ &intelScreen->depth); - } diff --git a/mesa-no-mach64.patch b/mesa-no-mach64.patch new file mode 100644 index 0000000..397531e --- /dev/null +++ b/mesa-no-mach64.patch @@ -0,0 +1,45 @@ +--- configure.ac.mach64 2008-09-05 13:53:24.000000000 +1000 ++++ configure.ac 2008-09-05 13:53:39.000000000 +1000 +@@ -656,7 +656,7 @@ + # because there is no x86-64 system where they could *ever* + # be used. + if test "x$DRI_DIRS" = "xyes"; then +- DRI_DIRS="i915 i965 mach64 mga r128 r200 r300 radeon \ ++ DRI_DIRS="i915 i965 mga r128 r200 r300 radeon \ + savage tdfx unichrome swrast" + fi + ;; +@@ -664,13 +664,13 @@ + # Build only the drivers for cards that exist on PowerPC. + # At some point MGA will be added, but not yet. + if test "x$DRI_DIRS" = "xyes"; then +- DRI_DIRS="mach64 r128 r200 r300 radeon tdfx swrast" ++ DRI_DIRS="r128 r200 r300 radeon tdfx swrast" + fi + ;; + sparc*) + # Build only the drivers for cards that exist on sparc` + if test "x$DRI_DIRS" = "xyes"; then +- DRI_DIRS="mach64 r128 r200 r300 radeon ffb swrast" ++ DRI_DIRS="r128 r200 r300 radeon ffb swrast" + fi + ;; + esac +@@ -689,7 +689,7 @@ + # ffb and gamma are missing because they have not been converted + # to use the new interface. + if test "x$DRI_DIRS" = "xyes"; then +- DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 radeon tdfx \ ++ DRI_DIRS="i810 i915 i965 mga r128 r200 r300 radeon tdfx \ + unichrome savage sis swrast" + fi + ;; +@@ -704,7 +704,7 @@ + + # default drivers + if test "x$DRI_DIRS" = "xyes"; then +- DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 radeon s3v \ ++ DRI_DIRS="i810 i915 i965 mga r128 r200 r300 radeon s3v \ + savage sis tdfx trident unichrome ffb swrast" + fi + diff --git a/mesa.spec b/mesa.spec index b8a850e..ccd5cd5 100644 --- a/mesa.spec +++ b/mesa.spec @@ -12,12 +12,12 @@ %define _default_patch_fuzz 2 %define manpages gl-manpages-1.0.1 -%define gitdate 20080814 +%define gitdate 20080905 Summary: Mesa graphics libraries Name: mesa -Version: 7.1 -Release: 0.38%{?dist} +Version: 7.2 +Release: 0.1%{?dist} License: MIT Group: System Environment/Libraries URL: http://www.mesa3d.org @@ -29,13 +29,14 @@ Source0: %{name}-%{gitdate}.tar.bz2 Source2: %{manpages}.tar.bz2 Source3: make-git-snapshot.sh -Patch0: mesa-7.1pre-osmesa-version.patch -Patch1: mesa-fixes.patch -Patch2: mesa-7.1pre-nukeglthread-debug.patch +Patch0: mesa-7.1-osmesa-version.patch +Patch2: mesa-7.1-nukeglthread-debug.patch +Patch3: mesa-no-mach64.patch -Patch5: r300-cmdbuf.patch +Patch5: r300-bufmgr.patch Patch7: mesa-7.1-link-shared.patch +Patch8: intel-mmio-fix.patch Patch12: mesa-7.1-disable-intel-classic-warn.patch @@ -164,10 +165,11 @@ This package provides some demo applications for testing Mesa. #%setup -q -n Mesa-%{version}pre -b1 -b2 %setup -q -n mesa-%{gitdate} -b2 %patch0 -p1 -b .osmesa -%patch1 -p1 -b .fixes %patch2 -p1 -b .intel-glthread -%patch5 -p1 -b .r300cmdbuf +%patch3 -p0 -b .no-mach64 +%patch5 -p1 -b .r300-bufmgr %patch7 -p1 -b .dricore +%patch8 -p1 -b .intel-mmio %patch12 -p1 -b .intel-nowarn # WARNING: The following files are copyright "Mark J. Kilgard" under the GLUT @@ -240,7 +242,7 @@ make install DESTDIR=$RPM_BUILD_ROOT DRI_DIRS= %if %{with_dri} install -d $RPM_BUILD_ROOT%{_libdir}/dri install -m 0755 -t $RPM_BUILD_ROOT%{_libdir}/dri %{_lib}/libdricore.so >& /dev/null -for f in i810 i915 i965 mach64 mga r128 r200 r300 radeon savage sis swrast tdfx unichrome; do +for f in i810 i915 i965 mga r128 r200 r300 radeon savage sis swrast tdfx unichrome; do so=%{_lib}/${f}_dri.so test -e $so && echo $so done | xargs install -m 0755 -t $RPM_BUILD_ROOT%{_libdir}/dri >& /dev/null || : @@ -308,8 +310,8 @@ rm -rf $RPM_BUILD_ROOT %{_includedir}/GL/xmesa_xf86.h %dir %{_includedir}/GL/internal %{_includedir}/GL/internal/dri_interface.h -%{_includedir}/GL/internal/dri_sarea.h %{_libdir}/libGL.so +%{_libdir}/pkgconfig/dri.pc %{_libdir}/pkgconfig/gl.pc %{_datadir}/man/man3/gl[^uX]*.3gl* %{_datadir}/man/man3/glX*.3gl* @@ -356,6 +358,7 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/cubemap %{_bindir}/drawpix %{_bindir}/engine +%{_bindir}/fbo_firecube %{_bindir}/fire %{_bindir}/fogcoord %{_bindir}/fplight @@ -402,6 +405,10 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/mesa-demos-data %changelog +* Fri Sep 05 2008 Dave Airlie 7.2-0.1 +- latest snapshot - r300 bufmgr code +- stop building mach64, patch around some intel issues + * Thu Aug 28 2008 Dave Airlie 7.1-0.38 - latest Mesa snapshot - re-enable tex offset - add r300 command buffer support on top of snapshot diff --git a/r300-bufmgr.patch b/r300-bufmgr.patch new file mode 100644 index 0000000..09c2e95 --- /dev/null +++ b/r300-bufmgr.patch @@ -0,0 +1,7844 @@ +diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile +index e9144ac..b6ed58b 100644 +--- a/src/mesa/drivers/dri/r200/Makefile ++++ b/src/mesa/drivers/dri/r200/Makefile +@@ -48,7 +48,8 @@ SYMLINKS = \ + COMMON_SYMLINKS = \ + radeon_chipset.h \ + radeon_screen.c \ +- radeon_screen.h ++ radeon_screen.h \ ++ radeon_buffer.h + + ##### TARGETS ##### + +diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile +index 6ca9342..3bb1ff4 100644 +--- a/src/mesa/drivers/dri/r300/Makefile ++++ b/src/mesa/drivers/dri/r300/Makefile +@@ -11,15 +11,6 @@ ifeq ($(USING_EGL), 1) + EGL_SOURCES = server/radeon_egl.c + endif + +-COMMON_SOURCES = \ +- ../../common/driverfuncs.c \ +- ../common/mm.c \ +- ../common/utils.c \ +- ../common/texmem.c \ +- ../common/vblank.c \ +- ../common/xmlconfig.c \ +- ../common/dri_util.c +- + DRIVER_SOURCES = \ + radeon_screen.c \ + radeon_context.c \ +@@ -36,6 +27,7 @@ DRIVER_SOURCES = \ + r300_texmem.c \ + r300_tex.c \ + r300_texstate.c \ ++ r300_mipmap_tree.c \ + radeon_program.c \ + radeon_program_alu.c \ + radeon_program_pair.c \ +@@ -51,7 +43,7 @@ DRIVER_SOURCES = \ + r300_swtcl.c \ + $(EGL_SOURCES) + +-C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) ++C_SOURCES = $(COMMON_SOURCES) $(COMMON_BM_SOURCES) $(DRIVER_SOURCES) + + DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \ + -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 +@@ -68,7 +60,8 @@ COMMON_SYMLINKS = \ + radeon_chipset.h \ + radeon_screen.c \ + radeon_screen.h \ +- radeon_span.h ++ radeon_span.h \ ++ radeon_buffer.h + + ##### TARGETS ##### + +diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c +index c069660..dd42bf8 100644 +--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c ++++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c +@@ -51,11 +51,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "r300_reg.h" + #include "r300_cmdbuf.h" + #include "r300_emit.h" ++#include "r300_mem.h" ++#include "r300_mipmap_tree.h" + #include "r300_state.h" + + // Set this to 1 for extremely verbose debugging of command buffers + #define DEBUG_CMDBUF 0 + ++/** # of dwords reserved for additional instructions that may need to be written ++ * during flushing. ++ */ ++#define SPACE_FOR_FLUSHING 4 ++ + /** + * Send the current command buffer via ioctl to the hardware. + */ +@@ -66,24 +73,42 @@ int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller) + drm_radeon_cmd_buffer_t cmd; + int start; + ++ if (r300->cmdbuf.flushing) { ++ fprintf(stderr, "Recursive call into r300FlushCmdBufLocked!\n"); ++ exit(-1); ++ } ++ r300->cmdbuf.flushing = 1; ++ + if (r300->radeon.lost_context) { + start = 0; + r300->radeon.lost_context = GL_FALSE; + } else +- start = r300->cmdbuf.count_reemit; ++ start = r300->cmdbuf.reemit; + + if (RADEON_DEBUG & DEBUG_IOCTL) { + fprintf(stderr, "%s from %s - %i cliprects\n", + __FUNCTION__, caller, r300->radeon.numClipRects); + +- if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE) +- for (i = start; i < r300->cmdbuf.count_used; ++i) ++ if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE) { ++ fprintf(stderr, "written: %d committed: %d\n", r300->cmdbuf.written, r300->cmdbuf.committed); ++ for (i = start; i < r300->cmdbuf.written; ++i) + fprintf(stderr, "%d: %08x\n", i, +- r300->cmdbuf.cmd_buf[i]); ++ ((uint32_t*)r300->cmdbuf.buf->virtual)[i]); ++ } + } + +- cmd.buf = (char *)(r300->cmdbuf.cmd_buf + start); +- cmd.bufsz = (r300->cmdbuf.count_used - start) * 4; ++ if (r300->cmdbuf.written != r300->cmdbuf.committed) { ++ _mesa_problem(r300->radeon.glCtx, ++ "Command buffer contains %d uncommitted dwords\n" ++ "in r300FlushCmdBufLocked called from %s.\n", ++ r300->cmdbuf.written - r300->cmdbuf.committed, caller); ++ } ++ ++ dri_bo_unmap(r300->cmdbuf.buf); ++ dri_process_relocs(r300->cmdbuf.buf); ++ ++ cmd.buf = (char *)r300->cmdbuf.buf->virtual + 4*start; ++ cmd.bufsz = (r300->cmdbuf.committed - start) * 4; + + if (r300->radeon.state.scissor.enabled) { + cmd.nbox = r300->radeon.state.scissor.numClipRects; +@@ -103,9 +128,19 @@ int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller) + radeonWaitForIdleLocked(&r300->radeon); + } + ++ dri_post_submit(r300->cmdbuf.buf); ++ dri_bo_unreference(r300->cmdbuf.buf); ++ + r300->dma.nr_released_bufs = 0; +- r300->cmdbuf.count_used = 0; +- r300->cmdbuf.count_reemit = 0; ++ r300->cmdbuf.buf = radeon_bufmgr_classic_bo_alloc(&r300->radeon.bufmgr->base, "cmdbuf", ++ r300->cmdbuf.size*4, 16, DRM_BO_MEM_CMDBUF); ++ r300->cmdbuf.written = 0; ++ r300->cmdbuf.reserved = 0; ++ r300->cmdbuf.committed = 0; ++ r300->cmdbuf.reemit = 0; ++ dri_bo_map(r300->cmdbuf.buf, GL_TRUE); ++ ++ r300->cmdbuf.flushing = 0; + + return ret; + } +@@ -115,9 +150,7 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char *caller) + int ret; + + LOCK_HARDWARE(&r300->radeon); +- + ret = r300FlushCmdBufLocked(r300, caller); +- + UNLOCK_HARDWARE(&r300->radeon); + + if (ret) { +@@ -128,6 +161,44 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char *caller) + return ret; + } + ++/** ++ * Make sure that enough space is available in the command buffer ++ * by flushing if necessary. ++ * ++ * \param dwords The number of dwords we need to be free on the command buffer ++ */ ++void r300EnsureCmdBufSpace(r300ContextPtr r300, int dwords, const char *caller) ++{ ++ assert(dwords < r300->cmdbuf.size); ++ ++ if (!r300->cmdbuf.flushing) ++ dwords += SPACE_FOR_FLUSHING; ++ ++ if (r300->cmdbuf.written + dwords > r300->cmdbuf.size) ++ r300FlushCmdBuf(r300, caller); ++} ++ ++void r300BeginBatch(r300ContextPtr r300, int n, GLboolean autostate, const char* function, int line) ++{ ++ assert(r300->cmdbuf.written == r300->cmdbuf.reserved); ++ ++ r300EnsureCmdBufSpace(r300, n, function); ++ ++ if (autostate && !r300->cmdbuf.written) { ++ if (RADEON_DEBUG & DEBUG_IOCTL) ++ fprintf(stderr, ++ "Reemit state after flush (from %s)\n", function); ++ r300EmitState(r300); ++ } ++ ++ r300->cmdbuf.reserved += n; ++ assert(r300->cmdbuf.reserved < r300->cmdbuf.size); ++ ++ if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_IOCTL) ++ fprintf(stderr, "BEGIN_BATCH(%d) at %d, from %s:%i\n", ++ n, r300->cmdbuf.written, function, line); ++} ++ + static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state) + { + int i; +@@ -152,33 +223,18 @@ static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *stat + */ + static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) + { ++ BATCH_LOCALS(r300); + struct r300_state_atom *atom; +- uint32_t *dest; + int dwords; + +- dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used; +- +- /* Emit WAIT */ +- *dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN); +- dest++; +- r300->cmdbuf.count_used++; +- +- /* Emit cache flush */ +- *dest = cmdpacket0(R300_TX_INVALTAGS, 1); +- dest++; +- r300->cmdbuf.count_used++; +- +- *dest = R300_TX_FLUSH; +- dest++; +- r300->cmdbuf.count_used++; +- +- /* Emit END3D */ +- *dest = cmdpacify(); +- dest++; +- r300->cmdbuf.count_used++; ++ BEGIN_BATCH_NO_AUTOSTATE(4); ++ OUT_BATCH(cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN)); ++ OUT_BATCH(cmdpacket0(R300_TX_INVALTAGS, 1)); ++ OUT_BATCH(R300_TX_FLUSH); ++ OUT_BATCH(cmdpacify()); ++ END_BATCH(); + + /* Emit actual atoms */ +- + foreach(atom, &r300->hw.atomlist) { + if ((atom->dirty || r300->hw.all_dirty) == dirty) { + dwords = (*atom->check) (r300, atom); +@@ -186,9 +242,13 @@ static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) + if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { + r300PrintStateAtom(r300, atom); + } +- memcpy(dest, atom->cmd, dwords * 4); +- dest += dwords; +- r300->cmdbuf.count_used += dwords; ++ if (atom->emit) { ++ (*atom->emit)(r300); ++ } else { ++ BEGIN_BATCH_NO_AUTOSTATE(dwords); ++ OUT_BATCH_TABLE(atom->cmd, dwords); ++ END_BATCH(); ++ } + atom->dirty = GL_FALSE; + } else { + if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { +@@ -198,6 +258,8 @@ static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) + } + } + } ++ ++ COMMIT_BATCH(); + } + + /** +@@ -211,22 +273,21 @@ void r300EmitState(r300ContextPtr r300) + if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS)) + fprintf(stderr, "%s\n", __FUNCTION__); + +- if (r300->cmdbuf.count_used && !r300->hw.is_dirty ++ if (r300->cmdbuf.written && !r300->hw.is_dirty + && !r300->hw.all_dirty) + return; + + /* To avoid going across the entire set of states multiple times, just check +- * for enough space for the case of emitting all state, and inline the +- * r300AllocCmdBuf code here without all the checks. ++ * for enough space for the case of emitting all state. + */ + r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__); + +- if (!r300->cmdbuf.count_used) { ++ if (!r300->cmdbuf.written) { + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "Begin reemit state\n"); + + r300EmitAtoms(r300, GL_FALSE); +- r300->cmdbuf.count_reemit = r300->cmdbuf.count_used; ++ r300->cmdbuf.reemit = r300->cmdbuf.committed; + } + + if (RADEON_DEBUG & DEBUG_STATE) +@@ -234,7 +295,7 @@ void r300EmitState(r300ContextPtr r300) + + r300EmitAtoms(r300, GL_TRUE); + +- assert(r300->cmdbuf.count_used < r300->cmdbuf.size); ++ assert(r300->cmdbuf.written < r300->cmdbuf.size); + + r300->hw.is_dirty = GL_FALSE; + r300->hw.all_dirty = GL_FALSE; +@@ -244,6 +305,79 @@ void r300EmitState(r300ContextPtr r300) + #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) + #define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) + ++static void emit_tex_offsets(r300ContextPtr r300) ++{ ++ BATCH_LOCALS(r300); ++ int numtmus = packet0_count(r300->hw.tex.offset.cmd); ++ ++ if (numtmus) { ++ int i; ++ ++ BEGIN_BATCH(numtmus + 1); ++ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0, numtmus); ++ for(i = 0; i < numtmus; ++i) { ++ r300TexObj *t = r300->hw.textures[i]; ++ if (t && !t->image_override) { ++ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, DRM_RELOC_TXOFFSET); ++ } else if (!t) { ++ OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]); ++ } else { ++ OUT_BATCH(t->override_offset); ++ } ++ } ++ END_BATCH(); ++ } ++} ++ ++static void emit_cb_offset(r300ContextPtr r300) ++{ ++ BATCH_LOCALS(r300); ++ struct radeon_renderbuffer *rrb; ++ uint32_t cbpitch; ++ ++ rrb = r300->radeon.state.color.rrb; ++ if (!rrb) { ++ fprintf(stderr, "no rrb\n"); ++ return; ++ } ++ ++ cbpitch = rrb->pitch; ++ if (rrb->cpp == 4) ++ cbpitch |= R300_COLOR_FORMAT_ARGB8888; ++ else ++ cbpitch |= R300_COLOR_FORMAT_RGB565; ++ ++ if (r300->radeon.sarea->tiling_enabled) ++ cbpitch |= R300_COLOR_TILE_ENABLE; ++ ++ BEGIN_BATCH(4); ++ OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); ++ OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET); ++ OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1); ++ OUT_BATCH(cbpitch); ++ END_BATCH(); ++} ++ ++static void emit_zb_offset(r300ContextPtr r300) ++{ ++ BATCH_LOCALS(r300); ++ struct radeon_renderbuffer *rrb; ++ uint32_t zbpitch; ++ ++ rrb = r300->radeon.state.depth_buffer; ++ if (!rrb) ++ return; ++ ++ zbpitch = rrb->pitch; ++ ++ BEGIN_BATCH(3); ++ OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 2); ++ OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET); ++ OUT_BATCH(zbpitch); ++ END_BATCH(); ++ ++} ++ + static int check_always(r300ContextPtr r300, struct r300_state_atom *atom) + { + return atom->cmd_size; +@@ -480,8 +614,7 @@ void r300InitCmdBuf(r300ContextPtr r300) + ALLOC_STATE(rop, always, 2, 0); + r300->hw.rop.cmd[0] = cmdpacket0(R300_RB3D_ROPCNTL, 1); + ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0); +- r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1); +- r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1); ++ r300->hw.cb.emit = &emit_cb_offset; + ALLOC_STATE(rb3d_dither_ctl, always, 10, 0); + r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9); + ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); +@@ -495,7 +628,7 @@ void r300InitCmdBuf(r300ContextPtr r300) + r300->hw.zstencil_format.cmd[0] = + cmdpacket0(R300_ZB_FORMAT, 4); + ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0); +- r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2); ++ r300->hw.zb.emit = emit_zb_offset; + ALLOC_STATE(zb_depthclearvalue, always, 2, 0); + r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1); + ALLOC_STATE(unk4F30, always, 3, 0); +@@ -562,9 +695,10 @@ void r300InitCmdBuf(r300ContextPtr r300) + ALLOC_STATE(tex.pitch, variable, mtu + 1, 0); + r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT2_0, 0); + +- ALLOC_STATE(tex.offset, variable, mtu + 1, 0); ++ ALLOC_STATE(tex.offset, variable, 1, 0); + r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_OFFSET_0, 0); ++ r300->hw.tex.offset.emit = &emit_tex_offsets; + + ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0); + r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = +@@ -597,10 +731,14 @@ void r300InitCmdBuf(r300ContextPtr r300) + size * 4, r300->hw.max_state_size * 4); + } + ++ r300->cmdbuf.buf = radeon_bufmgr_classic_bo_alloc(&r300->radeon.bufmgr->base, "cmdbuf", ++ size*4, 16, DRM_BO_MEM_CMDBUF); + r300->cmdbuf.size = size; +- r300->cmdbuf.cmd_buf = (uint32_t *) CALLOC(size * 4); +- r300->cmdbuf.count_used = 0; +- r300->cmdbuf.count_reemit = 0; ++ r300->cmdbuf.written = 0; ++ r300->cmdbuf.reserved = 0; ++ r300->cmdbuf.committed = 0; ++ r300->cmdbuf.reemit = 0; ++ dri_bo_map(r300->cmdbuf.buf, GL_TRUE); + } + + /** +@@ -610,66 +748,10 @@ void r300DestroyCmdBuf(r300ContextPtr r300) + { + struct r300_state_atom *atom; + +- FREE(r300->cmdbuf.cmd_buf); ++ dri_bo_unmap(r300->cmdbuf.buf); ++ dri_bo_unreference(r300->cmdbuf.buf); + + foreach(atom, &r300->hw.atomlist) { + FREE(atom->cmd); + } + } +- +-void r300EmitBlit(r300ContextPtr rmesa, +- GLuint color_fmt, +- GLuint src_pitch, +- GLuint src_offset, +- GLuint dst_pitch, +- GLuint dst_offset, +- GLint srcx, GLint srcy, +- GLint dstx, GLint dsty, GLuint w, GLuint h) +-{ +- drm_r300_cmd_header_t *cmd; +- +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, +- "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n", +- __FUNCTION__, src_pitch, src_offset, srcx, srcy, +- dst_pitch, dst_offset, dstx, dsty, w, h); +- +- assert((src_pitch & 63) == 0); +- assert((dst_pitch & 63) == 0); +- assert((src_offset & 1023) == 0); +- assert((dst_offset & 1023) == 0); +- assert(w < (1 << 16)); +- assert(h < (1 << 16)); +- +- cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__); +- +- cmd[0].header.cmd_type = R300_CMD_PACKET3; +- cmd[0].header.pad0 = R300_CMD_PACKET3_RAW; +- cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16); +- cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | +- RADEON_GMC_DST_PITCH_OFFSET_CNTL | +- RADEON_GMC_BRUSH_NONE | +- (color_fmt << 8) | +- RADEON_GMC_SRC_DATATYPE_COLOR | +- RADEON_ROP3_S | +- RADEON_DP_SRC_SOURCE_MEMORY | +- RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS); +- +- cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10); +- cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10); +- cmd[5].u = (srcx << 16) | srcy; +- cmd[6].u = (dstx << 16) | dsty; /* dst */ +- cmd[7].u = (w << 16) | h; +-} +- +-void r300EmitWait(r300ContextPtr rmesa, GLuint flags) +-{ +- drm_r300_cmd_header_t *cmd; +- +- assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D))); +- +- cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); +- cmd[0].u = 0; +- cmd[0].wait.cmd_type = R300_CMD_WAIT; +- cmd[0].wait.flags = flags; +-} +diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h +index a8eaa58..5c84b67 100644 +--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h ++++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h +@@ -45,29 +45,88 @@ extern void r300EmitState(r300ContextPtr r300); + + extern void r300InitCmdBuf(r300ContextPtr r300); + extern void r300DestroyCmdBuf(r300ContextPtr r300); ++extern void r300EnsureCmdBufSpace(r300ContextPtr r300, int dwords, const char *caller); ++ ++extern void r300BeginBatch(r300ContextPtr r300, int n, GLboolean autostate, const char* function, int line); + + /** +- * Make sure that enough space is available in the command buffer +- * by flushing if necessary. +- * +- * \param dwords The number of dwords we need to be free on the command buffer ++ * Every function writing to the command buffer needs to declare this ++ * to get the necessary local variables. + */ +-static INLINE void r300EnsureCmdBufSpace(r300ContextPtr r300, +- int dwords, const char *caller) +-{ +- assert(dwords < r300->cmdbuf.size); ++#define BATCH_LOCALS(r300) \ ++ const r300ContextPtr b_l_r300 = r300 + +- if (r300->cmdbuf.count_used + dwords > r300->cmdbuf.size) +- r300FlushCmdBuf(r300, caller); +-} ++/** ++ * Prepare writing n dwords to the command buffer, ++ * including producing any necessary state emits on buffer wraparound. ++ */ ++#define BEGIN_BATCH(n) r300BeginBatch(b_l_r300, n, GL_TRUE, __FUNCTION__, __LINE__) ++ ++/** ++ * Same as BEGIN_BATCH, but do not cause automatic state emits. ++ */ ++#define BEGIN_BATCH_NO_AUTOSTATE(n) r300BeginBatch(b_l_r300, n, GL_FALSE, __FUNCTION__, __LINE__) ++ ++/** ++ * Write one dword to the command buffer. ++ */ ++#define OUT_BATCH(data) \ ++ do { \ ++ if (b_l_r300->cmdbuf.written < b_l_r300->cmdbuf.reserved) { \ ++ ((uint32_t*)b_l_r300->cmdbuf.buf->virtual)[b_l_r300->cmdbuf.written++] = data; \ ++ } else { \ ++ _mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH mismatch", __FUNCTION__, __LINE__); \ ++ } \ ++ } while(0) + + /** +- * Allocate the given number of dwords in the command buffer and return +- * a pointer to the allocated area. +- * When necessary, these functions cause a flush. r300AllocCmdBuf() also +- * causes state reemission after a flush. This is necessary to ensure +- * correct hardware state after an unlock. ++ * Write a relocated dword to the command buffer. + */ ++#define OUT_BATCH_RELOC(data, bo, offset, flags) \ ++ do { \ ++ if (b_l_r300->cmdbuf.written < b_l_r300->cmdbuf.reserved) { \ ++ radeon_bufmgr_classic_emit_reloc(b_l_r300->cmdbuf.buf, flags, offset, 4*b_l_r300->cmdbuf.written, bo); \ ++ ((uint32_t*)b_l_r300->cmdbuf.buf->virtual)[b_l_r300->cmdbuf.written++] = data; \ ++ } else { \ ++ _mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH mismatch", __FUNCTION__, __LINE__); \ ++ } \ ++ } while(0) ++ ++/** ++ * Write n dwords from ptr to the command buffer. ++ */ ++#define OUT_BATCH_TABLE(ptr,n) \ ++ do { \ ++ int _n = n; \ ++ if (b_l_r300->cmdbuf.written+_n <= b_l_r300->cmdbuf.reserved) { \ ++ memcpy((uint32_t*)b_l_r300->cmdbuf.buf->virtual + b_l_r300->cmdbuf.written, (ptr), 4*_n); \ ++ b_l_r300->cmdbuf.written += _n; \ ++ } else { \ ++ _mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH_TABLE mismatch", __FUNCTION__, __LINE__); \ ++ } \ ++ } while(0) ++ ++/** ++ * Finish writing dwords to the command buffer. ++ * The number of (direct or indirect) OUT_BATCH calls between the previous ++ * BEGIN_BATCH and END_BATCH must match the number specified at BEGIN_BATCH time. ++ */ ++#define END_BATCH() \ ++ do { \ ++ if (b_l_r300->cmdbuf.written != b_l_r300->cmdbuf.reserved) \ ++ _mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: END_BATCH mismatch", __FUNCTION__, __LINE__); \ ++ } while(0) ++ ++/** ++ * After the last END_BATCH() of rendering, this indicates that flushing ++ * the command buffer now is okay. ++ */ ++#define COMMIT_BATCH() \ ++ do { \ ++ assert(b_l_r300->cmdbuf.written == b_l_r300->cmdbuf.reserved); \ ++ b_l_r300->cmdbuf.committed = b_l_r300->cmdbuf.written; \ ++ } while(0) ++ + static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300, + int dwords, const char *caller) + { +@@ -75,8 +134,9 @@ static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300, + + r300EnsureCmdBufSpace(r300, dwords, caller); + +- ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used]; +- r300->cmdbuf.count_used += dwords; ++ ptr = (uint32_t*)r300->cmdbuf.buf->virtual + r300->cmdbuf.written; ++ r300->cmdbuf.written += dwords; ++ r300->cmdbuf.reserved = r300->cmdbuf.committed = r300->cmdbuf.written; + return ptr; + } + +@@ -87,30 +147,17 @@ static INLINE uint32_t *r300AllocCmdBuf(r300ContextPtr r300, + + r300EnsureCmdBufSpace(r300, dwords, caller); + +- if (!r300->cmdbuf.count_used) { ++ if (!r300->cmdbuf.written) { + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, + "Reemit state after flush (from %s)\n", caller); + r300EmitState(r300); + } + +- ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used]; +- r300->cmdbuf.count_used += dwords; ++ ptr = (uint32_t*)r300->cmdbuf.buf->virtual + r300->cmdbuf.written; ++ r300->cmdbuf.written += dwords; ++ r300->cmdbuf.reserved = r300->cmdbuf.committed = r300->cmdbuf.written; + return ptr; + } + +-extern void r300EmitBlit(r300ContextPtr rmesa, +- GLuint color_fmt, +- GLuint src_pitch, +- GLuint src_offset, +- GLuint dst_pitch, +- GLuint dst_offset, +- GLint srcx, GLint srcy, +- GLint dstx, GLint dsty, GLuint w, GLuint h); +- +-extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags); +-extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start); +-extern void r300EmitVertexShader(r300ContextPtr rmesa); +-extern void r300EmitPixelShader(r300ContextPtr rmesa); +- + #endif /* __R300_CMDBUF_H__ */ +diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c +index fcf571d..6c6b5ba 100644 +--- a/src/mesa/drivers/dri/r300/r300_context.c ++++ b/src/mesa/drivers/dri/r300/r300_context.c +@@ -59,15 +59,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "radeon_span.h" + #include "r300_context.h" + #include "r300_cmdbuf.h" ++#include "r300_mipmap_tree.h" + #include "r300_state.h" + #include "r300_ioctl.h" + #include "r300_tex.h" + #include "r300_emit.h" + #include "r300_swtcl.h" + +-#ifdef USER_BUFFERS + #include "r300_mem.h" +-#endif + + #include "vblank.h" + #include "utils.h" +@@ -190,7 +189,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, + struct dd_function_table functions; + r300ContextPtr r300; + GLcontext *ctx; +- int tcl_mode, i; ++ int tcl_mode; + + assert(glVisual); + assert(driContextPriv); +@@ -222,10 +221,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, + r300InitTextureFuncs(&functions); + r300InitShaderFuncs(&functions); + +-#ifdef USER_BUFFERS +- r300_mem_init(r300); +-#endif +- + if (!radeonInitContext(&r300->radeon, &functions, + glVisual, driContextPriv, + sharedContextPrivate)) { +@@ -233,34 +228,9 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, + return GL_FALSE; + } + ++ r300->radeon.bufmgr = radeonBufmgrClassicInit(r300); ++ + /* Init r300 context data */ +- r300->dma.buf0_address = +- r300->radeon.radeonScreen->buffers->list[0].address; +- +- (void)memset(r300->texture_heaps, 0, sizeof(r300->texture_heaps)); +- make_empty_list(&r300->swapped); +- +- r300->nr_heaps = 1 /* screen->numTexHeaps */ ; +- assert(r300->nr_heaps < RADEON_NR_TEX_HEAPS); +- for (i = 0; i < r300->nr_heaps; i++) { +- /* *INDENT-OFF* */ +- r300->texture_heaps[i] = driCreateTextureHeap(i, r300, +- screen-> +- texSize[i], 12, +- RADEON_NR_TEX_REGIONS, +- (drmTextureRegionPtr) +- r300->radeon.sarea-> +- tex_list[i], +- &r300->radeon.sarea-> +- tex_age[i], +- &r300->swapped, +- sizeof +- (r300TexObj), +- (destroy_texture_object_t +- *) +- r300DestroyTexObj); +- /* *INDENT-ON* */ +- } + r300->texture_depth = driQueryOptioni(&r300->radeon.optionCache, + "texture_depth"); + if (r300->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) +@@ -299,13 +269,11 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, + ctx->Const.MaxLineWidth = R300_LINESIZE_MAX; + ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; + +-#ifdef USER_BUFFERS + /* Needs further modifications */ + #if 0 + ctx->Const.MaxArrayLockSize = + ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4); + #endif +-#endif + + /* Initialize the software rasterizer and helper modules. + */ +@@ -407,72 +375,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, + return GL_TRUE; + } + +-static void r300FreeGartAllocations(r300ContextPtr r300) +-{ +- int i, ret, tries = 0, done_age, in_use = 0; +- drm_radeon_mem_free_t memfree; +- +- memfree.region = RADEON_MEM_REGION_GART; +- +-#ifdef USER_BUFFERS +- for (i = r300->rmm->u_last; i > 0; i--) { +- if (r300->rmm->u_list[i].ptr == NULL) { +- continue; +- } +- +- /* check whether this buffer is still in use */ +- if (r300->rmm->u_list[i].pending) { +- in_use++; +- } +- } +- /* Cannot flush/lock if no context exists. */ +- if (in_use) +- r300FlushCmdBuf(r300, __FUNCTION__); +- +- done_age = radeonGetAge((radeonContextPtr) r300); +- +- for (i = r300->rmm->u_last; i > 0; i--) { +- if (r300->rmm->u_list[i].ptr == NULL) { +- continue; +- } +- +- /* check whether this buffer is still in use */ +- if (!r300->rmm->u_list[i].pending) { +- continue; +- } +- +- assert(r300->rmm->u_list[i].h_pending == 0); +- +- tries = 0; +- while (r300->rmm->u_list[i].age > done_age && tries++ < 1000) { +- usleep(10); +- done_age = radeonGetAge((radeonContextPtr) r300); +- } +- if (tries >= 1000) { +- WARN_ONCE("Failed to idle region!"); +- } +- +- memfree.region_offset = (char *)r300->rmm->u_list[i].ptr - +- (char *)r300->radeon.radeonScreen->gartTextures.map; +- +- ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd, +- DRM_RADEON_FREE, &memfree, +- sizeof(memfree)); +- if (ret) { +- fprintf(stderr, "Failed to free at %p\nret = %s\n", +- r300->rmm->u_list[i].ptr, strerror(-ret)); +- } else { +- if (i == r300->rmm->u_last) +- r300->rmm->u_last--; +- +- r300->rmm->u_list[i].pending = 0; +- r300->rmm->u_list[i].ptr = NULL; +- } +- } +- r300->rmm->u_head = i; +-#endif /* USER_BUFFERS */ +-} +- + /* Destroy the device specific context. + */ + void r300DestroyContext(__DRIcontextPrivate * driContextPriv) +@@ -496,24 +398,17 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv) + assert(r300); /* should never be null */ + + if (r300) { +- GLboolean release_texture_heaps; +- +- release_texture_heaps = +- (r300->radeon.glCtx->Shared->RefCount == 1); + _swsetup_DestroyContext(r300->radeon.glCtx); + _tnl_ProgramCacheDestroy(r300->radeon.glCtx); + _tnl_DestroyContext(r300->radeon.glCtx); + _vbo_DestroyContext(r300->radeon.glCtx); + _swrast_DestroyContext(r300->radeon.glCtx); + +- if (r300->dma.current.buf) { +- r300ReleaseDmaRegion(r300, &r300->dma.current, +- __FUNCTION__); +-#ifndef USER_BUFFERS +- r300FlushCmdBuf(r300, __FUNCTION__); +-#endif ++ if (r300->dma.current) { ++ dri_bo_unreference(r300->dma.current); ++ r300->dma.current = 0; + } +- r300FreeGartAllocations(r300); ++ r300FlushCmdBuf(r300, __FUNCTION__); + r300DestroyCmdBuf(r300); + + if (radeon->state.scissor.pClipRects) { +@@ -521,29 +416,14 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv) + radeon->state.scissor.pClipRects = NULL; + } + +- if (release_texture_heaps) { +- /* This share group is about to go away, free our private +- * texture object data. +- */ +- int i; +- +- for (i = 0; i < r300->nr_heaps; i++) { +- driDestroyTextureHeap(r300->texture_heaps[i]); +- r300->texture_heaps[i] = NULL; +- } +- +- assert(is_empty_list(&r300->swapped)); +- } +- + radeonCleanupContext(&r300->radeon); + +-#ifdef USER_BUFFERS +- /* the memory manager might be accessed when Mesa frees the shared +- * state, so don't destroy it earlier +- */ +- r300_mem_destroy(r300); +-#endif + ++ /* the memory manager might be accessed when Mesa frees the shared ++ * state, so don't destroy it earlier ++ */ ++ dri_bufmgr_destroy(&r300->radeon.bufmgr->base); ++ r300->radeon.bufmgr = 0; + /* free the option cache */ + driDestroyOptionCache(&r300->radeon.optionCache); + +diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h +index d2017f8..047caae 100644 +--- a/src/mesa/drivers/dri/r300/r300_context.h ++++ b/src/mesa/drivers/dri/r300/r300_context.h +@@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "tnl/t_vertex.h" + #include "drm.h" + #include "radeon_drm.h" ++#include "dri_bufmgr.h" + #include "dri_util.h" + #include "texmem.h" + +@@ -47,11 +48,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "mtypes.h" + #include "colormac.h" + +-#define USER_BUFFERS +- + struct r300_context; + typedef struct r300_context r300ContextRec; + typedef struct r300_context *r300ContextPtr; ++typedef struct radeon_bufmgr radeon_bufmgr; + + #include "radeon_lock.h" + #include "mm.h" +@@ -122,44 +122,22 @@ static INLINE uint32_t r300PackFloat24(float f) + + /************ DMA BUFFERS **************/ + +-/* Need refcounting on dma buffers: +- */ +-struct r300_dma_buffer { +- int refcount; /**< the number of retained regions in buf */ +- drmBufPtr buf; +- int id; +-}; +-#undef GET_START +-#ifdef USER_BUFFERS +-#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start)) +-#else +-#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset + \ +- (rvb)->address - rmesa->dma.buf0_address + \ +- (rvb)->start) +-#endif +-/* A retained region, eg vertices for indexed vertices. +- */ +-struct r300_dma_region { +- struct r300_dma_buffer *buf; +- char *address; /* == buf->address */ +- int start, end, ptr; /* offsets from start of buf */ +- +- int aos_offset; /* address in GART memory */ +- int aos_stride; /* distance between elements, in dwords */ +- int aos_size; /* number of components (1-4) */ +-}; +- + struct r300_dma { + /* Active dma region. Allocations for vertices and retained + * regions come from here. Also used for emitting random vertices, + * these may be flushed by calling flush_current(); + */ +- struct r300_dma_region current; ++ dri_bo *current; /** Buffer that DMA memory is allocated from */ ++ int current_used; /** Number of bytes allocated and forgotten about */ ++ int current_vertexptr; /** End of active vertex region */ + ++ /** ++ * If current_vertexptr != current_used then flush must be non-zero. ++ * flush must be called before non-active vertex allocations can be ++ * performed. ++ */ + void (*flush) (r300ContextPtr); + +- char *buf0_address; /* start of buf[0], for index calcs */ +- + /* Number of "in-flight" DMA buffers, i.e. the number of buffers + * for which a DISCARD command is currently queued in the command buffer. + */ +@@ -173,17 +151,13 @@ typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr; + /* Texture object in locally shared texture space. + */ + struct r300_tex_obj { +- driTextureObject base; +- +- GLuint bufAddr; /* Offset to start of locally +- shared texture block */ +- +- drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; +- /* Six, for the cube faces */ ++ struct gl_texture_object base; ++ struct _r300_mipmap_tree *mt; ++ GLuint dirty_images[6]; + + GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ ++ GLuint override_offset; + +- GLuint pitch; /* this isn't sent to hardware just used in calculations */ + /* hardware register values */ + /* Note that R200 has 8 registers per texture and R300 only 7 */ + GLuint filter; +@@ -191,30 +165,16 @@ struct r300_tex_obj { + GLuint pitch_reg; + GLuint size; /* npot only */ + GLuint format; +- GLuint offset; /* Image location in the card's address space. +- All cube faces follow. */ +- GLuint unknown4; +- GLuint unknown5; +- /* end hardware registers */ +- +- /* registers computed by r200 code - keep them here to +- compare against what is actually written. +- +- to be removed later.. */ + GLuint pp_border_color; +- GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ +- GLuint format_x; +- +- GLboolean border_fallback; ++ /* end hardware registers */ + + GLuint tile_bits; /* hw texture tile bits used on this texture */ + }; + +-struct r300_texture_env_state { +- r300TexObjPtr texobj; +- GLenum format; +- GLenum envMode; +-}; ++static INLINE r300TexObj* r300_tex_obj(struct gl_texture_object *texObj) ++{ ++ return (r300TexObj*)texObj; ++} + + /* The blit width for texture uploads + */ +@@ -222,7 +182,6 @@ struct r300_texture_env_state { + #define R300_MAX_TEXTURE_UNITS 8 + + struct r300_texture_state { +- struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS]; + int tc_count; /* number of incoming texture coordinates from VAP */ + }; + +@@ -242,6 +201,7 @@ struct r300_state_atom { + GLboolean dirty; + + int (*check) (r300ContextPtr, struct r300_state_atom * atom); ++ void (*emit) (r300ContextPtr); + }; + + #define R300_VPT_CMD_0 0 +@@ -549,6 +509,8 @@ struct r300_hw_state { + struct r300_state_atom border_color; + } tex; + struct r300_state_atom txe; /* tex enable (4104) */ ++ ++ r300TexObj *textures[R300_MAX_TEXTURE_UNITS]; + }; + + /** +@@ -559,10 +521,14 @@ struct r300_hw_state { + * otherwise. + */ + struct r300_cmdbuf { +- int size; /* DWORDs allocated for buffer */ +- uint32_t *cmd_buf; +- int count_used; /* DWORDs filled so far */ +- int count_reemit; /* size of re-emission batch */ ++ dri_bo *buf; ++ int reemit; /** # of dwords in reemit sequence (is always <= committed) */ ++ int size; /** # of dwords total */ ++ ++ int committed; /** # of dwords that we have committed to */ ++ int written; /** # of dwords written (is always >= committed) */ ++ int reserved; /** # of dwords reserved up to previous BEGIN_BATCH */ ++ unsigned int flushing:1; /** whether we're currently in FlushCmdBufLocked */ + }; + + /** +@@ -811,18 +777,25 @@ struct r500_fragment_program { + #define REG_COLOR0 1 + #define REG_TEX0 2 + ++struct r300_aos { ++ dri_bo *bo; /** Buffer object where vertex data is stored */ ++ int offset; /** Offset into buffer object, in bytes */ ++ int components; /** Number of components per vertex */ ++ int stride; /** Stride in dwords (may be 0 for repeating) */ ++ int count; /** Number of vertices */ ++}; ++ + struct r300_state { + struct r300_depthbuffer_state depth; + struct r300_texture_state texture; + int sw_tcl_inputs[VERT_ATTRIB_MAX]; + struct r300_vertex_shader_state vertex_shader; +- struct r300_dma_region aos[R300_MAX_AOS_ARRAYS]; ++ struct r300_aos aos[R300_MAX_AOS_ARRAYS]; + int aos_count; + +- GLuint *Elts; +- struct r300_dma_region elt_dma; ++ dri_bo *elt_dma_bo; /** Buffer object that contains element indices */ ++ int elt_dma_offset; /** Offset into this buffer object, in bytes */ + +- struct r300_dma_region swtcl_dma; + DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. + They are the same as tnl->render_inputs for fixed pipeline */ + +@@ -880,13 +853,6 @@ struct r300_swtcl_info { + * Offset of the 3UB specular color data within a hardware (swtcl) vertex. + */ + GLuint specoffset; +- +- /** +- * Should Mesa project vertex data or will the hardware do it? +- */ +- GLboolean needproj; +- +- struct r300_dma_region indexed_verts; + }; + + +@@ -905,25 +871,11 @@ struct r300_context { + /* Vertex buffers + */ + struct r300_dma dma; +- GLboolean save_on_next_unlock; + GLuint NewGLState; + +- /* Texture object bookkeeping +- */ +- unsigned nr_heaps; +- driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS]; +- driTextureObject swapped; + int texture_depth; + float initialMaxAnisotropy; + +- /* Clientdata textures; +- */ +- GLuint prefer_gart_client_texturing; +- +-#ifdef USER_BUFFERS +- struct r300_memory_manager *rmm; +-#endif +- + GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; + GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; + +diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c +index 2ea17ad..5e2afd5 100644 +--- a/src/mesa/drivers/dri/r300/r300_emit.c ++++ b/src/mesa/drivers/dri/r300/r300_emit.c +@@ -51,9 +51,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "r300_emit.h" + #include "r300_ioctl.h" + +-#ifdef USER_BUFFERS + #include "r300_mem.h" +-#endif + + #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \ + SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \ +@@ -86,11 +84,9 @@ do { \ + } while (0) + #endif + +-static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb, +- GLvoid * data, int stride, int count) ++static void r300EmitVec4(uint32_t *out, GLvoid * data, int stride, int count) + { + int i; +- int *out = (int *)(rvb->address + rvb->start); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p data %p\n", +@@ -106,11 +102,9 @@ static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb, + } + } + +-static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb, +- GLvoid * data, int stride, int count) ++static void r300EmitVec8(uint32_t *out, GLvoid * data, int stride, int count) + { + int i; +- int *out = (int *)(rvb->address + rvb->start); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p data %p\n", +@@ -127,11 +121,9 @@ static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb, + } + } + +-static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb, +- GLvoid * data, int stride, int count) ++static void r300EmitVec12(uint32_t *out, GLvoid * data, int stride, int count) + { + int i; +- int *out = (int *)(rvb->address + rvb->start); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p data %p\n", +@@ -149,11 +141,9 @@ static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb, + } + } + +-static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb, +- GLvoid * data, int stride, int count) ++static void r300EmitVec16(uint32_t *out, GLvoid * data, int stride, int count) + { + int i; +- int *out = (int *)(rvb->address + rvb->start); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p data %p\n", +@@ -172,35 +162,31 @@ static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb, + } + } + +-static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb, ++ ++static void r300EmitVec(GLcontext * ctx, struct r300_aos *aos, + GLvoid * data, int size, int stride, int count) + { + r300ContextPtr rmesa = R300_CONTEXT(ctx); ++ uint32_t *out; + + if (stride == 0) { +- r300AllocDmaRegion(rmesa, rvb, size * 4, 4); ++ r300AllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); + count = 1; +- rvb->aos_offset = GET_START(rvb); +- rvb->aos_stride = 0; ++ aos->stride = 0; + } else { +- r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); +- rvb->aos_offset = GET_START(rvb); +- rvb->aos_stride = size; ++ r300AllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); ++ aos->stride = size; + } + ++ aos->components = size; ++ aos->count = count; ++ ++ out = (uint32_t*)((char*)aos->bo->virtual + aos->offset); + switch (size) { +- case 1: +- r300EmitVec4(ctx, rvb, data, stride, count); +- break; +- case 2: +- r300EmitVec8(ctx, rvb, data, stride, count); +- break; +- case 3: +- r300EmitVec12(ctx, rvb, data, stride, count); +- break; +- case 4: +- r300EmitVec16(ctx, rvb, data, stride, count); +- break; ++ case 1: r300EmitVec4(out, data, stride, count); break; ++ case 2: r300EmitVec8(out, data, stride, count); break; ++ case 3: r300EmitVec12(out, data, stride, count); break; ++ case 4: r300EmitVec16(out, data, stride, count); break; + default: + assert(0); + break; +@@ -433,7 +419,7 @@ int r300EmitArrays(GLcontext * ctx) + } + + for (i = 0; i < nr; i++) { +- int ci, fix, found = 0; ++ int ci; + + swizzle[i][0] = SWIZZLE_ZERO; + swizzle[i][1] = SWIZZLE_ZERO; +@@ -444,48 +430,10 @@ int r300EmitArrays(GLcontext * ctx) + swizzle[i][ci] = ci; + } + +- if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) { +- if (vb->AttribPtr[tab[i]]->stride % 4) { +- return R300_FALLBACK_TCL; +- } +- rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data); +- rmesa->state.aos[i].start = 0; +- rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data); +- rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4; +- rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size; +- } else { +- r300EmitVec(ctx, &rmesa->state.aos[i], +- vb->AttribPtr[tab[i]]->data, +- vb->AttribPtr[tab[i]]->size, +- vb->AttribPtr[tab[i]]->stride, count); +- } +- +- rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size; +- +- for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) { +- if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) { +- continue; +- } +- found = 1; +- break; +- } +- +- if (found) { +- if (fix > 0) { +- WARN_ONCE("Feeling lucky?\n"); +- } +- rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix; +- for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) { +- swizzle[i][ci] += fix; +- } +- } else { +- WARN_ONCE +- ("Cannot handle offset %x with stride %d, comp %d\n", +- rmesa->state.aos[i].aos_offset, +- rmesa->state.aos[i].aos_stride, +- vb->AttribPtr[tab[i]]->size); +- return R300_FALLBACK_TCL; +- } ++ r300EmitVec(ctx, &rmesa->state.aos[i], ++ vb->AttribPtr[tab[i]]->data, ++ vb->AttribPtr[tab[i]]->size, ++ vb->AttribPtr[tab[i]]->stride, count); + } + + /* Setup INPUT_ROUTE. */ +@@ -515,45 +463,76 @@ int r300EmitArrays(GLcontext * ctx) + return R300_FALLBACK_NONE; + } + +-#ifdef USER_BUFFERS +-void r300UseArrays(GLcontext * ctx) +-{ +- r300ContextPtr rmesa = R300_CONTEXT(ctx); +- int i; +- +- if (rmesa->state.elt_dma.buf) +- r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id); +- +- for (i = 0; i < rmesa->state.aos_count; i++) { +- if (rmesa->state.aos[i].buf) +- r300_mem_use(rmesa, rmesa->state.aos[i].buf->id); +- } +-} +-#endif +- + void r300ReleaseArrays(GLcontext * ctx) + { + r300ContextPtr rmesa = R300_CONTEXT(ctx); + int i; + +- r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__); ++ if (rmesa->state.elt_dma_bo) { ++ dri_bo_unreference(rmesa->state.elt_dma_bo); ++ rmesa->state.elt_dma_bo = 0; ++ } + for (i = 0; i < rmesa->state.aos_count; i++) { +- r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__); ++ if (rmesa->state.aos[i].bo) { ++ dri_bo_unreference(rmesa->state.aos[i].bo); ++ rmesa->state.aos[i].bo = 0; ++ } + } + } + + void r300EmitCacheFlush(r300ContextPtr rmesa) + { +- int cmd_reserved = 0; +- int cmd_written = 0; +- +- drm_radeon_cmd_header_t *cmd = NULL; +- +- reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); +- e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | +- R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); ++ BATCH_LOCALS(rmesa); ++ ++ BEGIN_BATCH(4); ++ OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, ++ R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | ++ R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); ++ OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT, ++ R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | ++ R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); ++ END_BATCH(); ++ COMMIT_BATCH(); ++} + +- reg_start(R300_ZB_ZCACHE_CTLSTAT, 0); +- e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | +- R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); ++void r300EmitBlit(r300ContextPtr rmesa, ++ GLuint color_fmt, ++ GLuint src_pitch, ++ dri_bo *src_bo, int src_offset, ++ GLuint dst_pitch, ++ GLuint dst_offset, ++ GLint srcx, GLint srcy, ++ GLint dstx, GLint dsty, GLuint w, GLuint h) ++{ ++ BATCH_LOCALS(rmesa); ++ ++ if (RADEON_DEBUG & DEBUG_IOCTL) ++ fprintf(stderr, ++ "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n", ++ __FUNCTION__, src_pitch, src_offset, srcx, srcy, ++ dst_pitch, dst_offset, dstx, dsty, w, h); ++ ++ assert((src_pitch & 63) == 0); ++ assert((dst_pitch & 63) == 0); ++ assert((src_offset & 1023) == 0); ++ assert((dst_offset & 1023) == 0); ++ assert(w < (1 << 16)); ++ assert(h < (1 << 16)); ++ ++ BEGIN_BATCH(8); ++ OUT_BATCH_PACKET3(R300_CP_CMD_BITBLT_MULTI, 5); ++ OUT_BATCH(RADEON_GMC_SRC_PITCH_OFFSET_CNTL | ++ RADEON_GMC_DST_PITCH_OFFSET_CNTL | ++ RADEON_GMC_BRUSH_NONE | ++ (color_fmt << 8) | ++ RADEON_GMC_SRC_DATATYPE_COLOR | ++ RADEON_ROP3_S | ++ RADEON_DP_SRC_SOURCE_MEMORY | ++ RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS); ++ OUT_BATCH_RELOC((src_pitch / 64) << 22, src_bo, src_offset, DRM_RELOC_BLITTER); ++ OUT_BATCH(((dst_pitch / 64) << 22) | (dst_offset >> 10)); ++ OUT_BATCH((srcx << 16) | srcy); ++ OUT_BATCH((dstx << 16) | dsty); ++ OUT_BATCH((w << 16) | h); ++ END_BATCH(); + } +diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h +index 5950539..179983d 100644 +--- a/src/mesa/drivers/dri/r300/r300_emit.h ++++ b/src/mesa/drivers/dri/r300/r300_emit.h +@@ -127,130 +127,62 @@ static INLINE uint32_t cmdpacify(void) + return cmd.u; + } + +-/** +- * Prepare to write a register value to register at address reg. +- * If num_extra > 0 then the following extra values are written +- * to registers with address +4, +8 and so on.. +- */ +-#define reg_start(reg, num_extra) \ +- do { \ +- int _n; \ +- _n=(num_extra); \ +- cmd = (drm_radeon_cmd_header_t*) \ +- r300AllocCmdBuf(rmesa, \ +- (_n+2), \ +- __FUNCTION__); \ +- cmd_reserved=_n+2; \ +- cmd_written=1; \ +- cmd[0].i=cmdpacket0((reg), _n+1); \ +- } while (0); ++ ++/** Single register write to command buffer; requires 2 dwords. */ ++#define OUT_BATCH_REGVAL(reg, val) \ ++ OUT_BATCH(cmdpacket0((reg), 1)); \ ++ OUT_BATCH((val)) ++ ++/** Continuous register range write to command buffer; requires 1 dword, ++ * expects count dwords afterwards for register contents. */ ++#define OUT_BATCH_REGSEQ(reg, count) \ ++ OUT_BATCH(cmdpacket0((reg), (count))); ++ ++/** Write a 32 bit float to the ring; requires 1 dword. */ ++#define OUT_BATCH_FLOAT32(f) \ ++ OUT_BATCH(r300PackFloat32((f))); + + /** +- * Emit GLuint freestyle ++ * Write the header of a packet3 to the command buffer. ++ * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards. + */ +-#define e32(dword) \ +- do { \ +- if(cmd_written 0x3fff) { \ +- fprintf(stderr,"Too big packet3 %08x: cannot " \ +- "store %d dwords\n", \ +- _p, _n); \ +- _mesa_exit(-1); \ +- } \ +- cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW); \ +- cmd[1].i = _p | ((_n & 0x3fff)<<16); \ +- } +- + /** + * Must be sent to switch to 2d commands + */ + void static INLINE end_3d(r300ContextPtr rmesa) + { +- drm_radeon_cmd_header_t *cmd = NULL; ++ BATCH_LOCALS(rmesa); + +- cmd = +- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); +- cmd[0].header.cmd_type = R300_CMD_END3D; ++ BEGIN_BATCH(1); ++ OUT_BATCH(cmdpacify()); ++ END_BATCH(); + } + + void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count) + { +- drm_radeon_cmd_header_t *cmd = NULL; ++ BATCH_LOCALS(rmesa); + +- cmd = +- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); +- cmd[0].i = cmdcpdelay(count); ++ BEGIN_BATCH(1); ++ OUT_BATCH(cmdcpdelay(count)); ++ END_BATCH(); + } + + void static INLINE cp_wait(r300ContextPtr rmesa, unsigned char flags) + { +- drm_radeon_cmd_header_t *cmd = NULL; ++ BATCH_LOCALS(rmesa); + +- cmd = +- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); +- cmd[0].i = cmdwait(flags); ++ BEGIN_BATCH(1); ++ OUT_BATCH(cmdwait(flags)); ++ END_BATCH(); + } + + extern int r300EmitArrays(GLcontext * ctx); + +-#ifdef USER_BUFFERS +-void r300UseArrays(GLcontext * ctx); +-#endif +- + extern void r300ReleaseArrays(GLcontext * ctx); + extern int r300PrimitiveType(r300ContextPtr rmesa, int prim); + extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); +@@ -265,4 +197,13 @@ extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead); + extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten); + extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten); + ++extern void r300EmitBlit(r300ContextPtr rmesa, ++ GLuint color_fmt, ++ GLuint src_pitch, ++ dri_bo *src_bo, int src_offset, ++ GLuint dst_pitch, ++ GLuint dst_offset, ++ GLint srcx, GLint srcy, ++ GLint dstx, GLint dsty, GLuint w, GLuint h); ++ + #endif +diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c +index bd7f060..2b8b266 100644 +--- a/src/mesa/drivers/dri/r300/r300_ioctl.c ++++ b/src/mesa/drivers/dri/r300/r300_ioctl.c +@@ -55,6 +55,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "radeon_reg.h" + #include "r300_emit.h" + #include "r300_fragprog.h" ++#include "r300_mem.h" + + #include "vblank.h" + +@@ -62,64 +63,51 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #define CLEARBUFFER_DEPTH 0x2 + #define CLEARBUFFER_STENCIL 0x4 + +-static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) ++static void r300ClearBuffer(r300ContextPtr r300, int flags, ++ struct radeon_renderbuffer *rrb) + { ++ BATCH_LOCALS(r300); + GLcontext *ctx = r300->radeon.glCtx; + __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; +- GLuint cboffset, cbpitch; +- drm_r300_cmd_header_t *cmd2; +- int cmd_reserved = 0; +- int cmd_written = 0; +- drm_radeon_cmd_header_t *cmd = NULL; ++ GLuint cbpitch = 0; + r300ContextPtr rmesa = r300; + + if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s: %s buffer (%i,%i %ix%i)\n", +- __FUNCTION__, buffer ? "back" : "front", +- dPriv->x, dPriv->y, dPriv->w, dPriv->h); +- +- if (buffer) { +- cboffset = r300->radeon.radeonScreen->backOffset; +- cbpitch = r300->radeon.radeonScreen->backPitch; +- } else { +- cboffset = r300->radeon.radeonScreen->frontOffset; +- cbpitch = r300->radeon.radeonScreen->frontPitch; ++ fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n", ++ __FUNCTION__, rrb, dPriv->x, dPriv->y, ++ dPriv->w, dPriv->h); ++ ++ if (rrb) { ++ cbpitch = rrb->pitch; ++ if (rrb->cpp == 4) ++ cbpitch |= R300_COLOR_FORMAT_ARGB8888; ++ else ++ cbpitch |= R300_COLOR_FORMAT_RGB565; ++ ++ if (r300->radeon.sarea->tiling_enabled) ++ cbpitch |= R300_COLOR_TILE_ENABLE; + } + +- cboffset += r300->radeon.radeonScreen->fbLocation; +- ++ /* TODO in bufmgr */ + cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN); + end_3d(rmesa); + +- R300_STATECHANGE(r300, cb); +- reg_start(R300_RB3D_COLOROFFSET0, 0); +- e32(cboffset); +- +- if (r300->radeon.radeonScreen->cpp == 4) +- cbpitch |= R300_COLOR_FORMAT_ARGB8888; +- else +- cbpitch |= R300_COLOR_FORMAT_RGB565; +- +- if (r300->radeon.sarea->tiling_enabled) +- cbpitch |= R300_COLOR_TILE_ENABLE; +- +- reg_start(R300_RB3D_COLORPITCH0, 0); +- e32(cbpitch); +- +- R300_STATECHANGE(r300, cmk); +- reg_start(RB3D_COLOR_CHANNEL_MASK, 0); ++ BEGIN_BATCH(19); ++ OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); ++ OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET); ++ OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch); + ++ OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1); + if (flags & CLEARBUFFER_COLOR) { +- e32((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | +- (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | +- (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | +- (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0)); ++ OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | ++ (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | ++ (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | ++ (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0)); + } else { +- e32(0x0); ++ OUT_BATCH(0); + } + +- R300_STATECHANGE(r300, zs); +- reg_start(R300_ZB_CNTL, 2); ++ OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3); + + { + uint32_t t1, t2; +@@ -146,37 +134,37 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) + R300_S_FRONT_ZFAIL_OP_SHIFT); + } + +- e32(t1); +- e32(t2); +- e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) | +- (ctx->Stencil.Clear & R300_STENCILREF_MASK)); ++ OUT_BATCH(t1); ++ OUT_BATCH(t2); ++ OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) | ++ (ctx->Stencil.Clear & R300_STENCILREF_MASK)); + } + +- cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__); +- cmd2[0].packet3.cmd_type = R300_CMD_PACKET3; +- cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR; +- cmd2[1].u = r300PackFloat32(dPriv->w / 2.0); +- cmd2[2].u = r300PackFloat32(dPriv->h / 2.0); +- cmd2[3].u = r300PackFloat32(ctx->Depth.Clear); +- cmd2[4].u = r300PackFloat32(1.0); +- cmd2[5].u = r300PackFloat32(ctx->Color.ClearColor[0]); +- cmd2[6].u = r300PackFloat32(ctx->Color.ClearColor[1]); +- cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]); +- cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]); ++ OUT_BATCH(cmdpacket3(R300_CMD_PACKET3_CLEAR)); ++ OUT_BATCH_FLOAT32(dPriv->w / 2.0); ++ OUT_BATCH_FLOAT32(dPriv->h / 2.0); ++ OUT_BATCH_FLOAT32(ctx->Depth.Clear); ++ OUT_BATCH_FLOAT32(1.0); ++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]); ++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]); ++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]); ++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]); ++ END_BATCH(); + + r300EmitCacheFlush(rmesa); + cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN); ++ ++ R300_STATECHANGE(r300, cb); ++ R300_STATECHANGE(r300, cmk); ++ R300_STATECHANGE(r300, zs); + } + + static void r300EmitClearState(GLcontext * ctx) + { + r300ContextPtr r300 = R300_CONTEXT(ctx); +- r300ContextPtr rmesa = r300; ++ BATCH_LOCALS(r300); + __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; + int i; +- int cmd_reserved = 0; +- int cmd_written = 0; +- drm_radeon_cmd_header_t *cmd = NULL; + int has_tcl = 1; + int is_r500 = 0; + GLuint vap_cntl; +@@ -184,35 +172,37 @@ static void r300EmitClearState(GLcontext * ctx) + if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + has_tcl = 0; + +- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) +- is_r500 = 1; +- ++ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ++ is_r500 = 1; + +- /* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and +- * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are +- * quite complex; see the functions in r300_emit.c. ++ /* State atom dirty tracking is a little subtle here. ++ * ++ * On the one hand, we need to make sure base state is emitted ++ * here if we start with an empty batch buffer, otherwise clear ++ * works incorrectly with multiple processes. Therefore, the first ++ * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE. + * +- * I believe it would be a good idea to extend the functions in +- * r300_emit.c so that they can be used to setup the default values for +- * these registers, as well as the actual values used for rendering. ++ * On the other hand, implicit state emission clears the state atom ++ * dirty bits, so we have to call R300_STATECHANGE later than the ++ * first BEGIN_BATCH. ++ * ++ * The final trickiness is that, because we change state, we need ++ * to ensure that any stored swtcl primitives are flushed properly ++ * before we start changing state. See the R300_NEWPRIM in r300Clear ++ * for this. + */ +- R300_STATECHANGE(r300, vir[0]); +- reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0); ++ BEGIN_BATCH(31); ++ OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1); + if (!has_tcl) +- e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | ++ OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); + else +- e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | ++ OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); + +- /* disable fog */ +- R300_STATECHANGE(r300, fogs); +- reg_start(R300_FG_FOG_BLEND, 0); +- e32(0x0); +- +- R300_STATECHANGE(r300, vir[1]); +- reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0); +- e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | ++ OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0); ++ OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0, ++ ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | +@@ -226,238 +216,246 @@ static void r300EmitClearState(GLcontext * ctx) + << R300_SWIZZLE1_SHIFT))); + + /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */ +- R300_STATECHANGE(r300, vic); +- reg_start(R300_VAP_VTX_STATE_CNTL, 1); +- e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); +- e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); ++ OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2); ++ OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); ++ OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); + +- R300_STATECHANGE(r300, vte); + /* comes from fglrx startup of clear */ +- reg_start(R300_SE_VTE_CNTL, 1); +- e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | +- R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | +- R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | +- R300_VPORT_Z_OFFSET_ENA); +- e32(0x8); ++ OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2); ++ OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | ++ R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | ++ R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | ++ R300_VPORT_Z_OFFSET_ENA); ++ OUT_BATCH(0x8); + +- reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0); +- e32(0xaaaaaaaa); ++ OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa); + +- R300_STATECHANGE(r300, vof); +- reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1); +- e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | +- R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT); +- e32(0x0); /* no textures */ ++ OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); ++ OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | ++ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT); ++ OUT_BATCH(0); /* no textures */ + +- R300_STATECHANGE(r300, txe); +- reg_start(R300_TX_ENABLE, 0); +- e32(0x0); ++ OUT_BATCH_REGVAL(R300_TX_ENABLE, 0); + +- R300_STATECHANGE(r300, vpt); +- reg_start(R300_SE_VPORT_XSCALE, 5); +- efloat(1.0); +- efloat(dPriv->x); +- efloat(1.0); +- efloat(dPriv->y); +- efloat(1.0); +- efloat(0.0); ++ OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6); ++ OUT_BATCH_FLOAT32(1.0); ++ OUT_BATCH_FLOAT32(dPriv->x); ++ OUT_BATCH_FLOAT32(1.0); ++ OUT_BATCH_FLOAT32(dPriv->y); ++ OUT_BATCH_FLOAT32(1.0); ++ OUT_BATCH_FLOAT32(0.0); + +- R300_STATECHANGE(r300, at); +- reg_start(R300_FG_ALPHA_FUNC, 0); +- e32(0x0); ++ OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0); ++ ++ OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2); ++ OUT_BATCH(0x0); ++ OUT_BATCH(0x0); ++ END_BATCH(); + ++ R300_STATECHANGE(r300, vir[0]); ++ R300_STATECHANGE(r300, fogs); ++ R300_STATECHANGE(r300, vir[1]); ++ R300_STATECHANGE(r300, vic); ++ R300_STATECHANGE(r300, vte); ++ R300_STATECHANGE(r300, vof); ++ R300_STATECHANGE(r300, txe); ++ R300_STATECHANGE(r300, vpt); ++ R300_STATECHANGE(r300, at); + R300_STATECHANGE(r300, bld); +- reg_start(R300_RB3D_CBLEND, 1); +- e32(0x0); +- e32(0x0); ++ R300_STATECHANGE(r300, ps); + + if (has_tcl) { +- R300_STATECHANGE(r300, vap_clip_cntl); +- reg_start(R300_VAP_CLIP_CNTL, 0); +- e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); ++ R300_STATECHANGE(r300, vap_clip_cntl); ++ ++ BEGIN_BATCH_NO_AUTOSTATE(2); ++ OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); ++ END_BATCH(); + } + +- R300_STATECHANGE(r300, ps); +- reg_start(R300_GA_POINT_SIZE, 0); +- e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | +- ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); ++ BEGIN_BATCH_NO_AUTOSTATE(2); ++ OUT_BATCH_REGVAL(R300_GA_POINT_SIZE, ++ ((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | ++ ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); ++ END_BATCH(); + + if (!is_r500) { + R300_STATECHANGE(r300, ri); +- reg_start(R300_RS_IP_0, 7); +- for (i = 0; i < 8; ++i) { +- e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); +- } +- + R300_STATECHANGE(r300, rc); +- /* The second constant is needed to get glxgears display anything .. */ +- reg_start(R300_RS_COUNT, 1); +- e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); +- e32(0x0); +- + R300_STATECHANGE(r300, rr); +- reg_start(R300_RS_INST_0, 0); +- e32(R300_RS_INST_COL_CN_WRITE); ++ ++ BEGIN_BATCH(14); ++ OUT_BATCH_REGSEQ(R300_RS_IP_0, 8); ++ for (i = 0; i < 8; ++i) ++ OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); ++ ++ OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); ++ OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); ++ OUT_BATCH(0x0); ++ ++ OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE); ++ END_BATCH(); + } else { + R300_STATECHANGE(r300, ri); +- reg_start(R500_RS_IP_0, 7); ++ R300_STATECHANGE(r300, rc); ++ R300_STATECHANGE(r300, rr); ++ ++ BEGIN_BATCH(14); ++ OUT_BATCH_REGSEQ(R500_RS_IP_0, 8); + for (i = 0; i < 8; ++i) { +- e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | +- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | +- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | +- (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); ++ OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | ++ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | ++ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | ++ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); + } + +- R300_STATECHANGE(r300, rc); +- /* The second constant is needed to get glxgears display anything .. */ +- reg_start(R300_RS_COUNT, 1); +- e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); +- e32(0x0); +- +- R300_STATECHANGE(r300, rr); +- reg_start(R500_RS_INST_0, 0); +- e32(R500_RS_INST_COL_CN_WRITE); ++ OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); ++ OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); ++ OUT_BATCH(0x0); + ++ OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE); ++ END_BATCH(); + } + + if (!is_r500) { + R300_STATECHANGE(r300, fp); +- reg_start(R300_US_CONFIG, 2); +- e32(0x0); +- e32(0x0); +- e32(0x0); +- reg_start(R300_US_CODE_ADDR_0, 3); +- e32(0x0); +- e32(0x0); +- e32(0x0); +- e32(R300_RGBA_OUT); +- + R300_STATECHANGE(r300, fpi[0]); + R300_STATECHANGE(r300, fpi[1]); + R300_STATECHANGE(r300, fpi[2]); + R300_STATECHANGE(r300, fpi[3]); + +- reg_start(R300_US_ALU_RGB_INST_0, 0); +- e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); +- +- reg_start(R300_US_ALU_RGB_ADDR_0, 0); +- e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); +- +- reg_start(R300_US_ALU_ALPHA_INST_0, 0); +- e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); +- +- reg_start(R300_US_ALU_ALPHA_ADDR_0, 0); +- e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); ++ BEGIN_BATCH(17); ++ OUT_BATCH_REGSEQ(R300_US_CONFIG, 3); ++ OUT_BATCH(0x0); ++ OUT_BATCH(0x0); ++ OUT_BATCH(0x0); ++ OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4); ++ OUT_BATCH(0x0); ++ OUT_BATCH(0x0); ++ OUT_BATCH(0x0); ++ OUT_BATCH(R300_RGBA_OUT); ++ ++ OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0, ++ FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); ++ OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0, ++ FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); ++ OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0, ++ FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); ++ OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0, ++ FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); ++ END_BATCH(); + } else { +- R300_STATECHANGE(r300, fp); +- reg_start(R500_US_CONFIG, 1); +- e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); +- e32(0x0); +- reg_start(R500_US_CODE_ADDR, 2); +- e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); +- e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); +- e32(R500_US_CODE_OFFSET_ADDR(0)); +- ++ R300_STATECHANGE(r300, fp); + R300_STATECHANGE(r300, r500fp); +- r500fp_start_fragment(0, 6); +- +- e32(R500_INST_TYPE_OUT | +- R500_INST_TEX_SEM_WAIT | +- R500_INST_LAST | +- R500_INST_RGB_OMASK_R | +- R500_INST_RGB_OMASK_G | +- R500_INST_RGB_OMASK_B | +- R500_INST_ALPHA_OMASK | +- R500_INST_RGB_CLAMP | +- R500_INST_ALPHA_CLAMP); +- +- e32(R500_RGB_ADDR0(0) | +- R500_RGB_ADDR1(0) | +- R500_RGB_ADDR1_CONST | +- R500_RGB_ADDR2(0) | +- R500_RGB_ADDR2_CONST); +- +- e32(R500_ALPHA_ADDR0(0) | +- R500_ALPHA_ADDR1(0) | +- R500_ALPHA_ADDR1_CONST | +- R500_ALPHA_ADDR2(0) | +- R500_ALPHA_ADDR2_CONST); +- +- e32(R500_ALU_RGB_SEL_A_SRC0 | +- R500_ALU_RGB_R_SWIZ_A_R | +- R500_ALU_RGB_G_SWIZ_A_G | +- R500_ALU_RGB_B_SWIZ_A_B | +- R500_ALU_RGB_SEL_B_SRC0 | +- R500_ALU_RGB_R_SWIZ_B_R | +- R500_ALU_RGB_B_SWIZ_B_G | +- R500_ALU_RGB_G_SWIZ_B_B); +- +- e32(R500_ALPHA_OP_CMP | +- R500_ALPHA_SWIZ_A_A | +- R500_ALPHA_SWIZ_B_A); +- +- e32(R500_ALU_RGBA_OP_CMP | +- R500_ALU_RGBA_R_SWIZ_0 | +- R500_ALU_RGBA_G_SWIZ_0 | +- R500_ALU_RGBA_B_SWIZ_0 | +- R500_ALU_RGBA_A_SWIZ_0); ++ ++ BEGIN_BATCH(14); ++ OUT_BATCH_REGSEQ(R500_US_CONFIG, 2); ++ OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); ++ OUT_BATCH(0x0); ++ OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3); ++ OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); ++ OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); ++ OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0)); ++ ++ OUT_BATCH(cmdr500fp(0, 1, 0, 0)); ++ OUT_BATCH(R500_INST_TYPE_OUT | ++ R500_INST_TEX_SEM_WAIT | ++ R500_INST_LAST | ++ R500_INST_RGB_OMASK_R | ++ R500_INST_RGB_OMASK_G | ++ R500_INST_RGB_OMASK_B | ++ R500_INST_ALPHA_OMASK | ++ R500_INST_RGB_CLAMP | ++ R500_INST_ALPHA_CLAMP); ++ OUT_BATCH(R500_RGB_ADDR0(0) | ++ R500_RGB_ADDR1(0) | ++ R500_RGB_ADDR1_CONST | ++ R500_RGB_ADDR2(0) | ++ R500_RGB_ADDR2_CONST); ++ OUT_BATCH(R500_ALPHA_ADDR0(0) | ++ R500_ALPHA_ADDR1(0) | ++ R500_ALPHA_ADDR1_CONST | ++ R500_ALPHA_ADDR2(0) | ++ R500_ALPHA_ADDR2_CONST); ++ OUT_BATCH(R500_ALU_RGB_SEL_A_SRC0 | ++ R500_ALU_RGB_R_SWIZ_A_R | ++ R500_ALU_RGB_G_SWIZ_A_G | ++ R500_ALU_RGB_B_SWIZ_A_B | ++ R500_ALU_RGB_SEL_B_SRC0 | ++ R500_ALU_RGB_R_SWIZ_B_R | ++ R500_ALU_RGB_B_SWIZ_B_G | ++ R500_ALU_RGB_G_SWIZ_B_B); ++ OUT_BATCH(R500_ALPHA_OP_CMP | ++ R500_ALPHA_SWIZ_A_A | ++ R500_ALPHA_SWIZ_B_A); ++ OUT_BATCH(R500_ALU_RGBA_OP_CMP | ++ R500_ALU_RGBA_R_SWIZ_0 | ++ R500_ALU_RGBA_G_SWIZ_0 | ++ R500_ALU_RGBA_B_SWIZ_0 | ++ R500_ALU_RGBA_A_SWIZ_0); ++ END_BATCH(); + } + +- reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0); +- e32(0x00000000); ++ BEGIN_BATCH(2); ++ OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); ++ END_BATCH(); ++ + if (has_tcl) { +- vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | ++ vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (12 << R300_VF_MAX_VTX_NUM_SHIFT)); +- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) +- vap_cntl |= R500_TCL_STATE_OPTIMIZATION; +- } else +- vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | ++ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ++ vap_cntl |= R500_TCL_STATE_OPTIMIZATION; ++ } else { ++ vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (5 << R300_VF_MAX_VTX_NUM_SHIFT)); ++ } + + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) +- vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); ++ vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) +- vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); ++ vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)) +- vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); ++ vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580)) +- vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); ++ vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); + else +- vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); ++ vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); ++ ++ R300_STATECHANGE(r300, vap_cntl); + +- R300_STATECHANGE(rmesa, vap_cntl); +- reg_start(R300_VAP_CNTL, 0); +- e32(vap_cntl); ++ BEGIN_BATCH(2); ++ OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl); ++ END_BATCH(); + + if (has_tcl) { + R300_STATECHANGE(r300, pvs); +- reg_start(R300_VAP_PVS_CODE_CNTL_0, 2); +- +- e32((0 << R300_PVS_FIRST_INST_SHIFT) | +- (0 << R300_PVS_XYZW_VALID_INST_SHIFT) | +- (1 << R300_PVS_LAST_INST_SHIFT)); +- e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | +- (0 << R300_PVS_MAX_CONST_ADDR_SHIFT)); +- e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT); +- + R300_STATECHANGE(r300, vpi); +- vsf_start_fragment(0x0, 8); +- +- e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT)); +- e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); +- e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); +- e32(0x0); + +- e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT)); +- e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); +- e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); +- e32(0x0); ++ BEGIN_BATCH(13); ++ OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3); ++ OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) | ++ (0 << R300_PVS_XYZW_VALID_INST_SHIFT) | ++ (1 << R300_PVS_LAST_INST_SHIFT)); ++ OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | ++ (0 << R300_PVS_MAX_CONST_ADDR_SHIFT)); ++ OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT); ++ ++ OUT_BATCH(cmdvpu(0, 2)); ++ OUT_BATCH(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT)); ++ OUT_BATCH(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); ++ OUT_BATCH(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); ++ OUT_BATCH(0x0); ++ ++ OUT_BATCH(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT)); ++ OUT_BATCH(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); ++ OUT_BATCH(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); ++ OUT_BATCH(0x0); ++ END_BATCH(); + } + } + +@@ -467,7 +465,10 @@ static void r300EmitClearState(GLcontext * ctx) + static void r300Clear(GLcontext * ctx, GLbitfield mask) + { + r300ContextPtr r300 = R300_CONTEXT(ctx); ++ BATCH_LOCALS(r300); + __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; ++ GLframebuffer *fb = dPriv->driverPrivate; ++ struct radeon_renderbuffer *rrb; + int flags = 0; + int bits = 0; + int swapped; +@@ -482,6 +483,12 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) + return; + } + ++ /* Flush swtcl vertices if necessary, because we will change hardware ++ * state during clear. See also the state-related comment in ++ * r300EmitClearState. ++ */ ++ R300_NEWPRIM(r300); ++ + if (mask & BUFFER_BIT_FRONT_LEFT) { + flags |= BUFFER_BIT_FRONT_LEFT; + mask &= ~BUFFER_BIT_FRONT_LEFT; +@@ -509,26 +516,27 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) + _swrast_Clear(ctx, mask); + } + +- swapped = r300->radeon.sarea->pfCurrentPage == 1; +- + /* Make sure it fits there. */ + r300EnsureCmdBufSpace(r300, 421 * 3, __FUNCTION__); + if (flags || bits) + r300EmitClearState(ctx); + + if (flags & BUFFER_BIT_FRONT_LEFT) { +- r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped); ++ rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; ++ r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb); + bits = 0; + } + + if (flags & BUFFER_BIT_BACK_LEFT) { +- r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped ^ 1); ++ rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; ++ r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb); + bits = 0; + } + + if (bits) +- r300ClearBuffer(r300, bits, 0); ++ r300ClearBuffer(r300, bits, NULL); + ++ COMMIT_BATCH(); + } + + void r300Flush(GLcontext * ctx) +@@ -541,16 +549,12 @@ void r300Flush(GLcontext * ctx) + if (rmesa->dma.flush) + rmesa->dma.flush( rmesa ); + +- if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit) ++ if (rmesa->cmdbuf.committed > rmesa->cmdbuf.reemit) + r300FlushCmdBuf(rmesa, __FUNCTION__); + } + +-#ifdef USER_BUFFERS +-#include "r300_mem.h" +- + void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) + { +- struct r300_dma_buffer *dmabuf; + size = MAX2(size, RADEON_BUFFER_SIZE * 16); + + if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) +@@ -560,71 +564,24 @@ void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) + rmesa->dma.flush(rmesa); + } + +- if (rmesa->dma.current.buf) { +-#ifdef USER_BUFFERS +- r300_mem_use(rmesa, rmesa->dma.current.buf->id); +-#endif +- r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__); ++ if (rmesa->dma.current) { ++ dri_bo_unreference(rmesa->dma.current); ++ rmesa->dma.current = 0; + } + if (rmesa->dma.nr_released_bufs > 4) + r300FlushCmdBuf(rmesa, __FUNCTION__); + +- dmabuf = CALLOC_STRUCT(r300_dma_buffer); +- dmabuf->buf = (void *)1; /* hack */ +- dmabuf->refcount = 1; +- +- dmabuf->id = r300_mem_alloc(rmesa, 4, size); +- if (dmabuf->id == 0) { +- LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */ +- +- r300FlushCmdBufLocked(rmesa, __FUNCTION__); +- radeonWaitForIdleLocked(&rmesa->radeon); +- +- dmabuf->id = r300_mem_alloc(rmesa, 4, size); +- +- UNLOCK_HARDWARE(&rmesa->radeon); +- +- if (dmabuf->id == 0) { +- fprintf(stderr, +- "Error: Could not get dma buffer... exiting\n"); +- _mesa_exit(-1); +- } +- } +- +- rmesa->dma.current.buf = dmabuf; +- rmesa->dma.current.address = r300_mem_ptr(rmesa, dmabuf->id); +- rmesa->dma.current.end = size; +- rmesa->dma.current.start = 0; +- rmesa->dma.current.ptr = 0; +-} +- +-void r300ReleaseDmaRegion(r300ContextPtr rmesa, +- struct r300_dma_region *region, const char *caller) +-{ +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); +- +- if (!region->buf) +- return; +- +- if (rmesa->dma.flush) +- rmesa->dma.flush(rmesa); +- +- if (--region->buf->refcount == 0) { +- r300_mem_free(rmesa, region->buf->id); +- FREE(region->buf); +- rmesa->dma.nr_released_bufs++; +- } +- +- region->buf = 0; +- region->start = 0; ++ rmesa->dma.current = radeon_bufmgr_classic_bo_alloc(&rmesa->radeon.bufmgr->base, "DMA regions", ++ size, 4, DRM_BO_MEM_DMA); ++ rmesa->dma.current_used = 0; ++ rmesa->dma.current_vertexptr = 0; + } + + /* Allocates a region from rmesa->dma.current. If there isn't enough + * space in current, grab a new buffer (and discard what was left of current) + */ + void r300AllocDmaRegion(r300ContextPtr rmesa, +- struct r300_dma_region *region, ++ dri_bo **pbo, int *poffset, + int bytes, int alignment) + { + if (RADEON_DEBUG & DEBUG_IOCTL) +@@ -633,207 +590,23 @@ void r300AllocDmaRegion(r300ContextPtr rmesa, + if (rmesa->dma.flush) + rmesa->dma.flush(rmesa); + +- if (region->buf) +- r300ReleaseDmaRegion(rmesa, region, __FUNCTION__); ++ assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr); + + alignment--; +- rmesa->dma.current.start = rmesa->dma.current.ptr = +- (rmesa->dma.current.ptr + alignment) & ~alignment; +- +- if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end) +- r300RefillCurrentDmaRegion(rmesa, (bytes + 0x7) & ~0x7); +- +- region->start = rmesa->dma.current.start; +- region->ptr = rmesa->dma.current.start; +- region->end = rmesa->dma.current.start + bytes; +- region->address = rmesa->dma.current.address; +- region->buf = rmesa->dma.current.buf; +- region->buf->refcount++; ++ rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; + +- rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ +- rmesa->dma.current.start = +- rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; +- +- assert(rmesa->dma.current.ptr <= rmesa->dma.current.end); +-} ++ if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size) ++ r300RefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15); + +-#else +-static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa) +-{ +- struct r300_dma_buffer *dmabuf; +- int fd = rmesa->radeon.dri.fd; +- int index = 0; +- int size = 0; +- drmDMAReq dma; +- int ret; ++ *poffset = rmesa->dma.current_used; ++ *pbo = rmesa->dma.current; ++ dri_bo_reference(*pbo); + +- if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) +- fprintf(stderr, "%s\n", __FUNCTION__); ++ /* Always align to at least 16 bytes */ ++ rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; ++ rmesa->dma.current_vertexptr = rmesa->dma.current_used; + +- if (rmesa->dma.flush) { +- rmesa->dma.flush(rmesa); +- } +- +- if (rmesa->dma.current.buf) +- r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__); +- +- if (rmesa->dma.nr_released_bufs > 4) +- r300FlushCmdBuf(rmesa, __FUNCTION__); +- +- dma.context = rmesa->radeon.dri.hwContext; +- dma.send_count = 0; +- dma.send_list = NULL; +- dma.send_sizes = NULL; +- dma.flags = 0; +- dma.request_count = 1; +- dma.request_size = RADEON_BUFFER_SIZE; +- dma.request_list = &index; +- dma.request_sizes = &size; +- dma.granted_count = 0; +- +- LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */ +- +- ret = drmDMA(fd, &dma); +- +- if (ret != 0) { +- /* Try to release some buffers and wait until we can't get any more */ +- if (rmesa->dma.nr_released_bufs) { +- r300FlushCmdBufLocked(rmesa, __FUNCTION__); +- } +- +- if (RADEON_DEBUG & DEBUG_DMA) +- fprintf(stderr, "Waiting for buffers\n"); +- +- radeonWaitForIdleLocked(&rmesa->radeon); +- ret = drmDMA(fd, &dma); +- +- if (ret != 0) { +- UNLOCK_HARDWARE(&rmesa->radeon); +- fprintf(stderr, +- "Error: Could not get dma buffer... exiting\n"); +- _mesa_exit(-1); +- } +- } +- +- UNLOCK_HARDWARE(&rmesa->radeon); +- +- if (RADEON_DEBUG & DEBUG_DMA) +- fprintf(stderr, "Allocated buffer %d\n", index); +- +- dmabuf = CALLOC_STRUCT(r300_dma_buffer); +- dmabuf->buf = &rmesa->radeon.radeonScreen->buffers->list[index]; +- dmabuf->refcount = 1; +- +- rmesa->dma.current.buf = dmabuf; +- rmesa->dma.current.address = dmabuf->buf->address; +- rmesa->dma.current.end = dmabuf->buf->total; +- rmesa->dma.current.start = 0; +- rmesa->dma.current.ptr = 0; +-} +- +-void r300ReleaseDmaRegion(r300ContextPtr rmesa, +- struct r300_dma_region *region, const char *caller) +-{ +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); +- +- if (!region->buf) +- return; +- +- if (rmesa->dma.flush) +- rmesa->dma.flush(rmesa); +- +- if (--region->buf->refcount == 0) { +- drm_radeon_cmd_header_t *cmd; +- +- if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) +- fprintf(stderr, "%s -- DISCARD BUF %d\n", +- __FUNCTION__, region->buf->buf->idx); +- cmd = +- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, +- sizeof +- (*cmd) / 4, +- __FUNCTION__); +- cmd->dma.cmd_type = R300_CMD_DMA_DISCARD; +- cmd->dma.buf_idx = region->buf->buf->idx; +- +- FREE(region->buf); +- rmesa->dma.nr_released_bufs++; +- } +- +- region->buf = 0; +- region->start = 0; +-} +- +-/* Allocates a region from rmesa->dma.current. If there isn't enough +- * space in current, grab a new buffer (and discard what was left of current) +- */ +-void r300AllocDmaRegion(r300ContextPtr rmesa, +- struct r300_dma_region *region, +- int bytes, int alignment) +-{ +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); +- +- if (rmesa->dma.flush) +- rmesa->dma.flush(rmesa); +- +- if (region->buf) +- r300ReleaseDmaRegion(rmesa, region, __FUNCTION__); +- +- alignment--; +- rmesa->dma.current.start = rmesa->dma.current.ptr = +- (rmesa->dma.current.ptr + alignment) & ~alignment; +- +- if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end) +- r300RefillCurrentDmaRegion(rmesa); +- +- region->start = rmesa->dma.current.start; +- region->ptr = rmesa->dma.current.start; +- region->end = rmesa->dma.current.start + bytes; +- region->address = rmesa->dma.current.address; +- region->buf = rmesa->dma.current.buf; +- region->buf->refcount++; +- +- rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ +- rmesa->dma.current.start = +- rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; +- +- assert(rmesa->dma.current.ptr <= rmesa->dma.current.end); +-} +- +-#endif +- +-GLboolean r300IsGartMemory(r300ContextPtr rmesa, const GLvoid * pointer, +- GLint size) +-{ +- int offset = +- (char *)pointer - +- (char *)rmesa->radeon.radeonScreen->gartTextures.map; +- int valid = (size >= 0 && offset >= 0 +- && offset + size < +- rmesa->radeon.radeonScreen->gartTextures.size); +- +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "r300IsGartMemory( %p ) : %d\n", pointer, +- valid); +- +- return valid; +-} +- +-GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, const GLvoid * pointer) +-{ +- int offset = +- (char *)pointer - +- (char *)rmesa->radeon.radeonScreen->gartTextures.map; +- +- //fprintf(stderr, "offset=%08x\n", offset); +- +- if (offset < 0 +- || offset > rmesa->radeon.radeonScreen->gartTextures.size) +- return ~0; +- else +- return rmesa->radeon.radeonScreen->gart_texture_offset + offset; ++ assert(rmesa->dma.current_used <= rmesa->dma.current->size); + } + + void r300InitIoctlFuncs(struct dd_function_table *functions) +diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.h b/src/mesa/drivers/dri/r300/r300_ioctl.h +index e1143fb..c743478 100644 +--- a/src/mesa/drivers/dri/r300/r300_ioctl.h ++++ b/src/mesa/drivers/dri/r300/r300_ioctl.h +@@ -39,20 +39,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "r300_context.h" + #include "radeon_drm.h" + +-extern GLboolean r300IsGartMemory(r300ContextPtr rmesa, +- const GLvoid * pointer, GLint size); +- +-extern GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, +- const GLvoid * pointer); +- + extern void r300Flush(GLcontext * ctx); + +-extern void r300ReleaseDmaRegion(r300ContextPtr rmesa, +- struct r300_dma_region *region, +- const char *caller); + extern void r300AllocDmaRegion(r300ContextPtr rmesa, +- struct r300_dma_region *region, int bytes, +- int alignment); ++ dri_bo **pbo, int *poffset, ++ int bytes, int alignment); + + extern void r300InitIoctlFuncs(struct dd_function_table *functions); + +diff --git a/src/mesa/drivers/dri/r300/r300_mem.c b/src/mesa/drivers/dri/r300/r300_mem.c +index f8f9d4f..1097388 100644 +--- a/src/mesa/drivers/dri/r300/r300_mem.c ++++ b/src/mesa/drivers/dri/r300/r300_mem.c +@@ -27,359 +27,869 @@ + + /** + * \file ++ * Simulate a real memory manager for R300 in the old-style scheme. ++ * ++ * NOTE: Right now, this is DMA-only and really only a skeleton of a true bufmgr. + * + * \author Aapo Tahkola + */ + ++#include "r300_mem.h" ++ ++#include + #include + +-#include "r300_context.h" +-#include "r300_cmdbuf.h" +-#include "r300_ioctl.h" +-#include "r300_mem.h" ++#include "simple_list.h" ++ + #include "radeon_ioctl.h" ++#include "r300_cmdbuf.h" + +-#ifdef USER_BUFFERS ++typedef struct _radeon_bufmgr_classic radeon_bufmgr_classic; ++typedef struct _radeon_bo_classic radeon_bo_classic; ++typedef struct _radeon_bo_functions radeon_bo_functions; ++typedef struct _radeon_reloc radeon_reloc; ++typedef struct _radeon_bo_vram radeon_bo_vram; ++ ++struct _radeon_bufmgr_classic { ++ radeon_bufmgr base; ++ r300ContextPtr rmesa; ++ ++ radeon_bo_classic *buffers; /** Unsorted linked list of all buffer objects */ ++ ++ radeon_bo_classic *pending; /** Age-sorted linked list of pending buffer objects */ ++ radeon_bo_classic **pending_tail; ++ ++ /* Texture heap bookkeeping */ ++ driTexHeap *texture_heap; ++ GLuint texture_offset; ++ driTextureObject texture_swapped; ++}; ++ ++struct _radeon_reloc { ++ uint64_t flags; ++ GLuint offset; /**< Offset (in bytes) into command buffer to relocated dword */ ++ radeon_bo_classic *target; ++ GLuint delta; ++}; ++ ++struct _radeon_bo_functions { ++ /** ++ * Free a buffer object. Caller has verified that the object is not ++ * referenced or pending. ++ */ ++ void (*free)(radeon_bo_classic*); ++ ++ /** ++ * Validate the given buffer. Must set the validated flag to 1. ++ * ++ * May be null for buffer objects that are always valid. ++ * Always called with lock held. ++ */ ++ void (*validate)(radeon_bo_classic*); ++ ++ /** ++ * Map the buffer for CPU access. ++ * Only called when the buffer isn't already mapped. ++ * ++ * May be null. ++ */ ++ void (*map)(radeon_bo_classic*, GLboolean write); ++ ++ /** ++ * Unmap the buffer. ++ * Only called on final unmap. ++ * ++ * May be null. ++ */ ++ void (*unmap)(radeon_bo_classic*); ++ ++ /** ++ * Indicate that the buffer object is now used by the hardware. ++ * ++ * May be null. ++ */ ++ void (*bind)(radeon_bo_classic*); ++ ++ /** ++ * Indicate that the buffer object is no longer used by the hardware. ++ * ++ * May be null. ++ */ ++ void (*unbind)(radeon_bo_classic*); ++}; + +-static void resize_u_list(r300ContextPtr rmesa) +-{ +- void *temp; +- int nsize; ++/** ++ * A buffer object. There are three types of buffer objects: ++ * 1. cmdbuf: Ordinary malloc()ed memory, used for command buffers ++ * 2. dma: GART memory allocated via the DRM_RADEON_ALLOC ioctl. ++ * 3. vram: Objects with malloc()ed backing store that will be uploaded ++ * into VRAM on demand; used for textures. ++ * There is a @ref functions table for operations that depend on the ++ * buffer object type. ++ * ++ * Fencing is handled the same way all buffer objects. During command buffer ++ * submission, the pending flag and corresponding variables are set accordingly. ++ */ ++struct _radeon_bo_classic { ++ dri_bo base; + +- temp = rmesa->rmm->u_list; +- nsize = rmesa->rmm->u_size * 2; ++ const radeon_bo_functions *functions; + +- rmesa->rmm->u_list = _mesa_malloc(nsize * sizeof(*rmesa->rmm->u_list)); +- _mesa_memset(rmesa->rmm->u_list, 0, +- nsize * sizeof(*rmesa->rmm->u_list)); ++ radeon_bo_classic *next; /** Unsorted linked list of all buffer objects */ ++ radeon_bo_classic **pprev; + +- if (temp) { +- r300FlushCmdBuf(rmesa, __FUNCTION__); ++ /** ++ * Number of software references to this buffer. ++ * A buffer is freed automatically as soon as its reference count reaches 0 ++ * *and* it is no longer pending. ++ */ ++ unsigned int refcount; ++ unsigned int mapcount; /** mmap count; mutually exclusive to being pending */ + +- _mesa_memcpy(rmesa->rmm->u_list, temp, +- rmesa->rmm->u_size * sizeof(*rmesa->rmm->u_list)); +- _mesa_free(temp); +- } ++ unsigned int validated:1; /** whether the buffer is validated for hardware use right now */ ++ unsigned int used:1; /* only for communication between process_relocs and post_submit */ + +- rmesa->rmm->u_size = nsize; ++ unsigned int pending:1; ++ radeon_bo_classic *pending_next; /** Age-sorted linked list of pending buffer objects */ ++ radeon_bo_classic **pending_pprev; ++ ++ /* The following two variables are intricately linked to the DRM interface, ++ * and must be in this physical memory order, or else chaos ensues. ++ * See the DRM's implementation of R300_CMD_SCRATCH for details. ++ */ ++ uint32_t pending_age; /** Buffer object pending until this age is reached, written by the DRM */ ++ uint32_t pending_count; /** Number of pending R300_CMD_SCRATCH references to this object */ ++ ++ radeon_reloc *relocs; /** Array of relocations in this buffer */ ++ GLuint relocs_used; /** # of relocations in relocation array */ ++ GLuint relocs_size; /** # of reloc records reserved in relocation array */ ++}; ++ ++typedef struct _radeon_vram_wrapper radeon_vram_wrapper; ++ ++/** Wrapper around heap object */ ++struct _radeon_vram_wrapper { ++ driTextureObject base; ++ radeon_bo_vram *bo; ++}; ++ ++struct _radeon_bo_vram { ++ radeon_bo_classic base; ++ ++ unsigned int backing_store_dirty:1; /** Backing store has changed, block must be reuploaded */ ++ ++ radeon_vram_wrapper *vram; /** Block in VRAM (if any) */ ++}; ++ ++static radeon_bufmgr_classic* get_bufmgr_classic(dri_bufmgr *bufmgr_ctx) ++{ ++ return (radeon_bufmgr_classic*)bufmgr_ctx; + } + +-void r300_mem_init(r300ContextPtr rmesa) ++static radeon_bo_classic* get_bo_classic(dri_bo *bo_base) + { +- rmesa->rmm = malloc(sizeof(struct r300_memory_manager)); +- memset(rmesa->rmm, 0, sizeof(struct r300_memory_manager)); ++ return (radeon_bo_classic*)bo_base; ++} + +- rmesa->rmm->u_size = 128; +- resize_u_list(rmesa); ++static radeon_bo_vram* get_bo_vram(radeon_bo_classic *bo_base) ++{ ++ return (radeon_bo_vram*)bo_base; + } + +-void r300_mem_destroy(r300ContextPtr rmesa) ++/** ++ * Really free a given buffer object. ++ */ ++static void bo_free(radeon_bo_classic *bo) + { +- _mesa_free(rmesa->rmm->u_list); +- rmesa->rmm->u_list = NULL; ++ assert(!bo->refcount); ++ assert(!bo->pending); ++ assert(!bo->mapcount); ++ ++ if (bo->relocs) { ++ int i; ++ for(i = 0; i < bo->relocs_used; ++i) ++ dri_bo_unreference(&bo->relocs[i].target->base); ++ free(bo->relocs); ++ bo->relocs = 0; ++ } ++ ++ *bo->pprev = bo->next; ++ if (bo->next) ++ bo->next->pprev = bo->pprev; + +- _mesa_free(rmesa->rmm); +- rmesa->rmm = NULL; ++ bo->functions->free(bo); + } + +-void *r300_mem_ptr(r300ContextPtr rmesa, int id) ++ ++/** ++ * Keep track of which buffer objects are still pending, i.e. waiting for ++ * some hardware operation to complete. ++ */ ++static void track_pending_buffers(radeon_bufmgr_classic *bufmgr) + { +- assert(id <= rmesa->rmm->u_last); +- return rmesa->rmm->u_list[id].ptr; ++ uint32_t currentage = radeonGetAge((radeonContextPtr)bufmgr->rmesa); ++ ++ while(bufmgr->pending) { ++ radeon_bo_classic *bo = bufmgr->pending; ++ ++ assert(bo->pending); ++ ++ if (bo->pending_count || ++ bo->pending_age > currentage) // TODO: Age counter wraparound! ++ break; ++ ++ bo->pending = 0; ++ bufmgr->pending = bo->pending_next; ++ if (bufmgr->pending) ++ bufmgr->pending->pending_pprev = &bufmgr->pending; ++ else ++ bufmgr->pending_tail = &bufmgr->pending; ++ ++ if (bo->functions->unbind) ++ (*bo->functions->unbind)(bo); ++ if (!bo->refcount) ++ bo_free(bo); ++ } + } + +-int r300_mem_find(r300ContextPtr rmesa, void *ptr) ++/** ++ * Initialize common buffer object data. ++ */ ++static void init_buffer(radeon_bufmgr_classic *bufmgr, radeon_bo_classic *bo, unsigned long size) + { +- int i; ++ bo->base.bufmgr = &bufmgr->base.base; ++ bo->base.size = size; ++ bo->refcount = 1; ++ ++ bo->pprev = &bufmgr->buffers; ++ bo->next = bufmgr->buffers; ++ if (bo->next) ++ bo->next->pprev = &bo->next; ++ bufmgr->buffers = bo; ++} + +- for (i = 1; i < rmesa->rmm->u_size + 1; i++) +- if (rmesa->rmm->u_list[i].ptr && +- ptr >= rmesa->rmm->u_list[i].ptr && +- ptr < +- rmesa->rmm->u_list[i].ptr + rmesa->rmm->u_list[i].size) +- break; + +- if (i < rmesa->rmm->u_size + 1) +- return i; ++/** ++ * Free a DMA-based buffer. ++ */ ++static void dma_free(radeon_bo_classic *bo) ++{ ++ radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bo->base.bufmgr); ++ drm_radeon_mem_free_t memfree; ++ int ret; ++ ++ memfree.region = RADEON_MEM_REGION_GART; ++ memfree.region_offset = bo->base.offset; ++ memfree.region_offset -= bufmgr->rmesa->radeon.radeonScreen->gart_texture_offset; + +- fprintf(stderr, "%p failed\n", ptr); +- return 0; ++ ret = drmCommandWrite(bufmgr->rmesa->radeon.radeonScreen->driScreen->fd, ++ DRM_RADEON_FREE, &memfree, sizeof(memfree)); ++ if (ret) { ++ fprintf(stderr, "Failed to free bo[%p] at %08x\n", bo, memfree.region_offset); ++ fprintf(stderr, "ret = %s\n", strerror(-ret)); ++ exit(1); ++ } ++ ++ free(bo); + } + +-//#define MM_DEBUG +-int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size) ++static const radeon_bo_functions dma_bo_functions = { ++ .free = &dma_free ++}; ++ ++/** ++ * Call the DRM to allocate GART memory for the given (incomplete) ++ * buffer object. ++ */ ++static int try_dma_alloc(radeon_bufmgr_classic *bufmgr, radeon_bo_classic *bo, ++ unsigned long size, unsigned int alignment) + { + drm_radeon_mem_alloc_t alloc; +- int offset = 0, ret; +- int i, free = -1; +- int done_age; +- drm_radeon_mem_free_t memfree; +- int tries = 0; +- static int bytes_wasted = 0, allocated = 0; ++ int baseoffset; ++ int ret; ++ ++ alloc.region = RADEON_MEM_REGION_GART; ++ alloc.alignment = alignment; ++ alloc.size = size; ++ alloc.region_offset = &baseoffset; ++ ++ ret = drmCommandWriteRead(bufmgr->rmesa->radeon.dri.fd, ++ DRM_RADEON_ALLOC, &alloc, sizeof(alloc)); ++ if (ret) { ++ if (RADEON_DEBUG & DEBUG_MEMORY) ++ fprintf(stderr, "DRM_RADEON_ALLOC failed: %d\n", ret); ++ return 0; ++ } + +- if (size < 4096) +- bytes_wasted += 4096 - size; ++ bo->base.virtual = (char*)bufmgr->rmesa->radeon.radeonScreen->gartTextures.map + baseoffset; ++ bo->base.offset = bufmgr->rmesa->radeon.radeonScreen->gart_texture_offset + baseoffset; + +- allocated += size; ++ return 1; ++} + +-#if 0 +- static int t = 0; +- if (t != time(NULL)) { +- t = time(NULL); +- fprintf(stderr, "slots used %d, wasted %d kb, allocated %d\n", +- rmesa->rmm->u_last, bytes_wasted / 1024, +- allocated / 1024); ++/** ++ * Allocate a DMA buffer. ++ */ ++static dri_bo *dma_alloc(radeon_bufmgr_classic *bufmgr, const char *name, ++ unsigned long size, unsigned int alignment) ++{ ++ radeon_bo_classic* bo = (radeon_bo_classic*)calloc(1, sizeof(radeon_bo_classic)); ++ ++ bo->functions = &dma_bo_functions; ++ ++ track_pending_buffers(bufmgr); ++ if (!try_dma_alloc(bufmgr, bo, size, alignment)) { ++ if (RADEON_DEBUG & DEBUG_MEMORY) ++ fprintf(stderr, "Failed to allocate %ld bytes, finishing command buffer...\n", size); ++ radeonFinish(bufmgr->rmesa->radeon.glCtx); ++ track_pending_buffers(bufmgr); ++ if (!try_dma_alloc(bufmgr, bo, size, alignment)) { ++ WARN_ONCE( ++ "Ran out of GART memory (for %ld)!\n" ++ "Please consider adjusting GARTSize option.\n", ++ size); ++ free(bo); ++ return 0; ++ } + } +-#endif + +- memfree.region = RADEON_MEM_REGION_GART; ++ init_buffer(bufmgr, bo, size); ++ bo->validated = 1; /* DMA buffer offsets are always valid */ + +- again: ++ return &bo->base; ++} ++ ++/** ++ * Free a command buffer ++ */ ++static void cmdbuf_free(radeon_bo_classic *bo) ++{ ++ free(bo->base.virtual); ++ free(bo); ++} + +- done_age = radeonGetAge((radeonContextPtr) rmesa); ++static const radeon_bo_functions cmdbuf_bo_functions = { ++ .free = cmdbuf_free ++}; + +- if (rmesa->rmm->u_last + 1 >= rmesa->rmm->u_size) +- resize_u_list(rmesa); ++/** ++ * Allocate a command buffer. ++ * ++ * Command buffers are really just malloc'ed buffers. They are managed by ++ * the bufmgr to enable relocations. ++ */ ++static dri_bo *cmdbuf_alloc(radeon_bufmgr_classic *bufmgr, const char *name, ++ unsigned long size) ++{ ++ radeon_bo_classic* bo = (radeon_bo_classic*)calloc(1, sizeof(radeon_bo_classic)); + +- for (i = rmesa->rmm->u_last + 1; i > 0; i--) { +- if (rmesa->rmm->u_list[i].ptr == NULL) { +- free = i; +- continue; +- } ++ bo->functions = &cmdbuf_bo_functions; ++ bo->base.virtual = malloc(size); + +- if (rmesa->rmm->u_list[i].h_pending == 0 && +- rmesa->rmm->u_list[i].pending +- && rmesa->rmm->u_list[i].age <= done_age) { +- memfree.region_offset = +- (char *)rmesa->rmm->u_list[i].ptr - +- (char *)rmesa->radeon.radeonScreen->gartTextures. +- map; ++ init_buffer(bufmgr, bo, size); ++ return &bo->base; ++} + +- ret = +- drmCommandWrite(rmesa->radeon.radeonScreen-> +- driScreen->fd, DRM_RADEON_FREE, +- &memfree, sizeof(memfree)); ++/** ++ * Free a VRAM-based buffer object. ++ */ ++static void vram_free(radeon_bo_classic *bo_base) ++{ ++ radeon_bo_vram *bo = get_bo_vram(bo_base); + +- if (ret) { +- fprintf(stderr, "Failed to free at %p\n", +- rmesa->rmm->u_list[i].ptr); +- fprintf(stderr, "ret = %s\n", strerror(-ret)); +- exit(1); +- } else { +-#ifdef MM_DEBUG +- fprintf(stderr, "really freed %d at age %x\n", +- i, +- radeonGetAge((radeonContextPtr) rmesa)); +-#endif +- if (i == rmesa->rmm->u_last) +- rmesa->rmm->u_last--; +- +- if (rmesa->rmm->u_list[i].size < 4096) +- bytes_wasted -= +- 4096 - rmesa->rmm->u_list[i].size; +- +- allocated -= rmesa->rmm->u_list[i].size; +- rmesa->rmm->u_list[i].pending = 0; +- rmesa->rmm->u_list[i].ptr = NULL; +- free = i; +- } +- } ++ if (bo->vram) { ++ driDestroyTextureObject(&bo->vram->base); ++ bo->vram = 0; + } +- rmesa->rmm->u_head = i; +- +- if (free == -1) { +- WARN_ONCE("Ran out of slots!\n"); +- //usleep(100); +- r300FlushCmdBuf(rmesa, __FUNCTION__); +- tries++; +- if (tries > 100) { +- WARN_ONCE("Ran out of slots!\n"); +- exit(1); ++ ++ free(bo->base.base.virtual); ++ free(bo); ++} ++ ++/** ++ * Allocate/update the copy in vram. ++ * ++ * Note: Assume we're called with the DRI lock held. ++ */ ++static void vram_validate(radeon_bo_classic *bo_base) ++{ ++ radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo_base->base.bufmgr); ++ radeon_bo_vram *bo = get_bo_vram(bo_base); ++ ++ if (!bo->vram) { ++ bo->backing_store_dirty = 1; ++ ++ bo->vram = (radeon_vram_wrapper*)calloc(1, sizeof(radeon_vram_wrapper)); ++ bo->vram->bo = bo; ++ make_empty_list(&bo->vram->base); ++ bo->vram->base.totalSize = bo->base.base.size; ++ if (driAllocateTexture(&bufmgr->texture_heap, 1, &bo->vram->base) < 0) { ++ fprintf(stderr, "Ouch! vram_validate failed\n"); ++ free(bo->vram); ++ bo->base.base.offset = 0; ++ bo->vram = 0; ++ return; + } +- goto again; + } + +- alloc.region = RADEON_MEM_REGION_GART; +- alloc.alignment = alignment; +- alloc.size = size; +- alloc.region_offset = &offset; ++ assert(bo->vram->base.memBlock); + +- ret = +- drmCommandWriteRead(rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc, +- sizeof(alloc)); +- if (ret) { +-#if 0 +- WARN_ONCE("Ran out of mem!\n"); +- r300FlushCmdBuf(rmesa, __FUNCTION__); +- //usleep(100); +- tries2++; +- tries = 0; +- if (tries2 > 100) { +- WARN_ONCE("Ran out of GART memory!\n"); +- exit(1); ++ bo->base.base.offset = bufmgr->texture_offset + bo->vram->base.memBlock->ofs; ++ ++ if (bo->backing_store_dirty) { ++ /* Copy to VRAM using a blit. ++ * All memory is 4K aligned. We're using 1024 pixels wide blits. ++ */ ++ drm_radeon_texture_t tex; ++ drm_radeon_tex_image_t tmp; ++ int ret; ++ ++ tex.offset = bo->base.base.offset; ++ tex.image = &tmp; ++ ++ assert(!(tex.offset & 1023)); ++ ++ tmp.x = 0; ++ tmp.y = 0; ++ if (bo->base.base.size < 4096) { ++ tmp.width = (bo->base.base.size + 3) / 4; ++ tmp.height = 1; ++ } else { ++ tmp.width = 1024; ++ tmp.height = (bo->base.base.size + 4095) / 4096; + } +- goto again; +-#else +- WARN_ONCE +- ("Ran out of GART memory (for %d)!\nPlease consider adjusting GARTSize option.\n", +- size); +- return 0; +-#endif ++ tmp.data = bo->base.base.virtual; ++ ++ tex.format = RADEON_TXFORMAT_ARGB8888; ++ tex.width = tmp.width; ++ tex.height = tmp.height; ++ tex.pitch = MAX2(tmp.width / 16, 1); ++ ++ do { ++ ret = drmCommandWriteRead(bufmgr->rmesa->radeon.dri.fd, ++ DRM_RADEON_TEXTURE, &tex, ++ sizeof(drm_radeon_texture_t)); ++ if (ret) { ++ if (RADEON_DEBUG & DEBUG_IOCTL) ++ fprintf(stderr, ++ "DRM_RADEON_TEXTURE: again!\n"); ++ usleep(1); ++ } ++ } while (ret == -EAGAIN); ++ ++ bo->backing_store_dirty = 0; + } + +- i = free; ++ bo->base.validated = 1; ++} + +- if (i > rmesa->rmm->u_last) +- rmesa->rmm->u_last = i; ++/* No need for actual mmap actions since we have backing store, ++ * but mark buffer dirty when necessary */ ++static void vram_map(radeon_bo_classic *bo_base, GLboolean write) ++{ ++ radeon_bo_vram *bo = get_bo_vram(bo_base); + +- rmesa->rmm->u_list[i].ptr = +- ((GLubyte *) rmesa->radeon.radeonScreen->gartTextures.map) + offset; +- rmesa->rmm->u_list[i].size = size; +- rmesa->rmm->u_list[i].age = 0; +- //fprintf(stderr, "alloc %p at id %d\n", rmesa->rmm->u_list[i].ptr, i); ++ if (write) { ++ bo->base.validated = 0; ++ bo->backing_store_dirty = 1; ++ } ++} + +-#ifdef MM_DEBUG +- fprintf(stderr, "allocated %d at age %x\n", i, +- radeonGetAge((radeonContextPtr) rmesa)); +-#endif ++static void vram_bind(radeon_bo_classic *bo_base) ++{ ++ radeon_bo_vram *bo = get_bo_vram(bo_base); + +- return i; ++ if (bo->vram) { ++ bo->vram->base.bound = 1; ++ driUpdateTextureLRU(&bo->vram->base); ++ } + } + +-void r300_mem_use(r300ContextPtr rmesa, int id) ++static void vram_unbind(radeon_bo_classic *bo_base) + { +- uint64_t ull; +-#ifdef MM_DEBUG +- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, +- radeonGetAge((radeonContextPtr) rmesa)); +-#endif +- drm_r300_cmd_header_t *cmd; ++ radeon_bo_vram *bo = get_bo_vram(bo_base); + +- assert(id <= rmesa->rmm->u_last); ++ if (bo->vram) ++ bo->vram->base.bound = 0; ++} + +- if (id == 0) +- return; ++/** Callback function called by the texture heap when a texture is evicted */ ++static void destroy_vram_wrapper(void *data, driTextureObject *t) ++{ ++ radeon_vram_wrapper *wrapper = (radeon_vram_wrapper*)t; ++ ++ if (wrapper->bo && wrapper->bo->vram == wrapper) { ++ wrapper->bo->base.validated = 0; ++ wrapper->bo->vram = 0; ++ } ++} + +- cmd = +- (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, +- 2 + sizeof(ull) / 4, +- __FUNCTION__); +- cmd[0].scratch.cmd_type = R300_CMD_SCRATCH; +- cmd[0].scratch.reg = R300_MEM_SCRATCH; +- cmd[0].scratch.n_bufs = 1; +- cmd[0].scratch.flags = 0; +- cmd++; ++static const radeon_bo_functions vram_bo_functions = { ++ .free = vram_free, ++ .validate = vram_validate, ++ .map = vram_map, ++ .bind = vram_bind, ++ .unbind = vram_unbind ++}; + +- ull = (uint64_t) (intptr_t) & rmesa->rmm->u_list[id].age; +- _mesa_memcpy(cmd, &ull, sizeof(ull)); +- cmd += sizeof(ull) / 4; ++/** ++ * Free a VRAM-based buffer object. ++ */ ++static void static_free(radeon_bo_classic *bo_base) ++{ ++ radeon_bo_vram *bo = get_bo_vram(bo_base); + +- cmd[0].u = /*id */ 0; ++ free(bo); ++} + +- LOCK_HARDWARE(&rmesa->radeon); /* Protect from DRM. */ +- rmesa->rmm->u_list[id].h_pending++; +- UNLOCK_HARDWARE(&rmesa->radeon); ++static void static_map(radeon_bo_classic *bo_base, GLboolean write) ++{ ++ radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo_base->base.bufmgr); ++ ++ bo_base->base.virtual = bufmgr->rmesa->radeon.dri.screen->pFB + ++ (bo_base->base.offset - bufmgr->rmesa->radeon.radeonScreen->fbLocation); ++ ++ /* Read the first pixel in the frame buffer. This should ++ * be a noop, right? In fact without this conform fails as reading ++ * from the framebuffer sometimes produces old results -- the ++ * on-card read cache gets mixed up and doesn't notice that the ++ * framebuffer has been updated. ++ * ++ * Note that we should probably be reading some otherwise unused ++ * region of VRAM, otherwise we might get incorrect results when ++ * reading pixels from the top left of the screen. ++ * ++ * I found this problem on an R420 with glean's texCube test. ++ * Note that the R200 span code also *writes* the first pixel in the ++ * framebuffer, but I've found this to be unnecessary. ++ * -- Nicolai Hähnle, June 2008 ++ */ ++ { ++ int p; ++ volatile int *buf = (int*)bufmgr->rmesa->radeon.dri.screen->pFB; ++ p = *buf; ++ } + } + +-unsigned long r300_mem_offset(r300ContextPtr rmesa, int id) ++static void static_unmap(radeon_bo_classic *bo_base) + { +- unsigned long offset; ++ bo_base->base.virtual = 0; ++} ++ ++static const radeon_bo_functions static_bo_functions = { ++ .free = static_free, ++ .map = static_map, ++ .unmap = static_unmap ++}; + +- assert(id <= rmesa->rmm->u_last); ++/** ++ * Allocate a backing store buffer object that is validated into VRAM. ++ */ ++static dri_bo *vram_alloc(radeon_bufmgr_classic *bufmgr, const char *name, ++ unsigned long size, unsigned int alignment) ++{ ++ radeon_bo_vram* bo = (radeon_bo_vram*)calloc(1, sizeof(radeon_bo_vram)); + +- offset = (char *)rmesa->rmm->u_list[id].ptr - +- (char *)rmesa->radeon.radeonScreen->gartTextures.map; +- offset += rmesa->radeon.radeonScreen->gart_texture_offset; ++ bo->base.functions = &vram_bo_functions; ++ bo->base.base.virtual = malloc(size); ++ init_buffer(bufmgr, &bo->base, size); ++ return &bo->base.base; ++} + +- return offset; ++dri_bo *radeon_bufmgr_classic_bo_alloc(dri_bufmgr *bufmgr_ctx, const char *name, ++ unsigned long size, unsigned int alignment, ++ uint32_t location_mask) ++{ ++ radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx); ++ ++ if (location_mask & DRM_BO_MEM_CMDBUF) { ++ return cmdbuf_alloc(bufmgr, name, size); ++ } else if (location_mask & DRM_BO_MEM_DMA) { ++ return dma_alloc(bufmgr, name, size, alignment); ++ } else { ++ return vram_alloc(bufmgr, name, size, alignment); ++ } + } + +-void *r300_mem_map(r300ContextPtr rmesa, int id, int access) ++dri_bo *radeon_bufmgr_classic_bo_alloc_static(dri_bufmgr *bufmgr_ctx, const char *name, ++ unsigned long offset, unsigned long size, ++ void *virtual, uint32_t initial_domain) + { +-#ifdef MM_DEBUG +- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, +- radeonGetAge((radeonContextPtr) rmesa)); +-#endif +- void *ptr; +- int tries = 0; ++ radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx); ++ radeon_bo_vram* bo = (radeon_bo_vram*)calloc(1, sizeof(radeon_bo_vram)); ++ ++ bo->base.functions = &static_bo_functions; ++ bo->base.base.virtual = virtual; ++ bo->base.base.offset = offset + bufmgr->rmesa->radeon.radeonScreen->fbLocation; ++ bo->base.validated = 1; /* Static buffer offsets are always valid */ ++ ++ init_buffer(bufmgr, &bo->base, size); ++ return &bo->base.base; ++ ++} + +- assert(id <= rmesa->rmm->u_last); ++static void bufmgr_classic_bo_reference(dri_bo *bo_base) ++{ ++ radeon_bo_classic *bo = get_bo_classic(bo_base); ++ bo->refcount++; ++ assert(bo->refcount > 0); ++} + +- if (access == R300_MEM_R) { ++static void bufmgr_classic_bo_unreference(dri_bo *bo_base) ++{ ++ radeon_bo_classic *bo = get_bo_classic(bo_base); + +- if (rmesa->rmm->u_list[id].mapped == 1) +- WARN_ONCE("buffer %d already mapped\n", id); ++ if (!bo_base) ++ return; + +- rmesa->rmm->u_list[id].mapped = 1; +- ptr = r300_mem_ptr(rmesa, id); ++ assert(bo->refcount > 0); ++ bo->refcount--; ++ if (!bo->refcount) { ++ // Ugly HACK - figure out whether this is really necessary ++ get_bufmgr_classic(bo_base->bufmgr)->rmesa->dma.nr_released_bufs++; + +- return ptr; ++ assert(!bo->mapcount); ++ if (!bo->pending) ++ bo_free(bo); + } ++} + +- if (rmesa->rmm->u_list[id].h_pending) +- r300FlushCmdBuf(rmesa, __FUNCTION__); +- +- if (rmesa->rmm->u_list[id].h_pending) { +- return NULL; ++static int bufmgr_classic_bo_map(dri_bo *bo_base, int write_enable) ++{ ++ radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo_base->bufmgr); ++ radeon_bo_classic *bo = get_bo_classic(bo_base); ++ assert(bo->refcount > 0); ++ ++ if (bo->pending) { ++ track_pending_buffers(bufmgr); ++ if (bo->pending) { ++ // TODO: Better fence waiting ++ if (RADEON_DEBUG & DEBUG_MEMORY) ++ fprintf(stderr, "bo_map: buffer is pending. Flushing...\n"); ++ radeonFinish(bufmgr->rmesa->radeon.glCtx); ++ track_pending_buffers(bufmgr); ++ if (bo->pending) { ++ fprintf(stderr, "Internal error or hardware lockup: bo_map: buffer is still pending.\n"); ++ abort(); ++ } ++ } + } + +- while (rmesa->rmm->u_list[id].age > +- radeonGetAge((radeonContextPtr) rmesa) && tries++ < 1000) +- usleep(10); ++ if (!bo->mapcount && bo->functions->map) ++ bo->functions->map(bo, write_enable); ++ ++ bo->mapcount++; ++ assert(bo->mapcount > 0); ++ return 0; ++} ++ ++static int bufmgr_classic_bo_unmap(dri_bo *buf) ++{ ++ radeon_bo_classic *bo = get_bo_classic(buf); ++ assert(bo->refcount > 0); ++ assert(bo->mapcount > 0); ++ bo->mapcount--; ++ ++ if (!bo->mapcount && bo->functions->unmap) ++ bo->functions->unmap(bo); ++ ++ return 0; ++} + +- if (tries >= 1000) { +- fprintf(stderr, "Idling failed (%x vs %x)\n", +- rmesa->rmm->u_list[id].age, +- radeonGetAge((radeonContextPtr) rmesa)); +- return NULL; ++/** ++ * Mark the given buffer as pending and move it to the tail ++ * of the pending list. ++ * The caller is responsible for setting up pending_count and pending_age. ++ */ ++static void move_to_pending_tail(radeon_bo_classic *bo) ++{ ++ radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo->base.bufmgr); ++ ++ if (bo->pending) { ++ *bo->pending_pprev = bo->pending_next; ++ if (bo->pending_next) ++ bo->pending_next->pending_pprev = bo->pending_pprev; ++ else ++ bufmgr->pending_tail = bo->pending_pprev; + } + +- if (rmesa->rmm->u_list[id].mapped == 1) +- WARN_ONCE("buffer %d already mapped\n", id); ++ bo->pending = 1; ++ bo->pending_pprev = bufmgr->pending_tail; ++ bo->pending_next = 0; ++ *bufmgr->pending_tail = bo; ++ bufmgr->pending_tail = &bo->pending_next; ++} + +- rmesa->rmm->u_list[id].mapped = 1; +- ptr = r300_mem_ptr(rmesa, id); ++/** ++ * Emit commands to the batch buffer that cause the guven buffer's ++ * pending_count and pending_age to be updated. ++ */ ++static void emit_age_for_buffer(radeon_bo_classic* bo) ++{ ++ radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo->base.bufmgr); ++ BATCH_LOCALS(bufmgr->rmesa); ++ drm_r300_cmd_header_t cmd; ++ uint64_t ull; + +- return ptr; ++ cmd.scratch.cmd_type = R300_CMD_SCRATCH; ++ cmd.scratch.reg = 2; /* Scratch register 2 corresponds to what radeonGetAge polls */ ++ cmd.scratch.n_bufs = 1; ++ cmd.scratch.flags = 0; ++ ull = (uint64_t) (intptr_t) &bo->pending_age; ++ ++ BEGIN_BATCH(4); ++ OUT_BATCH(cmd.u); ++ OUT_BATCH(ull & 0xffffffff); ++ OUT_BATCH(ull >> 32); ++ OUT_BATCH(0); ++ END_BATCH(); ++ COMMIT_BATCH(); ++ ++ bo->pending_count++; + } + +-void r300_mem_unmap(r300ContextPtr rmesa, int id) ++int radeon_bufmgr_classic_emit_reloc(dri_bo *batch_buf, uint64_t flags, GLuint delta, ++ GLuint offset, dri_bo *target) + { +-#ifdef MM_DEBUG +- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, +- radeonGetAge((radeonContextPtr) rmesa)); +-#endif ++ radeon_bo_classic *bo = get_bo_classic(batch_buf); ++ radeon_reloc *reloc; ++ ++ if (bo->relocs_used >= bo->relocs_size) { ++ bo->relocs_size *= 2; ++ if (bo->relocs_size < 32) ++ bo->relocs_size = 32; ++ ++ bo->relocs = (radeon_reloc*)realloc(bo->relocs, bo->relocs_size*sizeof(radeon_reloc)); ++ } + +- assert(id <= rmesa->rmm->u_last); ++ reloc = &bo->relocs[bo->relocs_used++]; ++ reloc->flags = flags; ++ reloc->offset = offset; ++ reloc->delta = delta; ++ reloc->target = get_bo_classic(target); ++ dri_bo_reference(target); ++ return 0; ++} + +- if (rmesa->rmm->u_list[id].mapped == 0) +- WARN_ONCE("buffer %d not mapped\n", id); ++/* process_relocs is called just before the given command buffer ++ * is executed. It ensures that all referenced buffers are in ++ * the right GPU domain. ++ */ ++static void *bufmgr_classic_process_relocs(dri_bo *batch_buf) ++{ ++ radeon_bo_classic *batch_bo = get_bo_classic(batch_buf); ++ int i; + +- rmesa->rmm->u_list[id].mapped = 0; ++ // Warning: At this point, we append something to the batch buffer ++ // during flush. ++ emit_age_for_buffer(batch_bo); ++ ++ dri_bo_map(batch_buf, GL_TRUE); ++ for(i = 0; i < batch_bo->relocs_used; ++i) { ++ radeon_reloc *reloc = &batch_bo->relocs[i]; ++ uint32_t *dest = (uint32_t*)((char*)batch_buf->virtual + reloc->offset); ++ uint32_t offset; ++ ++ if (!reloc->target->validated) ++ reloc->target->functions->validate(reloc->target); ++ reloc->target->used = 1; ++ offset = reloc->target->base.offset + reloc->delta; ++ ++ if (reloc->flags & DRM_RELOC_BLITTER) ++ *dest = (*dest & 0xffc00000) | (offset >> 10); ++ else if (reloc->flags & DRM_RELOC_TXOFFSET) ++ *dest = (*dest & 31) | (offset & ~31); ++ else ++ *dest = offset; ++ } ++ dri_bo_unmap(batch_buf); ++ return 0; + } + +-void r300_mem_free(r300ContextPtr rmesa, int id) ++/* post_submit is called just after the given command buffer ++ * is executed. It ensures that buffers are properly marked as ++ * pending. ++ */ ++static void bufmgr_classic_post_submit(dri_bo *batch_buf) + { +-#ifdef MM_DEBUG +- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, +- radeonGetAge((radeonContextPtr) rmesa)); +-#endif ++ radeon_bo_classic *batch_bo = get_bo_classic(batch_buf); ++ int i; + +- assert(id <= rmesa->rmm->u_last); ++ assert(!batch_bo->pending_count); + +- if (id == 0) +- return; ++ for(i = 0; i < batch_bo->relocs_used; ++i) { ++ radeon_reloc *reloc = &batch_bo->relocs[i]; + +- if (rmesa->rmm->u_list[id].ptr == NULL) { +- WARN_ONCE("Not allocated!\n"); +- return; ++ if (reloc->target->used) { ++ reloc->target->used = 0; ++ assert(!reloc->target->pending_count); ++ reloc->target->pending_age = batch_bo->pending_age; ++ move_to_pending_tail(reloc->target); ++ if (reloc->target->functions->bind) ++ (*reloc->target->functions->bind)(reloc->target); ++ } + } ++} + +- if (rmesa->rmm->u_list[id].pending) { +- WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr); +- return; ++static void bufmgr_classic_destroy(dri_bufmgr *bufmgr_ctx) ++{ ++ radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx); ++ ++ track_pending_buffers(bufmgr); ++ if (bufmgr->pending) ++ radeonFinish(bufmgr->rmesa->radeon.glCtx); ++ track_pending_buffers(bufmgr); ++ ++ if (bufmgr->buffers) { ++ //fprintf(stderr, "Warning: Buffer objects have leaked\n"); ++ while(bufmgr->buffers) { ++ //fprintf(stderr, " Leak of size %ld\n", bufmgr->buffers->base.size); ++ bufmgr->buffers->refcount = 0; ++ bufmgr->buffers->mapcount = 0; ++ bufmgr->buffers->pending = 0; ++ bo_free(bufmgr->buffers); ++ } + } + +- rmesa->rmm->u_list[id].pending = 1; ++ driDestroyTextureHeap(bufmgr->texture_heap); ++ bufmgr->texture_heap = 0; ++ assert(is_empty_list(&bufmgr->texture_swapped)); ++ ++ free(bufmgr); ++} ++ ++radeon_bufmgr* radeonBufmgrClassicInit(r300ContextPtr rmesa) ++{ ++ radeon_bufmgr_classic* bufmgr = (radeon_bufmgr_classic*)calloc(1, sizeof(radeon_bufmgr_classic)); ++ ++ bufmgr->rmesa = rmesa; ++ // bufmgr->base.base.bo_alloc = &bufmgr_classic_bo_alloc; ++ bufmgr->base.base.bo_reference = &bufmgr_classic_bo_reference; ++ bufmgr->base.base.bo_unreference = &bufmgr_classic_bo_unreference; ++ bufmgr->base.base.bo_map = &bufmgr_classic_bo_map; ++ bufmgr->base.base.bo_unmap = &bufmgr_classic_bo_unmap; ++ bufmgr->base.base.process_relocs = &bufmgr_classic_process_relocs; ++ bufmgr->base.base.post_submit = &bufmgr_classic_post_submit; ++ bufmgr->base.base.destroy = &bufmgr_classic_destroy; ++ ++ bufmgr->pending_tail = &bufmgr->pending; ++ ++ /* Init texture heap */ ++ make_empty_list(&bufmgr->texture_swapped); ++ bufmgr->texture_heap = driCreateTextureHeap(0, bufmgr, ++ rmesa->radeon.radeonScreen->texSize[0], 12, RADEON_NR_TEX_REGIONS, ++ (drmTextureRegionPtr)rmesa->radeon.sarea->tex_list[0], ++ &rmesa->radeon.sarea->tex_age[0], ++ &bufmgr->texture_swapped, sizeof(radeon_vram_wrapper), ++ &destroy_vram_wrapper); ++ bufmgr->texture_offset = rmesa->radeon.radeonScreen->texOffset[0]; ++ ++ return &bufmgr->base; ++} ++ ++void radeonBufmgrContendedLockTake(radeon_bufmgr* bufmgr_ctx) ++{ ++ radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(&bufmgr_ctx->base); ++ ++ DRI_AGE_TEXTURES(bufmgr->texture_heap); + } +-#endif +diff --git a/src/mesa/drivers/dri/r300/r300_mem.h b/src/mesa/drivers/dri/r300/r300_mem.h +index 625a7f6..4e9be65 100644 +--- a/src/mesa/drivers/dri/r300/r300_mem.h ++++ b/src/mesa/drivers/dri/r300/r300_mem.h +@@ -1,37 +1,22 @@ + #ifndef __R300_MEM_H__ + #define __R300_MEM_H__ + +-//#define R300_MEM_PDL 0 +-#define R300_MEM_UL 1 ++#include "glheader.h" ++#include "dri_bufmgr.h" + +-#define R300_MEM_R 1 +-#define R300_MEM_W 2 +-#define R300_MEM_RW (R300_MEM_R | R300_MEM_W) ++#include "r300_context.h" + +-#define R300_MEM_SCRATCH 2 + +-struct r300_memory_manager { +- struct { +- void *ptr; +- uint32_t size; +- uint32_t age; +- uint32_t h_pending; +- int pending; +- int mapped; +- } *u_list; +- int u_head, u_size, u_last; ++/* Note: The following flags should probably be ultimately eliminated, ++ * or replaced by something else. ++ */ ++#define DRM_BO_MEM_DMA (1 << 27) /** Use for transient buffers (texture upload, vertex buffers...) */ ++#define DRM_BO_MEM_CMDBUF (1 << 28) /** Use for command buffers */ + +-}; ++#define DRM_RELOC_BLITTER (1 << 23) /** Offset overwrites lower 22 bits (used with blit packet3) */ ++#define DRM_RELOC_TXOFFSET (1 << 24) /** Offset overwrites everything but low bits (used for texture offsets) */ + +-extern void r300_mem_init(r300ContextPtr rmesa); +-extern void r300_mem_destroy(r300ContextPtr rmesa); +-extern void *r300_mem_ptr(r300ContextPtr rmesa, int id); +-extern int r300_mem_find(r300ContextPtr rmesa, void *ptr); +-extern int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size); +-extern void r300_mem_use(r300ContextPtr rmesa, int id); +-extern unsigned long r300_mem_offset(r300ContextPtr rmesa, int id); +-extern void *r300_mem_map(r300ContextPtr rmesa, int id, int access); +-extern void r300_mem_unmap(r300ContextPtr rmesa, int id); +-extern void r300_mem_free(r300ContextPtr rmesa, int id); ++radeon_bufmgr* radeonBufmgrClassicInit(r300ContextPtr rmesa); ++void radeonBufmgrContendedLockTake(radeon_bufmgr* bufmgr_ctx); + + #endif +diff --git a/src/mesa/drivers/dri/r300/r300_mipmap_tree.c b/src/mesa/drivers/dri/r300/r300_mipmap_tree.c +new file mode 100644 +index 0000000..75b7d32 +--- /dev/null ++++ b/src/mesa/drivers/dri/r300/r300_mipmap_tree.c +@@ -0,0 +1,248 @@ ++/* ++ * Copyright (C) 2008 Nicolai Haehnle. ++ * ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining ++ * a copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sublicense, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial ++ * portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ++ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE ++ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION ++ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION ++ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ */ ++ ++#include "r300_mipmap_tree.h" ++ ++#include ++#include ++ ++#include "simple_list.h" ++#include "texcompress.h" ++#include "texformat.h" ++ ++#include "r300_mem.h" ++ ++static GLuint r300_compressed_texture_size(GLcontext *ctx, ++ GLsizei width, GLsizei height, GLsizei depth, ++ GLuint mesaFormat) ++{ ++ GLuint size = _mesa_compressed_texture_size(ctx, width, height, depth, mesaFormat); ++ ++ if (mesaFormat == MESA_FORMAT_RGB_DXT1 || ++ mesaFormat == MESA_FORMAT_RGBA_DXT1) { ++ if (width + 3 < 8) /* width one block */ ++ size = size * 4; ++ else if (width + 3 < 16) ++ size = size * 2; ++ } else { ++ /* DXT3/5, 16 bytes per block */ ++ WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n"); ++ if (width + 3 < 8) ++ size = size * 2; ++ } ++ ++ return size; ++} ++ ++/** ++ * Compute sizes and fill in offset and blit information for the given ++ * image (determined by \p face and \p level). ++ * ++ * \param curOffset points to the offset at which the image is to be stored ++ * and is updated by this function according to the size of the image. ++ */ ++static void compute_tex_image_offset(r300_mipmap_tree *mt, ++ GLuint face, GLuint level, GLuint* curOffset) ++{ ++ r300_mipmap_level *lvl = &mt->levels[level]; ++ ++ /* Find image size in bytes */ ++ if (mt->compressed) { ++ lvl->size = r300_compressed_texture_size(mt->r300->radeon.glCtx, ++ lvl->width, lvl->height, lvl->depth, mt->compressed); ++ } else if (mt->target == GL_TEXTURE_RECTANGLE_NV) { ++ lvl->size = ((lvl->width * mt->bpp + 63) & ~63) * lvl->height; ++ } else if (mt->tilebits & R300_TXO_MICRO_TILE) { ++ /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, ++ * though the actual offset may be different (if texture is less than ++ * 32 bytes width) to the untiled case */ ++ int w = (lvl->width * mt->bpp * 2 + 31) & ~31; ++ lvl->size = (w * ((lvl->height + 1) / 2)) * lvl->depth; ++ } else { ++ int w = (lvl->width * mt->bpp + 31) & ~31; ++ lvl->size = w * lvl->height * lvl->depth; ++ } ++ assert(lvl->size > 0); ++ ++ /* All images are aligned to a 32-byte offset */ ++ *curOffset = (*curOffset + 0x1f) & ~0x1f; ++ lvl->faces[face].offset = *curOffset; ++ *curOffset += lvl->size; ++} ++ ++static GLuint minify(GLuint size, GLuint levels) ++{ ++ size = size >> levels; ++ if (size < 1) ++ size = 1; ++ return size; ++} ++ ++static void calculate_miptree_layout(r300_mipmap_tree *mt) ++{ ++ GLuint curOffset; ++ GLuint numLevels; ++ GLuint i; ++ ++ numLevels = mt->lastLevel - mt->firstLevel + 1; ++ assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); ++ ++ curOffset = 0; ++ for(i = 0; i < numLevels; i++) { ++ GLuint face; ++ ++ mt->levels[i].width = minify(mt->width0, mt->firstLevel + i); ++ mt->levels[i].height = minify(mt->height0, mt->firstLevel + i); ++ mt->levels[i].depth = minify(mt->depth0, mt->firstLevel + i); ++ ++ for(face = 0; face < mt->faces; face++) ++ compute_tex_image_offset(mt, face, i, &curOffset); ++ } ++ ++ /* Note the required size in memory */ ++ mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; ++} ++ ++ ++/** ++ * Create a new mipmap tree, calculate its layout and allocate memory. ++ */ ++r300_mipmap_tree* r300_miptree_create(r300ContextPtr rmesa, r300TexObj *t, ++ GLenum target, GLuint firstLevel, GLuint lastLevel, ++ GLuint width0, GLuint height0, GLuint depth0, ++ GLuint bpp, GLuint tilebits, GLuint compressed) ++{ ++ r300_mipmap_tree *mt = CALLOC_STRUCT(_r300_mipmap_tree); ++ ++ mt->r300 = rmesa; ++ mt->t = t; ++ mt->target = target; ++ mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; ++ mt->firstLevel = firstLevel; ++ mt->lastLevel = lastLevel; ++ mt->width0 = width0; ++ mt->height0 = height0; ++ mt->depth0 = depth0; ++ mt->bpp = bpp; ++ mt->tilebits = tilebits; ++ mt->compressed = compressed; ++ ++ calculate_miptree_layout(mt); ++ ++ mt->bo = radeon_bufmgr_classic_bo_alloc(&rmesa->radeon.bufmgr->base, "texture", mt->totalsize, 1024, 0); ++ ++ return mt; ++} ++ ++/** ++ * Destroy the given mipmap tree. ++ */ ++void r300_miptree_destroy(r300_mipmap_tree *mt) ++{ ++ dri_bo_unreference(mt->bo); ++ free(mt); ++} ++ ++/* ++ * XXX Move this into core Mesa? ++ */ ++static void ++_mesa_copy_rect(GLubyte * dst, ++ GLuint cpp, ++ GLuint dst_pitch, ++ GLuint dst_x, ++ GLuint dst_y, ++ GLuint width, ++ GLuint height, ++ const GLubyte * src, ++ GLuint src_pitch, GLuint src_x, GLuint src_y) ++{ ++ GLuint i; ++ ++ dst_pitch *= cpp; ++ src_pitch *= cpp; ++ dst += dst_x * cpp; ++ src += src_x * cpp; ++ dst += dst_y * dst_pitch; ++ src += src_y * dst_pitch; ++ width *= cpp; ++ ++ if (width == dst_pitch && width == src_pitch) ++ memcpy(dst, src, height * width); ++ else { ++ for (i = 0; i < height; i++) { ++ memcpy(dst, src, width); ++ dst += dst_pitch; ++ src += src_pitch; ++ } ++ } ++} ++ ++/** ++ * Upload the given texture image to the given face/level of the mipmap tree. ++ * \param level of the texture, i.e. \c level==mt->firstLevel is the first hw level ++ */ ++void r300_miptree_upload_image(r300_mipmap_tree *mt, GLuint face, GLuint level, ++ struct gl_texture_image *texImage) ++{ ++ GLuint hwlevel = level - mt->firstLevel; ++ r300_mipmap_level *lvl = &mt->levels[hwlevel]; ++ void *dest; ++ ++ assert(face < mt->faces); ++ assert(level >= mt->firstLevel && level <= mt->lastLevel); ++ assert(texImage && texImage->Data); ++ assert(texImage->Width == lvl->width); ++ assert(texImage->Height == lvl->height); ++ assert(texImage->Depth == lvl->depth); ++ ++ dri_bo_map(mt->bo, GL_TRUE); ++ ++ dest = mt->bo->virtual + lvl->faces[face].offset; ++ ++ if (mt->tilebits) ++ WARN_ONCE("%s: tiling not supported yet", __FUNCTION__); ++ ++ if (!mt->compressed) { ++ GLuint dst_align; ++ GLuint dst_pitch = lvl->width; ++ GLuint src_pitch = lvl->width; ++ ++ if (mt->target == GL_TEXTURE_RECTANGLE_NV) ++ dst_align = 64 / mt->bpp; ++ else ++ dst_align = 32 / mt->bpp; ++ dst_pitch = (dst_pitch + dst_align - 1) & ~(dst_align - 1); ++ ++ _mesa_copy_rect(dest, mt->bpp, dst_pitch, 0, 0, lvl->width, lvl->height, ++ texImage->Data, src_pitch, 0, 0); ++ } else { ++ memcpy(dest, texImage->Data, lvl->size); ++ } ++ ++ dri_bo_unmap(mt->bo); ++} +diff --git a/src/mesa/drivers/dri/r300/r300_mipmap_tree.h b/src/mesa/drivers/dri/r300/r300_mipmap_tree.h +new file mode 100644 +index 0000000..a888ecf +--- /dev/null ++++ b/src/mesa/drivers/dri/r300/r300_mipmap_tree.h +@@ -0,0 +1,91 @@ ++/* ++ * Copyright (C) 2008 Nicolai Haehnle. ++ * ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining ++ * a copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sublicense, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial ++ * portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ++ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE ++ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION ++ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION ++ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ */ ++ ++#ifndef __R300_MIPMAP_TREE_H_ ++#define __R300_MIPMAP_TREE_H_ ++ ++#include "r300_context.h" ++ ++typedef struct _r300_mipmap_tree r300_mipmap_tree; ++typedef struct _r300_mipmap_level r300_mipmap_level; ++typedef struct _r300_mipmap_image r300_mipmap_image; ++ ++struct _r300_mipmap_image { ++ GLuint offset; /** Offset of this image from the start of mipmap tree, in bytes */ ++}; ++ ++struct _r300_mipmap_level { ++ GLuint width; ++ GLuint height; ++ GLuint depth; ++ GLuint size; /** Size of each image, in bytes */ ++ r300_mipmap_image faces[6]; ++}; ++ ++ ++/** ++ * A mipmap tree contains texture images in the layout that the hardware ++ * expects. ++ * ++ * The meta-data of mipmap trees is immutable, i.e. you cannot change the ++ * layout on-the-fly; however, the texture contents (i.e. texels) can be ++ * changed. ++ */ ++struct _r300_mipmap_tree { ++ r300ContextPtr r300; ++ r300TexObj *t; ++ dri_bo *bo; ++ ++ GLuint totalsize; /** total size of the miptree, in bytes */ ++ ++ GLenum target; /** GL_TEXTURE_xxx */ ++ GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */ ++ GLuint firstLevel; /** First mip level stored in this mipmap tree */ ++ GLuint lastLevel; /** Last mip level stored in this mipmap tree */ ++ ++ GLuint width0; /** Width of level 0 image */ ++ GLuint height0; /** Height of level 0 image */ ++ GLuint depth0; /** Depth of level 0 image */ ++ ++ GLuint bpp; /** Bytes per texel */ ++ GLuint tilebits; /** R300_TXO_xxx_TILE */ ++ GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */ ++ ++ r300_mipmap_level levels[RADEON_MAX_TEXTURE_LEVELS]; ++}; ++ ++r300_mipmap_tree* r300_miptree_create(r300ContextPtr rmesa, r300TexObj *t, ++ GLenum target, GLuint firstLevel, GLuint lastLevel, ++ GLuint width0, GLuint height0, GLuint depth0, ++ GLuint bpp, GLuint tilebits, GLuint compressed); ++void r300_miptree_destroy(r300_mipmap_tree *mt); ++ ++void r300_miptree_upload_image(r300_mipmap_tree *mt, GLuint face, GLuint level, ++ struct gl_texture_image *texImage); ++ ++ ++#endif /* __R300_MIPMAP_TREE_H_ */ +diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c +index 0a199e6..209fae9 100644 +--- a/src/mesa/drivers/dri/r300/r300_render.c ++++ b/src/mesa/drivers/dri/r300/r300_render.c +@@ -175,89 +175,79 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) + static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts) + { + r300ContextPtr rmesa = R300_CONTEXT(ctx); +- struct r300_dma_region *rvb = &rmesa->state.elt_dma; + void *out; + +- if (r300IsGartMemory(rmesa, elts, n_elts * 4)) { +- rvb->address = rmesa->radeon.radeonScreen->gartTextures.map; +- rvb->start = ((char *)elts) - rvb->address; +- rvb->aos_offset = +- rmesa->radeon.radeonScreen->gart_texture_offset + +- rvb->start; +- return; +- } else if (r300IsGartMemory(rmesa, elts, 1)) { +- WARN_ONCE("Pointer not within GART memory!\n"); +- _mesa_exit(-1); +- } +- +- r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4); +- rvb->aos_offset = GET_START(rvb); ++ r300AllocDmaRegion(rmesa, &rmesa->state.elt_dma_bo, &rmesa->state.elt_dma_offset, ++ n_elts * 4, 4); + +- out = rvb->address + rvb->start; ++ out = rmesa->state.elt_dma_bo->virtual + rmesa->state.elt_dma_offset; + memcpy(out, elts, n_elts * 4); + } + +-static void r300FireEB(r300ContextPtr rmesa, unsigned long addr, +- int vertex_count, int type) ++static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) + { +- int cmd_reserved = 0; +- int cmd_written = 0; +- drm_radeon_cmd_header_t *cmd = NULL; ++ BATCH_LOCALS(rmesa); + +- start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0); +- e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); ++ BEGIN_BATCH(8); ++ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); ++ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + +- start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2); +- e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); +- e32(addr); +- e32(vertex_count); ++ OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); ++ OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); ++ OUT_BATCH_RELOC(0, rmesa->state.elt_dma_bo, rmesa->state.elt_dma_offset, 0); ++ OUT_BATCH(vertex_count); ++ END_BATCH(); + } + + static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) + { ++ BATCH_LOCALS(rmesa); + int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; + int i; +- int cmd_reserved = 0; +- int cmd_written = 0; +- drm_radeon_cmd_header_t *cmd = NULL; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, + offset); + +- start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1); +- e32(nr); ++ BEGIN_BATCH(sz+2); ++ OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); ++ OUT_BATCH(nr); + + for (i = 0; i + 1 < nr; i += 2) { +- e32((rmesa->state.aos[i].aos_size << 0) | +- (rmesa->state.aos[i].aos_stride << 8) | +- (rmesa->state.aos[i + 1].aos_size << 16) | +- (rmesa->state.aos[i + 1].aos_stride << 24)); +- +- e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride); +- e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride); ++ OUT_BATCH((rmesa->state.aos[i].components << 0) | ++ (rmesa->state.aos[i].stride << 8) | ++ (rmesa->state.aos[i + 1].components << 16) | ++ (rmesa->state.aos[i + 1].stride << 24)); ++ ++ OUT_BATCH_RELOC(0, rmesa->state.aos[i].bo, ++ rmesa->state.aos[i].offset + offset * 4 * rmesa->state.aos[i].stride, 0); ++ OUT_BATCH_RELOC(0, rmesa->state.aos[i+1].bo, ++ rmesa->state.aos[i+1].offset + offset * 4 * rmesa->state.aos[i + 1].stride, 0); + } + + if (nr & 1) { +- e32((rmesa->state.aos[nr - 1].aos_size << 0) | +- (rmesa->state.aos[nr - 1].aos_stride << 8)); +- e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride); ++ OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) | ++ (rmesa->state.aos[nr - 1].stride << 8)); ++ OUT_BATCH_RELOC(0, rmesa->state.aos[nr - 1].bo, ++ rmesa->state.aos[nr - 1].offset + offset * 4 * rmesa->state.aos[nr - 1].stride, 0); + } ++ END_BATCH(); + } + + static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) + { +- int cmd_reserved = 0; +- int cmd_written = 0; +- drm_radeon_cmd_header_t *cmd = NULL; ++ BATCH_LOCALS(rmesa); + +- start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); +- e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); ++ BEGIN_BATCH(3); ++ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); ++ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); ++ END_BATCH(); + } + + static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, + int start, int end, int prim) + { ++ BATCH_LOCALS(rmesa); + int type, num_verts; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; +@@ -268,6 +258,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, + if (type < 0 || num_verts <= 0) + return; + ++ /* Make space for at least 64 dwords. ++ * This is supposed to ensure that we can get all rendering ++ * commands into a single command buffer. ++ */ ++ r300EnsureCmdBufSpace(rmesa, 64, __FUNCTION__); ++ + if (vb->Elts) { + if (num_verts > 65535) { + /* not implemented yet */ +@@ -287,11 +283,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, + */ + r300EmitElts(ctx, vb->Elts, num_verts); + r300EmitAOS(rmesa, rmesa->state.aos_count, start); +- r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type); ++ r300FireEB(rmesa, num_verts, type); + } else { + r300EmitAOS(rmesa, rmesa->state.aos_count, start); + r300FireAOS(rmesa, num_verts, type); + } ++ COMMIT_BATCH(); + } + + static GLboolean r300RunRender(GLcontext * ctx, +@@ -324,10 +321,6 @@ static GLboolean r300RunRender(GLcontext * ctx, + + r300EmitCacheFlush(rmesa); + +-#ifdef USER_BUFFERS +- r300UseArrays(ctx); +-#endif +- + r300ReleaseArrays(ctx); + + return GL_FALSE; +diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c +index 15cd053..589327d 100644 +--- a/src/mesa/drivers/dri/r300/r300_state.c ++++ b/src/mesa/drivers/dri/r300/r300_state.c +@@ -55,6 +55,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #include "radeon_ioctl.h" + #include "radeon_state.h" ++#include "radeon_buffer.h" + #include "r300_context.h" + #include "r300_ioctl.h" + #include "r300_state.h" +@@ -1146,39 +1147,25 @@ void r300UpdateDrawBuffer(GLcontext * ctx) + r300ContextPtr rmesa = R300_CONTEXT(ctx); + r300ContextPtr r300 = rmesa; + struct gl_framebuffer *fb = ctx->DrawBuffer; +- driRenderbuffer *drb; ++ struct radeon_renderbuffer *rrb; + + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { + /* draw to front */ +- drb = +- (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT]. +- Renderbuffer; ++ rrb = ++ (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; + } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { + /* draw to back */ +- drb = +- (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT]. +- Renderbuffer; ++ rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; + } else { + /* drawing to multiple buffers, or none */ + return; + } + +- assert(drb); +- assert(drb->flippedPitch); ++ assert(rrb); ++ assert(rrb->pitch); + + R300_STATECHANGE(rmesa, cb); + +- r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset + //r300->radeon.state.color.drawOffset + +- r300->radeon.radeonScreen->fbLocation; +- r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch; //r300->radeon.state.color.drawPitch; +- +- if (r300->radeon.radeonScreen->cpp == 4) +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; +- else +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; +- +- if (r300->radeon.sarea->tiling_enabled) +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; + #if 0 + R200_STATECHANGE(rmesa, ctx); + +@@ -1497,14 +1484,9 @@ static void r300SetupTextures(GLcontext * ctx) + /* We cannot let disabled tmu offsets pass DRM */ + for (i = 0; i < mtu; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { +- +-#if 0 /* Enables old behaviour */ +- hw_tmu = i; +-#endif + tmu_mappings[i] = hw_tmu; + +- t = r300->state.texture.unit[i].texobj; +- /* XXX questionable fix for bug 9170: */ ++ t = r300_tex_obj(ctx->Texture.Unit[i]._Current); + if (!t) + continue; + +@@ -1530,21 +1512,20 @@ static void r300SetupTextures(GLcontext * ctx) + */ + r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = + t->filter_1 | +- translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.tObj->LodBias); ++ translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias); + r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] = + t->size; + r300->hw.tex.format.cmd[R300_TEX_VALUE_0 + + hw_tmu] = t->format; + r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] = + t->pitch_reg; +- r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 + +- hw_tmu] = t->offset; ++ r300->hw.textures[hw_tmu] = t; + +- if (t->offset & R300_TXO_MACRO_TILE) { ++ if (t->tile_bits & R300_TXO_MACRO_TILE) { + WARN_ONCE("macro tiling enabled!\n"); + } + +- if (t->offset & R300_TXO_MICRO_TILE) { ++ if (t->tile_bits & R300_TXO_MICRO_TILE) { + WARN_ONCE("micro tiling enabled!\n"); + } + +@@ -2371,20 +2352,6 @@ static void r300ResetHwState(r300ContextPtr r300) + + r300BlendColor(ctx, ctx->Color.BlendColor); + +- /* Again, r300ClearBuffer uses this */ +- r300->hw.cb.cmd[R300_CB_OFFSET] = +- r300->radeon.state.color.drawOffset + +- r300->radeon.radeonScreen->fbLocation; +- r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch; +- +- if (r300->radeon.radeonScreen->cpp == 4) +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; +- else +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; +- +- if (r300->radeon.sarea->tiling_enabled) +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; +- + r300->hw.rb3d_dither_ctl.cmd[1] = 0; + r300->hw.rb3d_dither_ctl.cmd[2] = 0; + r300->hw.rb3d_dither_ctl.cmd[3] = 0; +@@ -2400,10 +2367,6 @@ static void r300ResetHwState(r300ContextPtr r300) + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000; + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff; + +- r300->hw.zb.cmd[R300_ZB_OFFSET] = +- r300->radeon.radeonScreen->depthOffset + +- r300->radeon.radeonScreen->fbLocation; +- r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch; + + if (r300->radeon.sarea->tiling_enabled) { + /* XXX: Turn off when clearing buffers ? */ +diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h +index 0589ab7..96177ba 100644 +--- a/src/mesa/drivers/dri/r300/r300_state.h ++++ b/src/mesa/drivers/dri/r300/r300_state.h +@@ -59,7 +59,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #define R300_FIREVERTICES( r300 ) \ + do { \ + \ +- if ( (r300)->cmdbuf.count_used || (r300)->dma.flush ) { \ ++ if ( (r300)->cmdbuf.committed || (r300)->dma.flush ) { \ + r300Flush( (r300)->radeon.glCtx ); \ + } \ + \ +diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c +index 8aebd9b..f4a0b7f 100644 +--- a/src/mesa/drivers/dri/r300/r300_swtcl.c ++++ b/src/mesa/drivers/dri/r300/r300_swtcl.c +@@ -61,7 +61,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. + static void flush_last_swtcl_prim( r300ContextPtr rmesa ); + + +-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset); ++void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, dri_bo *bo, GLuint offset); + void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr); + #define EMIT_ATTR( ATTR, STYLE ) \ + do { \ +@@ -175,7 +175,7 @@ static void r300SetVertexFormat( GLcontext *ctx ) + inputs[i] = -1; + } + } +- ++ + /* Fixed, apply to vir0 only */ + if (InputsRead & (1 << VERT_ATTRIB_POS)) + inputs[VERT_ATTRIB_POS] = 0; +@@ -186,16 +186,16 @@ static void r300SetVertexFormat( GLcontext *ctx ) + for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) + if (InputsRead & (1 << i)) + inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); +- ++ + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) { + tab[nr++] = i; + } + } +- ++ + for (i = 0; i < nr; i++) { + int ci; +- ++ + swizzle[i][0] = SWIZZLE_ZERO; + swizzle[i][1] = SWIZZLE_ZERO; + swizzle[i][2] = SWIZZLE_ZERO; +@@ -215,21 +215,21 @@ static void r300SetVertexFormat( GLcontext *ctx ) + ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = + r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, + nr); +- ++ + R300_STATECHANGE(rmesa, vic); + rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); + rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); +- ++ + R300_STATECHANGE(rmesa, vof); + rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1; +- ++ + rmesa->swtcl.vertex_size = + _tnl_install_attrs( ctx, +- rmesa->swtcl.vertex_attrs, ++ rmesa->swtcl.vertex_attrs, + rmesa->swtcl.vertex_attr_count, + NULL, 0 ); +- ++ + rmesa->swtcl.vertex_size /= 4; + + RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); +@@ -245,38 +245,40 @@ static void r300SetVertexFormat( GLcontext *ctx ) + */ + static void flush_last_swtcl_prim( r300ContextPtr rmesa ) + { ++ BATCH_LOCALS(rmesa); ++ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); +- ++ + rmesa->dma.flush = NULL; + +- if (rmesa->dma.current.buf) { +- struct r300_dma_region *current = &rmesa->dma.current; +- GLuint current_offset = GET_START(current); ++ if (rmesa->dma.current) { ++ GLuint current_offset = rmesa->dma.current_used; + +- assert (current->start + ++ assert (rmesa->dma.current_used + + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == +- current->ptr); ++ rmesa->dma.current_vertexptr); + +- if (rmesa->dma.current.start != rmesa->dma.current.ptr) { ++ if (rmesa->dma.current_used != rmesa->dma.current_vertexptr) { ++ rmesa->dma.current_used = rmesa->dma.current_vertexptr; + + r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__); +- ++ + r300EmitState(rmesa); +- ++ + r300EmitVertexAOS( rmesa, + rmesa->swtcl.vertex_size, +- current_offset); +- ++ rmesa->dma.current, current_offset); ++ + r300EmitVbufPrim( rmesa, + rmesa->swtcl.hw_primitive, + rmesa->swtcl.numverts); +- ++ + r300EmitCacheFlush(rmesa); ++ COMMIT_BATCH(); + } +- ++ + rmesa->swtcl.numverts = 0; +- current->start = current->ptr; + } + } + +@@ -287,7 +289,7 @@ r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize ) + { + GLuint bytes = vsize * nverts; + +- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) ++ if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) + r300RefillCurrentDmaRegion( rmesa, bytes); + + if (!rmesa->dma.flush) { +@@ -297,13 +299,13 @@ r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize ) + + ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); + ASSERT( rmesa->dma.flush == flush_last_swtcl_prim ); +- ASSERT( rmesa->dma.current.start + ++ ASSERT( rmesa->dma.current_used + + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == +- rmesa->dma.current.ptr ); ++ rmesa->dma.current_vertexptr ); + + { +- GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr); +- rmesa->dma.current.ptr += bytes; ++ GLubyte *head = (GLubyte *) (rmesa->dma.current->virtual + rmesa->dma.current_vertexptr); ++ rmesa->dma.current_vertexptr += bytes; + rmesa->swtcl.numverts += nverts; + return head; + } +@@ -352,7 +354,7 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ + const char *r300verts = (char *)rmesa->swtcl.verts; + #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int))) +-#define VERTEX r300Vertex ++#define VERTEX r300Vertex + #define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS)) + #define PRINT_VERTEX(x) + #undef TAG +@@ -572,15 +574,15 @@ static void r300RenderStart(GLcontext *ctx) + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + // fprintf(stderr, "%s\n", __FUNCTION__); + +- r300ChooseRenderState(ctx); ++ r300ChooseRenderState(ctx); + r300SetVertexFormat(ctx); + + r300UpdateShaders(rmesa); + r300UpdateShaderStates(rmesa); + + r300EmitCacheFlush(rmesa); +- +- if (rmesa->dma.flush != 0 && ++ ++ if (rmesa->dma.flush != 0 && + rmesa->dma.flush != flush_last_swtcl_prim) + rmesa->dma.flush( rmesa ); + +@@ -593,7 +595,7 @@ static void r300RenderFinish(GLcontext *ctx) + static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) + { + r300ContextPtr rmesa = R300_CONTEXT(ctx); +- ++ + if (rmesa->swtcl.hw_primitive != hwprim) { + R300_NEWPRIM( rmesa ); + rmesa->swtcl.hw_primitive = hwprim; +@@ -611,7 +613,7 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim) + + r300RasterPrimitive( ctx, reduced_prim[prim] ); + // fprintf(stderr, "%s\n", __FUNCTION__); +- ++ + } + + static void r300ResetLineStipple(GLcontext *ctx) +@@ -625,12 +627,12 @@ void r300InitSwtcl(GLcontext *ctx) + TNLcontext *tnl = TNL_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + static int firsttime = 1; +- ++ + if (firsttime) { + init_rast_tab(); + firsttime = 0; + } +- ++ + tnl->Driver.Render.Start = r300RenderStart; + tnl->Driver.Render.Finish = r300RenderFinish; + tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive; +@@ -638,15 +640,15 @@ void r300InitSwtcl(GLcontext *ctx) + tnl->Driver.Render.BuildVertices = _tnl_build_vertices; + tnl->Driver.Render.CopyPV = _tnl_copy_pv; + tnl->Driver.Render.Interp = _tnl_interp; +- ++ + /* FIXME: what are these numbers? */ +- _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, ++ _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, + 48 * sizeof(GLfloat) ); +- ++ + rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; + rmesa->swtcl.RenderIndex = ~0; + rmesa->swtcl.render_primitive = GL_TRIANGLES; +- rmesa->swtcl.hw_primitive = 0; ++ rmesa->swtcl.hw_primitive = 0; + + _tnl_invalidate_vertex_state( ctx, ~0 ); + _tnl_invalidate_vertices( ctx, ~0 ); +@@ -655,9 +657,9 @@ void r300InitSwtcl(GLcontext *ctx) + _tnl_need_projected_coords( ctx, GL_FALSE ); + r300ChooseRenderState(ctx); + +- _mesa_validate_all_lighting_tables( ctx ); ++ _mesa_validate_all_lighting_tables( ctx ); + +- tnl->Driver.NotifyMaterialChange = ++ tnl->Driver.NotifyMaterialChange = + _mesa_validate_all_lighting_tables; + } + +@@ -665,33 +667,32 @@ void r300DestroySwtcl(GLcontext *ctx) + { + } + +-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset) ++void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, dri_bo *bo, GLuint offset) + { +- int cmd_reserved = 0; +- int cmd_written = 0; ++ BATCH_LOCALS(rmesa); + +- drm_radeon_cmd_header_t *cmd = NULL; + if (RADEON_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", +- __FUNCTION__, vertex_size, offset); +- +- start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2); +- e32(1); +- e32(vertex_size | (vertex_size << 8)); +- e32(offset); ++ fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", ++ __FUNCTION__, vertex_size, offset); ++ ++ BEGIN_BATCH(5); ++ OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2); ++ OUT_BATCH(1); ++ OUT_BATCH(vertex_size | (vertex_size << 8)); ++ OUT_BATCH_RELOC(0, bo, offset, 0); ++ END_BATCH(); + } + + void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) + { +- +- int cmd_reserved = 0; +- int cmd_written = 0; ++ BATCH_LOCALS(rmesa); + int type, num_verts; +- drm_radeon_cmd_header_t *cmd = NULL; + + type = r300PrimitiveType(rmesa, primitive); + num_verts = r300NumVerts(rmesa, vertex_nr, primitive); +- +- start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); +- e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type); ++ ++ BEGIN_BATCH(3); ++ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); ++ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type); ++ END_BATCH(); + } +diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c +index f7f4972..c6ee1b5 100644 +--- a/src/mesa/drivers/dri/r300/r300_tex.c ++++ b/src/mesa/drivers/dri/r300/r300_tex.c +@@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "r300_context.h" + #include "r300_state.h" + #include "r300_ioctl.h" ++#include "r300_mipmap_tree.h" + #include "r300_tex.h" + + #include "xmlpool.h" +@@ -78,7 +79,7 @@ static unsigned int translate_wrap_mode(GLenum wrapmode) + */ + static void r300UpdateTexWrap(r300TexObjPtr t) + { +- struct gl_texture_object *tObj = t->base.tObj; ++ struct gl_texture_object *tObj = &t->base; + + t->filter &= + ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK); +@@ -175,39 +176,6 @@ static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4]) + t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]); + } + +-/** +- * Allocate space for and load the mesa images into the texture memory block. +- * This will happen before drawing with a new texture, or drawing with a +- * texture after it was swapped out or teximaged again. +- */ +- +-static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj) +-{ +- r300TexObjPtr t; +- +- t = CALLOC_STRUCT(r300_tex_obj); +- texObj->DriverData = t; +- if (t != NULL) { +- if (RADEON_DEBUG & DEBUG_TEXTURE) { +- fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__, +- (void *)texObj, (void *)t); +- } +- +- /* Initialize non-image-dependent parts of the state: +- */ +- t->base.tObj = texObj; +- t->border_fallback = GL_FALSE; +- +- make_empty_list(&t->base); +- +- r300UpdateTexWrap(t); +- r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy); +- r300SetTexBorderColor(t, texObj->_BorderChan); +- } +- +- return t; +-} +- + /* try to find a format which will only need a memcopy */ + static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat, + GLenum srcType) +@@ -433,95 +401,14 @@ static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx, + return NULL; /* never get here */ + } + +-static GLboolean +-r300ValidateClientStorage(GLcontext * ctx, GLenum target, +- GLint internalFormat, +- GLint srcWidth, GLint srcHeight, +- GLenum format, GLenum type, const void *pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage) ++/** ++ * Marks the given face/level pair as dirty. ++ * This will cause an appropriate texture reupload the next time this ++ * texture is validated. ++ */ ++static void mark_texture_image_dirty(r300TexObj *t, int face, int level) + { +- r300ContextPtr rmesa = R300_CONTEXT(ctx); +- +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "intformat %s format %s type %s\n", +- _mesa_lookup_enum_by_nr(internalFormat), +- _mesa_lookup_enum_by_nr(format), +- _mesa_lookup_enum_by_nr(type)); +- +- if (!ctx->Unpack.ClientStorage) +- return 0; +- +- if (ctx->_ImageTransferState || +- texImage->IsCompressed || texObj->GenerateMipmap) +- return 0; +- +- /* This list is incomplete, may be different on ppc??? +- */ +- switch (internalFormat) { +- case GL_RGBA: +- if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) { +- texImage->TexFormat = _dri_texformat_argb8888; +- } else +- return 0; +- break; +- +- case GL_RGB: +- if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) { +- texImage->TexFormat = _dri_texformat_rgb565; +- } else +- return 0; +- break; +- +- case GL_YCBCR_MESA: +- if (format == GL_YCBCR_MESA && +- type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) { +- texImage->TexFormat = &_mesa_texformat_ycbcr_rev; +- } else if (format == GL_YCBCR_MESA && +- (type == GL_UNSIGNED_SHORT_8_8_APPLE || +- type == GL_UNSIGNED_BYTE)) { +- texImage->TexFormat = &_mesa_texformat_ycbcr; +- } else +- return 0; +- break; +- +- default: +- return 0; +- } +- +- /* Could deal with these packing issues, but currently don't: +- */ +- if (packing->SkipPixels || +- packing->SkipRows || packing->SwapBytes || packing->LsbFirst) { +- return 0; +- } +- +- GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth, +- format, type); +- +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: srcRowStride %d/%x\n", +- __FUNCTION__, srcRowStride, srcRowStride); +- +- /* Could check this later in upload, pitch restrictions could be +- * relaxed, but would need to store the image pitch somewhere, +- * as packing details might change before image is uploaded: +- */ +- if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride) +- || (srcRowStride & 63)) +- return 0; +- +- /* Have validated that _mesa_transfer_teximage would be a straight +- * memcpy at this point. NOTE: future calls to TexSubImage will +- * overwrite the client data. This is explicitly mentioned in the +- * extension spec. +- */ +- texImage->Data = (void *)pixels; +- texImage->IsClientData = GL_TRUE; +- texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes; +- +- return 1; ++ t->dirty_images[face] |= 1 << level; + } + + static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level, +@@ -532,24 +419,13 @@ static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) + { +- driTextureObject *t = (driTextureObject *) texObj->DriverData; ++ r300TexObj* t = r300_tex_obj(texObj); + +- if (t) { +- driSwapOutTextureObject(t); +- } else { +- t = (driTextureObject *) r300AllocTexObj(texObj); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D"); +- return; +- } +- } +- +- /* Note, this will call ChooseTextureFormat */ + _mesa_store_teximage1d(ctx, target, level, internalFormat, + width, border, format, type, pixels, + &ctx->Unpack, texObj, texImage); + +- t->dirty_images[0] |= (1 << level); ++ mark_texture_image_dirty(t, 0, level); + } + + static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level, +@@ -561,24 +437,13 @@ static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) + { +- driTextureObject *t = (driTextureObject *) texObj->DriverData; +- +- assert(t); /* this _should_ be true */ +- if (t) { +- driSwapOutTextureObject(t); +- } else { +- t = (driTextureObject *) r300AllocTexObj(texObj); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D"); +- return; +- } +- } ++ r300TexObj* t = r300_tex_obj(texObj); + + _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, + format, type, pixels, packing, texObj, + texImage); + +- t->dirty_images[0] |= (1 << level); ++ mark_texture_image_dirty(t, 0, level); + } + + static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level, +@@ -589,7 +454,7 @@ static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) + { +- driTextureObject *t = (driTextureObject *) texObj->DriverData; ++ r300TexObj* t = r300_tex_obj(texObj); + GLuint face; + + /* which cube face or ordinary 2D image */ +@@ -608,43 +473,23 @@ static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level, + face = 0; + } + +- if (t != NULL) { +- driSwapOutTextureObject(t); +- } else { +- t = (driTextureObject *) r300AllocTexObj(texObj); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D"); +- return; +- } +- } +- + texImage->IsClientData = GL_FALSE; + +- if (r300ValidateClientStorage(ctx, target, +- internalFormat, +- width, height, +- format, type, pixels, +- packing, texObj, texImage)) { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using client storage\n", +- __FUNCTION__); +- } else { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using normal storage\n", +- __FUNCTION__); +- +- /* Normal path: copy (to cached memory) and eventually upload +- * via another copy to GART memory and then a blit... Could +- * eliminate one copy by going straight to (permanent) GART. +- * +- * Note, this will call r300ChooseTextureFormat. +- */ +- _mesa_store_teximage2d(ctx, target, level, internalFormat, +- width, height, border, format, type, +- pixels, &ctx->Unpack, texObj, texImage); ++ if (RADEON_DEBUG & DEBUG_TEXTURE) ++ fprintf(stderr, "%s: Using normal storage\n", ++ __FUNCTION__); ++ ++ /* Normal path: copy (to cached memory) and eventually upload ++ * via another copy to GART memory and then a blit... Could ++ * eliminate one copy by going straight to (permanent) GART. ++ * ++ * Note, this will call r300ChooseTextureFormat. ++ */ ++ _mesa_store_teximage2d(ctx, target, level, internalFormat, ++ width, height, border, format, type, ++ pixels, &ctx->Unpack, texObj, texImage); + +- t->dirty_images[face] |= (1 << level); +- } ++ mark_texture_image_dirty(t, face, level); + } + + static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level, +@@ -656,7 +501,7 @@ static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) + { +- driTextureObject *t = (driTextureObject *) texObj->DriverData; ++ r300TexObj* t = r300_tex_obj(texObj); + GLuint face; + + /* which cube face or ordinary 2D image */ +@@ -675,22 +520,11 @@ static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level, + face = 0; + } + +- assert(t); /* this _should_ be true */ +- if (t) { +- driSwapOutTextureObject(t); +- } else { +- t = (driTextureObject *) r300AllocTexObj(texObj); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D"); +- return; +- } +- } +- + _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, + height, format, type, pixels, packing, texObj, + texImage); + +- t->dirty_images[face] |= (1 << level); ++ mark_texture_image_dirty(t, face, level); + } + + static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target, +@@ -700,7 +534,7 @@ static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) + { +- driTextureObject *t = (driTextureObject *) texObj->DriverData; ++ r300TexObj* t = r300_tex_obj(texObj); + GLuint face; + + /* which cube face or ordinary 2D image */ +@@ -719,49 +553,24 @@ static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target, + face = 0; + } + +- if (t != NULL) { +- driSwapOutTextureObject(t); +- } else { +- t = (driTextureObject *) r300AllocTexObj(texObj); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, +- "glCompressedTexImage2D"); +- return; +- } +- } +- + texImage->IsClientData = GL_FALSE; + +- /* can't call this, different parameters. Would never evaluate to true anyway currently */ +-#if 0 +- if (r300ValidateClientStorage(ctx, target, +- internalFormat, +- width, height, +- format, type, pixels, +- packing, texObj, texImage)) { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using client storage\n", +- __FUNCTION__); +- } else +-#endif +- { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using normal storage\n", +- __FUNCTION__); +- +- /* Normal path: copy (to cached memory) and eventually upload +- * via another copy to GART memory and then a blit... Could +- * eliminate one copy by going straight to (permanent) GART. +- * +- * Note, this will call r300ChooseTextureFormat. +- */ +- _mesa_store_compressed_teximage2d(ctx, target, level, +- internalFormat, width, height, +- border, imageSize, data, +- texObj, texImage); ++ if (RADEON_DEBUG & DEBUG_TEXTURE) ++ fprintf(stderr, "%s: Using normal storage\n", ++ __FUNCTION__); ++ ++ /* Normal path: copy (to cached memory) and eventually upload ++ * via another copy to GART memory and then a blit... Could ++ * eliminate one copy by going straight to (permanent) GART. ++ * ++ * Note, this will call r300ChooseTextureFormat. ++ */ ++ _mesa_store_compressed_teximage2d(ctx, target, level, ++ internalFormat, width, height, ++ border, imageSize, data, ++ texObj, texImage); + +- t->dirty_images[face] |= (1 << level); +- } ++ mark_texture_image_dirty(t, face, level); + } + + static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target, +@@ -772,7 +581,7 @@ static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) + { +- driTextureObject *t = (driTextureObject *) texObj->DriverData; ++ r300TexObj* t = r300_tex_obj(texObj); + GLuint face; + + /* which cube face or ordinary 2D image */ +@@ -791,23 +600,11 @@ static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target, + face = 0; + } + +- assert(t); /* this _should_ be true */ +- if (t) { +- driSwapOutTextureObject(t); +- } else { +- t = (driTextureObject *) r300AllocTexObj(texObj); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, +- "glCompressedTexSubImage3D"); +- return; +- } +- } +- + _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, + yoffset, width, height, format, + imageSize, data, texObj, texImage); + +- t->dirty_images[face] |= (1 << level); ++ mark_texture_image_dirty(t, face, level); + } + + static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level, +@@ -819,49 +616,26 @@ static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) + { +- driTextureObject *t = (driTextureObject *) texObj->DriverData; +- +- if (t) { +- driSwapOutTextureObject(t); +- } else { +- t = (driTextureObject *) r300AllocTexObj(texObj); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D"); +- return; +- } +- } ++ r300TexObj* t = r300_tex_obj(texObj); + + texImage->IsClientData = GL_FALSE; + +-#if 0 +- if (r300ValidateClientStorage(ctx, target, +- internalFormat, +- width, height, +- format, type, pixels, +- packing, texObj, texImage)) { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using client storage\n", +- __FUNCTION__); +- } else +-#endif +- { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using normal storage\n", +- __FUNCTION__); +- +- /* Normal path: copy (to cached memory) and eventually upload +- * via another copy to GART memory and then a blit... Could +- * eliminate one copy by going straight to (permanent) GART. +- * +- * Note, this will call r300ChooseTextureFormat. +- */ +- _mesa_store_teximage3d(ctx, target, level, internalFormat, +- width, height, depth, border, +- format, type, pixels, +- &ctx->Unpack, texObj, texImage); ++ if (RADEON_DEBUG & DEBUG_TEXTURE) ++ fprintf(stderr, "%s: Using normal storage\n", ++ __FUNCTION__); ++ ++ /* Normal path: copy (to cached memory) and eventually upload ++ * via another copy to GART memory and then a blit... Could ++ * eliminate one copy by going straight to (permanent) GART. ++ * ++ * Note, this will call r300ChooseTextureFormat. ++ */ ++ _mesa_store_teximage3d(ctx, target, level, internalFormat, ++ width, height, depth, border, ++ format, type, pixels, ++ &ctx->Unpack, texObj, texImage); + +- t->dirty_images[0] |= (1 << level); +- } ++ mark_texture_image_dirty(t, 0, level); + } + + static void +@@ -874,28 +648,14 @@ r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) + { +- driTextureObject *t = (driTextureObject *) texObj->DriverData; +- +-/* fprintf(stderr, "%s\n", __FUNCTION__); */ +- +- assert(t); /* this _should_ be true */ +- if (t) { +- driSwapOutTextureObject(t); +- } else { +- t = (driTextureObject *) r300AllocTexObj(texObj); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D"); +- return; +- } +- texObj->DriverData = t; +- } ++ r300TexObj* t = r300_tex_obj(texObj); + + _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels, packing, texObj, + texImage); + +- t->dirty_images[0] |= (1 << level); ++ mark_texture_image_dirty(t, 0, level); + } + + /** +@@ -907,7 +667,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, + struct gl_texture_object *texObj, + GLenum pname, const GLfloat * params) + { +- r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData; ++ r300TexObj* t = r300_tex_obj(texObj); + + if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + fprintf(stderr, "%s( %s )\n", __FUNCTION__, +@@ -940,7 +700,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, + * we just have to rely on loading the right subset of mipmap levels + * to simulate a clamped LOD. + */ +- driSwapOutTextureObject((driTextureObject *) t); ++ if (t->mt) { ++ r300_miptree_destroy(t->mt); ++ t->mt = 0; ++ } + break; + + case GL_DEPTH_TEXTURE_MODE: +@@ -963,27 +726,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, + } + } + +-static void r300BindTexture(GLcontext * ctx, GLenum target, +- struct gl_texture_object *texObj) +-{ +- if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { +- fprintf(stderr, "%s( %p ) unit=%d\n", __FUNCTION__, +- (void *)texObj, ctx->Texture.CurrentUnit); +- } +- +- if ((target == GL_TEXTURE_1D) +- || (target == GL_TEXTURE_2D) +- || (target == GL_TEXTURE_3D) +- || (target == GL_TEXTURE_CUBE_MAP) +- || (target == GL_TEXTURE_RECTANGLE_NV)) { +- assert(texObj->DriverData != NULL); +- } +-} +- + static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) + { + r300ContextPtr rmesa = R300_CONTEXT(ctx); +- driTextureObject *t = (driTextureObject *) texObj->DriverData; ++ r300TexObj* t = r300_tex_obj(texObj); + + if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, +@@ -991,14 +737,19 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) + _mesa_lookup_enum_by_nr(texObj->Target)); + } + +- if (t != NULL) { +- if (rmesa) { +- R300_FIREVERTICES(rmesa); +- } ++ if (rmesa) { ++ int i; ++ R300_FIREVERTICES(rmesa); ++ ++ for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i) ++ if (rmesa->hw.textures[i] == t) ++ rmesa->hw.textures[i] = 0; ++ } + +- driDestroyTextureObject(t); ++ if (t->mt) { ++ r300_miptree_destroy(t->mt); ++ t->mt = 0; + } +- /* Free mipmap images and the texture object itself */ + _mesa_delete_texture_object(ctx, texObj); + } + +@@ -1007,8 +758,6 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) + * Called via ctx->Driver.NewTextureObject. + * Note: this function will be called during context creation to + * allocate the default texture objects. +- * Note: we could use containment here to 'derive' the driver-specific +- * texture object from the core mesa gl_texture_object. Not done at this time. + * Fixup MaxAnisotropy according to user preference. + */ + static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, +@@ -1016,14 +765,23 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, + GLenum target) + { + r300ContextPtr rmesa = R300_CONTEXT(ctx); +- struct gl_texture_object *obj; +- obj = _mesa_new_texture_object(ctx, name, target); +- if (!obj) +- return NULL; +- obj->MaxAnisotropy = rmesa->initialMaxAnisotropy; ++ r300TexObj* t = CALLOC_STRUCT(r300_tex_obj); ++ ++ ++ if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { ++ fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, ++ t, _mesa_lookup_enum_by_nr(target)); ++ } ++ ++ _mesa_initialize_texture_object(&t->base, name, target); ++ t->base.MaxAnisotropy = rmesa->initialMaxAnisotropy; ++ ++ /* Initialize hardware state */ ++ r300UpdateTexWrap(t); ++ r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy); ++ r300SetTexBorderColor(t, t->base._BorderChan); + +- r300AllocTexObj(obj); +- return obj; ++ return &t->base; + } + + void r300InitTextureFuncs(struct dd_function_table *functions) +@@ -1039,7 +797,6 @@ void r300InitTextureFuncs(struct dd_function_table *functions) + functions->TexSubImage2D = r300TexSubImage2D; + functions->TexSubImage3D = r300TexSubImage3D; + functions->NewTextureObject = r300NewTextureObject; +- functions->BindTexture = r300BindTexture; + functions->DeleteTexture = r300DeleteTexture; + functions->IsTextureResident = driIsTextureResident; + +diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h +index b86d45b..5d7f21e 100644 +--- a/src/mesa/drivers/dri/r300/r300_tex.h ++++ b/src/mesa/drivers/dri/r300/r300_tex.h +@@ -46,8 +46,6 @@ extern void r300UpdateTextureState(GLcontext * ctx); + extern int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, + GLuint face); + +-extern void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t); +- + extern void r300InitTextureFuncs(struct dd_function_table *functions); + + #endif /* __r300_TEX_H__ */ +diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c +index 69847a4..b3b501b 100644 +--- a/src/mesa/drivers/dri/r300/r300_texmem.c ++++ b/src/mesa/drivers/dri/r300/r300_texmem.c +@@ -48,439 +48,15 @@ SOFTWARE. + #include "r300_context.h" + #include "r300_state.h" + #include "r300_cmdbuf.h" ++#include "r300_emit.h" ++#include "r300_mipmap_tree.h" + #include "radeon_ioctl.h" + #include "r300_tex.h" + #include "r300_ioctl.h" + #include /* for usleep() */ + +-#ifdef USER_BUFFERS + #include "r300_mem.h" +-#endif + +-/** +- * Destroy any device-dependent state associated with the texture. This may +- * include NULLing out hardware state that points to the texture. +- */ +-void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t) +-{ +- int i; +- +- if (RADEON_DEBUG & DEBUG_TEXTURE) { +- fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__, +- (void *)t, (void *)t->base.tObj); +- } +- +- for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) { +- if (rmesa->state.texture.unit[i].texobj == t) { +- rmesa->state.texture.unit[i].texobj = NULL; +- } +- } +-} +- +-/* ------------------------------------------------------------ +- * Texture image conversions +- */ +- +-static void r300UploadGARTClientSubImage(r300ContextPtr rmesa, +- r300TexObjPtr t, +- struct gl_texture_image *texImage, +- GLint hwlevel, +- GLint x, GLint y, +- GLint width, GLint height) +-{ +- const struct gl_texture_format *texFormat = texImage->TexFormat; +- GLuint srcPitch, dstPitch; +- int blit_format; +- int srcOffset; +- +- /* +- * XXX it appears that we always upload the full image, not a subimage. +- * I.e. x==0, y==0, width=texWidth, height=texWidth. If this is ever +- * changed, the src pitch will have to change. +- */ +- switch (texFormat->TexelBytes) { +- case 1: +- blit_format = R300_CP_COLOR_FORMAT_CI8; +- srcPitch = t->image[0][0].width * texFormat->TexelBytes; +- dstPitch = t->image[0][0].width * texFormat->TexelBytes; +- break; +- case 2: +- blit_format = R300_CP_COLOR_FORMAT_RGB565; +- srcPitch = t->image[0][0].width * texFormat->TexelBytes; +- dstPitch = t->image[0][0].width * texFormat->TexelBytes; +- break; +- case 4: +- blit_format = R300_CP_COLOR_FORMAT_ARGB8888; +- srcPitch = t->image[0][0].width * texFormat->TexelBytes; +- dstPitch = t->image[0][0].width * texFormat->TexelBytes; +- break; +- case 8: +- case 16: +- blit_format = R300_CP_COLOR_FORMAT_CI8; +- srcPitch = t->image[0][0].width * texFormat->TexelBytes; +- dstPitch = t->image[0][0].width * texFormat->TexelBytes; +- break; +- default: +- return; +- } +- +- t->image[0][hwlevel].data = texImage->Data; +- srcOffset = r300GartOffsetFromVirtual(rmesa, texImage->Data); +- +- assert(srcOffset != ~0); +- +- /* Don't currently need to cope with small pitches? +- */ +- width = texImage->Width; +- height = texImage->Height; +- +- if (texFormat->TexelBytes > 4) { +- width *= texFormat->TexelBytes; +- } +- +- r300EmitWait(rmesa, R300_WAIT_3D); +- +- r300EmitBlit(rmesa, blit_format, +- srcPitch, +- srcOffset, +- dstPitch, +- t->bufAddr, +- x, +- y, +- t->image[0][hwlevel].x + x, +- t->image[0][hwlevel].y + y, width, height); +- +- r300EmitWait(rmesa, R300_WAIT_2D); +-} +- +-static void r300UploadRectSubImage(r300ContextPtr rmesa, +- r300TexObjPtr t, +- struct gl_texture_image *texImage, +- GLint x, GLint y, GLint width, GLint height) +-{ +- const struct gl_texture_format *texFormat = texImage->TexFormat; +- int blit_format, dstPitch, done; +- +- switch (texFormat->TexelBytes) { +- case 1: +- blit_format = R300_CP_COLOR_FORMAT_CI8; +- break; +- case 2: +- blit_format = R300_CP_COLOR_FORMAT_RGB565; +- break; +- case 4: +- blit_format = R300_CP_COLOR_FORMAT_ARGB8888; +- break; +- case 8: +- case 16: +- blit_format = R300_CP_COLOR_FORMAT_CI8; +- break; +- default: +- return; +- } +- +- t->image[0][0].data = texImage->Data; +- +- /* Currently don't need to cope with small pitches. +- */ +- width = texImage->Width; +- height = texImage->Height; +- dstPitch = t->pitch; +- +- if (texFormat->TexelBytes > 4) { +- width *= texFormat->TexelBytes; +- } +- +- if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) { +- /* In this case, could also use GART texturing. This is +- * currently disabled, but has been tested & works. +- */ +- t->offset = r300GartOffsetFromVirtual(rmesa, texImage->Data); +- t->pitch = texImage->RowStride * texFormat->TexelBytes - 32; +- +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, +- "Using GART texturing for rectangular client texture\n"); +- +- /* Release FB memory allocated for this image: +- */ +- /* FIXME This may not be correct as driSwapOutTextureObject sets +- * FIXME dirty_images. It may be fine, though. +- */ +- if (t->base.memBlock) { +- driSwapOutTextureObject((driTextureObject *) t); +- } +- } else if (texImage->IsClientData) { +- /* Data already in GART memory, with usable pitch. +- */ +- GLuint srcPitch; +- srcPitch = texImage->RowStride * texFormat->TexelBytes; +- r300EmitBlit(rmesa, +- blit_format, +- srcPitch, +- r300GartOffsetFromVirtual(rmesa, texImage->Data), +- dstPitch, t->bufAddr, 0, 0, 0, 0, width, height); +- } else { +- /* Data not in GART memory, or bad pitch. +- */ +- for (done = 0; done < height;) { +- struct r300_dma_region region; +- int lines = +- MIN2(height - done, RADEON_BUFFER_SIZE / dstPitch); +- int src_pitch; +- char *tex; +- +- src_pitch = texImage->RowStride * texFormat->TexelBytes; +- +- tex = (char *)texImage->Data + done * src_pitch; +- +- memset(®ion, 0, sizeof(region)); +- r300AllocDmaRegion(rmesa, ®ion, lines * dstPitch, +- 1024); +- +- /* Copy texdata to dma: +- */ +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, +- "%s: src_pitch %d dst_pitch %d\n", +- __FUNCTION__, src_pitch, dstPitch); +- +- if (src_pitch == dstPitch) { +- memcpy(region.address + region.start, tex, +- lines * src_pitch); +- } else { +- char *buf = region.address + region.start; +- int i; +- for (i = 0; i < lines; i++) { +- memcpy(buf, tex, src_pitch); +- buf += dstPitch; +- tex += src_pitch; +- } +- } +- +- r300EmitWait(rmesa, R300_WAIT_3D); +- +- /* Blit to framebuffer +- */ +- r300EmitBlit(rmesa, +- blit_format, +- dstPitch, GET_START(®ion), +- dstPitch | (t->tile_bits >> 16), +- t->bufAddr, 0, 0, 0, done, width, lines); +- +- r300EmitWait(rmesa, R300_WAIT_2D); +-#ifdef USER_BUFFERS +- r300_mem_use(rmesa, region.buf->id); +-#endif +- +- r300ReleaseDmaRegion(rmesa, ®ion, __FUNCTION__); +- done += lines; +- } +- } +-} +- +-/** +- * Upload the texture image associated with texture \a t at the specified +- * level at the address relative to \a start. +- */ +-static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t, +- GLint hwlevel, +- GLint x, GLint y, GLint width, GLint height, +- GLuint face) +-{ +- struct gl_texture_image *texImage = NULL; +- GLuint offset; +- GLint imageWidth, imageHeight; +- GLint ret; +- drm_radeon_texture_t tex; +- drm_radeon_tex_image_t tmp; +- const int level = hwlevel + t->base.firstLevel; +- +- if (RADEON_DEBUG & DEBUG_TEXTURE) { +- fprintf(stderr, +- "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", +- __FUNCTION__, (void *)t, (void *)t->base.tObj, level, +- width, height, face); +- } +- +- ASSERT(face < 6); +- +- /* Ensure we have a valid texture to upload */ +- if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) { +- _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__); +- return; +- } +- +- texImage = t->base.tObj->Image[face][level]; +- +- if (!texImage) { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: texImage %d is NULL!\n", +- __FUNCTION__, level); +- return; +- } +- if (!texImage->Data) { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: image data is NULL!\n", +- __FUNCTION__); +- return; +- } +- +- if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { +- assert(level == 0); +- assert(hwlevel == 0); +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: image data is rectangular\n", +- __FUNCTION__); +- r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height); +- return; +- } else if (texImage->IsClientData) { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, +- "%s: image data is in GART client storage\n", +- __FUNCTION__); +- r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y, +- width, height); +- return; +- } else if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: image data is in normal memory\n", +- __FUNCTION__); +- +- imageWidth = texImage->Width; +- imageHeight = texImage->Height; +- +- offset = t->bufAddr; +- +- if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) { +- GLint imageX = 0; +- GLint imageY = 0; +- GLint blitX = t->image[face][hwlevel].x; +- GLint blitY = t->image[face][hwlevel].y; +- GLint blitWidth = t->image[face][hwlevel].width; +- GLint blitHeight = t->image[face][hwlevel].height; +- fprintf(stderr, " upload image: %d,%d at %d,%d\n", +- imageWidth, imageHeight, imageX, imageY); +- fprintf(stderr, " upload blit: %d,%d at %d,%d\n", +- blitWidth, blitHeight, blitX, blitY); +- fprintf(stderr, " blit ofs: 0x%07x level: %d/%d\n", +- (GLuint) offset, hwlevel, level); +- } +- +- t->image[face][hwlevel].data = texImage->Data; +- +- /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct. +- * NOTE: we're always use a 1KB-wide blit and I8 texture format. +- * We used to use 1, 2 and 4-byte texels and used to use the texture +- * width to dictate the blit width - but that won't work for compressed +- * textures. (Brian) +- * NOTE: can't do that with texture tiling. (sroland) +- */ +- tex.offset = offset; +- tex.image = &tmp; +- /* copy (x,y,width,height,data) */ +- memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp)); +- +- if (texImage->TexFormat->TexelBytes > 4) { +- const int log2TexelBytes = +- (3 + (texImage->TexFormat->TexelBytes >> 4)); +- tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ +- tex.pitch = +- MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / +- 64, 1); +- tex.height = imageHeight; +- tex.width = imageWidth << log2TexelBytes; +- tex.offset += (tmp.x << log2TexelBytes) & ~1023; +- tmp.x = tmp.x % (1024 >> log2TexelBytes); +- tmp.width = tmp.width << log2TexelBytes; +- } else if (texImage->TexFormat->TexelBytes) { +- /* use multi-byte upload scheme */ +- tex.height = imageHeight; +- tex.width = imageWidth; +- switch (texImage->TexFormat->TexelBytes) { +- case 1: +- tex.format = RADEON_TXFORMAT_I8; +- break; +- case 2: +- tex.format = RADEON_TXFORMAT_AI88; +- break; +- case 4: +- tex.format = RADEON_TXFORMAT_ARGB8888; +- break; +- } +- tex.pitch = +- MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / +- 64, 1); +- tex.offset += tmp.x & ~1023; +- tmp.x = tmp.x % 1024; +- +- if (t->tile_bits & R300_TXO_MICRO_TILE) { +- /* need something like "tiled coordinates" ? */ +- tmp.y = tmp.x / (tex.pitch * 128) * 2; +- tmp.x = +- tmp.x % (tex.pitch * 128) / 2 / +- texImage->TexFormat->TexelBytes; +- tex.pitch |= RADEON_DST_TILE_MICRO >> 22; +- } else { +- tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); +- } +-#if 1 +- if ((t->tile_bits & R300_TXO_MACRO_TILE) && +- (texImage->Width * texImage->TexFormat->TexelBytes >= 256) +- && ((!(t->tile_bits & R300_TXO_MICRO_TILE) +- && (texImage->Height >= 8)) +- || (texImage->Height >= 16))) { +- /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes, +- OR if height is smaller than 8 automatically, but if micro tiling is active +- the limit is height 16 instead ? */ +- tex.pitch |= RADEON_DST_TILE_MACRO >> 22; +- } +-#endif +- } else { +- /* In case of for instance 8x8 texture (2x2 dxt blocks), +- padding after the first two blocks is needed (only +- with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ +- /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) +- has 4 real pixels. Needed so the kernel module reads +- the right amount of data. */ +- tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ +- tex.pitch = (R300_BLIT_WIDTH_BYTES / 64); +- tex.height = (imageHeight + 3) / 4; +- tex.width = (imageWidth + 3) / 4; +- if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) { +- tex.width *= 8; +- } else { +- tex.width *= 16; +- } +- } +- +- LOCK_HARDWARE(&rmesa->radeon); +- do { +- ret = +- drmCommandWriteRead(rmesa->radeon.dri.fd, +- DRM_RADEON_TEXTURE, &tex, +- sizeof(drm_radeon_texture_t)); +- if (ret) { +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, +- "DRM_RADEON_TEXTURE: again!\n"); +- usleep(1); +- } +- } while (ret == -EAGAIN); +- +- UNLOCK_HARDWARE(&rmesa->radeon); +- +- if (ret) { +- fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret); +- fprintf(stderr, " offset=0x%08x\n", offset); +- fprintf(stderr, " image width=%d height=%d\n", +- imageWidth, imageHeight); +- fprintf(stderr, " blit width=%d height=%d data=%p\n", +- t->image[face][hwlevel].width, +- t->image[face][hwlevel].height, +- t->image[face][hwlevel].data); +- _mesa_exit(-1); +- } +-} + + /** + * Upload the texture images associated with texture \a t. This might +@@ -493,69 +69,32 @@ static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t, + + int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face) + { +- const int numLevels = t->base.lastLevel - t->base.firstLevel + 1; +- + if (t->image_override) + return 0; ++ if (!t->mt) ++ return 0; + + if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) { +- fprintf(stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__, +- (void *)rmesa->radeon.glCtx, (void *)t->base.tObj, +- t->base.totalSize, t->base.firstLevel, +- t->base.lastLevel); ++ fprintf(stderr, "%s( %p, %p ) lvls=%d-%d\n", __FUNCTION__, ++ (void *)rmesa->radeon.glCtx, t, ++ t->mt->firstLevel, t->mt->lastLevel); + } + +- if (t->base.totalSize == 0) +- return 0; +- + if (RADEON_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "%s: Syncing\n", __FUNCTION__); + radeonFinish(rmesa->radeon.glCtx); + } + +- LOCK_HARDWARE(&rmesa->radeon); +- +- if (t->base.memBlock == NULL) { +- int heap; +- +- heap = driAllocateTexture(rmesa->texture_heaps, rmesa->nr_heaps, +- (driTextureObject *) t); +- if (heap == -1) { +- UNLOCK_HARDWARE(&rmesa->radeon); +- return -1; +- } +- +- /* Set the base offset of the texture image */ +- t->bufAddr = rmesa->radeon.radeonScreen->texOffset[heap] +- + t->base.memBlock->ofs; +- t->offset = t->bufAddr; +- +- if (!(t->base.tObj->Image[0][0]->IsClientData)) { +- /* hope it's safe to add that here... */ +- t->offset |= t->tile_bits; +- } +- } +- +- /* Let the world know we've used this memory recently. +- */ +- driUpdateTextureLRU((driTextureObject *) t); +- UNLOCK_HARDWARE(&rmesa->radeon); +- + /* Upload any images that are new */ +- if (t->base.dirty_images[face]) { +- int i; ++ if (t->dirty_images[face]) { ++ int i, numLevels = t->mt->lastLevel - t->mt->firstLevel + 1; + for (i = 0; i < numLevels; i++) { +- if ((t->base. +- dirty_images[face] & (1 << +- (i + t->base.firstLevel))) != +- 0) { +- r300UploadSubImage(rmesa, t, i, 0, 0, +- t->image[face][i].width, +- t->image[face][i].height, +- face); ++ if (t->dirty_images[face] & (1 << (i + t->mt->firstLevel))) { ++ r300_miptree_upload_image(t->mt, face, t->mt->firstLevel + i, ++ t->base.Image[face][t->mt->firstLevel + i]); + } + } +- t->base.dirty_images[face] = 0; ++ t->dirty_images[face] = 0; + } + + if (RADEON_DEBUG & DEBUG_SYNC) { +diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c +index d19832f..5cb9010 100644 +--- a/src/mesa/drivers/dri/r300/r300_texstate.c ++++ b/src/mesa/drivers/dri/r300/r300_texstate.c +@@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "r300_state.h" + #include "r300_ioctl.h" + #include "radeon_ioctl.h" ++#include "r300_mipmap_tree.h" + #include "r300_tex.h" + #include "r300_reg.h" + +@@ -148,8 +149,7 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) + if (!tObj) + return; + +- t = (r300TexObjPtr) tObj->DriverData; +- ++ t = r300_tex_obj(tObj); + + switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) { + case MESA_FORMAT_Z16: +@@ -189,118 +189,59 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) + } + + +-/** +- * Compute sizes and fill in offset and blit information for the given +- * image (determined by \p face and \p level). +- * +- * \param curOffset points to the offset at which the image is to be stored +- * and is updated by this function according to the size of the image. +- */ +-static void compute_tex_image_offset( +- struct gl_texture_object *tObj, +- GLuint face, +- GLint level, +- GLint* curOffset) ++static void calculate_first_last_level(struct gl_texture_object *tObj, ++ GLuint *pfirstLevel, GLuint *plastLevel) + { +- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; +- const struct gl_texture_image* texImage; +- GLuint blitWidth = R300_BLIT_WIDTH_BYTES; +- GLuint texelBytes; +- GLuint size; +- +- texImage = tObj->Image[0][level + t->base.firstLevel]; +- if (!texImage) +- return; +- +- texelBytes = texImage->TexFormat->TexelBytes; +- +- /* find image size in bytes */ +- if (texImage->IsCompressed) { +- if ((t->format & R300_TX_FORMAT_DXT1) == +- R300_TX_FORMAT_DXT1) { +- // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format); +- if ((texImage->Width + 3) < 8) /* width one block */ +- size = texImage->CompressedSize * 4; +- else if ((texImage->Width + 3) < 16) +- size = texImage->CompressedSize * 2; +- else +- size = texImage->CompressedSize; ++ const struct gl_texture_image * const baseImage = ++ tObj->Image[0][tObj->BaseLevel]; ++ ++ /* These must be signed values. MinLod and MaxLod can be negative numbers, ++ * and having firstLevel and lastLevel as signed prevents the need for ++ * extra sign checks. ++ */ ++ int firstLevel; ++ int lastLevel; ++ ++ /* Yes, this looks overly complicated, but it's all needed. ++ */ ++ switch (tObj->Target) { ++ case GL_TEXTURE_1D: ++ case GL_TEXTURE_2D: ++ case GL_TEXTURE_3D: ++ case GL_TEXTURE_CUBE_MAP: ++ if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) { ++ /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL. ++ */ ++ firstLevel = lastLevel = tObj->BaseLevel; + } else { +- /* DXT3/5, 16 bytes per block */ +- WARN_ONCE +- ("DXT 3/5 suffers from multitexturing problems!\n"); +- // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width); +- if ((texImage->Width + 3) < 8) +- size = texImage->CompressedSize * 2; +- else +- size = texImage->CompressedSize; ++ firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5); ++ firstLevel = MAX2(firstLevel, tObj->BaseLevel); ++ firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2); ++ lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5); ++ lastLevel = MAX2(lastLevel, tObj->BaseLevel); ++ lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2); ++ lastLevel = MIN2(lastLevel, tObj->MaxLevel); ++ lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ + } +- } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { +- size = +- ((texImage->Width * texelBytes + +- 63) & ~63) * texImage->Height; +- blitWidth = 64 / texelBytes; +- } else if (t->tile_bits & R300_TXO_MICRO_TILE) { +- /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, +- though the actual offset may be different (if texture is less than +- 32 bytes width) to the untiled case */ +- int w = (texImage->Width * texelBytes * 2 + 31) & ~31; +- size = +- (w * ((texImage->Height + 1) / 2)) * +- texImage->Depth; +- blitWidth = MAX2(texImage->Width, 64 / texelBytes); +- } else { +- int w = (texImage->Width * texelBytes + 31) & ~31; +- size = w * texImage->Height * texImage->Depth; +- blitWidth = MAX2(texImage->Width, 64 / texelBytes); +- } +- assert(size > 0); +- +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n", +- texImage->Width, texImage->Height, +- texImage->Depth, +- texImage->TexFormat->TexelBytes, +- texImage->InternalFormat); +- +- /* All images are aligned to a 32-byte offset */ +- *curOffset = (*curOffset + 0x1f) & ~0x1f; +- +- if (texelBytes) { +- /* fix x and y coords up later together with offset */ +- t->image[face][level].x = *curOffset; +- t->image[face][level].y = 0; +- t->image[face][level].width = +- MIN2(size / texelBytes, blitWidth); +- t->image[face][level].height = +- (size / texelBytes) / t->image[face][level].width; +- } else { +- t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES; +- t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES; +- t->image[face][level].width = +- MIN2(size, R300_BLIT_WIDTH_BYTES); +- t->image[face][level].height = size / t->image[face][level].width; ++ break; ++ case GL_TEXTURE_RECTANGLE_NV: ++ case GL_TEXTURE_4D_SGIS: ++ firstLevel = lastLevel = 0; ++ break; ++ default: ++ return; + } + +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, +- "level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", +- level, face, texImage->Width, texImage->Height, +- t->image[face][level].x, t->image[face][level].y, +- t->image[face][level].width, t->image[face][level].height, +- size, *curOffset); +- +- *curOffset += size; ++ /* save these values */ ++ *pfirstLevel = firstLevel; ++ *plastLevel = lastLevel; + } + + +- + /** +- * This function computes the number of bytes of storage needed for +- * the given texture object (all mipmap levels, all cube faces). +- * The \c image[face][level].x/y/width/height parameters for upload/blitting +- * are computed here. \c filter, \c format, etc. will be set here +- * too. ++ * This function ensures a validated miptree is available. ++ * ++ * Additionally, some texture format bits are configured here. + * + * \param rmesa Context pointer + * \param tObj GL texture object whose images are to be posted to +@@ -309,13 +250,13 @@ static void compute_tex_image_offset( + static void r300SetTexImages(r300ContextPtr rmesa, + struct gl_texture_object *tObj) + { +- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; ++ r300TexObjPtr t = r300_tex_obj(tObj); + const struct gl_texture_image *baseImage = + tObj->Image[0][tObj->BaseLevel]; +- GLint curOffset; +- GLint i, texelBytes; +- GLint numLevels; +- GLint log2Width, log2Height, log2Depth; ++ GLint texelBytes; ++ GLuint firstLevel = 0, lastLevel = 0; ++ ++ calculate_first_last_level(tObj, &firstLevel, &lastLevel); + + /* Set the hardware texture format + */ +@@ -335,112 +276,66 @@ static void r300SetTexImages(r300ContextPtr rmesa, + } + + texelBytes = baseImage->TexFormat->TexelBytes; +- +- /* Compute which mipmap levels we really want to send to the hardware. +- */ +- driCalculateTextureFirstLastLevel((driTextureObject *) t); +- log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2; +- log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2; +- log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2; +- +- numLevels = t->base.lastLevel - t->base.firstLevel + 1; +- +- assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); +- +- /* Calculate mipmap offsets and dimensions for blitting (uploading) +- * The idea is that we lay out the mipmap levels within a block of +- * memory organized as a rectangle of width BLIT_WIDTH_BYTES. +- */ + t->tile_bits = 0; + +- /* figure out if this texture is suitable for tiling. */ +-#if 0 /* Disabled for now */ +- if (texelBytes) { +- if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) && +- /* texrect might be able to use micro tiling too in theory? */ +- (baseImage->Height > 1)) { +- +- /* allow 32 (bytes) x 1 mip (which will use two times the space +- the non-tiled version would use) max if base texture is large enough */ +- if ((numLevels == 1) || +- (((baseImage->Width * texelBytes / +- baseImage->Height) <= 32) +- && (baseImage->Width * texelBytes > 64)) +- || +- ((baseImage->Width * texelBytes / +- baseImage->Height) <= 16)) { +- t->tile_bits |= R300_TXO_MICRO_TILE; +- } +- } ++ if (tObj->Target == GL_TEXTURE_CUBE_MAP) ++ t->format |= R300_TX_FORMAT_CUBIC_MAP; + +- if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) { +- /* we can set macro tiling even for small textures, they will be untiled anyway */ +- t->tile_bits |= R300_TXO_MACRO_TILE; ++ if (!t->image_override) { ++ GLuint compressed = baseImage->IsCompressed ? baseImage->TexFormat->MesaFormat : 0; ++ ++ if (t->mt) { ++ if (t->mt->firstLevel != firstLevel || ++ t->mt->lastLevel != lastLevel || ++ t->mt->width0 != baseImage->Width || ++ t->mt->height0 != baseImage->Height || ++ t->mt->depth0 != baseImage->Depth || ++ t->mt->bpp != texelBytes || ++ t->mt->tilebits != t->tile_bits || ++ t->mt->compressed != compressed) { ++ r300_miptree_destroy(t->mt); ++ t->mt = 0; ++ } + } +- } +-#endif +- +- curOffset = 0; + +- if (tObj->Target == GL_TEXTURE_CUBE_MAP) { +- ASSERT(log2Width == log2Height); +- t->format |= R300_TX_FORMAT_CUBIC_MAP; +- +- for(i = 0; i < numLevels; i++) { +- GLuint face; +- for(face = 0; face < 6; face++) +- compute_tex_image_offset(tObj, face, i, &curOffset); ++ if (!t->mt) { ++ t->mt = r300_miptree_create(rmesa, t, tObj->Target, ++ firstLevel, lastLevel, ++ baseImage->Width, baseImage->Height, baseImage->Depth, ++ texelBytes, t->tile_bits, compressed); ++ memset(t->dirty_images, 0xff, sizeof(t->dirty_images)); + } + } else { + if (tObj->Target == GL_TEXTURE_3D) + t->format |= R300_TX_FORMAT_3D; +- +- for (i = 0; i < numLevels; i++) +- compute_tex_image_offset(tObj, 0, i, &curOffset); + } + + /* Align the total size of texture memory block. + */ +- t->base.totalSize = +- (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; ++ // dritex->totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; + + t->size = +- (((tObj->Image[0][t->base.firstLevel]->Width - ++ (((tObj->Image[0][firstLevel]->Width - + 1) << R300_TX_WIDTHMASK_SHIFT) +- | ((tObj->Image[0][t->base.firstLevel]->Height - 1) << ++ | ((tObj->Image[0][firstLevel]->Height - 1) << + R300_TX_HEIGHTMASK_SHIFT) +- | ((tObj->Image[0][t->base.firstLevel]->DepthLog2) << ++ | ((tObj->Image[0][firstLevel]->DepthLog2) << + R300_TX_DEPTHMASK_SHIFT)) +- | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT); ++ | ((lastLevel - firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT); + +- t->pitch = 0; ++ // t->pitch = 0; + +- /* Only need to round to nearest 32 for textures, but the blitter +- * requires 64-byte aligned pitches, and we may/may not need the +- * blitter. NPOT only! +- */ +- if (baseImage->IsCompressed) { +- t->pitch |= +- (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); +- } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { ++ if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { + unsigned int align = (64 / texelBytes) - 1; +- t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width * +- texelBytes) + 63) & ~(63); + t->size |= R300_TX_SIZE_TXPITCH_EN; + if (!t->image_override) +- t->pitch_reg = +- (((tObj->Image[0][t->base.firstLevel]->Width) + +- align) & ~align) - 1; +- } else { +- t->pitch |= +- ((tObj->Image[0][t->base.firstLevel]->Width * +- texelBytes) + 63) & ~(63); ++ t->pitch_reg = (((tObj->Image[0][firstLevel]->Width) + align) & ~align) - 1; + } + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { +- if (tObj->Image[0][t->base.firstLevel]->Width > 2048) ++ if (tObj->Image[0][firstLevel]->Width > 2048) + t->pitch_reg |= R500_TXWIDTH_BIT11; +- if (tObj->Image[0][t->base.firstLevel]->Height > 2048) ++ if (tObj->Image[0][firstLevel]->Height > 2048) + t->pitch_reg |= R500_TXHEIGHT_BIT11; + } + } +@@ -454,17 +349,15 @@ static GLboolean r300EnableTexture2D(GLcontext * ctx, int unit) + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; +- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; ++ r300TexObjPtr t = r300_tex_obj(tObj); + + ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D); + +- if (t->base.dirty_images[0]) { ++ if (!t->mt || t->dirty_images[0]) { + R300_FIREVERTICES(rmesa); + + r300SetTexImages(rmesa, tObj); +- r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); +- if (!t->base.memBlock && !t->image_override) +- return GL_FALSE; ++ r300UploadTexImages(rmesa, t, 0); + } + + return GL_TRUE; +@@ -475,7 +368,7 @@ static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit) + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; +- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; ++ r300TexObjPtr t = r300_tex_obj(tObj); + + ASSERT(tObj->Target == GL_TEXTURE_3D); + +@@ -484,12 +377,10 @@ static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit) + return GL_FALSE; + } + +- if (t->base.dirty_images[0]) { ++ if (!t->mt || t->dirty_images[0]) { + R300_FIREVERTICES(rmesa); + r300SetTexImages(rmesa, tObj); +- r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); +- if (!t->base.memBlock) +- return GL_FALSE; ++ r300UploadTexImages(rmesa, t, 0); + } + + return GL_TRUE; +@@ -500,14 +391,15 @@ static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit) + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; +- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; ++ r300TexObjPtr t = r300_tex_obj(tObj); + GLuint face; + + ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP); + +- if (t->base.dirty_images[0] || t->base.dirty_images[1] || +- t->base.dirty_images[2] || t->base.dirty_images[3] || +- t->base.dirty_images[4] || t->base.dirty_images[5]) { ++ if (!t->mt || ++ t->dirty_images[0] || t->dirty_images[1] || ++ t->dirty_images[2] || t->dirty_images[3] || ++ t->dirty_images[4] || t->dirty_images[5]) { + /* flush */ + R300_FIREVERTICES(rmesa); + /* layout memory space, once for all faces */ +@@ -516,18 +408,11 @@ static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit) + + /* upload (per face) */ + for (face = 0; face < 6; face++) { +- if (t->base.dirty_images[face]) { +- r300UploadTexImages(rmesa, +- (r300TexObjPtr) tObj->DriverData, +- face); ++ if (t->dirty_images[face]) { ++ r300UploadTexImages(rmesa, t, face); + } + } + +- if (!t->base.memBlock) { +- /* texmem alloc failed, use s/w fallback */ +- return GL_FALSE; +- } +- + return GL_TRUE; + } + +@@ -536,18 +421,15 @@ static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit) + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; +- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; ++ r300TexObjPtr t = r300_tex_obj(tObj); + + ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV); + +- if (t->base.dirty_images[0]) { ++ if (!t->mt || t->dirty_images[0]) { + R300_FIREVERTICES(rmesa); + + r300SetTexImages(rmesa, tObj); +- r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); +- if (!t->base.memBlock && !t->image_override && +- !rmesa->prefer_gart_client_texturing) +- return GL_FALSE; ++ r300UploadTexImages(rmesa, t, 0); + } + + return GL_TRUE; +@@ -555,34 +437,19 @@ static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit) + + static GLboolean r300UpdateTexture(GLcontext * ctx, int unit) + { +- r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; +- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; ++ r300TexObjPtr t = r300_tex_obj(tObj); + + /* Fallback if there's a texture border */ + if (tObj->Image[0][tObj->BaseLevel]->Border > 0) + return GL_FALSE; + +- /* Update state if this is a different texture object to last +- * time. +- */ +- if (rmesa->state.texture.unit[unit].texobj != t) { +- if (rmesa->state.texture.unit[unit].texobj != NULL) { +- /* The old texture is no longer bound to this texture unit. +- * Mark it as such. +- */ +- +- rmesa->state.texture.unit[unit].texobj->base.bound &= +- ~(1 << unit); +- } +- +- rmesa->state.texture.unit[unit].texobj = t; +- t->base.bound |= (1 << unit); +- driUpdateTextureLRU((driTextureObject *) t); /* XXX: should be locked! */ +- } ++ /* Fallback if memory upload didn't work */ ++ if (!t->mt) ++ return GL_FALSE; + +- return !t->border_fallback; ++ return GL_TRUE; + } + + void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, +@@ -591,20 +458,18 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, + r300ContextPtr rmesa = pDRICtx->driverPrivate; + struct gl_texture_object *tObj = + _mesa_lookup_texture(rmesa->radeon.glCtx, texname); +- r300TexObjPtr t; ++ r300TexObjPtr t = r300_tex_obj(tObj); + uint32_t pitch_val; + + if (!tObj) + return; + +- t = (r300TexObjPtr) tObj->DriverData; +- + t->image_override = GL_TRUE; + + if (!offset) + return; + +- t->offset = offset; ++ t->override_offset = offset; + t->pitch_reg &= (1 << 13) -1; + pitch_val = pitch; + +diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c +index 3fc724a..a9d36a2 100644 +--- a/src/mesa/drivers/dri/r300/radeon_context.c ++++ b/src/mesa/drivers/dri/r300/radeon_context.c +@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "state.h" + #include "matrix.h" + #include "framebuffer.h" ++#include "drirenderbuffer.h" + + #include "drivers/common/driverfuncs.h" + #include "swrast/swrast.h" +@@ -258,6 +259,59 @@ void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, + } + } + ++static void ++radeon_make_renderbuffer_current(radeonContextPtr radeon, ++ GLframebuffer *draw) ++{ ++ int size = radeon->radeonScreen->driScreen->fbSize; ++ void *map = 0; ++ /* if radeon->fake */ ++ struct radeon_renderbuffer *rb; ++ uint32_t offset; ++ ++ if (!radeon->bufmgr) ++ return; ++ ++ if ((rb = (void *)draw->Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) { ++ ++ if (radeon->radeonScreen->kernel_mm) ++ map = radeon->radeonScreen->front.map; ++ ++ offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->front.offset : radeon->radeonScreen->frontOffset; ++ if (!rb->bo) ++ rb->bo = radeon_bufmgr_classic_bo_alloc_static(&radeon->bufmgr->base, "front buffer", ++ offset, size, map, ++ 0); ++ rb->cpp = radeon->radeonScreen->cpp; ++ rb->pitch = radeon->radeonScreen->frontPitch; ++ } ++ if ((rb = (void *)draw->Attachment[BUFFER_BACK_LEFT].Renderbuffer)) { ++ ++ if (radeon->radeonScreen->kernel_mm) ++ map = radeon->radeonScreen->back.map; ++ ++ offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->back.offset : radeon->radeonScreen->backOffset; ++ if (!rb->bo) ++ rb->bo = radeon_bufmgr_classic_bo_alloc_static(&radeon->bufmgr->base, "back buffer", ++ offset, size, map, 0); ++ rb->cpp = radeon->radeonScreen->cpp; ++ rb->pitch = radeon->radeonScreen->backPitch; ++ } ++ if ((rb = (void *)draw->Attachment[BUFFER_DEPTH].Renderbuffer)) { ++ offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->depth.offset : radeon->radeonScreen->depthOffset; ++ ++ if (radeon->radeonScreen->kernel_mm) ++ map = radeon->radeonScreen->depth.map; ++ ++ if (!rb->bo) ++ rb->bo = radeon_bufmgr_classic_bo_alloc_static(&radeon->bufmgr->base, "depth buffer", ++ offset, size, map, 0); ++ rb->cpp = radeon->radeonScreen->cpp; ++ rb->pitch = radeon->radeonScreen->depthPitch; ++ } ++} ++ ++ + /* Force the context `c' to be the current context and associate with it + * buffer `b'. + */ +@@ -265,51 +319,57 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, + __DRIdrawablePrivate * driDrawPriv, + __DRIdrawablePrivate * driReadPriv) + { +- if (driContextPriv) { +- radeonContextPtr radeon = +- (radeonContextPtr) driContextPriv->driverPrivate; ++ radeonContextPtr radeon; ++ GLframebuffer *dfb, *rfb; + ++ if (!driContextPriv) { + if (RADEON_DEBUG & DEBUG_DRI) +- fprintf(stderr, "%s ctx %p\n", __FUNCTION__, +- radeon->glCtx); +- +- if (radeon->dri.drawable != driDrawPriv) { +- if (driDrawPriv->swap_interval == (unsigned)-1) { +- driDrawPriv->vblFlags = +- (radeon->radeonScreen->irq != 0) +- ? driGetDefaultVBlankFlags(&radeon-> +- optionCache) +- : VBLANK_FLAG_NO_IRQ; ++ fprintf(stderr, "%s ctx is null\n", __FUNCTION__); ++ _mesa_make_current(NULL, NULL, NULL); ++ return GL_TRUE; ++ } + +- driDrawableInitVBlank(driDrawPriv); +- } +- } ++ radeon = (radeonContextPtr) driContextPriv->driverPrivate; ++ dfb = driDrawPriv->driverPrivate; ++ rfb = driReadPriv->driverPrivate; ++ ++ if (RADEON_DEBUG & DEBUG_DRI) ++ fprintf(stderr, "%s ctx %p\n", __FUNCTION__, radeon->glCtx); ++ ++ driUpdateFramebufferSize(radeon->glCtx, driDrawPriv); ++ if (driReadPriv != driDrawPriv) ++ driUpdateFramebufferSize(radeon->glCtx, driReadPriv); ++ ++ radeon_make_renderbuffer_current(radeon, dfb); + +- radeon->dri.readable = driReadPriv; ++ _mesa_make_current(radeon->glCtx, dfb, rfb); + +- if (radeon->dri.drawable != driDrawPriv || +- radeon->lastStamp != driDrawPriv->lastStamp) { +- radeon->dri.drawable = driDrawPriv; ++ if (radeon->dri.drawable != driDrawPriv) { ++ if (driDrawPriv->swap_interval == (unsigned)-1) { ++ driDrawPriv->vblFlags = ++ (radeon->radeonScreen->irq != 0) ++ ? driGetDefaultVBlankFlags(&radeon-> ++ optionCache) ++ : VBLANK_FLAG_NO_IRQ; + +- radeonSetCliprects(radeon); +- r300UpdateViewportOffset(radeon->glCtx); ++ driDrawableInitVBlank(driDrawPriv); + } ++ } + +- _mesa_make_current(radeon->glCtx, +- (GLframebuffer *) driDrawPriv-> +- driverPrivate, +- (GLframebuffer *) driReadPriv-> +- driverPrivate); ++ radeon->dri.readable = driReadPriv; + +- _mesa_update_state(radeon->glCtx); ++ if (radeon->dri.drawable != driDrawPriv || ++ radeon->lastStamp != driDrawPriv->lastStamp) { ++ radeon->dri.drawable = driDrawPriv; + +- radeonUpdatePageFlipping(radeon); +- } else { +- if (RADEON_DEBUG & DEBUG_DRI) +- fprintf(stderr, "%s ctx is null\n", __FUNCTION__); +- _mesa_make_current(0, 0, 0); ++ radeonSetCliprects(radeon); ++ r300UpdateViewportOffset(radeon->glCtx); + } + ++ _mesa_update_state(radeon->glCtx); ++ ++ radeonUpdatePageFlipping(radeon); ++ + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "End %s\n", __FUNCTION__); + return GL_TRUE; +diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h +index 7458d63..828853b 100644 +--- a/src/mesa/drivers/dri/r300/radeon_context.h ++++ b/src/mesa/drivers/dri/r300/radeon_context.h +@@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "drm.h" + #include "dri_util.h" + #include "colormac.h" ++#include "radeon_buffer.h" + + struct radeon_context; + typedef struct radeon_context radeonContextRec; +@@ -132,12 +133,13 @@ struct radeon_scissor_state { + + struct radeon_colorbuffer_state { + GLuint clear; +- GLint drawOffset, drawPitch; ++ struct radeon_renderbuffer *rrb; + }; + + struct radeon_state { + struct radeon_colorbuffer_state color; + struct radeon_scissor_state scissor; ++ struct radeon_renderbuffer *depth_buffer; + }; + + /** +@@ -185,6 +187,8 @@ struct radeon_context { + /* Configuration cache + */ + driOptionCache optionCache; ++ ++ struct radeon_bufmgr *bufmgr; + }; + + #define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx)) +diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.c b/src/mesa/drivers/dri/r300/radeon_ioctl.c +index 0c1a195..486ce8e 100644 +--- a/src/mesa/drivers/dri/r300/radeon_ioctl.c ++++ b/src/mesa/drivers/dri/r300/radeon_ioctl.c +@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "swrast/swrast.h" + #include "r300_context.h" + #include "radeon_ioctl.h" ++#include "radeon_buffer.h" + #include "r300_ioctl.h" + #include "r300_state.h" + #include "radeon_reg.h" +@@ -171,7 +172,7 @@ void radeonCopyBuffer(__DRIdrawablePrivate * dPriv, + assert(dPriv->driContextPriv->driverPrivate); + + radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; +- ++ + if (RADEON_DEBUG & DEBUG_IOCTL) { + fprintf(stderr, "\n%s( %p )\n\n", __FUNCTION__, + (void *)radeon->glCtx); +@@ -261,6 +262,8 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv) + GLint ret; + GLboolean missed_target; + __DRIscreenPrivate *psp = dPriv->driScreenPriv; ++ GLframebuffer *fb = dPriv->driverPrivate; ++ struct radeon_renderbuffer *rrb; + + assert(dPriv); + assert(dPriv->driContextPriv); +@@ -268,6 +271,8 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv) + + radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; + ++ rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; ++ + if (RADEON_DEBUG & DEBUG_IOCTL) { + fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__, + radeon->sarea->pfCurrentPage); +@@ -315,32 +320,10 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv) + radeon->swap_count++; + (void)(*psp->systemTime->getUST) (&radeon->swap_ust); + +- driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer, ++ driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer, + radeon->sarea->pfCurrentPage); + +- if (radeon->sarea->pfCurrentPage == 1) { +- radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset; +- radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch; +- } else { +- radeon->state.color.drawOffset = radeon->radeonScreen->backOffset; +- radeon->state.color.drawPitch = radeon->radeonScreen->backPitch; +- } +- +- if (IS_R300_CLASS(radeon->radeonScreen)) { +- r300ContextPtr r300 = (r300ContextPtr)radeon; +- R300_STATECHANGE(r300, cb); +- r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset + +- r300->radeon.radeonScreen->fbLocation; +- r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch; +- +- if (r300->radeon.radeonScreen->cpp == 4) +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; +- else +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; +- +- if (r300->radeon.sarea->tiling_enabled) +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; +- } ++ radeon->state.color.rrb = rrb; + } + + void radeonWaitForIdleLocked(radeonContextPtr radeon) +diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c +index d54a821..4df6a9c 100644 +--- a/src/mesa/drivers/dri/r300/radeon_lock.c ++++ b/src/mesa/drivers/dri/r300/radeon_lock.c +@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "radeon_state.h" + #include "r300_context.h" + #include "r300_state.h" ++#include "r300_mem.h" + + #include "framebuffer.h" + +@@ -59,11 +60,11 @@ int prevLockLine = 0; + void radeonUpdatePageFlipping(radeonContextPtr rmesa) + { + int use_back; ++ __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; ++ GLframebuffer *fb = drawable->driverPrivate; + + rmesa->doPageFlip = rmesa->sarea->pfState; + if (rmesa->glCtx->WinSysDrawBuffer) { +- driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer, +- rmesa->sarea->pfCurrentPage); + r300UpdateDrawBuffer(rmesa->glCtx); + } + +@@ -72,16 +73,12 @@ void radeonUpdatePageFlipping(radeonContextPtr rmesa) + BUFFER_BACK_LEFT) : 1; + use_back ^= (rmesa->sarea->pfCurrentPage == 1); + +- if (use_back) { +- rmesa->state.color.drawOffset = +- rmesa->radeonScreen->backOffset; +- rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch; +- } else { +- rmesa->state.color.drawOffset = +- rmesa->radeonScreen->frontOffset; +- rmesa->state.color.drawPitch = +- rmesa->radeonScreen->frontPitch; +- } ++ if (use_back) ++ rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; ++ else ++ rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; ++ ++ rmesa->state.depth_buffer = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer; + } + + /* Update the hardware state. This is called if another context has +@@ -125,12 +122,8 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags) + } + + if (sarea->ctx_owner != rmesa->dri.hwContext) { +- int i; +- + sarea->ctx_owner = rmesa->dri.hwContext; +- for (i = 0; i < r300->nr_heaps; i++) { +- DRI_AGE_TEXTURES(r300->texture_heaps[i]); +- } ++ radeonBufmgrContendedLockTake(r300->radeon.bufmgr); + } + + rmesa->lost_context = GL_TRUE; +diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c +index 3616d8b..58b00ff 100644 +--- a/src/mesa/drivers/dri/r300/radeon_span.c ++++ b/src/mesa/drivers/dri/r300/radeon_span.c +@@ -48,7 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "r300_ioctl.h" + #include "radeon_span.h" + +-#include "drirenderbuffer.h" ++#include "radeon_buffer.h" + + #define DBG 0 + +@@ -58,21 +58,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * information. + */ + #define LOCAL_VARS \ +- driRenderbuffer *drb = (driRenderbuffer *) rb; \ +- const __DRIdrawablePrivate *dPriv = drb->dPriv; \ ++ struct radeon_renderbuffer *rrb = (void *) rb; \ ++ const __DRIdrawablePrivate *dPriv = rrb->dPriv; \ + const GLuint bottom = dPriv->h - 1; \ +- GLubyte *buf = (GLubyte *) drb->flippedData \ +- + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \ ++ GLubyte *buf = (GLubyte *) rrb->bo->virtual \ ++ + (dPriv->y * rrb->pitch + dPriv->x) * rrb->cpp; \ + GLuint p; \ + (void) p; + + #define LOCAL_DEPTH_VARS \ +- driRenderbuffer *drb = (driRenderbuffer *) rb; \ +- const __DRIdrawablePrivate *dPriv = drb->dPriv; \ ++ struct radeon_renderbuffer *rrb = (void *) rb; \ ++ const __DRIdrawablePrivate *dPriv = rrb->dPriv; \ + const GLuint bottom = dPriv->h - 1; \ + GLuint xo = dPriv->x; \ + GLuint yo = dPriv->y; \ +- GLubyte *buf = (GLubyte *) drb->Base.Data; ++ GLubyte *buf = (GLubyte *) rrb->base.Data; + + #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS + +@@ -93,7 +93,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #define TAG(x) radeon##x##_RGB565 + #define TAG2(x,y) radeon##x##_RGB565##y +-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2) ++#define GET_PTR(X,Y) (buf + ((Y) * rrb->pitch + (X)) * 2) + #include "spantmp2.h" + + /* 32 bit, ARGB8888 color spanline and pixel functions +@@ -103,7 +103,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #define TAG(x) radeon##x##_ARGB8888 + #define TAG2(x,y) radeon##x##_ARGB8888##y +-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4) ++#define GET_PTR(X,Y) (buf + ((Y) * rrb->pitch + (X)) * 4) + #include "spantmp2.h" + + /* ================================================================ +@@ -120,10 +120,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * too... + */ + +-static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y) ++static GLuint radeon_mba_z32(const struct radeon_renderbuffer * rrb, ++ GLint x, GLint y) + { +- GLuint pitch = drb->pitch; +- if (drb->depthHasSurface) { ++ GLuint pitch = rrb->pitch; ++ if (rrb->depthHasSurface) { + return 4 * (x + y * pitch); + } else { + GLuint ba, address = 0; /* a[0..1] = 0 */ +@@ -148,10 +149,10 @@ static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y) + } + + static INLINE GLuint +-radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) ++radeon_mba_z16(const struct radeon_renderbuffer *rrb, GLint x, GLint y) + { +- GLuint pitch = drb->pitch; +- if (drb->depthHasSurface) { ++ GLuint pitch = rrb->pitch; ++ if (rrb->depthHasSurface) { + return 2 * (x + y * pitch); + } else { + GLuint ba, address = 0; /* a[0] = 0 */ +@@ -175,10 +176,10 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) + #define VALUE_TYPE GLushort + + #define WRITE_DEPTH( _x, _y, d ) \ +- *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d; ++ *(GLushort *)(buf + radeon_mba_z16( rrb, _x + xo, _y + yo )) = d; + + #define READ_DEPTH( d, _x, _y ) \ +- d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )); ++ d = *(GLushort *)(buf + radeon_mba_z16( rrb, _x + xo, _y + yo )); + + #define TAG(x) radeon##x##_z16 + #include "depthtmp.h" +@@ -193,7 +194,7 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) + #ifdef COMPILE_R300 + #define WRITE_DEPTH( _x, _y, d ) \ + do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ ++ GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + tmp &= 0x000000ff; \ + tmp |= ((d << 8) & 0xffffff00); \ +@@ -202,7 +203,7 @@ do { \ + #else + #define WRITE_DEPTH( _x, _y, d ) \ + do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ ++ GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + tmp &= 0xff000000; \ + tmp |= ((d) & 0x00ffffff); \ +@@ -213,12 +214,12 @@ do { \ + #ifdef COMPILE_R300 + #define READ_DEPTH( d, _x, _y ) \ + do { \ +- d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ ++ d = (*(GLuint *)(buf + radeon_mba_z32( rrb, _x + xo, \ + _y + yo )) & 0xffffff00) >> 8; \ + }while(0) + #else + #define READ_DEPTH( d, _x, _y ) \ +- d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ ++ d = *(GLuint *)(buf + radeon_mba_z32( rrb, _x + xo, \ + _y + yo )) & 0x00ffffff; + #endif + +@@ -234,7 +235,7 @@ do { \ + #ifdef COMPILE_R300 + #define WRITE_STENCIL( _x, _y, d ) \ + do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ ++ GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + tmp &= 0xffffff00; \ + tmp |= (d) & 0xff; \ +@@ -243,7 +244,7 @@ do { \ + #else + #define WRITE_STENCIL( _x, _y, d ) \ + do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ ++ GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + tmp &= 0x00ffffff; \ + tmp |= (((d) & 0xff) << 24); \ +@@ -254,14 +255,14 @@ do { \ + #ifdef COMPILE_R300 + #define READ_STENCIL( d, _x, _y ) \ + do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ ++ GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + d = tmp & 0x000000ff; \ + } while (0) + #else + #define READ_STENCIL( d, _x, _y ) \ + do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ ++ GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + d = (tmp & 0xff000000) >> 24; \ + } while (0) +@@ -270,6 +271,22 @@ do { \ + #define TAG(x) radeon##x##_z24_s8 + #include "stenciltmp.h" + ++static void map_buffer(struct gl_renderbuffer *rb, GLboolean write) ++{ ++ struct radeon_renderbuffer *rrb = (void*)rb; ++ ++ if (rrb->bo) ++ dri_bo_map(rrb->bo, write); ++} ++ ++static void unmap_buffer(struct gl_renderbuffer *rb) ++{ ++ struct radeon_renderbuffer *rrb = (void*)rb; ++ ++ if (rrb->bo) ++ dri_bo_unmap(rrb->bo); ++} ++ + /* Move locking out to get reasonable span performance (10x better + * than doing this in HW_LOCK above). WaitForIdle() is the main + * culprit. +@@ -278,45 +295,51 @@ do { \ + static void radeonSpanRenderStart(GLcontext * ctx) + { + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ int i; + #ifdef COMPILE_R300 + r300ContextPtr r300 = (r300ContextPtr) rmesa; + R300_FIREVERTICES(r300); + #else + RADEON_FIREVERTICES(rmesa); + #endif ++ ++ /* color draw buffers */ ++ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) ++ map_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i], GL_TRUE); ++ ++ map_buffer(ctx->ReadBuffer->_ColorReadBuffer, GL_FALSE); ++ ++ if (ctx->DrawBuffer->_DepthBuffer) ++ map_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped, GL_TRUE); ++ if (ctx->DrawBuffer->_StencilBuffer) ++ map_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped, GL_TRUE); ++ ++ /* The locking and wait for idle should really only be needed in classic mode. ++ * In a future memory manager based implementation, this should become ++ * unnecessary due to the fact that mapping our buffers, textures, etc. ++ * should implicitly wait for any previous rendering commands that must ++ * be waited on. */ + LOCK_HARDWARE(rmesa); + radeonWaitForIdleLocked(rmesa); +- +- /* Read the first pixel in the frame buffer. This should +- * be a noop, right? In fact without this conform fails as reading +- * from the framebuffer sometimes produces old results -- the +- * on-card read cache gets mixed up and doesn't notice that the +- * framebuffer has been updated. +- * +- * Note that we should probably be reading some otherwise unused +- * region of VRAM, otherwise we might get incorrect results when +- * reading pixels from the top left of the screen. +- * +- * I found this problem on an R420 with glean's texCube test. +- * Note that the R200 span code also *writes* the first pixel in the +- * framebuffer, but I've found this to be unnecessary. +- * -- Nicolai Hähnle, June 2008 +- */ +- { +- int p; +- driRenderbuffer *drb = +- (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0]; +- volatile int *buf = +- (volatile int *)(rmesa->dri.screen->pFB + drb->offset); +- p = *buf; +- } + } + + static void radeonSpanRenderFinish(GLcontext * ctx) + { + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ int i; + _swrast_flush(ctx); + UNLOCK_HARDWARE(rmesa); ++ ++ /* color draw buffers */ ++ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) ++ unmap_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i]); ++ ++ unmap_buffer(ctx->ReadBuffer->_ColorReadBuffer); ++ ++ if (ctx->DrawBuffer->_DepthBuffer) ++ unmap_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped); ++ if (ctx->DrawBuffer->_StencilBuffer) ++ unmap_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped); + } + + void radeonInitSpanFuncs(GLcontext * ctx) +@@ -330,20 +353,17 @@ void radeonInitSpanFuncs(GLcontext * ctx) + /** + * Plug in the Get/Put routines for the given driRenderbuffer. + */ +-void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis) ++void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb) + { +- if (drb->Base.InternalFormat == GL_RGBA) { +- if (vis->redBits == 5 && vis->greenBits == 6 +- && vis->blueBits == 5) { +- radeonInitPointers_RGB565(&drb->Base); +- } else { +- radeonInitPointers_ARGB8888(&drb->Base); +- } +- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { +- radeonInitDepthPointers_z16(&drb->Base); +- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { +- radeonInitDepthPointers_z24_s8(&drb->Base); +- } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { +- radeonInitStencilPointers_z24_s8(&drb->Base); ++ if (rrb->base.InternalFormat == GL_RGB5) { ++ radeonInitPointers_RGB565(&rrb->base); ++ } else if (rrb->base.InternalFormat == GL_RGBA8) { ++ radeonInitPointers_ARGB8888(&rrb->base); ++ } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT16) { ++ radeonInitDepthPointers_z16(&rrb->base); ++ } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT24) { ++ radeonInitDepthPointers_z24_s8(&rrb->base); ++ } else if (rrb->base.InternalFormat == GL_STENCIL_INDEX8_EXT) { ++ radeonInitStencilPointers_z24_s8(&rrb->base); + } + } +diff --git a/src/mesa/drivers/dri/r300/radeon_state.c b/src/mesa/drivers/dri/r300/radeon_state.c +index d81318c..a7720da 100644 +--- a/src/mesa/drivers/dri/r300/radeon_state.c ++++ b/src/mesa/drivers/dri/r300/radeon_state.c +@@ -222,14 +222,6 @@ void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state) + void radeonInitState(radeonContextPtr radeon) + { + radeon->Fallback = 0; +- +- if (radeon->glCtx->Visual.doubleBufferMode && radeon->sarea->pfCurrentPage == 0) { +- radeon->state.color.drawOffset = radeon->radeonScreen->backOffset; +- radeon->state.color.drawPitch = radeon->radeonScreen->backPitch; +- } else { +- radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset; +- radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch; +- } + } + + +diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer.h b/src/mesa/drivers/dri/radeon/radeon_buffer.h +new file mode 100644 +index 0000000..a5e4529 +--- /dev/null ++++ b/src/mesa/drivers/dri/radeon/radeon_buffer.h +@@ -0,0 +1,62 @@ ++/* ++ * Copyright 2008 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software") ++ * to deal in the software without restriction, including without limitation ++ * on the rights to use, copy, modify, merge, publish, distribute, sub ++ * license, and/or sell copies of the Software, and to permit persons to whom ++ * them Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTIBILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER ++ * IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: ++ * Adam Jackson ++ */ ++ ++#ifndef RADEON_BUFFER_H ++#define RADEON_BUFFER_H ++ ++#include "dri_bufmgr.h" ++ ++struct radeon_renderbuffer ++{ ++ struct gl_renderbuffer base; ++ dri_bo *bo; ++ unsigned int cpp; ++ /* unsigned int offset; */ ++ unsigned int pitch; ++ unsigned int height; ++ ++ /* boo Xorg 6.8.2 compat */ ++ int depthHasSurface; ++ ++ __DRIdrawablePrivate *dPriv; ++}; ++ ++struct radeon_bufmgr { ++ dri_bufmgr base; ++ void (*emit_reloc)(dri_bo *buf, uint32_t *head, uint32_t *count_p, uint32_t read_domains, uint32_t write_domain); ++}; ++ ++void radeon_bufmgr_emit_reloc(dri_bo *buf, uint32_t *head, uint32_t *count_p, uint32_t read_domains, uint32_t write_domain); ++ ++dri_bo *radeon_bufmgr_classic_bo_alloc_static(dri_bufmgr *bufmgr_ctx, const char *name, ++ unsigned long offset, unsigned long size, ++ void *virtual, uint32_t initial_domain); ++dri_bo *radeon_bufmgr_classic_bo_alloc(dri_bufmgr *bufmgr_ctx, const char *name, ++ unsigned long size, unsigned int alignment, ++ uint32_t location_mask); ++ ++int radeon_bufmgr_classic_emit_reloc(dri_bo *batch_buf, uint64_t flags, GLuint delta, ++ GLuint offset, dri_bo *target); ++#endif +diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c +index 84b5c46..cc384e1 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_screen.c ++++ b/src/mesa/drivers/dri/radeon/radeon_screen.c +@@ -46,6 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "radeon_chipset.h" + #include "radeon_macros.h" + #include "radeon_screen.h" ++#include "radeon_buffer.h" + #if !RADEON_COMMON + #include "radeon_context.h" + #include "radeon_span.h" +@@ -69,6 +70,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #include "GL/internal/dri_interface.h" + ++#include ++#include ++ + /* Radeon configuration + */ + #include "xmlpool.h" +@@ -344,11 +348,99 @@ static const __DRItexOffsetExtension r200texOffsetExtension = { + #endif + + #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) ++#if 0 + static const __DRItexOffsetExtension r300texOffsetExtension = { + { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, + r300SetTexOffset, + }; + #endif ++#endif ++ ++ ++static void ++radeon_gem_update_handle(radeonScreenPtr screen, __DRIscreenPrivate *sPriv, ++ struct radeon_gem_object *gem_obj) ++{ ++ struct drm_gem_close close_args; ++ struct drm_gem_open args; ++ struct drm_radeon_gem_mmap mmap_args; ++ struct drm_radeon_gem_pin pin_args; ++ int ret; ++ ++ if (gem_obj->gem_handle) { ++ close_args.handle = gem_obj->gem_handle; ++ ++ ioctl(sPriv->fd, DRM_IOCTL_GEM_CLOSE, &close_args); ++ gem_obj->gem_handle = 0; ++ } ++ ++ /* do open */ ++ args.name = gem_obj->gem_name; ++ ret = ioctl(sPriv->fd, DRM_IOCTL_GEM_OPEN, &args); ++ if (ret) { ++ fprintf(stderr," failed to open handle %d\n", gem_obj->gem_name); ++ return; ++ } ++ ++ gem_obj->gem_handle = args.handle; ++ gem_obj->size = args.size; ++ ++ mmap_args.handle = gem_obj->gem_handle; ++ mmap_args.size = gem_obj->size; ++ mmap_args.offset = 0; ++ ++ ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GEM_MMAP, &mmap_args, ++ sizeof(mmap_args)); ++ ++ if (ret) ++ return; ++ ++ gem_obj->map = (void *)(unsigned long)(mmap_args.addr_ptr); ++ ++ pin_args.handle = gem_obj->gem_handle; ++ pin_args.alignment = 0; ++ ++ ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GEM_PIN, &pin_args, ++ sizeof(pin_args)); ++ ++ if (ret) ++ return; ++ ++ gem_obj->offset = pin_args.offset; ++ ++} ++ ++static int ++radeon_init_mm_buffers(radeonScreenPtr screen, __DRIscreenPrivate *sPriv, ++ RADEONDRIPtr dri_priv) ++{ ++ /* STOP GAP HERE */ ++ ++ screen->front.gem_name = dri_priv->frontOffset; ++ radeon_gem_update_handle(screen, sPriv, &screen->front); ++ screen->frontOffset = screen->front.offset; ++ ++ screen->back.gem_name = dri_priv->backOffset; ++ radeon_gem_update_handle(screen, sPriv, &screen->back); ++ ++ screen->backOffset = screen->back.offset; ++ ++ screen->depth.gem_name = dri_priv->depthOffset; ++ radeon_gem_update_handle(screen, sPriv, &screen->depth); ++ screen->depthOffset = screen->depth.offset; ++ ++ screen->vram_texture.gem_name = dri_priv->textureOffset; ++ radeon_gem_update_handle(screen, sPriv, &screen->vram_texture); ++ ++ screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->vram_texture.offset + screen->fbLocation; ++ screen->texSize[RADEON_LOCAL_TEX_HEAP] = screen->vram_texture.size; ++ ++ screen->gart_texture.gem_name = dri_priv->gartTexHandle; ++ radeon_gem_update_handle(screen, sPriv, &screen->gart_texture); ++ screen->gartTextures.map = screen->gart_texture.map; ++ screen->gart_texture_offset = screen->gart_texture.offset + screen->gart_base; ++ ++} + + /* Create the device specific screen private data struct. + */ +@@ -389,6 +481,21 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP); + { + int ret; ++ ++#ifdef RADEON_PARAM_KERNEL_MM ++ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_KERNEL_MM, ++ &screen->kernel_mm); ++ ++ if (ret && ret != -EINVAL) { ++ FREE( screen ); ++ fprintf(stderr, "drm_radeon_getparam_t (RADEON_OFFSET): %d\n", ret); ++ return NULL; ++ } ++ ++ if (ret == -EINVAL) ++ screen->kernel_mm = 0; ++#endif ++ + ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET, + &screen->gart_buffer_offset); + +@@ -422,32 +529,34 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25); + } + +- screen->mmio.handle = dri_priv->registerHandle; +- screen->mmio.size = dri_priv->registerSize; +- if ( drmMap( sPriv->fd, +- screen->mmio.handle, +- screen->mmio.size, +- &screen->mmio.map ) ) { +- FREE( screen ); +- __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ ); +- return NULL; +- } ++ if (!screen->kernel_mm) { ++ screen->mmio.handle = dri_priv->registerHandle; ++ screen->mmio.size = dri_priv->registerSize; ++ if ( drmMap( sPriv->fd, ++ screen->mmio.handle, ++ screen->mmio.size, ++ &screen->mmio.map ) ) { ++ FREE( screen ); ++ __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ ); ++ return NULL; ++ } + +- RADEONMMIO = screen->mmio.map; ++ RADEONMMIO = screen->mmio.map; + +- screen->status.handle = dri_priv->statusHandle; +- screen->status.size = dri_priv->statusSize; +- if ( drmMap( sPriv->fd, +- screen->status.handle, +- screen->status.size, +- &screen->status.map ) ) { +- drmUnmap( screen->mmio.map, screen->mmio.size ); +- FREE( screen ); +- __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ ); +- return NULL; ++ screen->status.handle = dri_priv->statusHandle; ++ screen->status.size = dri_priv->statusSize; ++ if ( drmMap( sPriv->fd, ++ screen->status.handle, ++ screen->status.size, ++ &screen->status.map ) ) { ++ drmUnmap( screen->mmio.map, screen->mmio.size ); ++ FREE( screen ); ++ __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ ); ++ return NULL; ++ } ++ screen->scratch = (__volatile__ u_int32_t *) ++ ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); + } +- screen->scratch = (__volatile__ u_int32_t *) +- ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); + + screen->buffers = drmMapBufs( sPriv->fd ); + if ( !screen->buffers ) { +@@ -458,22 +567,24 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + return NULL; + } + +- if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) { +- screen->gartTextures.handle = dri_priv->gartTexHandle; +- screen->gartTextures.size = dri_priv->gartTexMapSize; +- if ( drmMap( sPriv->fd, +- screen->gartTextures.handle, +- screen->gartTextures.size, +- (drmAddressPtr)&screen->gartTextures.map ) ) { +- drmUnmapBufs( screen->buffers ); +- drmUnmap( screen->status.map, screen->status.size ); +- drmUnmap( screen->mmio.map, screen->mmio.size ); +- FREE( screen ); +- __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__); +- return NULL; ++ if (!screen->kernel_mm) { ++ if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) { ++ screen->gartTextures.handle = dri_priv->gartTexHandle; ++ screen->gartTextures.size = dri_priv->gartTexMapSize; ++ if ( drmMap( sPriv->fd, ++ screen->gartTextures.handle, ++ screen->gartTextures.size, ++ (drmAddressPtr)&screen->gartTextures.map ) ) { ++ drmUnmapBufs( screen->buffers ); ++ drmUnmap( screen->status.map, screen->status.size ); ++ drmUnmap( screen->mmio.map, screen->mmio.size ); ++ FREE( screen ); ++ __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__); ++ return NULL; ++ } ++ ++ screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base; + } +- +- screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base; + } + + screen->chip_flags = 0; +@@ -840,7 +951,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION, + &temp); + if (ret) { +- if (screen->chip_family < CHIP_FAMILY_RS690) ++ if (screen->chip_family < CHIP_FAMILY_RS690 && !screen->kernel_mm) + screen->fbLocation = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16; + else { + FREE( screen ); +@@ -881,55 +992,59 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + } + } + +- if ( sPriv->drm_version.minor >= 10 ) { +- drm_radeon_setparam_t sp; +- +- sp.param = RADEON_SETPARAM_FB_LOCATION; +- sp.value = screen->fbLocation; +- +- drmCommandWrite( sPriv->fd, DRM_RADEON_SETPARAM, +- &sp, sizeof( sp ) ); +- } +- +- screen->frontOffset = dri_priv->frontOffset; + screen->frontPitch = dri_priv->frontPitch; +- screen->backOffset = dri_priv->backOffset; + screen->backPitch = dri_priv->backPitch; +- screen->depthOffset = dri_priv->depthOffset; + screen->depthPitch = dri_priv->depthPitch; + +- /* Check if ddx has set up a surface reg to cover depth buffer */ +- screen->depthHasSurface = (sPriv->ddx_version.major > 4) || +- /* these chips don't use tiled z without hyperz. So always pretend +- we have set up a surface which will cause linear reads/writes */ +- ((screen->chip_family & RADEON_CLASS_R100) && +- !(screen->chip_flags & RADEON_CHIPSET_TCL)); +- +- if ( dri_priv->textureSize == 0 ) { +- screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->gart_texture_offset; +- screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->gartTexMapSize; +- screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] = +- dri_priv->log2GARTTexGran; +- } else { +- screen->texOffset[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureOffset +- + screen->fbLocation; +- screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureSize; +- screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] = +- dri_priv->log2TexGran; +- } ++ if (!screen->kernel_mm) { ++ if ( sPriv->drm_version.minor >= 10 ) { ++ drm_radeon_setparam_t sp; ++ ++ sp.param = RADEON_SETPARAM_FB_LOCATION; ++ sp.value = screen->fbLocation; ++ ++ drmCommandWrite( sPriv->fd, DRM_RADEON_SETPARAM, ++ &sp, sizeof( sp ) ); ++ } + +- if ( !screen->gartTextures.map || dri_priv->textureSize == 0 +- || getenv( "RADEON_GARTTEXTURING_FORCE_DISABLE" ) ) { +- screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1; +- screen->texOffset[RADEON_GART_TEX_HEAP] = 0; +- screen->texSize[RADEON_GART_TEX_HEAP] = 0; +- screen->logTexGranularity[RADEON_GART_TEX_HEAP] = 0; ++ screen->frontOffset = dri_priv->frontOffset; ++ screen->backOffset = dri_priv->backOffset; ++ screen->depthOffset = dri_priv->depthOffset; ++ ++ /* Check if ddx has set up a surface reg to cover depth buffer */ ++ screen->depthHasSurface = (sPriv->ddx_version.major > 4) || ++ /* these chips don't use tiled z without hyperz. So always pretend ++ we have set up a surface which will cause linear reads/writes */ ++ ((screen->chip_family & RADEON_CLASS_R100) && ++ !(screen->chip_flags & RADEON_CHIPSET_TCL)); ++ ++ if ( dri_priv->textureSize == 0 ) { ++ screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->gart_texture_offset; ++ screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->gartTexMapSize; ++ screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] = ++ dri_priv->log2GARTTexGran; ++ } else { ++ screen->texOffset[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureOffset ++ + screen->fbLocation; ++ screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureSize; ++ screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] = ++ dri_priv->log2TexGran; ++ } ++ ++ if ( !screen->gartTextures.map || dri_priv->textureSize == 0 ++ || getenv( "RADEON_GARTTEXTURING_FORCE_DISABLE" ) ) { ++ screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1; ++ screen->texOffset[RADEON_GART_TEX_HEAP] = 0; ++ screen->texSize[RADEON_GART_TEX_HEAP] = 0; ++ screen->logTexGranularity[RADEON_GART_TEX_HEAP] = 0; ++ } else { ++ screen->numTexHeaps = RADEON_NR_TEX_HEAPS; ++ screen->texOffset[RADEON_GART_TEX_HEAP] = screen->gart_texture_offset; ++ screen->texSize[RADEON_GART_TEX_HEAP] = dri_priv->gartTexMapSize; ++ screen->logTexGranularity[RADEON_GART_TEX_HEAP] = dri_priv->log2GARTTexGran; ++ } + } else { +- screen->numTexHeaps = RADEON_NR_TEX_HEAPS; +- screen->texOffset[RADEON_GART_TEX_HEAP] = screen->gart_texture_offset; +- screen->texSize[RADEON_GART_TEX_HEAP] = dri_priv->gartTexMapSize; +- screen->logTexGranularity[RADEON_GART_TEX_HEAP] = +- dri_priv->log2GARTTexGran; ++ radeon_init_mm_buffers(screen, sPriv, dri_priv); + } + + i = 0; +@@ -954,8 +1069,10 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + #endif + + #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) ++#if 0 + screen->extensions[i++] = &r300texOffsetExtension.base; + #endif ++#endif + + screen->extensions[i++] = NULL; + sPriv->extensions = screen->extensions; +@@ -975,12 +1092,14 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv ) + if (!screen) + return; + +- if ( screen->gartTextures.map ) { +- drmUnmap( screen->gartTextures.map, screen->gartTextures.size ); +- } + drmUnmapBufs( screen->buffers ); +- drmUnmap( screen->status.map, screen->status.size ); +- drmUnmap( screen->mmio.map, screen->mmio.size ); ++ if (!screen->kernel_mm) { ++ if ( screen->gartTextures.map ) { ++ drmUnmap( screen->gartTextures.map, screen->gartTextures.size ); ++ } ++ drmUnmap( screen->status.map, screen->status.size ); ++ drmUnmap( screen->mmio.map, screen->mmio.size ); ++ } + + /* free all option information */ + driDestroyOptionInfo (&screen->optionCache); +@@ -1004,6 +1123,160 @@ radeonInitDriver( __DRIscreenPrivate *sPriv ) + return GL_TRUE; + } + ++#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) ++static GLboolean ++radeon_alloc_window_storage(GLcontext *ctx, struct gl_renderbuffer *rb, ++ GLenum intFormat, GLuint w, GLuint h) ++{ ++ rb->Width = w; ++ rb->Height = h; ++ rb->_ActualFormat = intFormat; ++ ++ return GL_TRUE; ++} ++ ++ ++static struct radeon_renderbuffer * ++radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv) ++{ ++ struct radeon_renderbuffer *ret; ++ ++ ret = CALLOC_STRUCT(radeon_renderbuffer); ++ if (!ret) ++ return NULL; ++ ++ _mesa_init_renderbuffer(&ret->base, 0); ++ ++ /* XXX format junk */ ++ switch (format) { ++ case GL_RGB5: ++ ret->base._ActualFormat = GL_RGB5; ++ ret->base._BaseFormat = GL_RGBA; ++ ret->base.RedBits = 5; ++ ret->base.GreenBits = 6; ++ ret->base.BlueBits = 5; ++ ret->base.DataType = GL_UNSIGNED_BYTE; ++ break; ++ case GL_RGBA8: ++ ret->base._ActualFormat = GL_RGBA8; ++ ret->base._BaseFormat = GL_RGBA; ++ ret->base.RedBits = 8; ++ ret->base.GreenBits = 8; ++ ret->base.BlueBits = 8; ++ ret->base.AlphaBits = 8; ++ ret->base.DataType = GL_UNSIGNED_BYTE; ++ break; ++ case GL_STENCIL_INDEX8_EXT: ++ ret->base._ActualFormat = GL_STENCIL_INDEX8_EXT; ++ ret->base._BaseFormat = GL_STENCIL_INDEX; ++ ret->base.StencilBits = 8; ++ ret->base.DataType = GL_UNSIGNED_BYTE; ++ break; ++ case GL_DEPTH_COMPONENT16: ++ ret->base._ActualFormat = GL_DEPTH_COMPONENT16; ++ ret->base._BaseFormat = GL_DEPTH_COMPONENT; ++ ret->base.DepthBits = 16; ++ ret->base.DataType = GL_UNSIGNED_SHORT; ++ break; ++ case GL_DEPTH_COMPONENT24: ++ ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; ++ ret->base._BaseFormat = GL_DEPTH_COMPONENT; ++ ret->base.DepthBits = 24; ++ ret->base.DataType = GL_UNSIGNED_INT; ++ break; ++ case GL_DEPTH24_STENCIL8_EXT: ++ ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; ++ ret->base._BaseFormat = GL_DEPTH_STENCIL_EXT; ++ ret->base.DepthBits = 24; ++ ret->base.StencilBits = 8; ++ ret->base.DataType = GL_UNSIGNED_INT_24_8_EXT; ++ break; ++ default: ++ fprintf(stderr, "%s: Unknown format 0x%04x\n", __FUNCTION__, format); ++ _mesa_delete_renderbuffer(&ret->base); ++ return NULL; ++ } ++ ++ ret->dPriv = driDrawPriv; ++ ret->base.InternalFormat = format; ++ ++ ret->base.AllocStorage = radeon_alloc_window_storage; ++ ++ radeonSetSpanFunctions(ret); ++ ++ return ret; ++} ++ ++/** ++ * Create the Mesa framebuffer and renderbuffers for a given window/drawable. ++ * ++ * \todo This function (and its interface) will need to be updated to support ++ * pbuffers. ++ */ ++static GLboolean ++radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, ++ __DRIdrawablePrivate *driDrawPriv, ++ const __GLcontextModes *mesaVis, ++ GLboolean isPixmap ) ++{ ++ radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private; ++ ++ const GLboolean swDepth = GL_FALSE; ++ const GLboolean swAlpha = GL_FALSE; ++ const GLboolean swAccum = mesaVis->accumRedBits > 0; ++ const GLboolean swStencil = mesaVis->stencilBits > 0 && ++ mesaVis->depthBits != 24; ++ GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8); ++ GLenum depthFormat = GL_NONE; ++ struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis); ++ ++ if (mesaVis->depthBits == 16) ++ depthFormat = GL_DEPTH_COMPONENT16; ++ else if (mesaVis->depthBits == 24) ++ depthFormat = GL_DEPTH_COMPONENT24; ++ ++ /* front color renderbuffer */ ++ { ++ struct radeon_renderbuffer *front = ++ radeon_create_renderbuffer(rgbFormat, driDrawPriv); ++ _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &front->base); ++ } ++ ++ /* back color renderbuffer */ ++ if (mesaVis->doubleBufferMode) { ++ struct radeon_renderbuffer *back = ++ radeon_create_renderbuffer(rgbFormat, driDrawPriv); ++ _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &back->base); ++ } ++ ++ /* depth renderbuffer */ ++ if (depthFormat != GL_NONE) { ++ struct radeon_renderbuffer *depth = ++ radeon_create_renderbuffer(depthFormat, driDrawPriv); ++ _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depth->base); ++ depth->depthHasSurface = screen->depthHasSurface; ++ } ++ ++ /* stencil renderbuffer */ ++ if (mesaVis->stencilBits > 0 && !swStencil) { ++ struct radeon_renderbuffer *stencil = ++ radeon_create_renderbuffer(GL_STENCIL_INDEX8_EXT, driDrawPriv); ++ _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencil->base); ++ stencil->depthHasSurface = screen->depthHasSurface; ++ } ++ ++ _mesa_add_soft_renderbuffers(fb, ++ GL_FALSE, /* color */ ++ swDepth, ++ swStencil, ++ swAccum, ++ swAlpha, ++ GL_FALSE /* aux */); ++ driDrawPriv->driverPrivate = (void *) fb; ++ ++ return (driDrawPriv->driverPrivate != NULL); ++} ++#else + + /** + * Create the Mesa framebuffer and renderbuffers for a given window/drawable. +@@ -1103,7 +1376,7 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, + return (driDrawPriv->driverPrivate != NULL); + } + } +- ++#endif + + static void + radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) +@@ -1199,11 +1472,11 @@ radeonInitScreen(__DRIscreenPrivate *psp) + if (!radeonInitDriver(psp)) + return NULL; + ++ /* for now fill in all modes */ + return radeonFillInModes( psp, + dri_priv->bpp, + (dri_priv->bpp == 16) ? 16 : 24, +- (dri_priv->bpp == 16) ? 0 : 8, +- (dri_priv->backOffset != dri_priv->depthOffset) ); ++ (dri_priv->bpp == 16) ? 0 : 8, 1); + } + + +diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h +index ab859d5..82eb7d8 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_screen.h ++++ b/src/mesa/drivers/dri/radeon/radeon_screen.h +@@ -55,6 +55,14 @@ typedef struct { + drmAddress map; /* Mapping of the DRM region */ + } radeonRegionRec, *radeonRegionPtr; + ++struct radeon_gem_object { ++ uint32_t gem_name; ++ uint32_t gem_handle; ++ uint64_t size; ++ void *map; ++ uint64_t offset; ++}; ++ + typedef struct { + int chip_family; + int chip_flags; +@@ -107,6 +115,13 @@ typedef struct { + const __DRIextension *extensions[8]; + + int num_gb_pipes; ++ ++ int kernel_mm; ++ struct radeon_gem_object front; ++ struct radeon_gem_object back; ++ struct radeon_gem_object depth; ++ struct radeon_gem_object vram_texture; ++ struct radeon_gem_object gart_texture; + } radeonScreenRec, *radeonScreenPtr; + + #define IS_R100_CLASS(screen) \ +diff --git a/src/mesa/drivers/dri/radeon/radeon_span.h b/src/mesa/drivers/dri/radeon/radeon_span.h +index 9abe086..1650a9b 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_span.h ++++ b/src/mesa/drivers/dri/radeon/radeon_span.h +@@ -44,7 +44,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #include "drirenderbuffer.h" + ++#include "radeon_buffer.h" ++ + extern void radeonInitSpanFuncs(GLcontext * ctx); +-extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis); + ++#if COMPILE_R300 ++extern void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); ++#else ++extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis); ++#endif + #endif diff --git a/r300-cmdbuf.patch b/r300-cmdbuf.patch deleted file mode 100644 index 50eb918..0000000 --- a/r300-cmdbuf.patch +++ /dev/null @@ -1,7669 +0,0 @@ -diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile -index e9144ac..b6ed58b 100644 ---- a/src/mesa/drivers/dri/r200/Makefile -+++ b/src/mesa/drivers/dri/r200/Makefile -@@ -48,7 +48,8 @@ SYMLINKS = \ - COMMON_SYMLINKS = \ - radeon_chipset.h \ - radeon_screen.c \ -- radeon_screen.h -+ radeon_screen.h \ -+ radeon_buffer.h - - ##### TARGETS ##### - -diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile -index 6ca9342..3bb1ff4 100644 ---- a/src/mesa/drivers/dri/r300/Makefile -+++ b/src/mesa/drivers/dri/r300/Makefile -@@ -11,15 +11,6 @@ ifeq ($(USING_EGL), 1) - EGL_SOURCES = server/radeon_egl.c - endif - --COMMON_SOURCES = \ -- ../../common/driverfuncs.c \ -- ../common/mm.c \ -- ../common/utils.c \ -- ../common/texmem.c \ -- ../common/vblank.c \ -- ../common/xmlconfig.c \ -- ../common/dri_util.c -- - DRIVER_SOURCES = \ - radeon_screen.c \ - radeon_context.c \ -@@ -36,6 +27,7 @@ DRIVER_SOURCES = \ - r300_texmem.c \ - r300_tex.c \ - r300_texstate.c \ -+ r300_mipmap_tree.c \ - radeon_program.c \ - radeon_program_alu.c \ - radeon_program_pair.c \ -@@ -51,7 +43,7 @@ DRIVER_SOURCES = \ - r300_swtcl.c \ - $(EGL_SOURCES) - --C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) -+C_SOURCES = $(COMMON_SOURCES) $(COMMON_BM_SOURCES) $(DRIVER_SOURCES) - - DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \ - -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 -@@ -68,7 +60,8 @@ COMMON_SYMLINKS = \ - radeon_chipset.h \ - radeon_screen.c \ - radeon_screen.h \ -- radeon_span.h -+ radeon_span.h \ -+ radeon_buffer.h - - ##### TARGETS ##### - -diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c -index c069660..493b0ac 100644 ---- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c -+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c -@@ -51,11 +51,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #include "r300_reg.h" - #include "r300_cmdbuf.h" - #include "r300_emit.h" -+#include "r300_mem.h" -+#include "r300_mipmap_tree.h" - #include "r300_state.h" - - // Set this to 1 for extremely verbose debugging of command buffers - #define DEBUG_CMDBUF 0 - -+/** # of dwords reserved for additional instructions that may need to be written -+ * during flushing. -+ */ -+#define SPACE_FOR_FLUSHING 4 -+ - /** - * Send the current command buffer via ioctl to the hardware. - */ -@@ -66,24 +73,42 @@ int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller) - drm_radeon_cmd_buffer_t cmd; - int start; - -+ if (r300->cmdbuf.flushing) { -+ fprintf(stderr, "Recursive call into r300FlushCmdBufLocked!\n"); -+ exit(-1); -+ } -+ r300->cmdbuf.flushing = 1; -+ - if (r300->radeon.lost_context) { - start = 0; - r300->radeon.lost_context = GL_FALSE; - } else -- start = r300->cmdbuf.count_reemit; -+ start = r300->cmdbuf.reemit; - - if (RADEON_DEBUG & DEBUG_IOCTL) { - fprintf(stderr, "%s from %s - %i cliprects\n", - __FUNCTION__, caller, r300->radeon.numClipRects); - -- if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE) -- for (i = start; i < r300->cmdbuf.count_used; ++i) -+ if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE) { -+ fprintf(stderr, "written: %d committed: %d\n", r300->cmdbuf.written, r300->cmdbuf.committed); -+ for (i = start; i < r300->cmdbuf.written; ++i) - fprintf(stderr, "%d: %08x\n", i, -- r300->cmdbuf.cmd_buf[i]); -+ ((uint32_t*)r300->cmdbuf.buf->virtual)[i]); -+ } - } - -- cmd.buf = (char *)(r300->cmdbuf.cmd_buf + start); -- cmd.bufsz = (r300->cmdbuf.count_used - start) * 4; -+ if (r300->cmdbuf.written != r300->cmdbuf.committed) { -+ _mesa_problem(r300->radeon.glCtx, -+ "Command buffer contains %d uncommitted dwords\n" -+ "in r300FlushCmdBufLocked called from %s.\n", -+ r300->cmdbuf.written - r300->cmdbuf.committed, caller); -+ } -+ -+ dri_bo_unmap(r300->cmdbuf.buf); -+ dri_process_relocs(r300->cmdbuf.buf, 0); -+ -+ cmd.buf = (char *)r300->cmdbuf.buf->virtual + 4*start; -+ cmd.bufsz = (r300->cmdbuf.committed - start) * 4; - - if (r300->radeon.state.scissor.enabled) { - cmd.nbox = r300->radeon.state.scissor.numClipRects; -@@ -103,9 +128,19 @@ int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller) - radeonWaitForIdleLocked(&r300->radeon); - } - -+ dri_post_submit(r300->cmdbuf.buf, 0); -+ dri_bo_unreference(r300->cmdbuf.buf); -+ - r300->dma.nr_released_bufs = 0; -- r300->cmdbuf.count_used = 0; -- r300->cmdbuf.count_reemit = 0; -+ r300->cmdbuf.buf = dri_bo_alloc(&r300->radeon.bufmgr->base, "cmdbuf", -+ r300->cmdbuf.size*4, 16, DRM_BO_MEM_CMDBUF); -+ r300->cmdbuf.written = 0; -+ r300->cmdbuf.reserved = 0; -+ r300->cmdbuf.committed = 0; -+ r300->cmdbuf.reemit = 0; -+ dri_bo_map(r300->cmdbuf.buf, GL_TRUE); -+ -+ r300->cmdbuf.flushing = 0; - - return ret; - } -@@ -115,9 +150,7 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char *caller) - int ret; - - LOCK_HARDWARE(&r300->radeon); -- - ret = r300FlushCmdBufLocked(r300, caller); -- - UNLOCK_HARDWARE(&r300->radeon); - - if (ret) { -@@ -128,6 +161,44 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char *caller) - return ret; - } - -+/** -+ * Make sure that enough space is available in the command buffer -+ * by flushing if necessary. -+ * -+ * \param dwords The number of dwords we need to be free on the command buffer -+ */ -+void r300EnsureCmdBufSpace(r300ContextPtr r300, int dwords, const char *caller) -+{ -+ assert(dwords < r300->cmdbuf.size); -+ -+ if (!r300->cmdbuf.flushing) -+ dwords += SPACE_FOR_FLUSHING; -+ -+ if (r300->cmdbuf.written + dwords > r300->cmdbuf.size) -+ r300FlushCmdBuf(r300, caller); -+} -+ -+void r300BeginBatch(r300ContextPtr r300, int n, GLboolean autostate, const char* function, int line) -+{ -+ assert(r300->cmdbuf.written == r300->cmdbuf.reserved); -+ -+ r300EnsureCmdBufSpace(r300, n, function); -+ -+ if (autostate && !r300->cmdbuf.written) { -+ if (RADEON_DEBUG & DEBUG_IOCTL) -+ fprintf(stderr, -+ "Reemit state after flush (from %s)\n", function); -+ r300EmitState(r300); -+ } -+ -+ r300->cmdbuf.reserved += n; -+ assert(r300->cmdbuf.reserved < r300->cmdbuf.size); -+ -+ if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_IOCTL) -+ fprintf(stderr, "BEGIN_BATCH(%d) at %d, from %s:%i\n", -+ n, r300->cmdbuf.written, function, line); -+} -+ - static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state) - { - int i; -@@ -152,33 +223,18 @@ static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *stat - */ - static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) - { -+ BATCH_LOCALS(r300); - struct r300_state_atom *atom; -- uint32_t *dest; - int dwords; - -- dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used; -- -- /* Emit WAIT */ -- *dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN); -- dest++; -- r300->cmdbuf.count_used++; -- -- /* Emit cache flush */ -- *dest = cmdpacket0(R300_TX_INVALTAGS, 1); -- dest++; -- r300->cmdbuf.count_used++; -- -- *dest = R300_TX_FLUSH; -- dest++; -- r300->cmdbuf.count_used++; -- -- /* Emit END3D */ -- *dest = cmdpacify(); -- dest++; -- r300->cmdbuf.count_used++; -+ BEGIN_BATCH_NO_AUTOSTATE(4); -+ OUT_BATCH(cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN)); -+ OUT_BATCH(cmdpacket0(R300_TX_INVALTAGS, 1)); -+ OUT_BATCH(R300_TX_FLUSH); -+ OUT_BATCH(cmdpacify()); -+ END_BATCH(); - - /* Emit actual atoms */ -- - foreach(atom, &r300->hw.atomlist) { - if ((atom->dirty || r300->hw.all_dirty) == dirty) { - dwords = (*atom->check) (r300, atom); -@@ -186,9 +242,13 @@ static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) - if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { - r300PrintStateAtom(r300, atom); - } -- memcpy(dest, atom->cmd, dwords * 4); -- dest += dwords; -- r300->cmdbuf.count_used += dwords; -+ if (atom->emit) { -+ (*atom->emit)(r300); -+ } else { -+ BEGIN_BATCH_NO_AUTOSTATE(dwords); -+ OUT_BATCH_TABLE(atom->cmd, dwords); -+ END_BATCH(); -+ } - atom->dirty = GL_FALSE; - } else { - if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { -@@ -198,6 +258,8 @@ static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) - } - } - } -+ -+ COMMIT_BATCH(); - } - - /** -@@ -211,22 +273,21 @@ void r300EmitState(r300ContextPtr r300) - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS)) - fprintf(stderr, "%s\n", __FUNCTION__); - -- if (r300->cmdbuf.count_used && !r300->hw.is_dirty -+ if (r300->cmdbuf.written && !r300->hw.is_dirty - && !r300->hw.all_dirty) - return; - - /* To avoid going across the entire set of states multiple times, just check -- * for enough space for the case of emitting all state, and inline the -- * r300AllocCmdBuf code here without all the checks. -+ * for enough space for the case of emitting all state. - */ - r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__); - -- if (!r300->cmdbuf.count_used) { -+ if (!r300->cmdbuf.written) { - if (RADEON_DEBUG & DEBUG_STATE) - fprintf(stderr, "Begin reemit state\n"); - - r300EmitAtoms(r300, GL_FALSE); -- r300->cmdbuf.count_reemit = r300->cmdbuf.count_used; -+ r300->cmdbuf.reemit = r300->cmdbuf.committed; - } - - if (RADEON_DEBUG & DEBUG_STATE) -@@ -234,7 +295,7 @@ void r300EmitState(r300ContextPtr r300) - - r300EmitAtoms(r300, GL_TRUE); - -- assert(r300->cmdbuf.count_used < r300->cmdbuf.size); -+ assert(r300->cmdbuf.written < r300->cmdbuf.size); - - r300->hw.is_dirty = GL_FALSE; - r300->hw.all_dirty = GL_FALSE; -@@ -244,6 +305,79 @@ void r300EmitState(r300ContextPtr r300) - #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) - #define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) - -+static void emit_tex_offsets(r300ContextPtr r300) -+{ -+ BATCH_LOCALS(r300); -+ int numtmus = packet0_count(r300->hw.tex.offset.cmd); -+ -+ if (numtmus) { -+ int i; -+ -+ BEGIN_BATCH(numtmus + 1); -+ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0, numtmus); -+ for(i = 0; i < numtmus; ++i) { -+ r300TexObj *t = r300->hw.textures[i]; -+ if (t && !t->image_override) { -+ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, DRM_RELOC_TXOFFSET); -+ } else if (!t) { -+ OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]); -+ } else { -+ OUT_BATCH(t->override_offset); -+ } -+ } -+ END_BATCH(); -+ } -+} -+ -+static void emit_cb_offset(r300ContextPtr r300) -+{ -+ BATCH_LOCALS(r300); -+ struct radeon_renderbuffer *rrb; -+ uint32_t cbpitch; -+ -+ rrb = r300->radeon.state.color.rrb; -+ if (!rrb) { -+ fprintf(stderr, "no rrb\n"); -+ return; -+ } -+ -+ cbpitch = rrb->pitch; -+ if (rrb->cpp == 4) -+ cbpitch |= R300_COLOR_FORMAT_ARGB8888; -+ else -+ cbpitch |= R300_COLOR_FORMAT_RGB565; -+ -+ if (r300->radeon.sarea->tiling_enabled) -+ cbpitch |= R300_COLOR_TILE_ENABLE; -+ -+ BEGIN_BATCH(4); -+ OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); -+ OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET); -+ OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1); -+ OUT_BATCH(cbpitch); -+ END_BATCH(); -+} -+ -+static void emit_zb_offset(r300ContextPtr r300) -+{ -+ BATCH_LOCALS(r300); -+ struct radeon_renderbuffer *rrb; -+ uint32_t zbpitch; -+ -+ rrb = r300->radeon.state.depth_buffer; -+ if (!rrb) -+ return; -+ -+ zbpitch = rrb->pitch; -+ -+ BEGIN_BATCH(3); -+ OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 2); -+ OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET); -+ OUT_BATCH(zbpitch); -+ END_BATCH(); -+ -+} -+ - static int check_always(r300ContextPtr r300, struct r300_state_atom *atom) - { - return atom->cmd_size; -@@ -480,8 +614,7 @@ void r300InitCmdBuf(r300ContextPtr r300) - ALLOC_STATE(rop, always, 2, 0); - r300->hw.rop.cmd[0] = cmdpacket0(R300_RB3D_ROPCNTL, 1); - ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0); -- r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1); -- r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1); -+ r300->hw.cb.emit = &emit_cb_offset; - ALLOC_STATE(rb3d_dither_ctl, always, 10, 0); - r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9); - ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); -@@ -495,7 +628,7 @@ void r300InitCmdBuf(r300ContextPtr r300) - r300->hw.zstencil_format.cmd[0] = - cmdpacket0(R300_ZB_FORMAT, 4); - ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0); -- r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2); -+ r300->hw.zb.emit = emit_zb_offset; - ALLOC_STATE(zb_depthclearvalue, always, 2, 0); - r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1); - ALLOC_STATE(unk4F30, always, 3, 0); -@@ -562,9 +695,10 @@ void r300InitCmdBuf(r300ContextPtr r300) - ALLOC_STATE(tex.pitch, variable, mtu + 1, 0); - r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT2_0, 0); - -- ALLOC_STATE(tex.offset, variable, mtu + 1, 0); -+ ALLOC_STATE(tex.offset, variable, 1, 0); - r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_OFFSET_0, 0); -+ r300->hw.tex.offset.emit = &emit_tex_offsets; - - ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0); - r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = -@@ -597,10 +731,14 @@ void r300InitCmdBuf(r300ContextPtr r300) - size * 4, r300->hw.max_state_size * 4); - } - -+ r300->cmdbuf.buf = dri_bo_alloc(&r300->radeon.bufmgr->base, "cmdbuf", -+ size*4, 16, DRM_BO_MEM_CMDBUF); - r300->cmdbuf.size = size; -- r300->cmdbuf.cmd_buf = (uint32_t *) CALLOC(size * 4); -- r300->cmdbuf.count_used = 0; -- r300->cmdbuf.count_reemit = 0; -+ r300->cmdbuf.written = 0; -+ r300->cmdbuf.reserved = 0; -+ r300->cmdbuf.committed = 0; -+ r300->cmdbuf.reemit = 0; -+ dri_bo_map(r300->cmdbuf.buf, GL_TRUE); - } - - /** -@@ -610,66 +748,10 @@ void r300DestroyCmdBuf(r300ContextPtr r300) - { - struct r300_state_atom *atom; - -- FREE(r300->cmdbuf.cmd_buf); -+ dri_bo_unmap(r300->cmdbuf.buf); -+ dri_bo_unreference(r300->cmdbuf.buf); - - foreach(atom, &r300->hw.atomlist) { - FREE(atom->cmd); - } - } -- --void r300EmitBlit(r300ContextPtr rmesa, -- GLuint color_fmt, -- GLuint src_pitch, -- GLuint src_offset, -- GLuint dst_pitch, -- GLuint dst_offset, -- GLint srcx, GLint srcy, -- GLint dstx, GLint dsty, GLuint w, GLuint h) --{ -- drm_r300_cmd_header_t *cmd; -- -- if (RADEON_DEBUG & DEBUG_IOCTL) -- fprintf(stderr, -- "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n", -- __FUNCTION__, src_pitch, src_offset, srcx, srcy, -- dst_pitch, dst_offset, dstx, dsty, w, h); -- -- assert((src_pitch & 63) == 0); -- assert((dst_pitch & 63) == 0); -- assert((src_offset & 1023) == 0); -- assert((dst_offset & 1023) == 0); -- assert(w < (1 << 16)); -- assert(h < (1 << 16)); -- -- cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__); -- -- cmd[0].header.cmd_type = R300_CMD_PACKET3; -- cmd[0].header.pad0 = R300_CMD_PACKET3_RAW; -- cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16); -- cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | -- RADEON_GMC_DST_PITCH_OFFSET_CNTL | -- RADEON_GMC_BRUSH_NONE | -- (color_fmt << 8) | -- RADEON_GMC_SRC_DATATYPE_COLOR | -- RADEON_ROP3_S | -- RADEON_DP_SRC_SOURCE_MEMORY | -- RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS); -- -- cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10); -- cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10); -- cmd[5].u = (srcx << 16) | srcy; -- cmd[6].u = (dstx << 16) | dsty; /* dst */ -- cmd[7].u = (w << 16) | h; --} -- --void r300EmitWait(r300ContextPtr rmesa, GLuint flags) --{ -- drm_r300_cmd_header_t *cmd; -- -- assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D))); -- -- cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); -- cmd[0].u = 0; -- cmd[0].wait.cmd_type = R300_CMD_WAIT; -- cmd[0].wait.flags = flags; --} -diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h -index a8eaa58..4708a4c 100644 ---- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h -+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h -@@ -45,29 +45,88 @@ extern void r300EmitState(r300ContextPtr r300); - - extern void r300InitCmdBuf(r300ContextPtr r300); - extern void r300DestroyCmdBuf(r300ContextPtr r300); -+extern void r300EnsureCmdBufSpace(r300ContextPtr r300, int dwords, const char *caller); -+ -+extern void r300BeginBatch(r300ContextPtr r300, int n, GLboolean autostate, const char* function, int line); - - /** -- * Make sure that enough space is available in the command buffer -- * by flushing if necessary. -- * -- * \param dwords The number of dwords we need to be free on the command buffer -+ * Every function writing to the command buffer needs to declare this -+ * to get the necessary local variables. - */ --static INLINE void r300EnsureCmdBufSpace(r300ContextPtr r300, -- int dwords, const char *caller) --{ -- assert(dwords < r300->cmdbuf.size); -+#define BATCH_LOCALS(r300) \ -+ const r300ContextPtr b_l_r300 = r300 - -- if (r300->cmdbuf.count_used + dwords > r300->cmdbuf.size) -- r300FlushCmdBuf(r300, caller); --} -+/** -+ * Prepare writing n dwords to the command buffer, -+ * including producing any necessary state emits on buffer wraparound. -+ */ -+#define BEGIN_BATCH(n) r300BeginBatch(b_l_r300, n, GL_TRUE, __FUNCTION__, __LINE__) -+ -+/** -+ * Same as BEGIN_BATCH, but do not cause automatic state emits. -+ */ -+#define BEGIN_BATCH_NO_AUTOSTATE(n) r300BeginBatch(b_l_r300, n, GL_FALSE, __FUNCTION__, __LINE__) -+ -+/** -+ * Write one dword to the command buffer. -+ */ -+#define OUT_BATCH(data) \ -+ do { \ -+ if (b_l_r300->cmdbuf.written < b_l_r300->cmdbuf.reserved) { \ -+ ((uint32_t*)b_l_r300->cmdbuf.buf->virtual)[b_l_r300->cmdbuf.written++] = data; \ -+ } else { \ -+ _mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH mismatch", __FUNCTION__, __LINE__); \ -+ } \ -+ } while(0) - - /** -- * Allocate the given number of dwords in the command buffer and return -- * a pointer to the allocated area. -- * When necessary, these functions cause a flush. r300AllocCmdBuf() also -- * causes state reemission after a flush. This is necessary to ensure -- * correct hardware state after an unlock. -+ * Write a relocated dword to the command buffer. - */ -+#define OUT_BATCH_RELOC(data, bo, offset, flags) \ -+ do { \ -+ if (b_l_r300->cmdbuf.written < b_l_r300->cmdbuf.reserved) { \ -+ dri_emit_reloc(b_l_r300->cmdbuf.buf, flags, offset, 4*b_l_r300->cmdbuf.written, bo); \ -+ ((uint32_t*)b_l_r300->cmdbuf.buf->virtual)[b_l_r300->cmdbuf.written++] = data; \ -+ } else { \ -+ _mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH mismatch", __FUNCTION__, __LINE__); \ -+ } \ -+ } while(0) -+ -+/** -+ * Write n dwords from ptr to the command buffer. -+ */ -+#define OUT_BATCH_TABLE(ptr,n) \ -+ do { \ -+ int _n = n; \ -+ if (b_l_r300->cmdbuf.written+_n <= b_l_r300->cmdbuf.reserved) { \ -+ memcpy((uint32_t*)b_l_r300->cmdbuf.buf->virtual + b_l_r300->cmdbuf.written, (ptr), 4*_n); \ -+ b_l_r300->cmdbuf.written += _n; \ -+ } else { \ -+ _mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH_TABLE mismatch", __FUNCTION__, __LINE__); \ -+ } \ -+ } while(0) -+ -+/** -+ * Finish writing dwords to the command buffer. -+ * The number of (direct or indirect) OUT_BATCH calls between the previous -+ * BEGIN_BATCH and END_BATCH must match the number specified at BEGIN_BATCH time. -+ */ -+#define END_BATCH() \ -+ do { \ -+ if (b_l_r300->cmdbuf.written != b_l_r300->cmdbuf.reserved) \ -+ _mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: END_BATCH mismatch", __FUNCTION__, __LINE__); \ -+ } while(0) -+ -+/** -+ * After the last END_BATCH() of rendering, this indicates that flushing -+ * the command buffer now is okay. -+ */ -+#define COMMIT_BATCH() \ -+ do { \ -+ assert(b_l_r300->cmdbuf.written == b_l_r300->cmdbuf.reserved); \ -+ b_l_r300->cmdbuf.committed = b_l_r300->cmdbuf.written; \ -+ } while(0) -+ - static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300, - int dwords, const char *caller) - { -@@ -75,8 +134,9 @@ static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300, - - r300EnsureCmdBufSpace(r300, dwords, caller); - -- ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used]; -- r300->cmdbuf.count_used += dwords; -+ ptr = (uint32_t*)r300->cmdbuf.buf->virtual + r300->cmdbuf.written; -+ r300->cmdbuf.written += dwords; -+ r300->cmdbuf.reserved = r300->cmdbuf.committed = r300->cmdbuf.written; - return ptr; - } - -@@ -87,30 +147,17 @@ static INLINE uint32_t *r300AllocCmdBuf(r300ContextPtr r300, - - r300EnsureCmdBufSpace(r300, dwords, caller); - -- if (!r300->cmdbuf.count_used) { -+ if (!r300->cmdbuf.written) { - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, - "Reemit state after flush (from %s)\n", caller); - r300EmitState(r300); - } - -- ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used]; -- r300->cmdbuf.count_used += dwords; -+ ptr = (uint32_t*)r300->cmdbuf.buf->virtual + r300->cmdbuf.written; -+ r300->cmdbuf.written += dwords; -+ r300->cmdbuf.reserved = r300->cmdbuf.committed = r300->cmdbuf.written; - return ptr; - } - --extern void r300EmitBlit(r300ContextPtr rmesa, -- GLuint color_fmt, -- GLuint src_pitch, -- GLuint src_offset, -- GLuint dst_pitch, -- GLuint dst_offset, -- GLint srcx, GLint srcy, -- GLint dstx, GLint dsty, GLuint w, GLuint h); -- --extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags); --extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start); --extern void r300EmitVertexShader(r300ContextPtr rmesa); --extern void r300EmitPixelShader(r300ContextPtr rmesa); -- - #endif /* __R300_CMDBUF_H__ */ -diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c -index fcf571d..cc9c11a 100644 ---- a/src/mesa/drivers/dri/r300/r300_context.c -+++ b/src/mesa/drivers/dri/r300/r300_context.c -@@ -59,15 +59,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #include "radeon_span.h" - #include "r300_context.h" - #include "r300_cmdbuf.h" -+#include "r300_mipmap_tree.h" - #include "r300_state.h" - #include "r300_ioctl.h" - #include "r300_tex.h" - #include "r300_emit.h" - #include "r300_swtcl.h" - --#ifdef USER_BUFFERS - #include "r300_mem.h" --#endif - - #include "vblank.h" - #include "utils.h" -@@ -190,7 +189,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, - struct dd_function_table functions; - r300ContextPtr r300; - GLcontext *ctx; -- int tcl_mode, i; -+ int tcl_mode; - - assert(glVisual); - assert(driContextPriv); -@@ -222,10 +221,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, - r300InitTextureFuncs(&functions); - r300InitShaderFuncs(&functions); - --#ifdef USER_BUFFERS -- r300_mem_init(r300); --#endif -- - if (!radeonInitContext(&r300->radeon, &functions, - glVisual, driContextPriv, - sharedContextPrivate)) { -@@ -233,34 +228,9 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, - return GL_FALSE; - } - -+ r300->radeon.bufmgr = radeonBufmgrClassicInit(r300); -+ - /* Init r300 context data */ -- r300->dma.buf0_address = -- r300->radeon.radeonScreen->buffers->list[0].address; -- -- (void)memset(r300->texture_heaps, 0, sizeof(r300->texture_heaps)); -- make_empty_list(&r300->swapped); -- -- r300->nr_heaps = 1 /* screen->numTexHeaps */ ; -- assert(r300->nr_heaps < RADEON_NR_TEX_HEAPS); -- for (i = 0; i < r300->nr_heaps; i++) { -- /* *INDENT-OFF* */ -- r300->texture_heaps[i] = driCreateTextureHeap(i, r300, -- screen-> -- texSize[i], 12, -- RADEON_NR_TEX_REGIONS, -- (drmTextureRegionPtr) -- r300->radeon.sarea-> -- tex_list[i], -- &r300->radeon.sarea-> -- tex_age[i], -- &r300->swapped, -- sizeof -- (r300TexObj), -- (destroy_texture_object_t -- *) -- r300DestroyTexObj); -- /* *INDENT-ON* */ -- } - r300->texture_depth = driQueryOptioni(&r300->radeon.optionCache, - "texture_depth"); - if (r300->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) -@@ -299,13 +269,11 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, - ctx->Const.MaxLineWidth = R300_LINESIZE_MAX; - ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; - --#ifdef USER_BUFFERS - /* Needs further modifications */ - #if 0 - ctx->Const.MaxArrayLockSize = - ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4); - #endif --#endif - - /* Initialize the software rasterizer and helper modules. - */ -@@ -407,72 +375,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, - return GL_TRUE; - } - --static void r300FreeGartAllocations(r300ContextPtr r300) --{ -- int i, ret, tries = 0, done_age, in_use = 0; -- drm_radeon_mem_free_t memfree; -- -- memfree.region = RADEON_MEM_REGION_GART; -- --#ifdef USER_BUFFERS -- for (i = r300->rmm->u_last; i > 0; i--) { -- if (r300->rmm->u_list[i].ptr == NULL) { -- continue; -- } -- -- /* check whether this buffer is still in use */ -- if (r300->rmm->u_list[i].pending) { -- in_use++; -- } -- } -- /* Cannot flush/lock if no context exists. */ -- if (in_use) -- r300FlushCmdBuf(r300, __FUNCTION__); -- -- done_age = radeonGetAge((radeonContextPtr) r300); -- -- for (i = r300->rmm->u_last; i > 0; i--) { -- if (r300->rmm->u_list[i].ptr == NULL) { -- continue; -- } -- -- /* check whether this buffer is still in use */ -- if (!r300->rmm->u_list[i].pending) { -- continue; -- } -- -- assert(r300->rmm->u_list[i].h_pending == 0); -- -- tries = 0; -- while (r300->rmm->u_list[i].age > done_age && tries++ < 1000) { -- usleep(10); -- done_age = radeonGetAge((radeonContextPtr) r300); -- } -- if (tries >= 1000) { -- WARN_ONCE("Failed to idle region!"); -- } -- -- memfree.region_offset = (char *)r300->rmm->u_list[i].ptr - -- (char *)r300->radeon.radeonScreen->gartTextures.map; -- -- ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd, -- DRM_RADEON_FREE, &memfree, -- sizeof(memfree)); -- if (ret) { -- fprintf(stderr, "Failed to free at %p\nret = %s\n", -- r300->rmm->u_list[i].ptr, strerror(-ret)); -- } else { -- if (i == r300->rmm->u_last) -- r300->rmm->u_last--; -- -- r300->rmm->u_list[i].pending = 0; -- r300->rmm->u_list[i].ptr = NULL; -- } -- } -- r300->rmm->u_head = i; --#endif /* USER_BUFFERS */ --} -- - /* Destroy the device specific context. - */ - void r300DestroyContext(__DRIcontextPrivate * driContextPriv) -@@ -496,24 +398,17 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv) - assert(r300); /* should never be null */ - - if (r300) { -- GLboolean release_texture_heaps; -- -- release_texture_heaps = -- (r300->radeon.glCtx->Shared->RefCount == 1); - _swsetup_DestroyContext(r300->radeon.glCtx); - _tnl_ProgramCacheDestroy(r300->radeon.glCtx); - _tnl_DestroyContext(r300->radeon.glCtx); - _vbo_DestroyContext(r300->radeon.glCtx); - _swrast_DestroyContext(r300->radeon.glCtx); - -- if (r300->dma.current.buf) { -- r300ReleaseDmaRegion(r300, &r300->dma.current, -- __FUNCTION__); --#ifndef USER_BUFFERS -- r300FlushCmdBuf(r300, __FUNCTION__); --#endif -+ if (r300->dma.current) { -+ dri_bo_unreference(r300->dma.current); -+ r300->dma.current = 0; - } -- r300FreeGartAllocations(r300); -+ r300FlushCmdBuf(r300, __FUNCTION__); - r300DestroyCmdBuf(r300); - - if (radeon->state.scissor.pClipRects) { -@@ -521,28 +416,13 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv) - radeon->state.scissor.pClipRects = NULL; - } - -- if (release_texture_heaps) { -- /* This share group is about to go away, free our private -- * texture object data. -- */ -- int i; -- -- for (i = 0; i < r300->nr_heaps; i++) { -- driDestroyTextureHeap(r300->texture_heaps[i]); -- r300->texture_heaps[i] = NULL; -- } -- -- assert(is_empty_list(&r300->swapped)); -- } -- - radeonCleanupContext(&r300->radeon); - --#ifdef USER_BUFFERS - /* the memory manager might be accessed when Mesa frees the shared - * state, so don't destroy it earlier - */ -- r300_mem_destroy(r300); --#endif -+ dri_bufmgr_destroy(&r300->radeon.bufmgr->base); -+ r300->radeon.bufmgr = 0; - - /* free the option cache */ - driDestroyOptionCache(&r300->radeon.optionCache); -diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h -index d2017f8..5c99740 100644 ---- a/src/mesa/drivers/dri/r300/r300_context.h -+++ b/src/mesa/drivers/dri/r300/r300_context.h -@@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #include "tnl/t_vertex.h" - #include "drm.h" - #include "radeon_drm.h" -+#include "dri_bufmgr.h" - #include "dri_util.h" - #include "texmem.h" - -@@ -47,11 +48,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #include "mtypes.h" - #include "colormac.h" - --#define USER_BUFFERS -- - struct r300_context; - typedef struct r300_context r300ContextRec; - typedef struct r300_context *r300ContextPtr; -+typedef struct radeon_bufmgr radeon_bufmgr; - - #include "radeon_lock.h" - #include "mm.h" -@@ -122,44 +122,22 @@ static INLINE uint32_t r300PackFloat24(float f) - - /************ DMA BUFFERS **************/ - --/* Need refcounting on dma buffers: -- */ --struct r300_dma_buffer { -- int refcount; /**< the number of retained regions in buf */ -- drmBufPtr buf; -- int id; --}; --#undef GET_START --#ifdef USER_BUFFERS --#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start)) --#else --#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset + \ -- (rvb)->address - rmesa->dma.buf0_address + \ -- (rvb)->start) --#endif --/* A retained region, eg vertices for indexed vertices. -- */ --struct r300_dma_region { -- struct r300_dma_buffer *buf; -- char *address; /* == buf->address */ -- int start, end, ptr; /* offsets from start of buf */ -- -- int aos_offset; /* address in GART memory */ -- int aos_stride; /* distance between elements, in dwords */ -- int aos_size; /* number of components (1-4) */ --}; -- - struct r300_dma { - /* Active dma region. Allocations for vertices and retained - * regions come from here. Also used for emitting random vertices, - * these may be flushed by calling flush_current(); - */ -- struct r300_dma_region current; -+ dri_bo *current; /** Buffer that DMA memory is allocated from */ -+ int current_used; /** Number of bytes allocated and forgotten about */ -+ int current_vertexptr; /** End of active vertex region */ - -+ /** -+ * If current_vertexptr != current_used then flush must be non-zero. -+ * flush must be called before non-active vertex allocations can be -+ * performed. -+ */ - void (*flush) (r300ContextPtr); - -- char *buf0_address; /* start of buf[0], for index calcs */ -- - /* Number of "in-flight" DMA buffers, i.e. the number of buffers - * for which a DISCARD command is currently queued in the command buffer. - */ -@@ -173,15 +151,12 @@ typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr; - /* Texture object in locally shared texture space. - */ - struct r300_tex_obj { -- driTextureObject base; -- -- GLuint bufAddr; /* Offset to start of locally -- shared texture block */ -- -- drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; -- /* Six, for the cube faces */ -+ struct gl_texture_object base; -+ struct _r300_mipmap_tree *mt; -+ GLuint dirty_images[6]; - - GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ -+ GLuint override_offset; - - GLuint pitch; /* this isn't sent to hardware just used in calculations */ - /* hardware register values */ -@@ -191,30 +166,16 @@ struct r300_tex_obj { - GLuint pitch_reg; - GLuint size; /* npot only */ - GLuint format; -- GLuint offset; /* Image location in the card's address space. -- All cube faces follow. */ -- GLuint unknown4; -- GLuint unknown5; -- /* end hardware registers */ -- -- /* registers computed by r200 code - keep them here to -- compare against what is actually written. -- -- to be removed later.. */ - GLuint pp_border_color; -- GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ -- GLuint format_x; -- -- GLboolean border_fallback; -+ /* end hardware registers */ - - GLuint tile_bits; /* hw texture tile bits used on this texture */ - }; - --struct r300_texture_env_state { -- r300TexObjPtr texobj; -- GLenum format; -- GLenum envMode; --}; -+static INLINE r300TexObj* r300_tex_obj(struct gl_texture_object *texObj) -+{ -+ return (r300TexObj*)texObj; -+} - - /* The blit width for texture uploads - */ -@@ -222,7 +183,6 @@ struct r300_texture_env_state { - #define R300_MAX_TEXTURE_UNITS 8 - - struct r300_texture_state { -- struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS]; - int tc_count; /* number of incoming texture coordinates from VAP */ - }; - -@@ -242,6 +202,7 @@ struct r300_state_atom { - GLboolean dirty; - - int (*check) (r300ContextPtr, struct r300_state_atom * atom); -+ void (*emit) (r300ContextPtr); - }; - - #define R300_VPT_CMD_0 0 -@@ -549,6 +510,8 @@ struct r300_hw_state { - struct r300_state_atom border_color; - } tex; - struct r300_state_atom txe; /* tex enable (4104) */ -+ -+ r300TexObj *textures[R300_MAX_TEXTURE_UNITS]; - }; - - /** -@@ -559,10 +522,14 @@ struct r300_hw_state { - * otherwise. - */ - struct r300_cmdbuf { -- int size; /* DWORDs allocated for buffer */ -- uint32_t *cmd_buf; -- int count_used; /* DWORDs filled so far */ -- int count_reemit; /* size of re-emission batch */ -+ dri_bo *buf; -+ int reemit; /** # of dwords in reemit sequence (is always <= committed) */ -+ int size; /** # of dwords total */ -+ -+ int committed; /** # of dwords that we have committed to */ -+ int written; /** # of dwords written (is always >= committed) */ -+ int reserved; /** # of dwords reserved up to previous BEGIN_BATCH */ -+ unsigned int flushing:1; /** whether we're currently in FlushCmdBufLocked */ - }; - - /** -@@ -811,18 +778,25 @@ struct r500_fragment_program { - #define REG_COLOR0 1 - #define REG_TEX0 2 - -+struct r300_aos { -+ dri_bo *bo; /** Buffer object where vertex data is stored */ -+ int offset; /** Offset into buffer object, in bytes */ -+ int components; /** Number of components per vertex */ -+ int stride; /** Stride in dwords (may be 0 for repeating) */ -+ int count; /** Number of vertices */ -+}; -+ - struct r300_state { - struct r300_depthbuffer_state depth; - struct r300_texture_state texture; - int sw_tcl_inputs[VERT_ATTRIB_MAX]; - struct r300_vertex_shader_state vertex_shader; -- struct r300_dma_region aos[R300_MAX_AOS_ARRAYS]; -+ struct r300_aos aos[R300_MAX_AOS_ARRAYS]; - int aos_count; - -- GLuint *Elts; -- struct r300_dma_region elt_dma; -+ dri_bo *elt_dma_bo; /** Buffer object that contains element indices */ -+ int elt_dma_offset; /** Offset into this buffer object, in bytes */ - -- struct r300_dma_region swtcl_dma; - DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. - They are the same as tnl->render_inputs for fixed pipeline */ - -@@ -880,13 +854,6 @@ struct r300_swtcl_info { - * Offset of the 3UB specular color data within a hardware (swtcl) vertex. - */ - GLuint specoffset; -- -- /** -- * Should Mesa project vertex data or will the hardware do it? -- */ -- GLboolean needproj; -- -- struct r300_dma_region indexed_verts; - }; - - -@@ -905,25 +872,11 @@ struct r300_context { - /* Vertex buffers - */ - struct r300_dma dma; -- GLboolean save_on_next_unlock; - GLuint NewGLState; - -- /* Texture object bookkeeping -- */ -- unsigned nr_heaps; -- driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS]; -- driTextureObject swapped; - int texture_depth; - float initialMaxAnisotropy; - -- /* Clientdata textures; -- */ -- GLuint prefer_gart_client_texturing; -- --#ifdef USER_BUFFERS -- struct r300_memory_manager *rmm; --#endif -- - GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; - GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; - -diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c -index 2ea17ad..5e2afd5 100644 ---- a/src/mesa/drivers/dri/r300/r300_emit.c -+++ b/src/mesa/drivers/dri/r300/r300_emit.c -@@ -51,9 +51,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #include "r300_emit.h" - #include "r300_ioctl.h" - --#ifdef USER_BUFFERS - #include "r300_mem.h" --#endif - - #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \ - SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \ -@@ -86,11 +84,9 @@ do { \ - } while (0) - #endif - --static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb, -- GLvoid * data, int stride, int count) -+static void r300EmitVec4(uint32_t *out, GLvoid * data, int stride, int count) - { - int i; -- int *out = (int *)(rvb->address + rvb->start); - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d out %p data %p\n", -@@ -106,11 +102,9 @@ static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb, - } - } - --static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb, -- GLvoid * data, int stride, int count) -+static void r300EmitVec8(uint32_t *out, GLvoid * data, int stride, int count) - { - int i; -- int *out = (int *)(rvb->address + rvb->start); - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d out %p data %p\n", -@@ -127,11 +121,9 @@ static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb, - } - } - --static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb, -- GLvoid * data, int stride, int count) -+static void r300EmitVec12(uint32_t *out, GLvoid * data, int stride, int count) - { - int i; -- int *out = (int *)(rvb->address + rvb->start); - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d out %p data %p\n", -@@ -149,11 +141,9 @@ static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb, - } - } - --static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb, -- GLvoid * data, int stride, int count) -+static void r300EmitVec16(uint32_t *out, GLvoid * data, int stride, int count) - { - int i; -- int *out = (int *)(rvb->address + rvb->start); - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d out %p data %p\n", -@@ -172,35 +162,31 @@ static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb, - } - } - --static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb, -+ -+static void r300EmitVec(GLcontext * ctx, struct r300_aos *aos, - GLvoid * data, int size, int stride, int count) - { - r300ContextPtr rmesa = R300_CONTEXT(ctx); -+ uint32_t *out; - - if (stride == 0) { -- r300AllocDmaRegion(rmesa, rvb, size * 4, 4); -+ r300AllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); - count = 1; -- rvb->aos_offset = GET_START(rvb); -- rvb->aos_stride = 0; -+ aos->stride = 0; - } else { -- r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); -- rvb->aos_offset = GET_START(rvb); -- rvb->aos_stride = size; -+ r300AllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); -+ aos->stride = size; - } - -+ aos->components = size; -+ aos->count = count; -+ -+ out = (uint32_t*)((char*)aos->bo->virtual + aos->offset); - switch (size) { -- case 1: -- r300EmitVec4(ctx, rvb, data, stride, count); -- break; -- case 2: -- r300EmitVec8(ctx, rvb, data, stride, count); -- break; -- case 3: -- r300EmitVec12(ctx, rvb, data, stride, count); -- break; -- case 4: -- r300EmitVec16(ctx, rvb, data, stride, count); -- break; -+ case 1: r300EmitVec4(out, data, stride, count); break; -+ case 2: r300EmitVec8(out, data, stride, count); break; -+ case 3: r300EmitVec12(out, data, stride, count); break; -+ case 4: r300EmitVec16(out, data, stride, count); break; - default: - assert(0); - break; -@@ -433,7 +419,7 @@ int r300EmitArrays(GLcontext * ctx) - } - - for (i = 0; i < nr; i++) { -- int ci, fix, found = 0; -+ int ci; - - swizzle[i][0] = SWIZZLE_ZERO; - swizzle[i][1] = SWIZZLE_ZERO; -@@ -444,48 +430,10 @@ int r300EmitArrays(GLcontext * ctx) - swizzle[i][ci] = ci; - } - -- if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) { -- if (vb->AttribPtr[tab[i]]->stride % 4) { -- return R300_FALLBACK_TCL; -- } -- rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data); -- rmesa->state.aos[i].start = 0; -- rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data); -- rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4; -- rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size; -- } else { -- r300EmitVec(ctx, &rmesa->state.aos[i], -- vb->AttribPtr[tab[i]]->data, -- vb->AttribPtr[tab[i]]->size, -- vb->AttribPtr[tab[i]]->stride, count); -- } -- -- rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size; -- -- for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) { -- if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) { -- continue; -- } -- found = 1; -- break; -- } -- -- if (found) { -- if (fix > 0) { -- WARN_ONCE("Feeling lucky?\n"); -- } -- rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix; -- for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) { -- swizzle[i][ci] += fix; -- } -- } else { -- WARN_ONCE -- ("Cannot handle offset %x with stride %d, comp %d\n", -- rmesa->state.aos[i].aos_offset, -- rmesa->state.aos[i].aos_stride, -- vb->AttribPtr[tab[i]]->size); -- return R300_FALLBACK_TCL; -- } -+ r300EmitVec(ctx, &rmesa->state.aos[i], -+ vb->AttribPtr[tab[i]]->data, -+ vb->AttribPtr[tab[i]]->size, -+ vb->AttribPtr[tab[i]]->stride, count); - } - - /* Setup INPUT_ROUTE. */ -@@ -515,45 +463,76 @@ int r300EmitArrays(GLcontext * ctx) - return R300_FALLBACK_NONE; - } - --#ifdef USER_BUFFERS --void r300UseArrays(GLcontext * ctx) --{ -- r300ContextPtr rmesa = R300_CONTEXT(ctx); -- int i; -- -- if (rmesa->state.elt_dma.buf) -- r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id); -- -- for (i = 0; i < rmesa->state.aos_count; i++) { -- if (rmesa->state.aos[i].buf) -- r300_mem_use(rmesa, rmesa->state.aos[i].buf->id); -- } --} --#endif -- - void r300ReleaseArrays(GLcontext * ctx) - { - r300ContextPtr rmesa = R300_CONTEXT(ctx); - int i; - -- r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__); -+ if (rmesa->state.elt_dma_bo) { -+ dri_bo_unreference(rmesa->state.elt_dma_bo); -+ rmesa->state.elt_dma_bo = 0; -+ } - for (i = 0; i < rmesa->state.aos_count; i++) { -- r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__); -+ if (rmesa->state.aos[i].bo) { -+ dri_bo_unreference(rmesa->state.aos[i].bo); -+ rmesa->state.aos[i].bo = 0; -+ } - } - } - - void r300EmitCacheFlush(r300ContextPtr rmesa) - { -- int cmd_reserved = 0; -- int cmd_written = 0; -- -- drm_radeon_cmd_header_t *cmd = NULL; -- -- reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); -- e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | -- R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); -+ BATCH_LOCALS(rmesa); -+ -+ BEGIN_BATCH(4); -+ OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, -+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | -+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); -+ OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT, -+ R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | -+ R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); -+ END_BATCH(); -+ COMMIT_BATCH(); -+} - -- reg_start(R300_ZB_ZCACHE_CTLSTAT, 0); -- e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | -- R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); -+void r300EmitBlit(r300ContextPtr rmesa, -+ GLuint color_fmt, -+ GLuint src_pitch, -+ dri_bo *src_bo, int src_offset, -+ GLuint dst_pitch, -+ GLuint dst_offset, -+ GLint srcx, GLint srcy, -+ GLint dstx, GLint dsty, GLuint w, GLuint h) -+{ -+ BATCH_LOCALS(rmesa); -+ -+ if (RADEON_DEBUG & DEBUG_IOCTL) -+ fprintf(stderr, -+ "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n", -+ __FUNCTION__, src_pitch, src_offset, srcx, srcy, -+ dst_pitch, dst_offset, dstx, dsty, w, h); -+ -+ assert((src_pitch & 63) == 0); -+ assert((dst_pitch & 63) == 0); -+ assert((src_offset & 1023) == 0); -+ assert((dst_offset & 1023) == 0); -+ assert(w < (1 << 16)); -+ assert(h < (1 << 16)); -+ -+ BEGIN_BATCH(8); -+ OUT_BATCH_PACKET3(R300_CP_CMD_BITBLT_MULTI, 5); -+ OUT_BATCH(RADEON_GMC_SRC_PITCH_OFFSET_CNTL | -+ RADEON_GMC_DST_PITCH_OFFSET_CNTL | -+ RADEON_GMC_BRUSH_NONE | -+ (color_fmt << 8) | -+ RADEON_GMC_SRC_DATATYPE_COLOR | -+ RADEON_ROP3_S | -+ RADEON_DP_SRC_SOURCE_MEMORY | -+ RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS); -+ OUT_BATCH_RELOC((src_pitch / 64) << 22, src_bo, src_offset, DRM_RELOC_BLITTER); -+ OUT_BATCH(((dst_pitch / 64) << 22) | (dst_offset >> 10)); -+ OUT_BATCH((srcx << 16) | srcy); -+ OUT_BATCH((dstx << 16) | dsty); -+ OUT_BATCH((w << 16) | h); -+ END_BATCH(); - } -diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h -index 5950539..179983d 100644 ---- a/src/mesa/drivers/dri/r300/r300_emit.h -+++ b/src/mesa/drivers/dri/r300/r300_emit.h -@@ -127,130 +127,62 @@ static INLINE uint32_t cmdpacify(void) - return cmd.u; - } - --/** -- * Prepare to write a register value to register at address reg. -- * If num_extra > 0 then the following extra values are written -- * to registers with address +4, +8 and so on.. -- */ --#define reg_start(reg, num_extra) \ -- do { \ -- int _n; \ -- _n=(num_extra); \ -- cmd = (drm_radeon_cmd_header_t*) \ -- r300AllocCmdBuf(rmesa, \ -- (_n+2), \ -- __FUNCTION__); \ -- cmd_reserved=_n+2; \ -- cmd_written=1; \ -- cmd[0].i=cmdpacket0((reg), _n+1); \ -- } while (0); -+ -+/** Single register write to command buffer; requires 2 dwords. */ -+#define OUT_BATCH_REGVAL(reg, val) \ -+ OUT_BATCH(cmdpacket0((reg), 1)); \ -+ OUT_BATCH((val)) -+ -+/** Continuous register range write to command buffer; requires 1 dword, -+ * expects count dwords afterwards for register contents. */ -+#define OUT_BATCH_REGSEQ(reg, count) \ -+ OUT_BATCH(cmdpacket0((reg), (count))); -+ -+/** Write a 32 bit float to the ring; requires 1 dword. */ -+#define OUT_BATCH_FLOAT32(f) \ -+ OUT_BATCH(r300PackFloat32((f))); - - /** -- * Emit GLuint freestyle -+ * Write the header of a packet3 to the command buffer. -+ * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards. - */ --#define e32(dword) \ -- do { \ -- if(cmd_written 0x3fff) { \ -- fprintf(stderr,"Too big packet3 %08x: cannot " \ -- "store %d dwords\n", \ -- _p, _n); \ -- _mesa_exit(-1); \ -- } \ -- cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW); \ -- cmd[1].i = _p | ((_n & 0x3fff)<<16); \ -- } -- - /** - * Must be sent to switch to 2d commands - */ - void static INLINE end_3d(r300ContextPtr rmesa) - { -- drm_radeon_cmd_header_t *cmd = NULL; -+ BATCH_LOCALS(rmesa); - -- cmd = -- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); -- cmd[0].header.cmd_type = R300_CMD_END3D; -+ BEGIN_BATCH(1); -+ OUT_BATCH(cmdpacify()); -+ END_BATCH(); - } - - void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count) - { -- drm_radeon_cmd_header_t *cmd = NULL; -+ BATCH_LOCALS(rmesa); - -- cmd = -- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); -- cmd[0].i = cmdcpdelay(count); -+ BEGIN_BATCH(1); -+ OUT_BATCH(cmdcpdelay(count)); -+ END_BATCH(); - } - - void static INLINE cp_wait(r300ContextPtr rmesa, unsigned char flags) - { -- drm_radeon_cmd_header_t *cmd = NULL; -+ BATCH_LOCALS(rmesa); - -- cmd = -- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); -- cmd[0].i = cmdwait(flags); -+ BEGIN_BATCH(1); -+ OUT_BATCH(cmdwait(flags)); -+ END_BATCH(); - } - - extern int r300EmitArrays(GLcontext * ctx); - --#ifdef USER_BUFFERS --void r300UseArrays(GLcontext * ctx); --#endif -- - extern void r300ReleaseArrays(GLcontext * ctx); - extern int r300PrimitiveType(r300ContextPtr rmesa, int prim); - extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); -@@ -265,4 +197,13 @@ extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead); - extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten); - extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten); - -+extern void r300EmitBlit(r300ContextPtr rmesa, -+ GLuint color_fmt, -+ GLuint src_pitch, -+ dri_bo *src_bo, int src_offset, -+ GLuint dst_pitch, -+ GLuint dst_offset, -+ GLint srcx, GLint srcy, -+ GLint dstx, GLint dsty, GLuint w, GLuint h); -+ - #endif -diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c -index bd7f060..046f9a2 100644 ---- a/src/mesa/drivers/dri/r300/r300_ioctl.c -+++ b/src/mesa/drivers/dri/r300/r300_ioctl.c -@@ -55,6 +55,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #include "radeon_reg.h" - #include "r300_emit.h" - #include "r300_fragprog.h" -+#include "r300_mem.h" - - #include "vblank.h" - -@@ -62,64 +63,51 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #define CLEARBUFFER_DEPTH 0x2 - #define CLEARBUFFER_STENCIL 0x4 - --static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) -+static void r300ClearBuffer(r300ContextPtr r300, int flags, -+ struct radeon_renderbuffer *rrb) - { -+ BATCH_LOCALS(r300); - GLcontext *ctx = r300->radeon.glCtx; - __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; -- GLuint cboffset, cbpitch; -- drm_r300_cmd_header_t *cmd2; -- int cmd_reserved = 0; -- int cmd_written = 0; -- drm_radeon_cmd_header_t *cmd = NULL; -+ GLuint cbpitch = 0; - r300ContextPtr rmesa = r300; - - if (RADEON_DEBUG & DEBUG_IOCTL) -- fprintf(stderr, "%s: %s buffer (%i,%i %ix%i)\n", -- __FUNCTION__, buffer ? "back" : "front", -- dPriv->x, dPriv->y, dPriv->w, dPriv->h); -- -- if (buffer) { -- cboffset = r300->radeon.radeonScreen->backOffset; -- cbpitch = r300->radeon.radeonScreen->backPitch; -- } else { -- cboffset = r300->radeon.radeonScreen->frontOffset; -- cbpitch = r300->radeon.radeonScreen->frontPitch; -+ fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n", -+ __FUNCTION__, rrb, dPriv->x, dPriv->y, -+ dPriv->w, dPriv->h); -+ -+ if (rrb) { -+ cbpitch = rrb->pitch; -+ if (rrb->cpp == 4) -+ cbpitch |= R300_COLOR_FORMAT_ARGB8888; -+ else -+ cbpitch |= R300_COLOR_FORMAT_RGB565; -+ -+ if (r300->radeon.sarea->tiling_enabled) -+ cbpitch |= R300_COLOR_TILE_ENABLE; - } - -- cboffset += r300->radeon.radeonScreen->fbLocation; -- -+ /* TODO in bufmgr */ - cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN); - end_3d(rmesa); - -- R300_STATECHANGE(r300, cb); -- reg_start(R300_RB3D_COLOROFFSET0, 0); -- e32(cboffset); -- -- if (r300->radeon.radeonScreen->cpp == 4) -- cbpitch |= R300_COLOR_FORMAT_ARGB8888; -- else -- cbpitch |= R300_COLOR_FORMAT_RGB565; -- -- if (r300->radeon.sarea->tiling_enabled) -- cbpitch |= R300_COLOR_TILE_ENABLE; -- -- reg_start(R300_RB3D_COLORPITCH0, 0); -- e32(cbpitch); -- -- R300_STATECHANGE(r300, cmk); -- reg_start(RB3D_COLOR_CHANNEL_MASK, 0); -+ BEGIN_BATCH(19); -+ OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); -+ OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET); -+ OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch); - -+ OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1); - if (flags & CLEARBUFFER_COLOR) { -- e32((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | -- (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | -- (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | -- (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0)); -+ OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | -+ (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | -+ (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | -+ (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0)); - } else { -- e32(0x0); -+ OUT_BATCH(0); - } - -- R300_STATECHANGE(r300, zs); -- reg_start(R300_ZB_CNTL, 2); -+ OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3); - - { - uint32_t t1, t2; -@@ -146,37 +134,37 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) - R300_S_FRONT_ZFAIL_OP_SHIFT); - } - -- e32(t1); -- e32(t2); -- e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) | -- (ctx->Stencil.Clear & R300_STENCILREF_MASK)); -+ OUT_BATCH(t1); -+ OUT_BATCH(t2); -+ OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) | -+ (ctx->Stencil.Clear & R300_STENCILREF_MASK)); - } - -- cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__); -- cmd2[0].packet3.cmd_type = R300_CMD_PACKET3; -- cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR; -- cmd2[1].u = r300PackFloat32(dPriv->w / 2.0); -- cmd2[2].u = r300PackFloat32(dPriv->h / 2.0); -- cmd2[3].u = r300PackFloat32(ctx->Depth.Clear); -- cmd2[4].u = r300PackFloat32(1.0); -- cmd2[5].u = r300PackFloat32(ctx->Color.ClearColor[0]); -- cmd2[6].u = r300PackFloat32(ctx->Color.ClearColor[1]); -- cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]); -- cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]); -+ OUT_BATCH(cmdpacket3(R300_CMD_PACKET3_CLEAR)); -+ OUT_BATCH_FLOAT32(dPriv->w / 2.0); -+ OUT_BATCH_FLOAT32(dPriv->h / 2.0); -+ OUT_BATCH_FLOAT32(ctx->Depth.Clear); -+ OUT_BATCH_FLOAT32(1.0); -+ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]); -+ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]); -+ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]); -+ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]); -+ END_BATCH(); - - r300EmitCacheFlush(rmesa); - cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN); -+ -+ R300_STATECHANGE(r300, cb); -+ R300_STATECHANGE(r300, cmk); -+ R300_STATECHANGE(r300, zs); - } - - static void r300EmitClearState(GLcontext * ctx) - { - r300ContextPtr r300 = R300_CONTEXT(ctx); -- r300ContextPtr rmesa = r300; -+ BATCH_LOCALS(r300); - __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; - int i; -- int cmd_reserved = 0; -- int cmd_written = 0; -- drm_radeon_cmd_header_t *cmd = NULL; - int has_tcl = 1; - int is_r500 = 0; - GLuint vap_cntl; -@@ -184,35 +172,37 @@ static void r300EmitClearState(GLcontext * ctx) - if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) - has_tcl = 0; - -- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) -- is_r500 = 1; -- -+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) -+ is_r500 = 1; - -- /* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and -- * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are -- * quite complex; see the functions in r300_emit.c. -+ /* State atom dirty tracking is a little subtle here. -+ * -+ * On the one hand, we need to make sure base state is emitted -+ * here if we start with an empty batch buffer, otherwise clear -+ * works incorrectly with multiple processes. Therefore, the first -+ * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE. - * -- * I believe it would be a good idea to extend the functions in -- * r300_emit.c so that they can be used to setup the default values for -- * these registers, as well as the actual values used for rendering. -+ * On the other hand, implicit state emission clears the state atom -+ * dirty bits, so we have to call R300_STATECHANGE later than the -+ * first BEGIN_BATCH. -+ * -+ * The final trickiness is that, because we change state, we need -+ * to ensure that any stored swtcl primitives are flushed properly -+ * before we start changing state. See the R300_NEWPRIM in r300Clear -+ * for this. - */ -- R300_STATECHANGE(r300, vir[0]); -- reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0); -+ BEGIN_BATCH(31); -+ OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1); - if (!has_tcl) -- e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | -+ OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | - ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); - else -- e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | -+ OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | - ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); - -- /* disable fog */ -- R300_STATECHANGE(r300, fogs); -- reg_start(R300_FG_FOG_BLEND, 0); -- e32(0x0); -- -- R300_STATECHANGE(r300, vir[1]); -- reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0); -- e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | -+ OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0); -+ OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0, -+ ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | - (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | - (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | - (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | -@@ -226,238 +216,246 @@ static void r300EmitClearState(GLcontext * ctx) - << R300_SWIZZLE1_SHIFT))); - - /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */ -- R300_STATECHANGE(r300, vic); -- reg_start(R300_VAP_VTX_STATE_CNTL, 1); -- e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); -- e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); -+ OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2); -+ OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); -+ OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); - -- R300_STATECHANGE(r300, vte); - /* comes from fglrx startup of clear */ -- reg_start(R300_SE_VTE_CNTL, 1); -- e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | -- R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | -- R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | -- R300_VPORT_Z_OFFSET_ENA); -- e32(0x8); -+ OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2); -+ OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | -+ R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | -+ R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | -+ R300_VPORT_Z_OFFSET_ENA); -+ OUT_BATCH(0x8); - -- reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0); -- e32(0xaaaaaaaa); -+ OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa); - -- R300_STATECHANGE(r300, vof); -- reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1); -- e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | -- R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT); -- e32(0x0); /* no textures */ -+ OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); -+ OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | -+ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT); -+ OUT_BATCH(0); /* no textures */ - -- R300_STATECHANGE(r300, txe); -- reg_start(R300_TX_ENABLE, 0); -- e32(0x0); -+ OUT_BATCH_REGVAL(R300_TX_ENABLE, 0); - -- R300_STATECHANGE(r300, vpt); -- reg_start(R300_SE_VPORT_XSCALE, 5); -- efloat(1.0); -- efloat(dPriv->x); -- efloat(1.0); -- efloat(dPriv->y); -- efloat(1.0); -- efloat(0.0); -+ OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6); -+ OUT_BATCH_FLOAT32(1.0); -+ OUT_BATCH_FLOAT32(dPriv->x); -+ OUT_BATCH_FLOAT32(1.0); -+ OUT_BATCH_FLOAT32(dPriv->y); -+ OUT_BATCH_FLOAT32(1.0); -+ OUT_BATCH_FLOAT32(0.0); - -- R300_STATECHANGE(r300, at); -- reg_start(R300_FG_ALPHA_FUNC, 0); -- e32(0x0); -+ OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0); -+ -+ OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2); -+ OUT_BATCH(0x0); -+ OUT_BATCH(0x0); -+ END_BATCH(); - -+ R300_STATECHANGE(r300, vir[0]); -+ R300_STATECHANGE(r300, fogs); -+ R300_STATECHANGE(r300, vir[1]); -+ R300_STATECHANGE(r300, vic); -+ R300_STATECHANGE(r300, vte); -+ R300_STATECHANGE(r300, vof); -+ R300_STATECHANGE(r300, txe); -+ R300_STATECHANGE(r300, vpt); -+ R300_STATECHANGE(r300, at); - R300_STATECHANGE(r300, bld); -- reg_start(R300_RB3D_CBLEND, 1); -- e32(0x0); -- e32(0x0); -+ R300_STATECHANGE(r300, ps); - - if (has_tcl) { -- R300_STATECHANGE(r300, vap_clip_cntl); -- reg_start(R300_VAP_CLIP_CNTL, 0); -- e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); -+ R300_STATECHANGE(r300, vap_clip_cntl); -+ -+ BEGIN_BATCH_NO_AUTOSTATE(2); -+ OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); -+ END_BATCH(); - } - -- R300_STATECHANGE(r300, ps); -- reg_start(R300_GA_POINT_SIZE, 0); -- e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | -- ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); -+ BEGIN_BATCH_NO_AUTOSTATE(2); -+ OUT_BATCH_REGVAL(R300_GA_POINT_SIZE, -+ ((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | -+ ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); -+ END_BATCH(); - - if (!is_r500) { - R300_STATECHANGE(r300, ri); -- reg_start(R300_RS_IP_0, 7); -- for (i = 0; i < 8; ++i) { -- e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); -- } -- - R300_STATECHANGE(r300, rc); -- /* The second constant is needed to get glxgears display anything .. */ -- reg_start(R300_RS_COUNT, 1); -- e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); -- e32(0x0); -- - R300_STATECHANGE(r300, rr); -- reg_start(R300_RS_INST_0, 0); -- e32(R300_RS_INST_COL_CN_WRITE); -+ -+ BEGIN_BATCH(14); -+ OUT_BATCH_REGSEQ(R300_RS_IP_0, 8); -+ for (i = 0; i < 8; ++i) -+ OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); -+ -+ OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); -+ OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); -+ OUT_BATCH(0x0); -+ -+ OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE); -+ END_BATCH(); - } else { - R300_STATECHANGE(r300, ri); -- reg_start(R500_RS_IP_0, 7); -+ R300_STATECHANGE(r300, rc); -+ R300_STATECHANGE(r300, rr); -+ -+ BEGIN_BATCH(14); -+ OUT_BATCH_REGSEQ(R500_RS_IP_0, 8); - for (i = 0; i < 8; ++i) { -- e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | -- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | -- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | -- (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); -+ OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | -+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | -+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | -+ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); - } - -- R300_STATECHANGE(r300, rc); -- /* The second constant is needed to get glxgears display anything .. */ -- reg_start(R300_RS_COUNT, 1); -- e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); -- e32(0x0); -- -- R300_STATECHANGE(r300, rr); -- reg_start(R500_RS_INST_0, 0); -- e32(R500_RS_INST_COL_CN_WRITE); -+ OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); -+ OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); -+ OUT_BATCH(0x0); - -+ OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE); -+ END_BATCH(); - } - - if (!is_r500) { - R300_STATECHANGE(r300, fp); -- reg_start(R300_US_CONFIG, 2); -- e32(0x0); -- e32(0x0); -- e32(0x0); -- reg_start(R300_US_CODE_ADDR_0, 3); -- e32(0x0); -- e32(0x0); -- e32(0x0); -- e32(R300_RGBA_OUT); -- - R300_STATECHANGE(r300, fpi[0]); - R300_STATECHANGE(r300, fpi[1]); - R300_STATECHANGE(r300, fpi[2]); - R300_STATECHANGE(r300, fpi[3]); - -- reg_start(R300_US_ALU_RGB_INST_0, 0); -- e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); -- -- reg_start(R300_US_ALU_RGB_ADDR_0, 0); -- e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); -- -- reg_start(R300_US_ALU_ALPHA_INST_0, 0); -- e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); -- -- reg_start(R300_US_ALU_ALPHA_ADDR_0, 0); -- e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); -+ BEGIN_BATCH(17); -+ OUT_BATCH_REGSEQ(R300_US_CONFIG, 3); -+ OUT_BATCH(0x0); -+ OUT_BATCH(0x0); -+ OUT_BATCH(0x0); -+ OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4); -+ OUT_BATCH(0x0); -+ OUT_BATCH(0x0); -+ OUT_BATCH(0x0); -+ OUT_BATCH(R300_RGBA_OUT); -+ -+ OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0, -+ FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); -+ OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0, -+ FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); -+ OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0, -+ FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); -+ OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0, -+ FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); -+ END_BATCH(); - } else { -- R300_STATECHANGE(r300, fp); -- reg_start(R500_US_CONFIG, 1); -- e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); -- e32(0x0); -- reg_start(R500_US_CODE_ADDR, 2); -- e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); -- e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); -- e32(R500_US_CODE_OFFSET_ADDR(0)); -- -+ R300_STATECHANGE(r300, fp); - R300_STATECHANGE(r300, r500fp); -- r500fp_start_fragment(0, 6); -- -- e32(R500_INST_TYPE_OUT | -- R500_INST_TEX_SEM_WAIT | -- R500_INST_LAST | -- R500_INST_RGB_OMASK_R | -- R500_INST_RGB_OMASK_G | -- R500_INST_RGB_OMASK_B | -- R500_INST_ALPHA_OMASK | -- R500_INST_RGB_CLAMP | -- R500_INST_ALPHA_CLAMP); -- -- e32(R500_RGB_ADDR0(0) | -- R500_RGB_ADDR1(0) | -- R500_RGB_ADDR1_CONST | -- R500_RGB_ADDR2(0) | -- R500_RGB_ADDR2_CONST); -- -- e32(R500_ALPHA_ADDR0(0) | -- R500_ALPHA_ADDR1(0) | -- R500_ALPHA_ADDR1_CONST | -- R500_ALPHA_ADDR2(0) | -- R500_ALPHA_ADDR2_CONST); -- -- e32(R500_ALU_RGB_SEL_A_SRC0 | -- R500_ALU_RGB_R_SWIZ_A_R | -- R500_ALU_RGB_G_SWIZ_A_G | -- R500_ALU_RGB_B_SWIZ_A_B | -- R500_ALU_RGB_SEL_B_SRC0 | -- R500_ALU_RGB_R_SWIZ_B_R | -- R500_ALU_RGB_B_SWIZ_B_G | -- R500_ALU_RGB_G_SWIZ_B_B); -- -- e32(R500_ALPHA_OP_CMP | -- R500_ALPHA_SWIZ_A_A | -- R500_ALPHA_SWIZ_B_A); -- -- e32(R500_ALU_RGBA_OP_CMP | -- R500_ALU_RGBA_R_SWIZ_0 | -- R500_ALU_RGBA_G_SWIZ_0 | -- R500_ALU_RGBA_B_SWIZ_0 | -- R500_ALU_RGBA_A_SWIZ_0); -+ -+ BEGIN_BATCH(14); -+ OUT_BATCH_REGSEQ(R500_US_CONFIG, 2); -+ OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); -+ OUT_BATCH(0x0); -+ OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3); -+ OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); -+ OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); -+ OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0)); -+ -+ OUT_BATCH(cmdr500fp(0, 1, 0, 0)); -+ OUT_BATCH(R500_INST_TYPE_OUT | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_LAST | -+ R500_INST_RGB_OMASK_R | -+ R500_INST_RGB_OMASK_G | -+ R500_INST_RGB_OMASK_B | -+ R500_INST_ALPHA_OMASK | -+ R500_INST_RGB_CLAMP | -+ R500_INST_ALPHA_CLAMP); -+ OUT_BATCH(R500_RGB_ADDR0(0) | -+ R500_RGB_ADDR1(0) | -+ R500_RGB_ADDR1_CONST | -+ R500_RGB_ADDR2(0) | -+ R500_RGB_ADDR2_CONST); -+ OUT_BATCH(R500_ALPHA_ADDR0(0) | -+ R500_ALPHA_ADDR1(0) | -+ R500_ALPHA_ADDR1_CONST | -+ R500_ALPHA_ADDR2(0) | -+ R500_ALPHA_ADDR2_CONST); -+ OUT_BATCH(R500_ALU_RGB_SEL_A_SRC0 | -+ R500_ALU_RGB_R_SWIZ_A_R | -+ R500_ALU_RGB_G_SWIZ_A_G | -+ R500_ALU_RGB_B_SWIZ_A_B | -+ R500_ALU_RGB_SEL_B_SRC0 | -+ R500_ALU_RGB_R_SWIZ_B_R | -+ R500_ALU_RGB_B_SWIZ_B_G | -+ R500_ALU_RGB_G_SWIZ_B_B); -+ OUT_BATCH(R500_ALPHA_OP_CMP | -+ R500_ALPHA_SWIZ_A_A | -+ R500_ALPHA_SWIZ_B_A); -+ OUT_BATCH(R500_ALU_RGBA_OP_CMP | -+ R500_ALU_RGBA_R_SWIZ_0 | -+ R500_ALU_RGBA_G_SWIZ_0 | -+ R500_ALU_RGBA_B_SWIZ_0 | -+ R500_ALU_RGBA_A_SWIZ_0); -+ END_BATCH(); - } - -- reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0); -- e32(0x00000000); -+ BEGIN_BATCH(2); -+ OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); -+ END_BATCH(); -+ - if (has_tcl) { -- vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | -+ vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | - (5 << R300_PVS_NUM_CNTLRS_SHIFT) | - (12 << R300_VF_MAX_VTX_NUM_SHIFT)); -- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) -- vap_cntl |= R500_TCL_STATE_OPTIMIZATION; -- } else -- vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | -+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) -+ vap_cntl |= R500_TCL_STATE_OPTIMIZATION; -+ } else { -+ vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | - (5 << R300_PVS_NUM_CNTLRS_SHIFT) | - (5 << R300_VF_MAX_VTX_NUM_SHIFT)); -+ } - - if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) -- vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); -+ vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); - else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) -- vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); -+ vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); - else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)) -- vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); -+ vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); - else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580)) -- vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); -+ vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); - else -- vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); -+ vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); -+ -+ R300_STATECHANGE(r300, vap_cntl); - -- R300_STATECHANGE(rmesa, vap_cntl); -- reg_start(R300_VAP_CNTL, 0); -- e32(vap_cntl); -+ BEGIN_BATCH(2); -+ OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl); -+ END_BATCH(); - - if (has_tcl) { - R300_STATECHANGE(r300, pvs); -- reg_start(R300_VAP_PVS_CODE_CNTL_0, 2); -- -- e32((0 << R300_PVS_FIRST_INST_SHIFT) | -- (0 << R300_PVS_XYZW_VALID_INST_SHIFT) | -- (1 << R300_PVS_LAST_INST_SHIFT)); -- e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | -- (0 << R300_PVS_MAX_CONST_ADDR_SHIFT)); -- e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT); -- - R300_STATECHANGE(r300, vpi); -- vsf_start_fragment(0x0, 8); -- -- e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT)); -- e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); -- e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); -- e32(0x0); - -- e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT)); -- e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); -- e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); -- e32(0x0); -+ BEGIN_BATCH(13); -+ OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3); -+ OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) | -+ (0 << R300_PVS_XYZW_VALID_INST_SHIFT) | -+ (1 << R300_PVS_LAST_INST_SHIFT)); -+ OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | -+ (0 << R300_PVS_MAX_CONST_ADDR_SHIFT)); -+ OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT); -+ -+ OUT_BATCH(cmdvpu(0, 2)); -+ OUT_BATCH(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT)); -+ OUT_BATCH(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); -+ OUT_BATCH(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); -+ OUT_BATCH(0x0); -+ -+ OUT_BATCH(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT)); -+ OUT_BATCH(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); -+ OUT_BATCH(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); -+ OUT_BATCH(0x0); -+ END_BATCH(); - } - } - -@@ -467,7 +465,10 @@ static void r300EmitClearState(GLcontext * ctx) - static void r300Clear(GLcontext * ctx, GLbitfield mask) - { - r300ContextPtr r300 = R300_CONTEXT(ctx); -+ BATCH_LOCALS(r300); - __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; -+ GLframebuffer *fb = dPriv->driverPrivate; -+ struct radeon_renderbuffer *rrb; - int flags = 0; - int bits = 0; - int swapped; -@@ -482,6 +483,12 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) - return; - } - -+ /* Flush swtcl vertices if necessary, because we will change hardware -+ * state during clear. See also the state-related comment in -+ * r300EmitClearState. -+ */ -+ R300_NEWPRIM(r300); -+ - if (mask & BUFFER_BIT_FRONT_LEFT) { - flags |= BUFFER_BIT_FRONT_LEFT; - mask &= ~BUFFER_BIT_FRONT_LEFT; -@@ -509,26 +516,27 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) - _swrast_Clear(ctx, mask); - } - -- swapped = r300->radeon.sarea->pfCurrentPage == 1; -- - /* Make sure it fits there. */ - r300EnsureCmdBufSpace(r300, 421 * 3, __FUNCTION__); - if (flags || bits) - r300EmitClearState(ctx); - - if (flags & BUFFER_BIT_FRONT_LEFT) { -- r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped); -+ rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; -+ r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb); - bits = 0; - } - - if (flags & BUFFER_BIT_BACK_LEFT) { -- r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped ^ 1); -+ rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; -+ r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb); - bits = 0; - } - - if (bits) -- r300ClearBuffer(r300, bits, 0); -+ r300ClearBuffer(r300, bits, NULL); - -+ COMMIT_BATCH(); - } - - void r300Flush(GLcontext * ctx) -@@ -541,16 +549,12 @@ void r300Flush(GLcontext * ctx) - if (rmesa->dma.flush) - rmesa->dma.flush( rmesa ); - -- if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit) -+ if (rmesa->cmdbuf.committed > rmesa->cmdbuf.reemit) - r300FlushCmdBuf(rmesa, __FUNCTION__); - } - --#ifdef USER_BUFFERS --#include "r300_mem.h" -- - void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) - { -- struct r300_dma_buffer *dmabuf; - size = MAX2(size, RADEON_BUFFER_SIZE * 16); - - if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) -@@ -560,71 +564,24 @@ void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) - rmesa->dma.flush(rmesa); - } - -- if (rmesa->dma.current.buf) { --#ifdef USER_BUFFERS -- r300_mem_use(rmesa, rmesa->dma.current.buf->id); --#endif -- r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__); -+ if (rmesa->dma.current) { -+ dri_bo_unreference(rmesa->dma.current); -+ rmesa->dma.current = 0; - } - if (rmesa->dma.nr_released_bufs > 4) - r300FlushCmdBuf(rmesa, __FUNCTION__); - -- dmabuf = CALLOC_STRUCT(r300_dma_buffer); -- dmabuf->buf = (void *)1; /* hack */ -- dmabuf->refcount = 1; -- -- dmabuf->id = r300_mem_alloc(rmesa, 4, size); -- if (dmabuf->id == 0) { -- LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */ -- -- r300FlushCmdBufLocked(rmesa, __FUNCTION__); -- radeonWaitForIdleLocked(&rmesa->radeon); -- -- dmabuf->id = r300_mem_alloc(rmesa, 4, size); -- -- UNLOCK_HARDWARE(&rmesa->radeon); -- -- if (dmabuf->id == 0) { -- fprintf(stderr, -- "Error: Could not get dma buffer... exiting\n"); -- _mesa_exit(-1); -- } -- } -- -- rmesa->dma.current.buf = dmabuf; -- rmesa->dma.current.address = r300_mem_ptr(rmesa, dmabuf->id); -- rmesa->dma.current.end = size; -- rmesa->dma.current.start = 0; -- rmesa->dma.current.ptr = 0; --} -- --void r300ReleaseDmaRegion(r300ContextPtr rmesa, -- struct r300_dma_region *region, const char *caller) --{ -- if (RADEON_DEBUG & DEBUG_IOCTL) -- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); -- -- if (!region->buf) -- return; -- -- if (rmesa->dma.flush) -- rmesa->dma.flush(rmesa); -- -- if (--region->buf->refcount == 0) { -- r300_mem_free(rmesa, region->buf->id); -- FREE(region->buf); -- rmesa->dma.nr_released_bufs++; -- } -- -- region->buf = 0; -- region->start = 0; -+ rmesa->dma.current = dri_bo_alloc(&rmesa->radeon.bufmgr->base, "DMA regions", -+ size, 4, DRM_BO_MEM_DMA); -+ rmesa->dma.current_used = 0; -+ rmesa->dma.current_vertexptr = 0; - } - - /* Allocates a region from rmesa->dma.current. If there isn't enough - * space in current, grab a new buffer (and discard what was left of current) - */ - void r300AllocDmaRegion(r300ContextPtr rmesa, -- struct r300_dma_region *region, -+ dri_bo **pbo, int *poffset, - int bytes, int alignment) - { - if (RADEON_DEBUG & DEBUG_IOCTL) -@@ -633,207 +590,23 @@ void r300AllocDmaRegion(r300ContextPtr rmesa, - if (rmesa->dma.flush) - rmesa->dma.flush(rmesa); - -- if (region->buf) -- r300ReleaseDmaRegion(rmesa, region, __FUNCTION__); -+ assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr); - - alignment--; -- rmesa->dma.current.start = rmesa->dma.current.ptr = -- (rmesa->dma.current.ptr + alignment) & ~alignment; -- -- if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end) -- r300RefillCurrentDmaRegion(rmesa, (bytes + 0x7) & ~0x7); -- -- region->start = rmesa->dma.current.start; -- region->ptr = rmesa->dma.current.start; -- region->end = rmesa->dma.current.start + bytes; -- region->address = rmesa->dma.current.address; -- region->buf = rmesa->dma.current.buf; -- region->buf->refcount++; -+ rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; - -- rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ -- rmesa->dma.current.start = -- rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; -- -- assert(rmesa->dma.current.ptr <= rmesa->dma.current.end); --} -+ if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size) -+ r300RefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15); - --#else --static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa) --{ -- struct r300_dma_buffer *dmabuf; -- int fd = rmesa->radeon.dri.fd; -- int index = 0; -- int size = 0; -- drmDMAReq dma; -- int ret; -+ *poffset = rmesa->dma.current_used; -+ *pbo = rmesa->dma.current; -+ dri_bo_reference(*pbo); - -- if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) -- fprintf(stderr, "%s\n", __FUNCTION__); -+ /* Always align to at least 16 bytes */ -+ rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; -+ rmesa->dma.current_vertexptr = rmesa->dma.current_used; - -- if (rmesa->dma.flush) { -- rmesa->dma.flush(rmesa); -- } -- -- if (rmesa->dma.current.buf) -- r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__); -- -- if (rmesa->dma.nr_released_bufs > 4) -- r300FlushCmdBuf(rmesa, __FUNCTION__); -- -- dma.context = rmesa->radeon.dri.hwContext; -- dma.send_count = 0; -- dma.send_list = NULL; -- dma.send_sizes = NULL; -- dma.flags = 0; -- dma.request_count = 1; -- dma.request_size = RADEON_BUFFER_SIZE; -- dma.request_list = &index; -- dma.request_sizes = &size; -- dma.granted_count = 0; -- -- LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */ -- -- ret = drmDMA(fd, &dma); -- -- if (ret != 0) { -- /* Try to release some buffers and wait until we can't get any more */ -- if (rmesa->dma.nr_released_bufs) { -- r300FlushCmdBufLocked(rmesa, __FUNCTION__); -- } -- -- if (RADEON_DEBUG & DEBUG_DMA) -- fprintf(stderr, "Waiting for buffers\n"); -- -- radeonWaitForIdleLocked(&rmesa->radeon); -- ret = drmDMA(fd, &dma); -- -- if (ret != 0) { -- UNLOCK_HARDWARE(&rmesa->radeon); -- fprintf(stderr, -- "Error: Could not get dma buffer... exiting\n"); -- _mesa_exit(-1); -- } -- } -- -- UNLOCK_HARDWARE(&rmesa->radeon); -- -- if (RADEON_DEBUG & DEBUG_DMA) -- fprintf(stderr, "Allocated buffer %d\n", index); -- -- dmabuf = CALLOC_STRUCT(r300_dma_buffer); -- dmabuf->buf = &rmesa->radeon.radeonScreen->buffers->list[index]; -- dmabuf->refcount = 1; -- -- rmesa->dma.current.buf = dmabuf; -- rmesa->dma.current.address = dmabuf->buf->address; -- rmesa->dma.current.end = dmabuf->buf->total; -- rmesa->dma.current.start = 0; -- rmesa->dma.current.ptr = 0; --} -- --void r300ReleaseDmaRegion(r300ContextPtr rmesa, -- struct r300_dma_region *region, const char *caller) --{ -- if (RADEON_DEBUG & DEBUG_IOCTL) -- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); -- -- if (!region->buf) -- return; -- -- if (rmesa->dma.flush) -- rmesa->dma.flush(rmesa); -- -- if (--region->buf->refcount == 0) { -- drm_radeon_cmd_header_t *cmd; -- -- if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) -- fprintf(stderr, "%s -- DISCARD BUF %d\n", -- __FUNCTION__, region->buf->buf->idx); -- cmd = -- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, -- sizeof -- (*cmd) / 4, -- __FUNCTION__); -- cmd->dma.cmd_type = R300_CMD_DMA_DISCARD; -- cmd->dma.buf_idx = region->buf->buf->idx; -- -- FREE(region->buf); -- rmesa->dma.nr_released_bufs++; -- } -- -- region->buf = 0; -- region->start = 0; --} -- --/* Allocates a region from rmesa->dma.current. If there isn't enough -- * space in current, grab a new buffer (and discard what was left of current) -- */ --void r300AllocDmaRegion(r300ContextPtr rmesa, -- struct r300_dma_region *region, -- int bytes, int alignment) --{ -- if (RADEON_DEBUG & DEBUG_IOCTL) -- fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); -- -- if (rmesa->dma.flush) -- rmesa->dma.flush(rmesa); -- -- if (region->buf) -- r300ReleaseDmaRegion(rmesa, region, __FUNCTION__); -- -- alignment--; -- rmesa->dma.current.start = rmesa->dma.current.ptr = -- (rmesa->dma.current.ptr + alignment) & ~alignment; -- -- if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end) -- r300RefillCurrentDmaRegion(rmesa); -- -- region->start = rmesa->dma.current.start; -- region->ptr = rmesa->dma.current.start; -- region->end = rmesa->dma.current.start + bytes; -- region->address = rmesa->dma.current.address; -- region->buf = rmesa->dma.current.buf; -- region->buf->refcount++; -- -- rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ -- rmesa->dma.current.start = -- rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; -- -- assert(rmesa->dma.current.ptr <= rmesa->dma.current.end); --} -- --#endif -- --GLboolean r300IsGartMemory(r300ContextPtr rmesa, const GLvoid * pointer, -- GLint size) --{ -- int offset = -- (char *)pointer - -- (char *)rmesa->radeon.radeonScreen->gartTextures.map; -- int valid = (size >= 0 && offset >= 0 -- && offset + size < -- rmesa->radeon.radeonScreen->gartTextures.size); -- -- if (RADEON_DEBUG & DEBUG_IOCTL) -- fprintf(stderr, "r300IsGartMemory( %p ) : %d\n", pointer, -- valid); -- -- return valid; --} -- --GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, const GLvoid * pointer) --{ -- int offset = -- (char *)pointer - -- (char *)rmesa->radeon.radeonScreen->gartTextures.map; -- -- //fprintf(stderr, "offset=%08x\n", offset); -- -- if (offset < 0 -- || offset > rmesa->radeon.radeonScreen->gartTextures.size) -- return ~0; -- else -- return rmesa->radeon.radeonScreen->gart_texture_offset + offset; -+ assert(rmesa->dma.current_used <= rmesa->dma.current->size); - } - - void r300InitIoctlFuncs(struct dd_function_table *functions) -diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.h b/src/mesa/drivers/dri/r300/r300_ioctl.h -index e1143fb..c743478 100644 ---- a/src/mesa/drivers/dri/r300/r300_ioctl.h -+++ b/src/mesa/drivers/dri/r300/r300_ioctl.h -@@ -39,20 +39,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #include "r300_context.h" - #include "radeon_drm.h" - --extern GLboolean r300IsGartMemory(r300ContextPtr rmesa, -- const GLvoid * pointer, GLint size); -- --extern GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, -- const GLvoid * pointer); -- - extern void r300Flush(GLcontext * ctx); - --extern void r300ReleaseDmaRegion(r300ContextPtr rmesa, -- struct r300_dma_region *region, -- const char *caller); - extern void r300AllocDmaRegion(r300ContextPtr rmesa, -- struct r300_dma_region *region, int bytes, -- int alignment); -+ dri_bo **pbo, int *poffset, -+ int bytes, int alignment); - - extern void r300InitIoctlFuncs(struct dd_function_table *functions); - -diff --git a/src/mesa/drivers/dri/r300/r300_mem.c b/src/mesa/drivers/dri/r300/r300_mem.c -index f8f9d4f..b045393 100644 ---- a/src/mesa/drivers/dri/r300/r300_mem.c -+++ b/src/mesa/drivers/dri/r300/r300_mem.c -@@ -27,359 +27,843 @@ - - /** - * \file -+ * Simulate a real memory manager for R300 in the old-style scheme. -+ * -+ * NOTE: Right now, this is DMA-only and really only a skeleton of a true bufmgr. - * - * \author Aapo Tahkola - */ - -+#include "r300_mem.h" -+ -+#include - #include - --#include "r300_context.h" --#include "r300_cmdbuf.h" --#include "r300_ioctl.h" --#include "r300_mem.h" -+#include "simple_list.h" -+ - #include "radeon_ioctl.h" -+#include "r300_cmdbuf.h" - --#ifdef USER_BUFFERS -+typedef struct _radeon_bufmgr_classic radeon_bufmgr_classic; -+typedef struct _radeon_bo_classic radeon_bo_classic; -+typedef struct _radeon_bo_functions radeon_bo_functions; -+typedef struct _radeon_reloc radeon_reloc; -+typedef struct _radeon_bo_vram radeon_bo_vram; -+ -+struct _radeon_bufmgr_classic { -+ radeon_bufmgr base; -+ r300ContextPtr rmesa; -+ -+ radeon_bo_classic *buffers; /** Unsorted linked list of all buffer objects */ -+ -+ radeon_bo_classic *pending; /** Age-sorted linked list of pending buffer objects */ -+ radeon_bo_classic **pending_tail; -+ -+ /* Texture heap bookkeeping */ -+ driTexHeap *texture_heap; -+ GLuint texture_offset; -+ driTextureObject texture_swapped; -+}; -+ -+struct _radeon_reloc { -+ uint64_t flags; -+ GLuint offset; /**< Offset (in bytes) into command buffer to relocated dword */ -+ radeon_bo_classic *target; -+ GLuint delta; -+}; -+ -+struct _radeon_bo_functions { -+ /** -+ * Free a buffer object. Caller has verified that the object is not -+ * referenced or pending. -+ */ -+ void (*free)(radeon_bo_classic*); -+ -+ /** -+ * Validate the given buffer. Must set the validated flag to 1. -+ * -+ * May be null for buffer objects that are always valid. -+ * Always called with lock held. -+ */ -+ void (*validate)(radeon_bo_classic*); -+ -+ /** -+ * Called when a writing map of the buffer is taken, to note that -+ * the buffer will have to be re-validated. -+ * -+ * May be null for buffer objects that don't need it. -+ */ -+ void (*dirty)(radeon_bo_classic*); -+ -+ /** -+ * Indicate that the buffer object is now used by the hardware. -+ * -+ * May be null. -+ */ -+ void (*bind)(radeon_bo_classic*); -+ -+ /** -+ * Indicate that the buffer object is no longer used by the hardware. -+ * -+ * May be null. -+ */ -+ void (*unbind)(radeon_bo_classic*); -+}; - --static void resize_u_list(r300ContextPtr rmesa) --{ -- void *temp; -- int nsize; -+/** -+ * A buffer object. There are three types of buffer objects: -+ * 1. cmdbuf: Ordinary malloc()ed memory, used for command buffers -+ * 2. dma: GART memory allocated via the DRM_RADEON_ALLOC ioctl. -+ * 3. vram: Objects with malloc()ed backing store that will be uploaded -+ * into VRAM on demand; used for textures. -+ * There is a @ref functions table for operations that depend on the -+ * buffer object type. -+ * -+ * Fencing is handled the same way all buffer objects. During command buffer -+ * submission, the pending flag and corresponding variables are set accordingly. -+ */ -+struct _radeon_bo_classic { -+ dri_bo base; - -- temp = rmesa->rmm->u_list; -- nsize = rmesa->rmm->u_size * 2; -+ const radeon_bo_functions *functions; - -- rmesa->rmm->u_list = _mesa_malloc(nsize * sizeof(*rmesa->rmm->u_list)); -- _mesa_memset(rmesa->rmm->u_list, 0, -- nsize * sizeof(*rmesa->rmm->u_list)); -+ radeon_bo_classic *next; /** Unsorted linked list of all buffer objects */ -+ radeon_bo_classic **pprev; - -- if (temp) { -- r300FlushCmdBuf(rmesa, __FUNCTION__); -+ /** -+ * Number of software references to this buffer. -+ * A buffer is freed automatically as soon as its reference count reaches 0 -+ * *and* it is no longer pending. -+ */ -+ unsigned int refcount; -+ unsigned int mapcount; /** mmap count; mutually exclusive to being pending */ - -- _mesa_memcpy(rmesa->rmm->u_list, temp, -- rmesa->rmm->u_size * sizeof(*rmesa->rmm->u_list)); -- _mesa_free(temp); -- } -+ unsigned int validated:1; /** whether the buffer is validated for hardware use right now */ -+ unsigned int used:1; /* only for communication between process_relocs and post_submit */ -+ -+ unsigned int pending:1; -+ radeon_bo_classic *pending_next; /** Age-sorted linked list of pending buffer objects */ -+ radeon_bo_classic **pending_pprev; - -- rmesa->rmm->u_size = nsize; -+ /* The following two variables are intricately linked to the DRM interface, -+ * and must be in this physical memory order, or else chaos ensues. -+ * See the DRM's implementation of R300_CMD_SCRATCH for details. -+ */ -+ uint32_t pending_age; /** Buffer object pending until this age is reached, written by the DRM */ -+ uint32_t pending_count; /** Number of pending R300_CMD_SCRATCH references to this object */ -+ -+ radeon_reloc *relocs; /** Array of relocations in this buffer */ -+ GLuint relocs_used; /** # of relocations in relocation array */ -+ GLuint relocs_size; /** # of reloc records reserved in relocation array */ -+}; -+ -+typedef struct _radeon_vram_wrapper radeon_vram_wrapper; -+ -+/** Wrapper around heap object */ -+struct _radeon_vram_wrapper { -+ driTextureObject base; -+ radeon_bo_vram *bo; -+}; -+ -+struct _radeon_bo_vram { -+ radeon_bo_classic base; -+ -+ unsigned int backing_store_dirty:1; /** Backing store has changed, block must be reuploaded */ -+ -+ radeon_vram_wrapper *vram; /** Block in VRAM (if any) */ -+}; -+ -+static radeon_bufmgr_classic* get_bufmgr_classic(dri_bufmgr *bufmgr_ctx) -+{ -+ return (radeon_bufmgr_classic*)bufmgr_ctx; - } - --void r300_mem_init(r300ContextPtr rmesa) -+static radeon_bo_classic* get_bo_classic(dri_bo *bo_base) - { -- rmesa->rmm = malloc(sizeof(struct r300_memory_manager)); -- memset(rmesa->rmm, 0, sizeof(struct r300_memory_manager)); -+ return (radeon_bo_classic*)bo_base; -+} - -- rmesa->rmm->u_size = 128; -- resize_u_list(rmesa); -+static radeon_bo_vram* get_bo_vram(radeon_bo_classic *bo_base) -+{ -+ return (radeon_bo_vram*)bo_base; - } - --void r300_mem_destroy(r300ContextPtr rmesa) -+/** -+ * Really free a given buffer object. -+ */ -+static void bo_free(radeon_bo_classic *bo) - { -- _mesa_free(rmesa->rmm->u_list); -- rmesa->rmm->u_list = NULL; -+ assert(!bo->refcount); -+ assert(!bo->pending); -+ assert(!bo->mapcount); -+ -+ if (bo->relocs) { -+ int i; -+ for(i = 0; i < bo->relocs_used; ++i) -+ dri_bo_unreference(&bo->relocs[i].target->base); -+ free(bo->relocs); -+ bo->relocs = 0; -+ } -+ -+ *bo->pprev = bo->next; -+ if (bo->next) -+ bo->next->pprev = bo->pprev; - -- _mesa_free(rmesa->rmm); -- rmesa->rmm = NULL; -+ bo->functions->free(bo); - } - --void *r300_mem_ptr(r300ContextPtr rmesa, int id) -+ -+/** -+ * Keep track of which buffer objects are still pending, i.e. waiting for -+ * some hardware operation to complete. -+ */ -+static void track_pending_buffers(radeon_bufmgr_classic *bufmgr) - { -- assert(id <= rmesa->rmm->u_last); -- return rmesa->rmm->u_list[id].ptr; -+ uint32_t currentage = radeonGetAge((radeonContextPtr)bufmgr->rmesa); -+ -+ while(bufmgr->pending) { -+ radeon_bo_classic *bo = bufmgr->pending; -+ -+ assert(bo->pending); -+ -+ if (bo->pending_count || -+ bo->pending_age > currentage) // TODO: Age counter wraparound! -+ break; -+ -+ bo->pending = 0; -+ bufmgr->pending = bo->pending_next; -+ if (bufmgr->pending) -+ bufmgr->pending->pending_pprev = &bufmgr->pending; -+ else -+ bufmgr->pending_tail = &bufmgr->pending; -+ -+ if (bo->functions->unbind) -+ (*bo->functions->unbind)(bo); -+ if (!bo->refcount) -+ bo_free(bo); -+ } - } - --int r300_mem_find(r300ContextPtr rmesa, void *ptr) -+/** -+ * Initialize common buffer object data. -+ */ -+static void init_buffer(radeon_bufmgr_classic *bufmgr, radeon_bo_classic *bo, unsigned long size) - { -- int i; -+ bo->base.bufmgr = &bufmgr->base.base; -+ bo->base.size = size; -+ bo->refcount = 1; -+ -+ bo->pprev = &bufmgr->buffers; -+ bo->next = bufmgr->buffers; -+ if (bo->next) -+ bo->next->pprev = &bo->next; -+ bufmgr->buffers = bo; -+} - -- for (i = 1; i < rmesa->rmm->u_size + 1; i++) -- if (rmesa->rmm->u_list[i].ptr && -- ptr >= rmesa->rmm->u_list[i].ptr && -- ptr < -- rmesa->rmm->u_list[i].ptr + rmesa->rmm->u_list[i].size) -- break; - -- if (i < rmesa->rmm->u_size + 1) -- return i; -+/** -+ * Free a DMA-based buffer. -+ */ -+static void dma_free(radeon_bo_classic *bo) -+{ -+ radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bo->base.bufmgr); -+ drm_radeon_mem_free_t memfree; -+ int ret; -+ -+ memfree.region = RADEON_MEM_REGION_GART; -+ memfree.region_offset = bo->base.offset; -+ memfree.region_offset -= bufmgr->rmesa->radeon.radeonScreen->gart_texture_offset; - -- fprintf(stderr, "%p failed\n", ptr); -- return 0; -+ ret = drmCommandWrite(bufmgr->rmesa->radeon.radeonScreen->driScreen->fd, -+ DRM_RADEON_FREE, &memfree, sizeof(memfree)); -+ if (ret) { -+ fprintf(stderr, "Failed to free bo[%p] at %08x\n", bo, memfree.region_offset); -+ fprintf(stderr, "ret = %s\n", strerror(-ret)); -+ exit(1); -+ } -+ -+ free(bo); - } - --//#define MM_DEBUG --int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size) -+static const radeon_bo_functions dma_bo_functions = { -+ .free = &dma_free -+}; -+ -+/** -+ * Call the DRM to allocate GART memory for the given (incomplete) -+ * buffer object. -+ */ -+static int try_dma_alloc(radeon_bufmgr_classic *bufmgr, radeon_bo_classic *bo, -+ unsigned long size, unsigned int alignment) - { - drm_radeon_mem_alloc_t alloc; -- int offset = 0, ret; -- int i, free = -1; -- int done_age; -- drm_radeon_mem_free_t memfree; -- int tries = 0; -- static int bytes_wasted = 0, allocated = 0; -+ int baseoffset; -+ int ret; - -- if (size < 4096) -- bytes_wasted += 4096 - size; -+ alloc.region = RADEON_MEM_REGION_GART; -+ alloc.alignment = alignment; -+ alloc.size = size; -+ alloc.region_offset = &baseoffset; -+ -+ ret = drmCommandWriteRead(bufmgr->rmesa->radeon.dri.fd, -+ DRM_RADEON_ALLOC, &alloc, sizeof(alloc)); -+ if (ret) { -+ if (RADEON_DEBUG & DEBUG_MEMORY) -+ fprintf(stderr, "DRM_RADEON_ALLOC failed: %d\n", ret); -+ return 0; -+ } - -- allocated += size; -+ bo->base.virtual = (char*)bufmgr->rmesa->radeon.radeonScreen->gartTextures.map + baseoffset; -+ bo->base.offset = bufmgr->rmesa->radeon.radeonScreen->gart_texture_offset + baseoffset; - --#if 0 -- static int t = 0; -- if (t != time(NULL)) { -- t = time(NULL); -- fprintf(stderr, "slots used %d, wasted %d kb, allocated %d\n", -- rmesa->rmm->u_last, bytes_wasted / 1024, -- allocated / 1024); -+ return 1; -+} -+ -+/** -+ * Allocate a DMA buffer. -+ */ -+static dri_bo *dma_alloc(radeon_bufmgr_classic *bufmgr, const char *name, -+ unsigned long size, unsigned int alignment) -+{ -+ radeon_bo_classic* bo = (radeon_bo_classic*)calloc(1, sizeof(radeon_bo_classic)); -+ -+ bo->functions = &dma_bo_functions; -+ -+ track_pending_buffers(bufmgr); -+ if (!try_dma_alloc(bufmgr, bo, size, alignment)) { -+ if (RADEON_DEBUG & DEBUG_MEMORY) -+ fprintf(stderr, "Failed to allocate %ld bytes, finishing command buffer...\n", size); -+ radeonFinish(bufmgr->rmesa->radeon.glCtx); -+ track_pending_buffers(bufmgr); -+ if (!try_dma_alloc(bufmgr, bo, size, alignment)) { -+ WARN_ONCE( -+ "Ran out of GART memory (for %ld)!\n" -+ "Please consider adjusting GARTSize option.\n", -+ size); -+ free(bo); -+ return 0; -+ } - } --#endif - -- memfree.region = RADEON_MEM_REGION_GART; -+ init_buffer(bufmgr, bo, size); -+ bo->validated = 1; /* DMA buffer offsets are always valid */ - -- again: -+ return &bo->base; -+} - -- done_age = radeonGetAge((radeonContextPtr) rmesa); -+/** -+ * Free a command buffer -+ */ -+static void cmdbuf_free(radeon_bo_classic *bo) -+{ -+ free(bo->base.virtual); -+ free(bo); -+} - -- if (rmesa->rmm->u_last + 1 >= rmesa->rmm->u_size) -- resize_u_list(rmesa); -+static const radeon_bo_functions cmdbuf_bo_functions = { -+ .free = cmdbuf_free -+}; - -- for (i = rmesa->rmm->u_last + 1; i > 0; i--) { -- if (rmesa->rmm->u_list[i].ptr == NULL) { -- free = i; -- continue; -+/** -+ * Allocate a command buffer. -+ * -+ * Command buffers are really just malloc'ed buffers. They are managed by -+ * the bufmgr to enable relocations. -+ */ -+static dri_bo *cmdbuf_alloc(radeon_bufmgr_classic *bufmgr, const char *name, -+ unsigned long size) -+{ -+ radeon_bo_classic* bo = (radeon_bo_classic*)calloc(1, sizeof(radeon_bo_classic)); -+ -+ bo->functions = &cmdbuf_bo_functions; -+ bo->base.virtual = malloc(size); -+ -+ init_buffer(bufmgr, bo, size); -+ return &bo->base; -+} -+ -+/** -+ * Free a VRAM-based buffer object. -+ */ -+static void vram_free(radeon_bo_classic *bo_base) -+{ -+ radeon_bo_vram *bo = get_bo_vram(bo_base); -+ -+ if (bo->vram) { -+ driDestroyTextureObject(&bo->vram->base); -+ bo->vram = 0; -+ } -+ -+ free(bo->base.base.virtual); -+ free(bo); -+} -+ -+/** -+ * Allocate/update the copy in vram. -+ * -+ * Note: Assume we're called with the DRI lock held. -+ */ -+static void vram_validate(radeon_bo_classic *bo_base) -+{ -+ radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo_base->base.bufmgr); -+ radeon_bo_vram *bo = get_bo_vram(bo_base); -+ -+ if (!bo->vram) { -+ bo->backing_store_dirty = 1; -+ -+ bo->vram = (radeon_vram_wrapper*)calloc(1, sizeof(radeon_vram_wrapper)); -+ bo->vram->bo = bo; -+ make_empty_list(&bo->vram->base); -+ bo->vram->base.totalSize = bo->base.base.size; -+ if (driAllocateTexture(&bufmgr->texture_heap, 1, &bo->vram->base) < 0) { -+ fprintf(stderr, "Ouch! vram_validate failed\n"); -+ free(bo->vram); -+ bo->base.base.offset = 0; -+ bo->vram = 0; -+ return; - } -+ } -+ -+ assert(bo->vram->base.memBlock); -+ -+ bo->base.base.offset = bufmgr->texture_offset + bo->vram->base.memBlock->ofs; -+ -+ if (bo->backing_store_dirty) { -+ /* Copy to VRAM using a blit. -+ * All memory is 4K aligned. We're using 1024 pixels wide blits. -+ */ -+ drm_radeon_texture_t tex; -+ drm_radeon_tex_image_t tmp; -+ int ret; - -- if (rmesa->rmm->u_list[i].h_pending == 0 && -- rmesa->rmm->u_list[i].pending -- && rmesa->rmm->u_list[i].age <= done_age) { -- memfree.region_offset = -- (char *)rmesa->rmm->u_list[i].ptr - -- (char *)rmesa->radeon.radeonScreen->gartTextures. -- map; -+ tex.offset = bo->base.base.offset; -+ tex.image = &tmp; - -- ret = -- drmCommandWrite(rmesa->radeon.radeonScreen-> -- driScreen->fd, DRM_RADEON_FREE, -- &memfree, sizeof(memfree)); -+ assert(!(tex.offset & 1023)); - -+ tmp.x = 0; -+ tmp.y = 0; -+ if (bo->base.base.size < 4096) { -+ tmp.width = (bo->base.base.size + 3) / 4; -+ tmp.height = 1; -+ } else { -+ tmp.width = 1024; -+ tmp.height = (bo->base.base.size + 4095) / 4096; -+ } -+ tmp.data = bo->base.base.virtual; -+ -+ tex.format = RADEON_TXFORMAT_ARGB8888; -+ tex.width = tmp.width; -+ tex.height = tmp.height; -+ tex.pitch = MAX2(tmp.width / 16, 1); -+ -+ do { -+ ret = drmCommandWriteRead(bufmgr->rmesa->radeon.dri.fd, -+ DRM_RADEON_TEXTURE, &tex, -+ sizeof(drm_radeon_texture_t)); - if (ret) { -- fprintf(stderr, "Failed to free at %p\n", -- rmesa->rmm->u_list[i].ptr); -- fprintf(stderr, "ret = %s\n", strerror(-ret)); -- exit(1); -- } else { --#ifdef MM_DEBUG -- fprintf(stderr, "really freed %d at age %x\n", -- i, -- radeonGetAge((radeonContextPtr) rmesa)); --#endif -- if (i == rmesa->rmm->u_last) -- rmesa->rmm->u_last--; -- -- if (rmesa->rmm->u_list[i].size < 4096) -- bytes_wasted -= -- 4096 - rmesa->rmm->u_list[i].size; -- -- allocated -= rmesa->rmm->u_list[i].size; -- rmesa->rmm->u_list[i].pending = 0; -- rmesa->rmm->u_list[i].ptr = NULL; -- free = i; -+ if (RADEON_DEBUG & DEBUG_IOCTL) -+ fprintf(stderr, -+ "DRM_RADEON_TEXTURE: again!\n"); -+ usleep(1); - } -- } -+ } while (ret == -EAGAIN); -+ -+ bo->backing_store_dirty = 0; - } -- rmesa->rmm->u_head = i; -- -- if (free == -1) { -- WARN_ONCE("Ran out of slots!\n"); -- //usleep(100); -- r300FlushCmdBuf(rmesa, __FUNCTION__); -- tries++; -- if (tries > 100) { -- WARN_ONCE("Ran out of slots!\n"); -- exit(1); -- } -- goto again; -+ -+ bo->base.validated = 1; -+} -+ -+static void vram_dirty(radeon_bo_classic *bo_base) -+{ -+ radeon_bo_vram *bo = get_bo_vram(bo_base); -+ -+ bo->base.validated = 0; -+ bo->backing_store_dirty = 1; -+} -+ -+static void vram_bind(radeon_bo_classic *bo_base) -+{ -+ radeon_bo_vram *bo = get_bo_vram(bo_base); -+ -+ if (bo->vram) { -+ bo->vram->base.bound = 1; -+ driUpdateTextureLRU(&bo->vram->base); - } -+} - -- alloc.region = RADEON_MEM_REGION_GART; -- alloc.alignment = alignment; -- alloc.size = size; -- alloc.region_offset = &offset; -+static void vram_unbind(radeon_bo_classic *bo_base) -+{ -+ radeon_bo_vram *bo = get_bo_vram(bo_base); - -- ret = -- drmCommandWriteRead(rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc, -- sizeof(alloc)); -- if (ret) { --#if 0 -- WARN_ONCE("Ran out of mem!\n"); -- r300FlushCmdBuf(rmesa, __FUNCTION__); -- //usleep(100); -- tries2++; -- tries = 0; -- if (tries2 > 100) { -- WARN_ONCE("Ran out of GART memory!\n"); -- exit(1); -- } -- goto again; --#else -- WARN_ONCE -- ("Ran out of GART memory (for %d)!\nPlease consider adjusting GARTSize option.\n", -- size); -- return 0; --#endif -+ if (bo->vram) -+ bo->vram->base.bound = 0; -+} -+ -+/** Callback function called by the texture heap when a texture is evicted */ -+static void destroy_vram_wrapper(void *data, driTextureObject *t) -+{ -+ radeon_vram_wrapper *wrapper = (radeon_vram_wrapper*)t; -+ -+ if (wrapper->bo && wrapper->bo->vram == wrapper) { -+ wrapper->bo->base.validated = 0; -+ wrapper->bo->vram = 0; - } -+} - -- i = free; -+static const radeon_bo_functions vram_bo_functions = { -+ .free = vram_free, -+ .validate = vram_validate, -+ .dirty = vram_dirty, -+ .bind = vram_bind, -+ .unbind = vram_unbind -+}; - -- if (i > rmesa->rmm->u_last) -- rmesa->rmm->u_last = i; -+/** -+ * Free a VRAM-based buffer object. -+ */ -+static void static_free(radeon_bo_classic *bo_base) -+{ -+ radeon_bo_vram *bo = get_bo_vram(bo_base); - -- rmesa->rmm->u_list[i].ptr = -- ((GLubyte *) rmesa->radeon.radeonScreen->gartTextures.map) + offset; -- rmesa->rmm->u_list[i].size = size; -- rmesa->rmm->u_list[i].age = 0; -- //fprintf(stderr, "alloc %p at id %d\n", rmesa->rmm->u_list[i].ptr, i); -+ free(bo); -+} - --#ifdef MM_DEBUG -- fprintf(stderr, "allocated %d at age %x\n", i, -- radeonGetAge((radeonContextPtr) rmesa)); --#endif -+static void static_bind(radeon_bo_classic *bo_base) -+{ -+} - -- return i; -+static void static_unbind(radeon_bo_classic *bo_base) -+{ - } - --void r300_mem_use(r300ContextPtr rmesa, int id) -+static void static_validate(radeon_bo_classic *bo_base) - { -- uint64_t ull; --#ifdef MM_DEBUG -- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, -- radeonGetAge((radeonContextPtr) rmesa)); --#endif -- drm_r300_cmd_header_t *cmd; -+} - -- assert(id <= rmesa->rmm->u_last); -+static void static_dirty(radeon_bo_classic *bo_base) -+{ -+} - -- if (id == 0) -- return; -+static const radeon_bo_functions static_bo_functions = { -+ .free = static_free, -+ .validate = static_validate, -+ .dirty = static_dirty, -+ .bind = static_bind, -+ .unbind = static_unbind -+}; - -- cmd = -- (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, -- 2 + sizeof(ull) / 4, -- __FUNCTION__); -- cmd[0].scratch.cmd_type = R300_CMD_SCRATCH; -- cmd[0].scratch.reg = R300_MEM_SCRATCH; -- cmd[0].scratch.n_bufs = 1; -- cmd[0].scratch.flags = 0; -- cmd++; - -- ull = (uint64_t) (intptr_t) & rmesa->rmm->u_list[id].age; -- _mesa_memcpy(cmd, &ull, sizeof(ull)); -- cmd += sizeof(ull) / 4; -+/** -+ * Allocate a backing store buffer object that is validated into VRAM. -+ */ -+static dri_bo *vram_alloc(radeon_bufmgr_classic *bufmgr, const char *name, -+ unsigned long size, unsigned int alignment) -+{ -+ radeon_bo_vram* bo = (radeon_bo_vram*)calloc(1, sizeof(radeon_bo_vram)); -+ -+ bo->base.functions = &vram_bo_functions; -+ bo->base.base.virtual = malloc(size); -+ init_buffer(bufmgr, &bo->base, size); -+ return &bo->base.base; -+} - -- cmd[0].u = /*id */ 0; - -- LOCK_HARDWARE(&rmesa->radeon); /* Protect from DRM. */ -- rmesa->rmm->u_list[id].h_pending++; -- UNLOCK_HARDWARE(&rmesa->radeon); -+static dri_bo *bufmgr_classic_bo_alloc(dri_bufmgr *bufmgr_ctx, const char *name, -+ unsigned long size, unsigned int alignment, -+ uint64_t location_mask) -+{ -+ radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx); -+ -+ if (location_mask & DRM_BO_MEM_CMDBUF) { -+ return cmdbuf_alloc(bufmgr, name, size); -+ } else if (location_mask & DRM_BO_MEM_DMA) { -+ return dma_alloc(bufmgr, name, size, alignment); -+ } else { -+ return vram_alloc(bufmgr, name, size, alignment); -+ } - } - --unsigned long r300_mem_offset(r300ContextPtr rmesa, int id) -+static dri_bo *bufmgr_classic_bo_alloc_static(dri_bufmgr *bufmgr_ctx, const char *name, -+ unsigned long offset, unsigned long size, -+ void *virtual, uint64_t location_mask) - { -- unsigned long offset; -+ radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx); -+ radeon_bo_vram* bo = (radeon_bo_vram*)calloc(1, sizeof(radeon_bo_vram)); - -- assert(id <= rmesa->rmm->u_last); -+ bo->base.functions = &static_bo_functions; -+ bo->base.base.virtual = virtual; -+ bo->base.base.offset = offset + bufmgr->rmesa->radeon.radeonScreen->fbLocation; -+ bo->base.validated = 1; /* Static buffer offsets are always valid */ - -- offset = (char *)rmesa->rmm->u_list[id].ptr - -- (char *)rmesa->radeon.radeonScreen->gartTextures.map; -- offset += rmesa->radeon.radeonScreen->gart_texture_offset; -+ init_buffer(bufmgr, &bo->base, size); -+ return &bo->base.base; - -- return offset; - } - --void *r300_mem_map(r300ContextPtr rmesa, int id, int access) -+ -+ -+static void bufmgr_classic_bo_reference(dri_bo *bo_base) - { --#ifdef MM_DEBUG -- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, -- radeonGetAge((radeonContextPtr) rmesa)); --#endif -- void *ptr; -- int tries = 0; -+ radeon_bo_classic *bo = get_bo_classic(bo_base); -+ bo->refcount++; -+ assert(bo->refcount > 0); -+} - -- assert(id <= rmesa->rmm->u_last); -+static void bufmgr_classic_bo_unreference(dri_bo *bo_base) -+{ -+ radeon_bo_classic *bo = get_bo_classic(bo_base); - -- if (access == R300_MEM_R) { -+ if (!bo_base) -+ return; - -- if (rmesa->rmm->u_list[id].mapped == 1) -- WARN_ONCE("buffer %d already mapped\n", id); -+ assert(bo->refcount > 0); -+ bo->refcount--; -+ if (!bo->refcount) { -+ // Ugly HACK - figure out whether this is really necessary -+ get_bufmgr_classic(bo_base->bufmgr)->rmesa->dma.nr_released_bufs++; - -- rmesa->rmm->u_list[id].mapped = 1; -- ptr = r300_mem_ptr(rmesa, id); -+ assert(!bo->mapcount); -+ if (!bo->pending) -+ bo_free(bo); -+ } -+} - -- return ptr; -+static int bufmgr_classic_bo_map(dri_bo *bo_base, GLboolean write_enable) -+{ -+ radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo_base->bufmgr); -+ radeon_bo_classic *bo = get_bo_classic(bo_base); -+ assert(bo->refcount > 0); -+ -+ if (bo->pending) { -+ track_pending_buffers(bufmgr); -+ if (bo->pending) { -+ // TODO: Better fence waiting -+ if (RADEON_DEBUG & DEBUG_MEMORY) -+ fprintf(stderr, "bo_map: buffer is pending. Flushing...\n"); -+ radeonFinish(bufmgr->rmesa->radeon.glCtx); -+ track_pending_buffers(bufmgr); -+ if (bo->pending) { -+ fprintf(stderr, "Internal error or hardware lockup: bo_map: buffer is still pending.\n"); -+ abort(); -+ } -+ } - } - -- if (rmesa->rmm->u_list[id].h_pending) -- r300FlushCmdBuf(rmesa, __FUNCTION__); -+ if (write_enable && bo->functions->dirty) -+ bo->functions->dirty(bo); - -- if (rmesa->rmm->u_list[id].h_pending) { -- return NULL; -- } -+ bo->mapcount++; -+ assert(bo->mapcount > 0); -+ return 0; -+} - -- while (rmesa->rmm->u_list[id].age > -- radeonGetAge((radeonContextPtr) rmesa) && tries++ < 1000) -- usleep(10); -+static int bufmgr_classic_bo_unmap(dri_bo *buf) -+{ -+ radeon_bo_classic *bo = get_bo_classic(buf); -+ assert(bo->refcount > 0); -+ assert(bo->mapcount > 0); -+ bo->mapcount--; -+ return 0; -+} - -- if (tries >= 1000) { -- fprintf(stderr, "Idling failed (%x vs %x)\n", -- rmesa->rmm->u_list[id].age, -- radeonGetAge((radeonContextPtr) rmesa)); -- return NULL; -+/** -+ * Mark the given buffer as pending and move it to the tail -+ * of the pending list. -+ * The caller is responsible for setting up pending_count and pending_age. -+ */ -+static void move_to_pending_tail(radeon_bo_classic *bo) -+{ -+ radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo->base.bufmgr); -+ -+ if (bo->pending) { -+ *bo->pending_pprev = bo->pending_next; -+ if (bo->pending_next) -+ bo->pending_next->pending_pprev = bo->pending_pprev; -+ else -+ bufmgr->pending_tail = bo->pending_pprev; - } - -- if (rmesa->rmm->u_list[id].mapped == 1) -- WARN_ONCE("buffer %d already mapped\n", id); -+ bo->pending = 1; -+ bo->pending_pprev = bufmgr->pending_tail; -+ bo->pending_next = 0; -+ *bufmgr->pending_tail = bo; -+ bufmgr->pending_tail = &bo->pending_next; -+} - -- rmesa->rmm->u_list[id].mapped = 1; -- ptr = r300_mem_ptr(rmesa, id); -+/** -+ * Emit commands to the batch buffer that cause the guven buffer's -+ * pending_count and pending_age to be updated. -+ */ -+static void emit_age_for_buffer(radeon_bo_classic* bo) -+{ -+ radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo->base.bufmgr); -+ BATCH_LOCALS(bufmgr->rmesa); -+ drm_r300_cmd_header_t cmd; -+ uint64_t ull; - -- return ptr; -+ cmd.scratch.cmd_type = R300_CMD_SCRATCH; -+ cmd.scratch.reg = 2; /* Scratch register 2 corresponds to what radeonGetAge polls */ -+ cmd.scratch.n_bufs = 1; -+ cmd.scratch.flags = 0; -+ ull = (uint64_t) (intptr_t) &bo->pending_age; -+ -+ BEGIN_BATCH(4); -+ OUT_BATCH(cmd.u); -+ OUT_BATCH(ull & 0xffffffff); -+ OUT_BATCH(ull >> 32); -+ OUT_BATCH(0); -+ END_BATCH(); -+ COMMIT_BATCH(); -+ -+ bo->pending_count++; - } - --void r300_mem_unmap(r300ContextPtr rmesa, int id) -+static int bufmgr_classic_emit_reloc(dri_bo *batch_buf, uint64_t flags, GLuint delta, -+ GLuint offset, dri_bo *target) - { --#ifdef MM_DEBUG -- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, -- radeonGetAge((radeonContextPtr) rmesa)); --#endif -+ radeon_bo_classic *bo = get_bo_classic(batch_buf); -+ radeon_reloc *reloc; - -- assert(id <= rmesa->rmm->u_last); -+ if (bo->relocs_used >= bo->relocs_size) { -+ bo->relocs_size *= 2; -+ if (bo->relocs_size < 32) -+ bo->relocs_size = 32; - -- if (rmesa->rmm->u_list[id].mapped == 0) -- WARN_ONCE("buffer %d not mapped\n", id); -+ bo->relocs = (radeon_reloc*)realloc(bo->relocs, bo->relocs_size*sizeof(radeon_reloc)); -+ } - -- rmesa->rmm->u_list[id].mapped = 0; -+ reloc = &bo->relocs[bo->relocs_used++]; -+ reloc->flags = flags; -+ reloc->offset = offset; -+ reloc->delta = delta; -+ reloc->target = get_bo_classic(target); -+ dri_bo_reference(target); -+ return 0; - } - --void r300_mem_free(r300ContextPtr rmesa, int id) -+/* process_relocs is called just before the given command buffer -+ * is executed. It ensures that all referenced buffers are in -+ * the right GPU domain. -+ */ -+static void *bufmgr_classic_process_relocs(dri_bo *batch_buf, GLuint *count) - { --#ifdef MM_DEBUG -- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, -- radeonGetAge((radeonContextPtr) rmesa)); --#endif -+ radeon_bo_classic *batch_bo = get_bo_classic(batch_buf); -+ int i; - -- assert(id <= rmesa->rmm->u_last); -+ // Warning: At this point, we append something to the batch buffer -+ // during flush. -+ emit_age_for_buffer(batch_bo); -+ -+ dri_bo_map(batch_buf, GL_TRUE); -+ for(i = 0; i < batch_bo->relocs_used; ++i) { -+ radeon_reloc *reloc = &batch_bo->relocs[i]; -+ uint32_t *dest = (uint32_t*)((char*)batch_buf->virtual + reloc->offset); -+ uint32_t offset; -+ -+ if (!reloc->target->validated) -+ reloc->target->functions->validate(reloc->target); -+ reloc->target->used = 1; -+ offset = reloc->target->base.offset + reloc->delta; -+ -+ if (reloc->flags & DRM_RELOC_BLITTER) -+ *dest = (*dest & 0xffc00000) | (offset >> 10); -+ else if (reloc->flags & DRM_RELOC_TXOFFSET) -+ *dest = (*dest & 31) | (offset & ~31); -+ else -+ *dest = offset; -+ } -+ dri_bo_unmap(batch_buf); -+ return 0; -+} - -- if (id == 0) -- return; -+/* post_submit is called just after the given command buffer -+ * is executed. It ensures that buffers are properly marked as -+ * pending. -+ */ -+static void bufmgr_classic_post_submit(dri_bo *batch_buf, dri_fence **fence) -+{ -+ radeon_bo_classic *batch_bo = get_bo_classic(batch_buf); -+ int i; - -- if (rmesa->rmm->u_list[id].ptr == NULL) { -- WARN_ONCE("Not allocated!\n"); -- return; -+ assert(!batch_bo->pending_count); -+ -+ for(i = 0; i < batch_bo->relocs_used; ++i) { -+ radeon_reloc *reloc = &batch_bo->relocs[i]; -+ -+ if (reloc->target->used) { -+ reloc->target->used = 0; -+ assert(!reloc->target->pending_count); -+ reloc->target->pending_age = batch_bo->pending_age; -+ move_to_pending_tail(reloc->target); -+ if (reloc->target->functions->bind) -+ (*reloc->target->functions->bind)(reloc->target); -+ } - } -+} - -- if (rmesa->rmm->u_list[id].pending) { -- WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr); -- return; -+static void bufmgr_classic_destroy(dri_bufmgr *bufmgr_ctx) -+{ -+ radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx); -+ -+ track_pending_buffers(bufmgr); -+ if (bufmgr->pending) -+ radeonFinish(bufmgr->rmesa->radeon.glCtx); -+ track_pending_buffers(bufmgr); -+ -+ if (bufmgr->buffers) { -+ fprintf(stderr, "Warning: Buffer objects have leaked\n"); -+ while(bufmgr->buffers) { -+ fprintf(stderr, " Leak of size %ld\n", bufmgr->buffers->base.size); -+ bufmgr->buffers->refcount = 0; -+ bufmgr->buffers->mapcount = 0; -+ bufmgr->buffers->pending = 0; -+ bo_free(bufmgr->buffers); -+ } - } - -- rmesa->rmm->u_list[id].pending = 1; -+ driDestroyTextureHeap(bufmgr->texture_heap); -+ bufmgr->texture_heap = 0; -+ assert(is_empty_list(&bufmgr->texture_swapped)); -+ -+ free(bufmgr); -+} -+ -+radeon_bufmgr* radeonBufmgrClassicInit(r300ContextPtr rmesa) -+{ -+ radeon_bufmgr_classic* bufmgr = (radeon_bufmgr_classic*)calloc(1, sizeof(radeon_bufmgr_classic)); -+ -+ bufmgr->rmesa = rmesa; -+ bufmgr->base.base.bo_alloc = &bufmgr_classic_bo_alloc; -+ bufmgr->base.base.bo_alloc_static = bufmgr_classic_bo_alloc_static; -+ bufmgr->base.base.bo_reference = &bufmgr_classic_bo_reference; -+ bufmgr->base.base.bo_unreference = &bufmgr_classic_bo_unreference; -+ bufmgr->base.base.bo_map = &bufmgr_classic_bo_map; -+ bufmgr->base.base.bo_unmap = &bufmgr_classic_bo_unmap; -+ bufmgr->base.base.emit_reloc = &bufmgr_classic_emit_reloc; -+ bufmgr->base.base.process_relocs = &bufmgr_classic_process_relocs; -+ bufmgr->base.base.post_submit = &bufmgr_classic_post_submit; -+ bufmgr->base.base.destroy = &bufmgr_classic_destroy; -+ -+ bufmgr->pending_tail = &bufmgr->pending; -+ -+ /* Init texture heap */ -+ make_empty_list(&bufmgr->texture_swapped); -+ bufmgr->texture_heap = driCreateTextureHeap(0, bufmgr, -+ rmesa->radeon.radeonScreen->texSize[0], 12, RADEON_NR_TEX_REGIONS, -+ (drmTextureRegionPtr)rmesa->radeon.sarea->tex_list[0], -+ &rmesa->radeon.sarea->tex_age[0], -+ &bufmgr->texture_swapped, sizeof(radeon_vram_wrapper), -+ &destroy_vram_wrapper); -+ bufmgr->texture_offset = rmesa->radeon.radeonScreen->texOffset[0]; -+ -+ return &bufmgr->base; -+} -+ -+void radeonBufmgrContendedLockTake(radeon_bufmgr* bufmgr_ctx) -+{ -+ radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(&bufmgr_ctx->base); -+ -+ DRI_AGE_TEXTURES(bufmgr->texture_heap); - } --#endif -diff --git a/src/mesa/drivers/dri/r300/r300_mem.h b/src/mesa/drivers/dri/r300/r300_mem.h -index 625a7f6..4e9be65 100644 ---- a/src/mesa/drivers/dri/r300/r300_mem.h -+++ b/src/mesa/drivers/dri/r300/r300_mem.h -@@ -1,37 +1,22 @@ - #ifndef __R300_MEM_H__ - #define __R300_MEM_H__ - --//#define R300_MEM_PDL 0 --#define R300_MEM_UL 1 -+#include "glheader.h" -+#include "dri_bufmgr.h" - --#define R300_MEM_R 1 --#define R300_MEM_W 2 --#define R300_MEM_RW (R300_MEM_R | R300_MEM_W) -+#include "r300_context.h" - --#define R300_MEM_SCRATCH 2 - --struct r300_memory_manager { -- struct { -- void *ptr; -- uint32_t size; -- uint32_t age; -- uint32_t h_pending; -- int pending; -- int mapped; -- } *u_list; -- int u_head, u_size, u_last; -+/* Note: The following flags should probably be ultimately eliminated, -+ * or replaced by something else. -+ */ -+#define DRM_BO_MEM_DMA (1 << 27) /** Use for transient buffers (texture upload, vertex buffers...) */ -+#define DRM_BO_MEM_CMDBUF (1 << 28) /** Use for command buffers */ - --}; -+#define DRM_RELOC_BLITTER (1 << 23) /** Offset overwrites lower 22 bits (used with blit packet3) */ -+#define DRM_RELOC_TXOFFSET (1 << 24) /** Offset overwrites everything but low bits (used for texture offsets) */ - --extern void r300_mem_init(r300ContextPtr rmesa); --extern void r300_mem_destroy(r300ContextPtr rmesa); --extern void *r300_mem_ptr(r300ContextPtr rmesa, int id); --extern int r300_mem_find(r300ContextPtr rmesa, void *ptr); --extern int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size); --extern void r300_mem_use(r300ContextPtr rmesa, int id); --extern unsigned long r300_mem_offset(r300ContextPtr rmesa, int id); --extern void *r300_mem_map(r300ContextPtr rmesa, int id, int access); --extern void r300_mem_unmap(r300ContextPtr rmesa, int id); --extern void r300_mem_free(r300ContextPtr rmesa, int id); -+radeon_bufmgr* radeonBufmgrClassicInit(r300ContextPtr rmesa); -+void radeonBufmgrContendedLockTake(radeon_bufmgr* bufmgr_ctx); - - #endif -diff --git a/src/mesa/drivers/dri/r300/r300_mipmap_tree.c b/src/mesa/drivers/dri/r300/r300_mipmap_tree.c -new file mode 100644 -index 0000000..c3b918c ---- /dev/null -+++ b/src/mesa/drivers/dri/r300/r300_mipmap_tree.c -@@ -0,0 +1,248 @@ -+/* -+ * Copyright (C) 2008 Nicolai Haehnle. -+ * -+ * All Rights Reserved. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining -+ * a copy of this software and associated documentation files (the -+ * "Software"), to deal in the Software without restriction, including -+ * without limitation the rights to use, copy, modify, merge, publish, -+ * distribute, sublicense, and/or sell copies of the Software, and to -+ * permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the -+ * next paragraph) shall be included in all copies or substantial -+ * portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ * -+ */ -+ -+#include "r300_mipmap_tree.h" -+ -+#include -+#include -+ -+#include "simple_list.h" -+#include "texcompress.h" -+#include "texformat.h" -+ -+#include "r300_mem.h" -+ -+static GLuint r300_compressed_texture_size(GLcontext *ctx, -+ GLsizei width, GLsizei height, GLsizei depth, -+ GLuint mesaFormat) -+{ -+ GLuint size = _mesa_compressed_texture_size(ctx, width, height, depth, mesaFormat); -+ -+ if (mesaFormat == MESA_FORMAT_RGB_DXT1 || -+ mesaFormat == MESA_FORMAT_RGBA_DXT1) { -+ if (width + 3 < 8) /* width one block */ -+ size = size * 4; -+ else if (width + 3 < 16) -+ size = size * 2; -+ } else { -+ /* DXT3/5, 16 bytes per block */ -+ WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n"); -+ if (width + 3 < 8) -+ size = size * 2; -+ } -+ -+ return size; -+} -+ -+/** -+ * Compute sizes and fill in offset and blit information for the given -+ * image (determined by \p face and \p level). -+ * -+ * \param curOffset points to the offset at which the image is to be stored -+ * and is updated by this function according to the size of the image. -+ */ -+static void compute_tex_image_offset(r300_mipmap_tree *mt, -+ GLuint face, GLuint level, GLuint* curOffset) -+{ -+ r300_mipmap_level *lvl = &mt->levels[level]; -+ -+ /* Find image size in bytes */ -+ if (mt->compressed) { -+ lvl->size = r300_compressed_texture_size(mt->r300->radeon.glCtx, -+ lvl->width, lvl->height, lvl->depth, mt->compressed); -+ } else if (mt->target == GL_TEXTURE_RECTANGLE_NV) { -+ lvl->size = ((lvl->width * mt->bpp + 63) & ~63) * lvl->height; -+ } else if (mt->tilebits & R300_TXO_MICRO_TILE) { -+ /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, -+ * though the actual offset may be different (if texture is less than -+ * 32 bytes width) to the untiled case */ -+ int w = (lvl->width * mt->bpp * 2 + 31) & ~31; -+ lvl->size = (w * ((lvl->height + 1) / 2)) * lvl->depth; -+ } else { -+ int w = (lvl->width * mt->bpp + 31) & ~31; -+ lvl->size = w * lvl->height * lvl->depth; -+ } -+ assert(lvl->size > 0); -+ -+ /* All images are aligned to a 32-byte offset */ -+ *curOffset = (*curOffset + 0x1f) & ~0x1f; -+ lvl->faces[face].offset = *curOffset; -+ *curOffset += lvl->size; -+} -+ -+static GLuint minify(GLuint size, GLuint levels) -+{ -+ size = size >> levels; -+ if (size < 1) -+ size = 1; -+ return size; -+} -+ -+static void calculate_miptree_layout(r300_mipmap_tree *mt) -+{ -+ GLuint curOffset; -+ GLuint numLevels; -+ GLuint i; -+ -+ numLevels = mt->lastLevel - mt->firstLevel + 1; -+ assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); -+ -+ curOffset = 0; -+ for(i = 0; i < numLevels; i++) { -+ GLuint face; -+ -+ mt->levels[i].width = minify(mt->width0, mt->firstLevel + i); -+ mt->levels[i].height = minify(mt->height0, mt->firstLevel + i); -+ mt->levels[i].depth = minify(mt->depth0, mt->firstLevel + i); -+ -+ for(face = 0; face < mt->faces; face++) -+ compute_tex_image_offset(mt, face, i, &curOffset); -+ } -+ -+ /* Note the required size in memory */ -+ mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; -+} -+ -+ -+/** -+ * Create a new mipmap tree, calculate its layout and allocate memory. -+ */ -+r300_mipmap_tree* r300_miptree_create(r300ContextPtr rmesa, r300TexObj *t, -+ GLenum target, GLuint firstLevel, GLuint lastLevel, -+ GLuint width0, GLuint height0, GLuint depth0, -+ GLuint bpp, GLuint tilebits, GLuint compressed) -+{ -+ r300_mipmap_tree *mt = CALLOC_STRUCT(_r300_mipmap_tree); -+ -+ mt->r300 = rmesa; -+ mt->t = t; -+ mt->target = target; -+ mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; -+ mt->firstLevel = firstLevel; -+ mt->lastLevel = lastLevel; -+ mt->width0 = width0; -+ mt->height0 = height0; -+ mt->depth0 = depth0; -+ mt->bpp = bpp; -+ mt->tilebits = tilebits; -+ mt->compressed = compressed; -+ -+ calculate_miptree_layout(mt); -+ -+ mt->bo = dri_bo_alloc(&rmesa->radeon.bufmgr->base, "texture", mt->totalsize, 1024, 0); -+ -+ return mt; -+} -+ -+/** -+ * Destroy the given mipmap tree. -+ */ -+void r300_miptree_destroy(r300_mipmap_tree *mt) -+{ -+ dri_bo_unreference(mt->bo); -+ free(mt); -+} -+ -+/* -+ * XXX Move this into core Mesa? -+ */ -+static void -+_mesa_copy_rect(GLubyte * dst, -+ GLuint cpp, -+ GLuint dst_pitch, -+ GLuint dst_x, -+ GLuint dst_y, -+ GLuint width, -+ GLuint height, -+ const GLubyte * src, -+ GLuint src_pitch, GLuint src_x, GLuint src_y) -+{ -+ GLuint i; -+ -+ dst_pitch *= cpp; -+ src_pitch *= cpp; -+ dst += dst_x * cpp; -+ src += src_x * cpp; -+ dst += dst_y * dst_pitch; -+ src += src_y * dst_pitch; -+ width *= cpp; -+ -+ if (width == dst_pitch && width == src_pitch) -+ memcpy(dst, src, height * width); -+ else { -+ for (i = 0; i < height; i++) { -+ memcpy(dst, src, width); -+ dst += dst_pitch; -+ src += src_pitch; -+ } -+ } -+} -+ -+/** -+ * Upload the given texture image to the given face/level of the mipmap tree. -+ * \param level of the texture, i.e. \c level==mt->firstLevel is the first hw level -+ */ -+void r300_miptree_upload_image(r300_mipmap_tree *mt, GLuint face, GLuint level, -+ struct gl_texture_image *texImage) -+{ -+ GLuint hwlevel = level - mt->firstLevel; -+ r300_mipmap_level *lvl = &mt->levels[hwlevel]; -+ void *dest; -+ -+ assert(face < mt->faces); -+ assert(level >= mt->firstLevel && level <= mt->lastLevel); -+ assert(texImage && texImage->Data); -+ assert(texImage->Width == lvl->width); -+ assert(texImage->Height == lvl->height); -+ assert(texImage->Depth == lvl->depth); -+ -+ dri_bo_map(mt->bo, GL_TRUE); -+ -+ dest = mt->bo->virtual + lvl->faces[face].offset; -+ -+ if (mt->tilebits) -+ WARN_ONCE("%s: tiling not supported yet", __FUNCTION__); -+ -+ if (!mt->compressed) { -+ GLuint dst_align; -+ GLuint dst_pitch = lvl->width; -+ GLuint src_pitch = lvl->width; -+ -+ if (mt->target == GL_TEXTURE_RECTANGLE_NV) -+ dst_align = 64 / mt->bpp; -+ else -+ dst_align = 32 / mt->bpp; -+ dst_pitch = (dst_pitch + dst_align - 1) & ~(dst_align - 1); -+ -+ _mesa_copy_rect(dest, mt->bpp, dst_pitch, 0, 0, lvl->width, lvl->height, -+ texImage->Data, src_pitch, 0, 0); -+ } else { -+ memcpy(dest, texImage->Data, lvl->size); -+ } -+ -+ dri_bo_unmap(mt->bo); -+} -diff --git a/src/mesa/drivers/dri/r300/r300_mipmap_tree.h b/src/mesa/drivers/dri/r300/r300_mipmap_tree.h -new file mode 100644 -index 0000000..a888ecf ---- /dev/null -+++ b/src/mesa/drivers/dri/r300/r300_mipmap_tree.h -@@ -0,0 +1,91 @@ -+/* -+ * Copyright (C) 2008 Nicolai Haehnle. -+ * -+ * All Rights Reserved. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining -+ * a copy of this software and associated documentation files (the -+ * "Software"), to deal in the Software without restriction, including -+ * without limitation the rights to use, copy, modify, merge, publish, -+ * distribute, sublicense, and/or sell copies of the Software, and to -+ * permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the -+ * next paragraph) shall be included in all copies or substantial -+ * portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ * -+ */ -+ -+#ifndef __R300_MIPMAP_TREE_H_ -+#define __R300_MIPMAP_TREE_H_ -+ -+#include "r300_context.h" -+ -+typedef struct _r300_mipmap_tree r300_mipmap_tree; -+typedef struct _r300_mipmap_level r300_mipmap_level; -+typedef struct _r300_mipmap_image r300_mipmap_image; -+ -+struct _r300_mipmap_image { -+ GLuint offset; /** Offset of this image from the start of mipmap tree, in bytes */ -+}; -+ -+struct _r300_mipmap_level { -+ GLuint width; -+ GLuint height; -+ GLuint depth; -+ GLuint size; /** Size of each image, in bytes */ -+ r300_mipmap_image faces[6]; -+}; -+ -+ -+/** -+ * A mipmap tree contains texture images in the layout that the hardware -+ * expects. -+ * -+ * The meta-data of mipmap trees is immutable, i.e. you cannot change the -+ * layout on-the-fly; however, the texture contents (i.e. texels) can be -+ * changed. -+ */ -+struct _r300_mipmap_tree { -+ r300ContextPtr r300; -+ r300TexObj *t; -+ dri_bo *bo; -+ -+ GLuint totalsize; /** total size of the miptree, in bytes */ -+ -+ GLenum target; /** GL_TEXTURE_xxx */ -+ GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */ -+ GLuint firstLevel; /** First mip level stored in this mipmap tree */ -+ GLuint lastLevel; /** Last mip level stored in this mipmap tree */ -+ -+ GLuint width0; /** Width of level 0 image */ -+ GLuint height0; /** Height of level 0 image */ -+ GLuint depth0; /** Depth of level 0 image */ -+ -+ GLuint bpp; /** Bytes per texel */ -+ GLuint tilebits; /** R300_TXO_xxx_TILE */ -+ GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */ -+ -+ r300_mipmap_level levels[RADEON_MAX_TEXTURE_LEVELS]; -+}; -+ -+r300_mipmap_tree* r300_miptree_create(r300ContextPtr rmesa, r300TexObj *t, -+ GLenum target, GLuint firstLevel, GLuint lastLevel, -+ GLuint width0, GLuint height0, GLuint depth0, -+ GLuint bpp, GLuint tilebits, GLuint compressed); -+void r300_miptree_destroy(r300_mipmap_tree *mt); -+ -+void r300_miptree_upload_image(r300_mipmap_tree *mt, GLuint face, GLuint level, -+ struct gl_texture_image *texImage); -+ -+ -+#endif /* __R300_MIPMAP_TREE_H_ */ -diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c -index 0a199e6..209fae9 100644 ---- a/src/mesa/drivers/dri/r300/r300_render.c -+++ b/src/mesa/drivers/dri/r300/r300_render.c -@@ -175,89 +175,79 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) - static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts) - { - r300ContextPtr rmesa = R300_CONTEXT(ctx); -- struct r300_dma_region *rvb = &rmesa->state.elt_dma; - void *out; - -- if (r300IsGartMemory(rmesa, elts, n_elts * 4)) { -- rvb->address = rmesa->radeon.radeonScreen->gartTextures.map; -- rvb->start = ((char *)elts) - rvb->address; -- rvb->aos_offset = -- rmesa->radeon.radeonScreen->gart_texture_offset + -- rvb->start; -- return; -- } else if (r300IsGartMemory(rmesa, elts, 1)) { -- WARN_ONCE("Pointer not within GART memory!\n"); -- _mesa_exit(-1); -- } -- -- r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4); -- rvb->aos_offset = GET_START(rvb); -+ r300AllocDmaRegion(rmesa, &rmesa->state.elt_dma_bo, &rmesa->state.elt_dma_offset, -+ n_elts * 4, 4); - -- out = rvb->address + rvb->start; -+ out = rmesa->state.elt_dma_bo->virtual + rmesa->state.elt_dma_offset; - memcpy(out, elts, n_elts * 4); - } - --static void r300FireEB(r300ContextPtr rmesa, unsigned long addr, -- int vertex_count, int type) -+static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) - { -- int cmd_reserved = 0; -- int cmd_written = 0; -- drm_radeon_cmd_header_t *cmd = NULL; -+ BATCH_LOCALS(rmesa); - -- start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0); -- e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); -+ BEGIN_BATCH(8); -+ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); -+ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - -- start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2); -- e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); -- e32(addr); -- e32(vertex_count); -+ OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); -+ OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); -+ OUT_BATCH_RELOC(0, rmesa->state.elt_dma_bo, rmesa->state.elt_dma_offset, 0); -+ OUT_BATCH(vertex_count); -+ END_BATCH(); - } - - static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) - { -+ BATCH_LOCALS(rmesa); - int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; - int i; -- int cmd_reserved = 0; -- int cmd_written = 0; -- drm_radeon_cmd_header_t *cmd = NULL; - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, - offset); - -- start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1); -- e32(nr); -+ BEGIN_BATCH(sz+2); -+ OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); -+ OUT_BATCH(nr); - - for (i = 0; i + 1 < nr; i += 2) { -- e32((rmesa->state.aos[i].aos_size << 0) | -- (rmesa->state.aos[i].aos_stride << 8) | -- (rmesa->state.aos[i + 1].aos_size << 16) | -- (rmesa->state.aos[i + 1].aos_stride << 24)); -- -- e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride); -- e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride); -+ OUT_BATCH((rmesa->state.aos[i].components << 0) | -+ (rmesa->state.aos[i].stride << 8) | -+ (rmesa->state.aos[i + 1].components << 16) | -+ (rmesa->state.aos[i + 1].stride << 24)); -+ -+ OUT_BATCH_RELOC(0, rmesa->state.aos[i].bo, -+ rmesa->state.aos[i].offset + offset * 4 * rmesa->state.aos[i].stride, 0); -+ OUT_BATCH_RELOC(0, rmesa->state.aos[i+1].bo, -+ rmesa->state.aos[i+1].offset + offset * 4 * rmesa->state.aos[i + 1].stride, 0); - } - - if (nr & 1) { -- e32((rmesa->state.aos[nr - 1].aos_size << 0) | -- (rmesa->state.aos[nr - 1].aos_stride << 8)); -- e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride); -+ OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) | -+ (rmesa->state.aos[nr - 1].stride << 8)); -+ OUT_BATCH_RELOC(0, rmesa->state.aos[nr - 1].bo, -+ rmesa->state.aos[nr - 1].offset + offset * 4 * rmesa->state.aos[nr - 1].stride, 0); - } -+ END_BATCH(); - } - - static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) - { -- int cmd_reserved = 0; -- int cmd_written = 0; -- drm_radeon_cmd_header_t *cmd = NULL; -+ BATCH_LOCALS(rmesa); - -- start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); -- e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); -+ BEGIN_BATCH(3); -+ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); -+ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); -+ END_BATCH(); - } - - static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, - int start, int end, int prim) - { -+ BATCH_LOCALS(rmesa); - int type, num_verts; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; -@@ -268,6 +258,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, - if (type < 0 || num_verts <= 0) - return; - -+ /* Make space for at least 64 dwords. -+ * This is supposed to ensure that we can get all rendering -+ * commands into a single command buffer. -+ */ -+ r300EnsureCmdBufSpace(rmesa, 64, __FUNCTION__); -+ - if (vb->Elts) { - if (num_verts > 65535) { - /* not implemented yet */ -@@ -287,11 +283,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, - */ - r300EmitElts(ctx, vb->Elts, num_verts); - r300EmitAOS(rmesa, rmesa->state.aos_count, start); -- r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type); -+ r300FireEB(rmesa, num_verts, type); - } else { - r300EmitAOS(rmesa, rmesa->state.aos_count, start); - r300FireAOS(rmesa, num_verts, type); - } -+ COMMIT_BATCH(); - } - - static GLboolean r300RunRender(GLcontext * ctx, -@@ -324,10 +321,6 @@ static GLboolean r300RunRender(GLcontext * ctx, - - r300EmitCacheFlush(rmesa); - --#ifdef USER_BUFFERS -- r300UseArrays(ctx); --#endif -- - r300ReleaseArrays(ctx); - - return GL_FALSE; -diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c -index cce07d3..b314764 100644 ---- a/src/mesa/drivers/dri/r300/r300_state.c -+++ b/src/mesa/drivers/dri/r300/r300_state.c -@@ -55,6 +55,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - #include "radeon_ioctl.h" - #include "radeon_state.h" -+#include "radeon_buffer.h" - #include "r300_context.h" - #include "r300_ioctl.h" - #include "r300_state.h" -@@ -1148,39 +1149,25 @@ void r300UpdateDrawBuffer(GLcontext * ctx) - r300ContextPtr rmesa = R300_CONTEXT(ctx); - r300ContextPtr r300 = rmesa; - struct gl_framebuffer *fb = ctx->DrawBuffer; -- driRenderbuffer *drb; -+ struct radeon_renderbuffer *rrb; - - if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { - /* draw to front */ -- drb = -- (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT]. -- Renderbuffer; -+ rrb = -+ (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; - } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { - /* draw to back */ -- drb = -- (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT]. -- Renderbuffer; -+ rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; - } else { - /* drawing to multiple buffers, or none */ - return; - } - -- assert(drb); -- assert(drb->flippedPitch); -+ assert(rrb); -+ assert(rrb->pitch); - - R300_STATECHANGE(rmesa, cb); - -- r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset + //r300->radeon.state.color.drawOffset + -- r300->radeon.radeonScreen->fbLocation; -- r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch; //r300->radeon.state.color.drawPitch; -- -- if (r300->radeon.radeonScreen->cpp == 4) -- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; -- else -- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; -- -- if (r300->radeon.sarea->tiling_enabled) -- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; - #if 0 - R200_STATECHANGE(rmesa, ctx); - -@@ -1499,14 +1486,9 @@ static void r300SetupTextures(GLcontext * ctx) - /* We cannot let disabled tmu offsets pass DRM */ - for (i = 0; i < mtu; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) { -- --#if 0 /* Enables old behaviour */ -- hw_tmu = i; --#endif - tmu_mappings[i] = hw_tmu; - -- t = r300->state.texture.unit[i].texobj; -- /* XXX questionable fix for bug 9170: */ -+ t = r300_tex_obj(ctx->Texture.Unit[i]._Current); - if (!t) - continue; - -@@ -1532,21 +1514,20 @@ static void r300SetupTextures(GLcontext * ctx) - */ - r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = - t->filter_1 | -- translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.tObj->LodBias); -+ translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias); - r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] = - t->size; - r300->hw.tex.format.cmd[R300_TEX_VALUE_0 + - hw_tmu] = t->format; - r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] = - t->pitch_reg; -- r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 + -- hw_tmu] = t->offset; -+ r300->hw.textures[hw_tmu] = t; - -- if (t->offset & R300_TXO_MACRO_TILE) { -+ if (t->tile_bits & R300_TXO_MACRO_TILE) { - WARN_ONCE("macro tiling enabled!\n"); - } - -- if (t->offset & R300_TXO_MICRO_TILE) { -+ if (t->tile_bits & R300_TXO_MICRO_TILE) { - WARN_ONCE("micro tiling enabled!\n"); - } - -@@ -2373,20 +2354,6 @@ static void r300ResetHwState(r300ContextPtr r300) - - r300BlendColor(ctx, ctx->Color.BlendColor); - -- /* Again, r300ClearBuffer uses this */ -- r300->hw.cb.cmd[R300_CB_OFFSET] = -- r300->radeon.state.color.drawOffset + -- r300->radeon.radeonScreen->fbLocation; -- r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch; -- -- if (r300->radeon.radeonScreen->cpp == 4) -- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; -- else -- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; -- -- if (r300->radeon.sarea->tiling_enabled) -- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; -- - r300->hw.rb3d_dither_ctl.cmd[1] = 0; - r300->hw.rb3d_dither_ctl.cmd[2] = 0; - r300->hw.rb3d_dither_ctl.cmd[3] = 0; -@@ -2402,10 +2369,6 @@ static void r300ResetHwState(r300ContextPtr r300) - r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000; - r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff; - -- r300->hw.zb.cmd[R300_ZB_OFFSET] = -- r300->radeon.radeonScreen->depthOffset + -- r300->radeon.radeonScreen->fbLocation; -- r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch; - - if (r300->radeon.sarea->tiling_enabled) { - /* XXX: Turn off when clearing buffers ? */ -diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h -index 0589ab7..96177ba 100644 ---- a/src/mesa/drivers/dri/r300/r300_state.h -+++ b/src/mesa/drivers/dri/r300/r300_state.h -@@ -59,7 +59,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #define R300_FIREVERTICES( r300 ) \ - do { \ - \ -- if ( (r300)->cmdbuf.count_used || (r300)->dma.flush ) { \ -+ if ( (r300)->cmdbuf.committed || (r300)->dma.flush ) { \ - r300Flush( (r300)->radeon.glCtx ); \ - } \ - \ -diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c -index 8aebd9b..f4a0b7f 100644 ---- a/src/mesa/drivers/dri/r300/r300_swtcl.c -+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c -@@ -61,7 +61,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. - static void flush_last_swtcl_prim( r300ContextPtr rmesa ); - - --void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset); -+void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, dri_bo *bo, GLuint offset); - void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr); - #define EMIT_ATTR( ATTR, STYLE ) \ - do { \ -@@ -175,7 +175,7 @@ static void r300SetVertexFormat( GLcontext *ctx ) - inputs[i] = -1; - } - } -- -+ - /* Fixed, apply to vir0 only */ - if (InputsRead & (1 << VERT_ATTRIB_POS)) - inputs[VERT_ATTRIB_POS] = 0; -@@ -186,16 +186,16 @@ static void r300SetVertexFormat( GLcontext *ctx ) - for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) - if (InputsRead & (1 << i)) - inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); -- -+ - for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { - if (InputsRead & (1 << i)) { - tab[nr++] = i; - } - } -- -+ - for (i = 0; i < nr; i++) { - int ci; -- -+ - swizzle[i][0] = SWIZZLE_ZERO; - swizzle[i][1] = SWIZZLE_ZERO; - swizzle[i][2] = SWIZZLE_ZERO; -@@ -215,21 +215,21 @@ static void r300SetVertexFormat( GLcontext *ctx ) - ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = - r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, - nr); -- -+ - R300_STATECHANGE(rmesa, vic); - rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); - rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); -- -+ - R300_STATECHANGE(rmesa, vof); - rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); - rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1; -- -+ - rmesa->swtcl.vertex_size = - _tnl_install_attrs( ctx, -- rmesa->swtcl.vertex_attrs, -+ rmesa->swtcl.vertex_attrs, - rmesa->swtcl.vertex_attr_count, - NULL, 0 ); -- -+ - rmesa->swtcl.vertex_size /= 4; - - RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); -@@ -245,38 +245,40 @@ static void r300SetVertexFormat( GLcontext *ctx ) - */ - static void flush_last_swtcl_prim( r300ContextPtr rmesa ) - { -+ BATCH_LOCALS(rmesa); -+ - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); -- -+ - rmesa->dma.flush = NULL; - -- if (rmesa->dma.current.buf) { -- struct r300_dma_region *current = &rmesa->dma.current; -- GLuint current_offset = GET_START(current); -+ if (rmesa->dma.current) { -+ GLuint current_offset = rmesa->dma.current_used; - -- assert (current->start + -+ assert (rmesa->dma.current_used + - rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == -- current->ptr); -+ rmesa->dma.current_vertexptr); - -- if (rmesa->dma.current.start != rmesa->dma.current.ptr) { -+ if (rmesa->dma.current_used != rmesa->dma.current_vertexptr) { -+ rmesa->dma.current_used = rmesa->dma.current_vertexptr; - - r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__); -- -+ - r300EmitState(rmesa); -- -+ - r300EmitVertexAOS( rmesa, - rmesa->swtcl.vertex_size, -- current_offset); -- -+ rmesa->dma.current, current_offset); -+ - r300EmitVbufPrim( rmesa, - rmesa->swtcl.hw_primitive, - rmesa->swtcl.numverts); -- -+ - r300EmitCacheFlush(rmesa); -+ COMMIT_BATCH(); - } -- -+ - rmesa->swtcl.numverts = 0; -- current->start = current->ptr; - } - } - -@@ -287,7 +289,7 @@ r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize ) - { - GLuint bytes = vsize * nverts; - -- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) -+ if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) - r300RefillCurrentDmaRegion( rmesa, bytes); - - if (!rmesa->dma.flush) { -@@ -297,13 +299,13 @@ r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize ) - - ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); - ASSERT( rmesa->dma.flush == flush_last_swtcl_prim ); -- ASSERT( rmesa->dma.current.start + -+ ASSERT( rmesa->dma.current_used + - rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == -- rmesa->dma.current.ptr ); -+ rmesa->dma.current_vertexptr ); - - { -- GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr); -- rmesa->dma.current.ptr += bytes; -+ GLubyte *head = (GLubyte *) (rmesa->dma.current->virtual + rmesa->dma.current_vertexptr); -+ rmesa->dma.current_vertexptr += bytes; - rmesa->swtcl.numverts += nverts; - return head; - } -@@ -352,7 +354,7 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); - r300ContextPtr rmesa = R300_CONTEXT(ctx); \ - const char *r300verts = (char *)rmesa->swtcl.verts; - #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int))) --#define VERTEX r300Vertex -+#define VERTEX r300Vertex - #define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS)) - #define PRINT_VERTEX(x) - #undef TAG -@@ -572,15 +574,15 @@ static void r300RenderStart(GLcontext *ctx) - r300ContextPtr rmesa = R300_CONTEXT( ctx ); - // fprintf(stderr, "%s\n", __FUNCTION__); - -- r300ChooseRenderState(ctx); -+ r300ChooseRenderState(ctx); - r300SetVertexFormat(ctx); - - r300UpdateShaders(rmesa); - r300UpdateShaderStates(rmesa); - - r300EmitCacheFlush(rmesa); -- -- if (rmesa->dma.flush != 0 && -+ -+ if (rmesa->dma.flush != 0 && - rmesa->dma.flush != flush_last_swtcl_prim) - rmesa->dma.flush( rmesa ); - -@@ -593,7 +595,7 @@ static void r300RenderFinish(GLcontext *ctx) - static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) - { - r300ContextPtr rmesa = R300_CONTEXT(ctx); -- -+ - if (rmesa->swtcl.hw_primitive != hwprim) { - R300_NEWPRIM( rmesa ); - rmesa->swtcl.hw_primitive = hwprim; -@@ -611,7 +613,7 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim) - - r300RasterPrimitive( ctx, reduced_prim[prim] ); - // fprintf(stderr, "%s\n", __FUNCTION__); -- -+ - } - - static void r300ResetLineStipple(GLcontext *ctx) -@@ -625,12 +627,12 @@ void r300InitSwtcl(GLcontext *ctx) - TNLcontext *tnl = TNL_CONTEXT(ctx); - r300ContextPtr rmesa = R300_CONTEXT(ctx); - static int firsttime = 1; -- -+ - if (firsttime) { - init_rast_tab(); - firsttime = 0; - } -- -+ - tnl->Driver.Render.Start = r300RenderStart; - tnl->Driver.Render.Finish = r300RenderFinish; - tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive; -@@ -638,15 +640,15 @@ void r300InitSwtcl(GLcontext *ctx) - tnl->Driver.Render.BuildVertices = _tnl_build_vertices; - tnl->Driver.Render.CopyPV = _tnl_copy_pv; - tnl->Driver.Render.Interp = _tnl_interp; -- -+ - /* FIXME: what are these numbers? */ -- _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, -+ _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, - 48 * sizeof(GLfloat) ); -- -+ - rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; - rmesa->swtcl.RenderIndex = ~0; - rmesa->swtcl.render_primitive = GL_TRIANGLES; -- rmesa->swtcl.hw_primitive = 0; -+ rmesa->swtcl.hw_primitive = 0; - - _tnl_invalidate_vertex_state( ctx, ~0 ); - _tnl_invalidate_vertices( ctx, ~0 ); -@@ -655,9 +657,9 @@ void r300InitSwtcl(GLcontext *ctx) - _tnl_need_projected_coords( ctx, GL_FALSE ); - r300ChooseRenderState(ctx); - -- _mesa_validate_all_lighting_tables( ctx ); -+ _mesa_validate_all_lighting_tables( ctx ); - -- tnl->Driver.NotifyMaterialChange = -+ tnl->Driver.NotifyMaterialChange = - _mesa_validate_all_lighting_tables; - } - -@@ -665,33 +667,32 @@ void r300DestroySwtcl(GLcontext *ctx) - { - } - --void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset) -+void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, dri_bo *bo, GLuint offset) - { -- int cmd_reserved = 0; -- int cmd_written = 0; -+ BATCH_LOCALS(rmesa); - -- drm_radeon_cmd_header_t *cmd = NULL; - if (RADEON_DEBUG & DEBUG_VERTS) -- fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", -- __FUNCTION__, vertex_size, offset); -- -- start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2); -- e32(1); -- e32(vertex_size | (vertex_size << 8)); -- e32(offset); -+ fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", -+ __FUNCTION__, vertex_size, offset); -+ -+ BEGIN_BATCH(5); -+ OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2); -+ OUT_BATCH(1); -+ OUT_BATCH(vertex_size | (vertex_size << 8)); -+ OUT_BATCH_RELOC(0, bo, offset, 0); -+ END_BATCH(); - } - - void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) - { -- -- int cmd_reserved = 0; -- int cmd_written = 0; -+ BATCH_LOCALS(rmesa); - int type, num_verts; -- drm_radeon_cmd_header_t *cmd = NULL; - - type = r300PrimitiveType(rmesa, primitive); - num_verts = r300NumVerts(rmesa, vertex_nr, primitive); -- -- start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); -- e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type); -+ -+ BEGIN_BATCH(3); -+ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); -+ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type); -+ END_BATCH(); - } -diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c -index f7f4972..c6ee1b5 100644 ---- a/src/mesa/drivers/dri/r300/r300_tex.c -+++ b/src/mesa/drivers/dri/r300/r300_tex.c -@@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #include "r300_context.h" - #include "r300_state.h" - #include "r300_ioctl.h" -+#include "r300_mipmap_tree.h" - #include "r300_tex.h" - - #include "xmlpool.h" -@@ -78,7 +79,7 @@ static unsigned int translate_wrap_mode(GLenum wrapmode) - */ - static void r300UpdateTexWrap(r300TexObjPtr t) - { -- struct gl_texture_object *tObj = t->base.tObj; -+ struct gl_texture_object *tObj = &t->base; - - t->filter &= - ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK); -@@ -175,39 +176,6 @@ static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4]) - t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]); - } - --/** -- * Allocate space for and load the mesa images into the texture memory block. -- * This will happen before drawing with a new texture, or drawing with a -- * texture after it was swapped out or teximaged again. -- */ -- --static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj) --{ -- r300TexObjPtr t; -- -- t = CALLOC_STRUCT(r300_tex_obj); -- texObj->DriverData = t; -- if (t != NULL) { -- if (RADEON_DEBUG & DEBUG_TEXTURE) { -- fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__, -- (void *)texObj, (void *)t); -- } -- -- /* Initialize non-image-dependent parts of the state: -- */ -- t->base.tObj = texObj; -- t->border_fallback = GL_FALSE; -- -- make_empty_list(&t->base); -- -- r300UpdateTexWrap(t); -- r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy); -- r300SetTexBorderColor(t, texObj->_BorderChan); -- } -- -- return t; --} -- - /* try to find a format which will only need a memcopy */ - static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat, - GLenum srcType) -@@ -433,95 +401,14 @@ static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx, - return NULL; /* never get here */ - } - --static GLboolean --r300ValidateClientStorage(GLcontext * ctx, GLenum target, -- GLint internalFormat, -- GLint srcWidth, GLint srcHeight, -- GLenum format, GLenum type, const void *pixels, -- const struct gl_pixelstore_attrib *packing, -- struct gl_texture_object *texObj, -- struct gl_texture_image *texImage) -+/** -+ * Marks the given face/level pair as dirty. -+ * This will cause an appropriate texture reupload the next time this -+ * texture is validated. -+ */ -+static void mark_texture_image_dirty(r300TexObj *t, int face, int level) - { -- r300ContextPtr rmesa = R300_CONTEXT(ctx); -- -- if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, "intformat %s format %s type %s\n", -- _mesa_lookup_enum_by_nr(internalFormat), -- _mesa_lookup_enum_by_nr(format), -- _mesa_lookup_enum_by_nr(type)); -- -- if (!ctx->Unpack.ClientStorage) -- return 0; -- -- if (ctx->_ImageTransferState || -- texImage->IsCompressed || texObj->GenerateMipmap) -- return 0; -- -- /* This list is incomplete, may be different on ppc??? -- */ -- switch (internalFormat) { -- case GL_RGBA: -- if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) { -- texImage->TexFormat = _dri_texformat_argb8888; -- } else -- return 0; -- break; -- -- case GL_RGB: -- if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) { -- texImage->TexFormat = _dri_texformat_rgb565; -- } else -- return 0; -- break; -- -- case GL_YCBCR_MESA: -- if (format == GL_YCBCR_MESA && -- type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) { -- texImage->TexFormat = &_mesa_texformat_ycbcr_rev; -- } else if (format == GL_YCBCR_MESA && -- (type == GL_UNSIGNED_SHORT_8_8_APPLE || -- type == GL_UNSIGNED_BYTE)) { -- texImage->TexFormat = &_mesa_texformat_ycbcr; -- } else -- return 0; -- break; -- -- default: -- return 0; -- } -- -- /* Could deal with these packing issues, but currently don't: -- */ -- if (packing->SkipPixels || -- packing->SkipRows || packing->SwapBytes || packing->LsbFirst) { -- return 0; -- } -- -- GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth, -- format, type); -- -- if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, "%s: srcRowStride %d/%x\n", -- __FUNCTION__, srcRowStride, srcRowStride); -- -- /* Could check this later in upload, pitch restrictions could be -- * relaxed, but would need to store the image pitch somewhere, -- * as packing details might change before image is uploaded: -- */ -- if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride) -- || (srcRowStride & 63)) -- return 0; -- -- /* Have validated that _mesa_transfer_teximage would be a straight -- * memcpy at this point. NOTE: future calls to TexSubImage will -- * overwrite the client data. This is explicitly mentioned in the -- * extension spec. -- */ -- texImage->Data = (void *)pixels; -- texImage->IsClientData = GL_TRUE; -- texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes; -- -- return 1; -+ t->dirty_images[face] |= 1 << level; - } - - static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level, -@@ -532,24 +419,13 @@ static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) - { -- driTextureObject *t = (driTextureObject *) texObj->DriverData; -+ r300TexObj* t = r300_tex_obj(texObj); - -- if (t) { -- driSwapOutTextureObject(t); -- } else { -- t = (driTextureObject *) r300AllocTexObj(texObj); -- if (!t) { -- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D"); -- return; -- } -- } -- -- /* Note, this will call ChooseTextureFormat */ - _mesa_store_teximage1d(ctx, target, level, internalFormat, - width, border, format, type, pixels, - &ctx->Unpack, texObj, texImage); - -- t->dirty_images[0] |= (1 << level); -+ mark_texture_image_dirty(t, 0, level); - } - - static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level, -@@ -561,24 +437,13 @@ static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) - { -- driTextureObject *t = (driTextureObject *) texObj->DriverData; -- -- assert(t); /* this _should_ be true */ -- if (t) { -- driSwapOutTextureObject(t); -- } else { -- t = (driTextureObject *) r300AllocTexObj(texObj); -- if (!t) { -- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D"); -- return; -- } -- } -+ r300TexObj* t = r300_tex_obj(texObj); - - _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, - format, type, pixels, packing, texObj, - texImage); - -- t->dirty_images[0] |= (1 << level); -+ mark_texture_image_dirty(t, 0, level); - } - - static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level, -@@ -589,7 +454,7 @@ static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) - { -- driTextureObject *t = (driTextureObject *) texObj->DriverData; -+ r300TexObj* t = r300_tex_obj(texObj); - GLuint face; - - /* which cube face or ordinary 2D image */ -@@ -608,43 +473,23 @@ static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level, - face = 0; - } - -- if (t != NULL) { -- driSwapOutTextureObject(t); -- } else { -- t = (driTextureObject *) r300AllocTexObj(texObj); -- if (!t) { -- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D"); -- return; -- } -- } -- - texImage->IsClientData = GL_FALSE; - -- if (r300ValidateClientStorage(ctx, target, -- internalFormat, -- width, height, -- format, type, pixels, -- packing, texObj, texImage)) { -- if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, "%s: Using client storage\n", -- __FUNCTION__); -- } else { -- if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, "%s: Using normal storage\n", -- __FUNCTION__); -- -- /* Normal path: copy (to cached memory) and eventually upload -- * via another copy to GART memory and then a blit... Could -- * eliminate one copy by going straight to (permanent) GART. -- * -- * Note, this will call r300ChooseTextureFormat. -- */ -- _mesa_store_teximage2d(ctx, target, level, internalFormat, -- width, height, border, format, type, -- pixels, &ctx->Unpack, texObj, texImage); -+ if (RADEON_DEBUG & DEBUG_TEXTURE) -+ fprintf(stderr, "%s: Using normal storage\n", -+ __FUNCTION__); -+ -+ /* Normal path: copy (to cached memory) and eventually upload -+ * via another copy to GART memory and then a blit... Could -+ * eliminate one copy by going straight to (permanent) GART. -+ * -+ * Note, this will call r300ChooseTextureFormat. -+ */ -+ _mesa_store_teximage2d(ctx, target, level, internalFormat, -+ width, height, border, format, type, -+ pixels, &ctx->Unpack, texObj, texImage); - -- t->dirty_images[face] |= (1 << level); -- } -+ mark_texture_image_dirty(t, face, level); - } - - static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level, -@@ -656,7 +501,7 @@ static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) - { -- driTextureObject *t = (driTextureObject *) texObj->DriverData; -+ r300TexObj* t = r300_tex_obj(texObj); - GLuint face; - - /* which cube face or ordinary 2D image */ -@@ -675,22 +520,11 @@ static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level, - face = 0; - } - -- assert(t); /* this _should_ be true */ -- if (t) { -- driSwapOutTextureObject(t); -- } else { -- t = (driTextureObject *) r300AllocTexObj(texObj); -- if (!t) { -- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D"); -- return; -- } -- } -- - _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, - height, format, type, pixels, packing, texObj, - texImage); - -- t->dirty_images[face] |= (1 << level); -+ mark_texture_image_dirty(t, face, level); - } - - static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target, -@@ -700,7 +534,7 @@ static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) - { -- driTextureObject *t = (driTextureObject *) texObj->DriverData; -+ r300TexObj* t = r300_tex_obj(texObj); - GLuint face; - - /* which cube face or ordinary 2D image */ -@@ -719,49 +553,24 @@ static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target, - face = 0; - } - -- if (t != NULL) { -- driSwapOutTextureObject(t); -- } else { -- t = (driTextureObject *) r300AllocTexObj(texObj); -- if (!t) { -- _mesa_error(ctx, GL_OUT_OF_MEMORY, -- "glCompressedTexImage2D"); -- return; -- } -- } -- - texImage->IsClientData = GL_FALSE; - -- /* can't call this, different parameters. Would never evaluate to true anyway currently */ --#if 0 -- if (r300ValidateClientStorage(ctx, target, -- internalFormat, -- width, height, -- format, type, pixels, -- packing, texObj, texImage)) { -- if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, "%s: Using client storage\n", -- __FUNCTION__); -- } else --#endif -- { -- if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, "%s: Using normal storage\n", -- __FUNCTION__); -- -- /* Normal path: copy (to cached memory) and eventually upload -- * via another copy to GART memory and then a blit... Could -- * eliminate one copy by going straight to (permanent) GART. -- * -- * Note, this will call r300ChooseTextureFormat. -- */ -- _mesa_store_compressed_teximage2d(ctx, target, level, -- internalFormat, width, height, -- border, imageSize, data, -- texObj, texImage); -+ if (RADEON_DEBUG & DEBUG_TEXTURE) -+ fprintf(stderr, "%s: Using normal storage\n", -+ __FUNCTION__); -+ -+ /* Normal path: copy (to cached memory) and eventually upload -+ * via another copy to GART memory and then a blit... Could -+ * eliminate one copy by going straight to (permanent) GART. -+ * -+ * Note, this will call r300ChooseTextureFormat. -+ */ -+ _mesa_store_compressed_teximage2d(ctx, target, level, -+ internalFormat, width, height, -+ border, imageSize, data, -+ texObj, texImage); - -- t->dirty_images[face] |= (1 << level); -- } -+ mark_texture_image_dirty(t, face, level); - } - - static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target, -@@ -772,7 +581,7 @@ static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) - { -- driTextureObject *t = (driTextureObject *) texObj->DriverData; -+ r300TexObj* t = r300_tex_obj(texObj); - GLuint face; - - /* which cube face or ordinary 2D image */ -@@ -791,23 +600,11 @@ static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target, - face = 0; - } - -- assert(t); /* this _should_ be true */ -- if (t) { -- driSwapOutTextureObject(t); -- } else { -- t = (driTextureObject *) r300AllocTexObj(texObj); -- if (!t) { -- _mesa_error(ctx, GL_OUT_OF_MEMORY, -- "glCompressedTexSubImage3D"); -- return; -- } -- } -- - _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, - yoffset, width, height, format, - imageSize, data, texObj, texImage); - -- t->dirty_images[face] |= (1 << level); -+ mark_texture_image_dirty(t, face, level); - } - - static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level, -@@ -819,49 +616,26 @@ static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) - { -- driTextureObject *t = (driTextureObject *) texObj->DriverData; -- -- if (t) { -- driSwapOutTextureObject(t); -- } else { -- t = (driTextureObject *) r300AllocTexObj(texObj); -- if (!t) { -- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D"); -- return; -- } -- } -+ r300TexObj* t = r300_tex_obj(texObj); - - texImage->IsClientData = GL_FALSE; - --#if 0 -- if (r300ValidateClientStorage(ctx, target, -- internalFormat, -- width, height, -- format, type, pixels, -- packing, texObj, texImage)) { -- if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, "%s: Using client storage\n", -- __FUNCTION__); -- } else --#endif -- { -- if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, "%s: Using normal storage\n", -- __FUNCTION__); -- -- /* Normal path: copy (to cached memory) and eventually upload -- * via another copy to GART memory and then a blit... Could -- * eliminate one copy by going straight to (permanent) GART. -- * -- * Note, this will call r300ChooseTextureFormat. -- */ -- _mesa_store_teximage3d(ctx, target, level, internalFormat, -- width, height, depth, border, -- format, type, pixels, -- &ctx->Unpack, texObj, texImage); -+ if (RADEON_DEBUG & DEBUG_TEXTURE) -+ fprintf(stderr, "%s: Using normal storage\n", -+ __FUNCTION__); -+ -+ /* Normal path: copy (to cached memory) and eventually upload -+ * via another copy to GART memory and then a blit... Could -+ * eliminate one copy by going straight to (permanent) GART. -+ * -+ * Note, this will call r300ChooseTextureFormat. -+ */ -+ _mesa_store_teximage3d(ctx, target, level, internalFormat, -+ width, height, depth, border, -+ format, type, pixels, -+ &ctx->Unpack, texObj, texImage); - -- t->dirty_images[0] |= (1 << level); -- } -+ mark_texture_image_dirty(t, 0, level); - } - - static void -@@ -874,28 +648,14 @@ r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) - { -- driTextureObject *t = (driTextureObject *) texObj->DriverData; -- --/* fprintf(stderr, "%s\n", __FUNCTION__); */ -- -- assert(t); /* this _should_ be true */ -- if (t) { -- driSwapOutTextureObject(t); -- } else { -- t = (driTextureObject *) r300AllocTexObj(texObj); -- if (!t) { -- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D"); -- return; -- } -- texObj->DriverData = t; -- } -+ r300TexObj* t = r300_tex_obj(texObj); - - _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset, - width, height, depth, - format, type, pixels, packing, texObj, - texImage); - -- t->dirty_images[0] |= (1 << level); -+ mark_texture_image_dirty(t, 0, level); - } - - /** -@@ -907,7 +667,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, - struct gl_texture_object *texObj, - GLenum pname, const GLfloat * params) - { -- r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData; -+ r300TexObj* t = r300_tex_obj(texObj); - - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { - fprintf(stderr, "%s( %s )\n", __FUNCTION__, -@@ -940,7 +700,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, - * we just have to rely on loading the right subset of mipmap levels - * to simulate a clamped LOD. - */ -- driSwapOutTextureObject((driTextureObject *) t); -+ if (t->mt) { -+ r300_miptree_destroy(t->mt); -+ t->mt = 0; -+ } - break; - - case GL_DEPTH_TEXTURE_MODE: -@@ -963,27 +726,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, - } - } - --static void r300BindTexture(GLcontext * ctx, GLenum target, -- struct gl_texture_object *texObj) --{ -- if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { -- fprintf(stderr, "%s( %p ) unit=%d\n", __FUNCTION__, -- (void *)texObj, ctx->Texture.CurrentUnit); -- } -- -- if ((target == GL_TEXTURE_1D) -- || (target == GL_TEXTURE_2D) -- || (target == GL_TEXTURE_3D) -- || (target == GL_TEXTURE_CUBE_MAP) -- || (target == GL_TEXTURE_RECTANGLE_NV)) { -- assert(texObj->DriverData != NULL); -- } --} -- - static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) - { - r300ContextPtr rmesa = R300_CONTEXT(ctx); -- driTextureObject *t = (driTextureObject *) texObj->DriverData; -+ r300TexObj* t = r300_tex_obj(texObj); - - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { - fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, -@@ -991,14 +737,19 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) - _mesa_lookup_enum_by_nr(texObj->Target)); - } - -- if (t != NULL) { -- if (rmesa) { -- R300_FIREVERTICES(rmesa); -- } -+ if (rmesa) { -+ int i; -+ R300_FIREVERTICES(rmesa); -+ -+ for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i) -+ if (rmesa->hw.textures[i] == t) -+ rmesa->hw.textures[i] = 0; -+ } - -- driDestroyTextureObject(t); -+ if (t->mt) { -+ r300_miptree_destroy(t->mt); -+ t->mt = 0; - } -- /* Free mipmap images and the texture object itself */ - _mesa_delete_texture_object(ctx, texObj); - } - -@@ -1007,8 +758,6 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) - * Called via ctx->Driver.NewTextureObject. - * Note: this function will be called during context creation to - * allocate the default texture objects. -- * Note: we could use containment here to 'derive' the driver-specific -- * texture object from the core mesa gl_texture_object. Not done at this time. - * Fixup MaxAnisotropy according to user preference. - */ - static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, -@@ -1016,14 +765,23 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, - GLenum target) - { - r300ContextPtr rmesa = R300_CONTEXT(ctx); -- struct gl_texture_object *obj; -- obj = _mesa_new_texture_object(ctx, name, target); -- if (!obj) -- return NULL; -- obj->MaxAnisotropy = rmesa->initialMaxAnisotropy; -+ r300TexObj* t = CALLOC_STRUCT(r300_tex_obj); -+ -+ -+ if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { -+ fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, -+ t, _mesa_lookup_enum_by_nr(target)); -+ } -+ -+ _mesa_initialize_texture_object(&t->base, name, target); -+ t->base.MaxAnisotropy = rmesa->initialMaxAnisotropy; -+ -+ /* Initialize hardware state */ -+ r300UpdateTexWrap(t); -+ r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy); -+ r300SetTexBorderColor(t, t->base._BorderChan); - -- r300AllocTexObj(obj); -- return obj; -+ return &t->base; - } - - void r300InitTextureFuncs(struct dd_function_table *functions) -@@ -1039,7 +797,6 @@ void r300InitTextureFuncs(struct dd_function_table *functions) - functions->TexSubImage2D = r300TexSubImage2D; - functions->TexSubImage3D = r300TexSubImage3D; - functions->NewTextureObject = r300NewTextureObject; -- functions->BindTexture = r300BindTexture; - functions->DeleteTexture = r300DeleteTexture; - functions->IsTextureResident = driIsTextureResident; - -diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h -index b86d45b..5d7f21e 100644 ---- a/src/mesa/drivers/dri/r300/r300_tex.h -+++ b/src/mesa/drivers/dri/r300/r300_tex.h -@@ -46,8 +46,6 @@ extern void r300UpdateTextureState(GLcontext * ctx); - extern int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, - GLuint face); - --extern void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t); -- - extern void r300InitTextureFuncs(struct dd_function_table *functions); - - #endif /* __r300_TEX_H__ */ -diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c -index 69847a4..b3b501b 100644 ---- a/src/mesa/drivers/dri/r300/r300_texmem.c -+++ b/src/mesa/drivers/dri/r300/r300_texmem.c -@@ -48,439 +48,15 @@ SOFTWARE. - #include "r300_context.h" - #include "r300_state.h" - #include "r300_cmdbuf.h" -+#include "r300_emit.h" -+#include "r300_mipmap_tree.h" - #include "radeon_ioctl.h" - #include "r300_tex.h" - #include "r300_ioctl.h" - #include /* for usleep() */ - --#ifdef USER_BUFFERS - #include "r300_mem.h" --#endif - --/** -- * Destroy any device-dependent state associated with the texture. This may -- * include NULLing out hardware state that points to the texture. -- */ --void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t) --{ -- int i; -- -- if (RADEON_DEBUG & DEBUG_TEXTURE) { -- fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__, -- (void *)t, (void *)t->base.tObj); -- } -- -- for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) { -- if (rmesa->state.texture.unit[i].texobj == t) { -- rmesa->state.texture.unit[i].texobj = NULL; -- } -- } --} -- --/* ------------------------------------------------------------ -- * Texture image conversions -- */ -- --static void r300UploadGARTClientSubImage(r300ContextPtr rmesa, -- r300TexObjPtr t, -- struct gl_texture_image *texImage, -- GLint hwlevel, -- GLint x, GLint y, -- GLint width, GLint height) --{ -- const struct gl_texture_format *texFormat = texImage->TexFormat; -- GLuint srcPitch, dstPitch; -- int blit_format; -- int srcOffset; -- -- /* -- * XXX it appears that we always upload the full image, not a subimage. -- * I.e. x==0, y==0, width=texWidth, height=texWidth. If this is ever -- * changed, the src pitch will have to change. -- */ -- switch (texFormat->TexelBytes) { -- case 1: -- blit_format = R300_CP_COLOR_FORMAT_CI8; -- srcPitch = t->image[0][0].width * texFormat->TexelBytes; -- dstPitch = t->image[0][0].width * texFormat->TexelBytes; -- break; -- case 2: -- blit_format = R300_CP_COLOR_FORMAT_RGB565; -- srcPitch = t->image[0][0].width * texFormat->TexelBytes; -- dstPitch = t->image[0][0].width * texFormat->TexelBytes; -- break; -- case 4: -- blit_format = R300_CP_COLOR_FORMAT_ARGB8888; -- srcPitch = t->image[0][0].width * texFormat->TexelBytes; -- dstPitch = t->image[0][0].width * texFormat->TexelBytes; -- break; -- case 8: -- case 16: -- blit_format = R300_CP_COLOR_FORMAT_CI8; -- srcPitch = t->image[0][0].width * texFormat->TexelBytes; -- dstPitch = t->image[0][0].width * texFormat->TexelBytes; -- break; -- default: -- return; -- } -- -- t->image[0][hwlevel].data = texImage->Data; -- srcOffset = r300GartOffsetFromVirtual(rmesa, texImage->Data); -- -- assert(srcOffset != ~0); -- -- /* Don't currently need to cope with small pitches? -- */ -- width = texImage->Width; -- height = texImage->Height; -- -- if (texFormat->TexelBytes > 4) { -- width *= texFormat->TexelBytes; -- } -- -- r300EmitWait(rmesa, R300_WAIT_3D); -- -- r300EmitBlit(rmesa, blit_format, -- srcPitch, -- srcOffset, -- dstPitch, -- t->bufAddr, -- x, -- y, -- t->image[0][hwlevel].x + x, -- t->image[0][hwlevel].y + y, width, height); -- -- r300EmitWait(rmesa, R300_WAIT_2D); --} -- --static void r300UploadRectSubImage(r300ContextPtr rmesa, -- r300TexObjPtr t, -- struct gl_texture_image *texImage, -- GLint x, GLint y, GLint width, GLint height) --{ -- const struct gl_texture_format *texFormat = texImage->TexFormat; -- int blit_format, dstPitch, done; -- -- switch (texFormat->TexelBytes) { -- case 1: -- blit_format = R300_CP_COLOR_FORMAT_CI8; -- break; -- case 2: -- blit_format = R300_CP_COLOR_FORMAT_RGB565; -- break; -- case 4: -- blit_format = R300_CP_COLOR_FORMAT_ARGB8888; -- break; -- case 8: -- case 16: -- blit_format = R300_CP_COLOR_FORMAT_CI8; -- break; -- default: -- return; -- } -- -- t->image[0][0].data = texImage->Data; -- -- /* Currently don't need to cope with small pitches. -- */ -- width = texImage->Width; -- height = texImage->Height; -- dstPitch = t->pitch; -- -- if (texFormat->TexelBytes > 4) { -- width *= texFormat->TexelBytes; -- } -- -- if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) { -- /* In this case, could also use GART texturing. This is -- * currently disabled, but has been tested & works. -- */ -- t->offset = r300GartOffsetFromVirtual(rmesa, texImage->Data); -- t->pitch = texImage->RowStride * texFormat->TexelBytes - 32; -- -- if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, -- "Using GART texturing for rectangular client texture\n"); -- -- /* Release FB memory allocated for this image: -- */ -- /* FIXME This may not be correct as driSwapOutTextureObject sets -- * FIXME dirty_images. It may be fine, though. -- */ -- if (t->base.memBlock) { -- driSwapOutTextureObject((driTextureObject *) t); -- } -- } else if (texImage->IsClientData) { -- /* Data already in GART memory, with usable pitch. -- */ -- GLuint srcPitch; -- srcPitch = texImage->RowStride * texFormat->TexelBytes; -- r300EmitBlit(rmesa, -- blit_format, -- srcPitch, -- r300GartOffsetFromVirtual(rmesa, texImage->Data), -- dstPitch, t->bufAddr, 0, 0, 0, 0, width, height); -- } else { -- /* Data not in GART memory, or bad pitch. -- */ -- for (done = 0; done < height;) { -- struct r300_dma_region region; -- int lines = -- MIN2(height - done, RADEON_BUFFER_SIZE / dstPitch); -- int src_pitch; -- char *tex; -- -- src_pitch = texImage->RowStride * texFormat->TexelBytes; -- -- tex = (char *)texImage->Data + done * src_pitch; -- -- memset(®ion, 0, sizeof(region)); -- r300AllocDmaRegion(rmesa, ®ion, lines * dstPitch, -- 1024); -- -- /* Copy texdata to dma: -- */ -- if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, -- "%s: src_pitch %d dst_pitch %d\n", -- __FUNCTION__, src_pitch, dstPitch); -- -- if (src_pitch == dstPitch) { -- memcpy(region.address + region.start, tex, -- lines * src_pitch); -- } else { -- char *buf = region.address + region.start; -- int i; -- for (i = 0; i < lines; i++) { -- memcpy(buf, tex, src_pitch); -- buf += dstPitch; -- tex += src_pitch; -- } -- } -- -- r300EmitWait(rmesa, R300_WAIT_3D); -- -- /* Blit to framebuffer -- */ -- r300EmitBlit(rmesa, -- blit_format, -- dstPitch, GET_START(®ion), -- dstPitch | (t->tile_bits >> 16), -- t->bufAddr, 0, 0, 0, done, width, lines); -- -- r300EmitWait(rmesa, R300_WAIT_2D); --#ifdef USER_BUFFERS -- r300_mem_use(rmesa, region.buf->id); --#endif -- -- r300ReleaseDmaRegion(rmesa, ®ion, __FUNCTION__); -- done += lines; -- } -- } --} -- --/** -- * Upload the texture image associated with texture \a t at the specified -- * level at the address relative to \a start. -- */ --static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t, -- GLint hwlevel, -- GLint x, GLint y, GLint width, GLint height, -- GLuint face) --{ -- struct gl_texture_image *texImage = NULL; -- GLuint offset; -- GLint imageWidth, imageHeight; -- GLint ret; -- drm_radeon_texture_t tex; -- drm_radeon_tex_image_t tmp; -- const int level = hwlevel + t->base.firstLevel; -- -- if (RADEON_DEBUG & DEBUG_TEXTURE) { -- fprintf(stderr, -- "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", -- __FUNCTION__, (void *)t, (void *)t->base.tObj, level, -- width, height, face); -- } -- -- ASSERT(face < 6); -- -- /* Ensure we have a valid texture to upload */ -- if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) { -- _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__); -- return; -- } -- -- texImage = t->base.tObj->Image[face][level]; -- -- if (!texImage) { -- if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, "%s: texImage %d is NULL!\n", -- __FUNCTION__, level); -- return; -- } -- if (!texImage->Data) { -- if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, "%s: image data is NULL!\n", -- __FUNCTION__); -- return; -- } -- -- if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { -- assert(level == 0); -- assert(hwlevel == 0); -- if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, "%s: image data is rectangular\n", -- __FUNCTION__); -- r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height); -- return; -- } else if (texImage->IsClientData) { -- if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, -- "%s: image data is in GART client storage\n", -- __FUNCTION__); -- r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y, -- width, height); -- return; -- } else if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, "%s: image data is in normal memory\n", -- __FUNCTION__); -- -- imageWidth = texImage->Width; -- imageHeight = texImage->Height; -- -- offset = t->bufAddr; -- -- if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) { -- GLint imageX = 0; -- GLint imageY = 0; -- GLint blitX = t->image[face][hwlevel].x; -- GLint blitY = t->image[face][hwlevel].y; -- GLint blitWidth = t->image[face][hwlevel].width; -- GLint blitHeight = t->image[face][hwlevel].height; -- fprintf(stderr, " upload image: %d,%d at %d,%d\n", -- imageWidth, imageHeight, imageX, imageY); -- fprintf(stderr, " upload blit: %d,%d at %d,%d\n", -- blitWidth, blitHeight, blitX, blitY); -- fprintf(stderr, " blit ofs: 0x%07x level: %d/%d\n", -- (GLuint) offset, hwlevel, level); -- } -- -- t->image[face][hwlevel].data = texImage->Data; -- -- /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct. -- * NOTE: we're always use a 1KB-wide blit and I8 texture format. -- * We used to use 1, 2 and 4-byte texels and used to use the texture -- * width to dictate the blit width - but that won't work for compressed -- * textures. (Brian) -- * NOTE: can't do that with texture tiling. (sroland) -- */ -- tex.offset = offset; -- tex.image = &tmp; -- /* copy (x,y,width,height,data) */ -- memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp)); -- -- if (texImage->TexFormat->TexelBytes > 4) { -- const int log2TexelBytes = -- (3 + (texImage->TexFormat->TexelBytes >> 4)); -- tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ -- tex.pitch = -- MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / -- 64, 1); -- tex.height = imageHeight; -- tex.width = imageWidth << log2TexelBytes; -- tex.offset += (tmp.x << log2TexelBytes) & ~1023; -- tmp.x = tmp.x % (1024 >> log2TexelBytes); -- tmp.width = tmp.width << log2TexelBytes; -- } else if (texImage->TexFormat->TexelBytes) { -- /* use multi-byte upload scheme */ -- tex.height = imageHeight; -- tex.width = imageWidth; -- switch (texImage->TexFormat->TexelBytes) { -- case 1: -- tex.format = RADEON_TXFORMAT_I8; -- break; -- case 2: -- tex.format = RADEON_TXFORMAT_AI88; -- break; -- case 4: -- tex.format = RADEON_TXFORMAT_ARGB8888; -- break; -- } -- tex.pitch = -- MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / -- 64, 1); -- tex.offset += tmp.x & ~1023; -- tmp.x = tmp.x % 1024; -- -- if (t->tile_bits & R300_TXO_MICRO_TILE) { -- /* need something like "tiled coordinates" ? */ -- tmp.y = tmp.x / (tex.pitch * 128) * 2; -- tmp.x = -- tmp.x % (tex.pitch * 128) / 2 / -- texImage->TexFormat->TexelBytes; -- tex.pitch |= RADEON_DST_TILE_MICRO >> 22; -- } else { -- tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); -- } --#if 1 -- if ((t->tile_bits & R300_TXO_MACRO_TILE) && -- (texImage->Width * texImage->TexFormat->TexelBytes >= 256) -- && ((!(t->tile_bits & R300_TXO_MICRO_TILE) -- && (texImage->Height >= 8)) -- || (texImage->Height >= 16))) { -- /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes, -- OR if height is smaller than 8 automatically, but if micro tiling is active -- the limit is height 16 instead ? */ -- tex.pitch |= RADEON_DST_TILE_MACRO >> 22; -- } --#endif -- } else { -- /* In case of for instance 8x8 texture (2x2 dxt blocks), -- padding after the first two blocks is needed (only -- with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ -- /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) -- has 4 real pixels. Needed so the kernel module reads -- the right amount of data. */ -- tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ -- tex.pitch = (R300_BLIT_WIDTH_BYTES / 64); -- tex.height = (imageHeight + 3) / 4; -- tex.width = (imageWidth + 3) / 4; -- if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) { -- tex.width *= 8; -- } else { -- tex.width *= 16; -- } -- } -- -- LOCK_HARDWARE(&rmesa->radeon); -- do { -- ret = -- drmCommandWriteRead(rmesa->radeon.dri.fd, -- DRM_RADEON_TEXTURE, &tex, -- sizeof(drm_radeon_texture_t)); -- if (ret) { -- if (RADEON_DEBUG & DEBUG_IOCTL) -- fprintf(stderr, -- "DRM_RADEON_TEXTURE: again!\n"); -- usleep(1); -- } -- } while (ret == -EAGAIN); -- -- UNLOCK_HARDWARE(&rmesa->radeon); -- -- if (ret) { -- fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret); -- fprintf(stderr, " offset=0x%08x\n", offset); -- fprintf(stderr, " image width=%d height=%d\n", -- imageWidth, imageHeight); -- fprintf(stderr, " blit width=%d height=%d data=%p\n", -- t->image[face][hwlevel].width, -- t->image[face][hwlevel].height, -- t->image[face][hwlevel].data); -- _mesa_exit(-1); -- } --} - - /** - * Upload the texture images associated with texture \a t. This might -@@ -493,69 +69,32 @@ static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t, - - int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face) - { -- const int numLevels = t->base.lastLevel - t->base.firstLevel + 1; -- - if (t->image_override) - return 0; -+ if (!t->mt) -+ return 0; - - if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) { -- fprintf(stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__, -- (void *)rmesa->radeon.glCtx, (void *)t->base.tObj, -- t->base.totalSize, t->base.firstLevel, -- t->base.lastLevel); -+ fprintf(stderr, "%s( %p, %p ) lvls=%d-%d\n", __FUNCTION__, -+ (void *)rmesa->radeon.glCtx, t, -+ t->mt->firstLevel, t->mt->lastLevel); - } - -- if (t->base.totalSize == 0) -- return 0; -- - if (RADEON_DEBUG & DEBUG_SYNC) { - fprintf(stderr, "%s: Syncing\n", __FUNCTION__); - radeonFinish(rmesa->radeon.glCtx); - } - -- LOCK_HARDWARE(&rmesa->radeon); -- -- if (t->base.memBlock == NULL) { -- int heap; -- -- heap = driAllocateTexture(rmesa->texture_heaps, rmesa->nr_heaps, -- (driTextureObject *) t); -- if (heap == -1) { -- UNLOCK_HARDWARE(&rmesa->radeon); -- return -1; -- } -- -- /* Set the base offset of the texture image */ -- t->bufAddr = rmesa->radeon.radeonScreen->texOffset[heap] -- + t->base.memBlock->ofs; -- t->offset = t->bufAddr; -- -- if (!(t->base.tObj->Image[0][0]->IsClientData)) { -- /* hope it's safe to add that here... */ -- t->offset |= t->tile_bits; -- } -- } -- -- /* Let the world know we've used this memory recently. -- */ -- driUpdateTextureLRU((driTextureObject *) t); -- UNLOCK_HARDWARE(&rmesa->radeon); -- - /* Upload any images that are new */ -- if (t->base.dirty_images[face]) { -- int i; -+ if (t->dirty_images[face]) { -+ int i, numLevels = t->mt->lastLevel - t->mt->firstLevel + 1; - for (i = 0; i < numLevels; i++) { -- if ((t->base. -- dirty_images[face] & (1 << -- (i + t->base.firstLevel))) != -- 0) { -- r300UploadSubImage(rmesa, t, i, 0, 0, -- t->image[face][i].width, -- t->image[face][i].height, -- face); -+ if (t->dirty_images[face] & (1 << (i + t->mt->firstLevel))) { -+ r300_miptree_upload_image(t->mt, face, t->mt->firstLevel + i, -+ t->base.Image[face][t->mt->firstLevel + i]); - } - } -- t->base.dirty_images[face] = 0; -+ t->dirty_images[face] = 0; - } - - if (RADEON_DEBUG & DEBUG_SYNC) { -diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c -index bdd20b1..1b24738 100644 ---- a/src/mesa/drivers/dri/r300/r300_texstate.c -+++ b/src/mesa/drivers/dri/r300/r300_texstate.c -@@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #include "r300_state.h" - #include "r300_ioctl.h" - #include "radeon_ioctl.h" -+#include "r300_mipmap_tree.h" - #include "r300_tex.h" - #include "r300_reg.h" - -@@ -148,8 +149,7 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) - if (!tObj) - return; - -- t = (r300TexObjPtr) tObj->DriverData; -- -+ t = r300_tex_obj(tObj); - - switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) { - case MESA_FORMAT_Z16: -@@ -189,118 +189,59 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) - } - - --/** -- * Compute sizes and fill in offset and blit information for the given -- * image (determined by \p face and \p level). -- * -- * \param curOffset points to the offset at which the image is to be stored -- * and is updated by this function according to the size of the image. -- */ --static void compute_tex_image_offset( -- struct gl_texture_object *tObj, -- GLuint face, -- GLint level, -- GLint* curOffset) -+static void calculate_first_last_level(struct gl_texture_object *tObj, -+ GLuint *pfirstLevel, GLuint *plastLevel) - { -- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; -- const struct gl_texture_image* texImage; -- GLuint blitWidth = R300_BLIT_WIDTH_BYTES; -- GLuint texelBytes; -- GLuint size; -- -- texImage = tObj->Image[0][level + t->base.firstLevel]; -- if (!texImage) -- return; -- -- texelBytes = texImage->TexFormat->TexelBytes; -- -- /* find image size in bytes */ -- if (texImage->IsCompressed) { -- if ((t->format & R300_TX_FORMAT_DXT1) == -- R300_TX_FORMAT_DXT1) { -- // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format); -- if ((texImage->Width + 3) < 8) /* width one block */ -- size = texImage->CompressedSize * 4; -- else if ((texImage->Width + 3) < 16) -- size = texImage->CompressedSize * 2; -- else -- size = texImage->CompressedSize; -+ const struct gl_texture_image * const baseImage = -+ tObj->Image[0][tObj->BaseLevel]; -+ -+ /* These must be signed values. MinLod and MaxLod can be negative numbers, -+ * and having firstLevel and lastLevel as signed prevents the need for -+ * extra sign checks. -+ */ -+ int firstLevel; -+ int lastLevel; -+ -+ /* Yes, this looks overly complicated, but it's all needed. -+ */ -+ switch (tObj->Target) { -+ case GL_TEXTURE_1D: -+ case GL_TEXTURE_2D: -+ case GL_TEXTURE_3D: -+ case GL_TEXTURE_CUBE_MAP: -+ if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) { -+ /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL. -+ */ -+ firstLevel = lastLevel = tObj->BaseLevel; - } else { -- /* DXT3/5, 16 bytes per block */ -- WARN_ONCE -- ("DXT 3/5 suffers from multitexturing problems!\n"); -- // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width); -- if ((texImage->Width + 3) < 8) -- size = texImage->CompressedSize * 2; -- else -- size = texImage->CompressedSize; -+ firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5); -+ firstLevel = MAX2(firstLevel, tObj->BaseLevel); -+ firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2); -+ lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5); -+ lastLevel = MAX2(lastLevel, tObj->BaseLevel); -+ lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2); -+ lastLevel = MIN2(lastLevel, tObj->MaxLevel); -+ lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ - } -- } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { -- size = -- ((texImage->Width * texelBytes + -- 63) & ~63) * texImage->Height; -- blitWidth = 64 / texelBytes; -- } else if (t->tile_bits & R300_TXO_MICRO_TILE) { -- /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, -- though the actual offset may be different (if texture is less than -- 32 bytes width) to the untiled case */ -- int w = (texImage->Width * texelBytes * 2 + 31) & ~31; -- size = -- (w * ((texImage->Height + 1) / 2)) * -- texImage->Depth; -- blitWidth = MAX2(texImage->Width, 64 / texelBytes); -- } else { -- int w = (texImage->Width * texelBytes + 31) & ~31; -- size = w * texImage->Height * texImage->Depth; -- blitWidth = MAX2(texImage->Width, 64 / texelBytes); -- } -- assert(size > 0); -- -- if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n", -- texImage->Width, texImage->Height, -- texImage->Depth, -- texImage->TexFormat->TexelBytes, -- texImage->InternalFormat); -- -- /* All images are aligned to a 32-byte offset */ -- *curOffset = (*curOffset + 0x1f) & ~0x1f; -- -- if (texelBytes) { -- /* fix x and y coords up later together with offset */ -- t->image[face][level].x = *curOffset; -- t->image[face][level].y = 0; -- t->image[face][level].width = -- MIN2(size / texelBytes, blitWidth); -- t->image[face][level].height = -- (size / texelBytes) / t->image[face][level].width; -- } else { -- t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES; -- t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES; -- t->image[face][level].width = -- MIN2(size, R300_BLIT_WIDTH_BYTES); -- t->image[face][level].height = size / t->image[face][level].width; -+ break; -+ case GL_TEXTURE_RECTANGLE_NV: -+ case GL_TEXTURE_4D_SGIS: -+ firstLevel = lastLevel = 0; -+ break; -+ default: -+ return; - } - -- if (RADEON_DEBUG & DEBUG_TEXTURE) -- fprintf(stderr, -- "level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", -- level, face, texImage->Width, texImage->Height, -- t->image[face][level].x, t->image[face][level].y, -- t->image[face][level].width, t->image[face][level].height, -- size, *curOffset); -- -- *curOffset += size; -+ /* save these values */ -+ *pfirstLevel = firstLevel; -+ *plastLevel = lastLevel; - } - - -- - /** -- * This function computes the number of bytes of storage needed for -- * the given texture object (all mipmap levels, all cube faces). -- * The \c image[face][level].x/y/width/height parameters for upload/blitting -- * are computed here. \c filter, \c format, etc. will be set here -- * too. -+ * This function ensures a validated miptree is available. -+ * -+ * Additionally, some texture format bits are configured here. - * - * \param rmesa Context pointer - * \param tObj GL texture object whose images are to be posted to -@@ -309,13 +250,13 @@ static void compute_tex_image_offset( - static void r300SetTexImages(r300ContextPtr rmesa, - struct gl_texture_object *tObj) - { -- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; -+ r300TexObjPtr t = r300_tex_obj(tObj); - const struct gl_texture_image *baseImage = - tObj->Image[0][tObj->BaseLevel]; -- GLint curOffset; -- GLint i, texelBytes; -- GLint numLevels; -- GLint log2Width, log2Height, log2Depth; -+ GLint texelBytes; -+ GLuint firstLevel = 0, lastLevel = 0; -+ -+ calculate_first_last_level(tObj, &firstLevel, &lastLevel); - - /* Set the hardware texture format - */ -@@ -335,107 +276,59 @@ static void r300SetTexImages(r300ContextPtr rmesa, - } - - texelBytes = baseImage->TexFormat->TexelBytes; -- -- /* Compute which mipmap levels we really want to send to the hardware. -- */ -- driCalculateTextureFirstLastLevel((driTextureObject *) t); -- log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2; -- log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2; -- log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2; -- -- numLevels = t->base.lastLevel - t->base.firstLevel + 1; -- -- assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); -- -- /* Calculate mipmap offsets and dimensions for blitting (uploading) -- * The idea is that we lay out the mipmap levels within a block of -- * memory organized as a rectangle of width BLIT_WIDTH_BYTES. -- */ - t->tile_bits = 0; - -- /* figure out if this texture is suitable for tiling. */ --#if 0 /* Disabled for now */ -- if (texelBytes) { -- if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) && -- /* texrect might be able to use micro tiling too in theory? */ -- (baseImage->Height > 1)) { -- -- /* allow 32 (bytes) x 1 mip (which will use two times the space -- the non-tiled version would use) max if base texture is large enough */ -- if ((numLevels == 1) || -- (((baseImage->Width * texelBytes / -- baseImage->Height) <= 32) -- && (baseImage->Width * texelBytes > 64)) -- || -- ((baseImage->Width * texelBytes / -- baseImage->Height) <= 16)) { -- t->tile_bits |= R300_TXO_MICRO_TILE; -- } -- } -+ if (tObj->Target == GL_TEXTURE_CUBE_MAP) -+ t->format |= R300_TX_FORMAT_CUBIC_MAP; - -- if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) { -- /* we can set macro tiling even for small textures, they will be untiled anyway */ -- t->tile_bits |= R300_TXO_MACRO_TILE; -+ if (!t->image_override) { -+ GLuint compressed = baseImage->IsCompressed ? baseImage->TexFormat->MesaFormat : 0; -+ -+ if (t->mt) { -+ if (t->mt->firstLevel != firstLevel || -+ t->mt->lastLevel != lastLevel || -+ t->mt->width0 != baseImage->Width || -+ t->mt->height0 != baseImage->Height || -+ t->mt->depth0 != baseImage->Depth || -+ t->mt->bpp != texelBytes || -+ t->mt->tilebits != t->tile_bits || -+ t->mt->compressed != compressed) { -+ r300_miptree_destroy(t->mt); -+ t->mt = 0; -+ } - } -- } --#endif -- -- curOffset = 0; - -- if (tObj->Target == GL_TEXTURE_CUBE_MAP) { -- ASSERT(log2Width == log2Height); -- t->format |= R300_TX_FORMAT_CUBIC_MAP; -- -- for(i = 0; i < numLevels; i++) { -- GLuint face; -- for(face = 0; face < 6; face++) -- compute_tex_image_offset(tObj, face, i, &curOffset); -+ if (!t->mt) { -+ t->mt = r300_miptree_create(rmesa, t, tObj->Target, -+ firstLevel, lastLevel, -+ baseImage->Width, baseImage->Height, baseImage->Depth, -+ texelBytes, t->tile_bits, compressed); -+ memset(t->dirty_images, 0xff, sizeof(t->dirty_images)); - } -- } else { -- for (i = 0; i < numLevels; i++) -- compute_tex_image_offset(tObj, 0, i, &curOffset); - } - -- /* Align the total size of texture memory block. -- */ -- t->base.totalSize = -- (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; -- -- t->size = -- (((tObj->Image[0][t->base.firstLevel]->Width - -- 1) << R300_TX_WIDTHMASK_SHIFT) -- | ((tObj->Image[0][t->base.firstLevel]->Height - 1) << -- R300_TX_HEIGHTMASK_SHIFT)) -- | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT); -- -+ t->size = (((tObj->Image[0][firstLevel]->Width - 1) << R300_TX_WIDTHMASK_SHIFT) -+ | ((tObj->Image[0][firstLevel]->Height - 1) << R300_TX_HEIGHTMASK_SHIFT)) -+ | ((lastLevel - firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT); - t->pitch = 0; - -- /* Only need to round to nearest 32 for textures, but the blitter -- * requires 64-byte aligned pitches, and we may/may not need the -- * blitter. NPOT only! -- */ - if (baseImage->IsCompressed) { -- t->pitch |= -- (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); -+ t->pitch |= (tObj->Image[0][firstLevel]->Width + 63) & ~(63); - } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - unsigned int align = (64 / texelBytes) - 1; -- t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width * -+ t->pitch |= ((tObj->Image[0][firstLevel]->Width * - texelBytes) + 63) & ~(63); - t->size |= R300_TX_SIZE_TXPITCH_EN; - if (!t->image_override) -- t->pitch_reg = -- (((tObj->Image[0][t->base.firstLevel]->Width) + -- align) & ~align) - 1; -+ t->pitch_reg = (((tObj->Image[0][firstLevel]->Width) + align) & ~align) - 1; - } else { -- t->pitch |= -- ((tObj->Image[0][t->base.firstLevel]->Width * -- texelBytes) + 63) & ~(63); -+ t->pitch |= ((tObj->Image[0][firstLevel]->Width * texelBytes) + 63) & ~(63); - } - - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { -- if (tObj->Image[0][t->base.firstLevel]->Width > 2048) -+ if (tObj->Image[0][firstLevel]->Width > 2048) - t->pitch_reg |= R500_TXWIDTH_BIT11; -- if (tObj->Image[0][t->base.firstLevel]->Height > 2048) -+ if (tObj->Image[0][firstLevel]->Height > 2048) - t->pitch_reg |= R500_TXHEIGHT_BIT11; - } - } -@@ -449,17 +342,15 @@ static GLboolean r300EnableTexture2D(GLcontext * ctx, int unit) - r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; -- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; -+ r300TexObjPtr t = r300_tex_obj(tObj); - - ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D); - -- if (t->base.dirty_images[0]) { -+ if (!t->mt || t->dirty_images[0]) { - R300_FIREVERTICES(rmesa); - - r300SetTexImages(rmesa, tObj); -- r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); -- if (!t->base.memBlock && !t->image_override) -- return GL_FALSE; -+ r300UploadTexImages(rmesa, t, 0); - } - - return GL_TRUE; -@@ -470,7 +361,7 @@ static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit) - r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; -- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; -+ r300TexObjPtr t = r300_tex_obj(tObj); - - ASSERT(tObj->Target == GL_TEXTURE_3D); - -@@ -479,12 +370,10 @@ static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit) - return GL_FALSE; - } - -- if (t->base.dirty_images[0]) { -+ if (!t->mt || t->dirty_images[0]) { - R300_FIREVERTICES(rmesa); - r300SetTexImages(rmesa, tObj); -- r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); -- if (!t->base.memBlock) -- return GL_FALSE; -+ r300UploadTexImages(rmesa, t, 0); - } - - return GL_TRUE; -@@ -495,14 +384,15 @@ static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit) - r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; -- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; -+ r300TexObjPtr t = r300_tex_obj(tObj); - GLuint face; - - ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP); - -- if (t->base.dirty_images[0] || t->base.dirty_images[1] || -- t->base.dirty_images[2] || t->base.dirty_images[3] || -- t->base.dirty_images[4] || t->base.dirty_images[5]) { -+ if (!t->mt || -+ t->dirty_images[0] || t->dirty_images[1] || -+ t->dirty_images[2] || t->dirty_images[3] || -+ t->dirty_images[4] || t->dirty_images[5]) { - /* flush */ - R300_FIREVERTICES(rmesa); - /* layout memory space, once for all faces */ -@@ -511,18 +401,11 @@ static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit) - - /* upload (per face) */ - for (face = 0; face < 6; face++) { -- if (t->base.dirty_images[face]) { -- r300UploadTexImages(rmesa, -- (r300TexObjPtr) tObj->DriverData, -- face); -+ if (t->dirty_images[face]) { -+ r300UploadTexImages(rmesa, t, face); - } - } - -- if (!t->base.memBlock) { -- /* texmem alloc failed, use s/w fallback */ -- return GL_FALSE; -- } -- - return GL_TRUE; - } - -@@ -531,18 +414,15 @@ static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit) - r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; -- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; -+ r300TexObjPtr t = r300_tex_obj(tObj); - - ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV); - -- if (t->base.dirty_images[0]) { -+ if (!t->mt || t->dirty_images[0]) { - R300_FIREVERTICES(rmesa); - - r300SetTexImages(rmesa, tObj); -- r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); -- if (!t->base.memBlock && !t->image_override && -- !rmesa->prefer_gart_client_texturing) -- return GL_FALSE; -+ r300UploadTexImages(rmesa, t, 0); - } - - return GL_TRUE; -@@ -550,34 +430,19 @@ static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit) - - static GLboolean r300UpdateTexture(GLcontext * ctx, int unit) - { -- r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; -- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; -+ r300TexObjPtr t = r300_tex_obj(tObj); - - /* Fallback if there's a texture border */ - if (tObj->Image[0][tObj->BaseLevel]->Border > 0) - return GL_FALSE; - -- /* Update state if this is a different texture object to last -- * time. -- */ -- if (rmesa->state.texture.unit[unit].texobj != t) { -- if (rmesa->state.texture.unit[unit].texobj != NULL) { -- /* The old texture is no longer bound to this texture unit. -- * Mark it as such. -- */ -- -- rmesa->state.texture.unit[unit].texobj->base.bound &= -- ~(1 << unit); -- } -- -- rmesa->state.texture.unit[unit].texobj = t; -- t->base.bound |= (1 << unit); -- driUpdateTextureLRU((driTextureObject *) t); /* XXX: should be locked! */ -- } -+ /* Fallback if memory upload didn't work */ -+ if (!t->mt) -+ return GL_FALSE; - -- return !t->border_fallback; -+ return GL_TRUE; - } - - void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, -@@ -586,20 +451,18 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, - r300ContextPtr rmesa = pDRICtx->driverPrivate; - struct gl_texture_object *tObj = - _mesa_lookup_texture(rmesa->radeon.glCtx, texname); -- r300TexObjPtr t; -+ r300TexObjPtr t = r300_tex_obj(tObj); - uint32_t pitch_val; - - if (!tObj) - return; - -- t = (r300TexObjPtr) tObj->DriverData; -- - t->image_override = GL_TRUE; - - if (!offset) - return; - -- t->offset = offset; -+ t->override_offset = offset; - t->pitch_reg &= (1 << 13) -1; - pitch_val = pitch; - -diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c -index 3fc724a..a84c8fc 100644 ---- a/src/mesa/drivers/dri/r300/radeon_context.c -+++ b/src/mesa/drivers/dri/r300/radeon_context.c -@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #include "state.h" - #include "matrix.h" - #include "framebuffer.h" -+#include "drirenderbuffer.h" - - #include "drivers/common/driverfuncs.h" - #include "swrast/swrast.h" -@@ -258,6 +259,52 @@ void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, - } - } - -+static void -+radeon_make_renderbuffer_current(radeonContextPtr radeon, -+ GLframebuffer *draw) -+{ -+ int size = radeon->radeonScreen->driScreen->fbSize; -+ void *map = 0; -+ /* if radeon->fake */ -+ struct radeon_renderbuffer *rb; -+ uint32_t offset; -+ if (!radeon->bufmgr) -+ return; -+ -+ if ((rb = (void *)draw->Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) { -+ -+ offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->front.offset : radeon->radeonScreen->frontOffset; -+ if (!rb->bo) -+ rb->bo = dri_bo_alloc_static(&radeon->bufmgr->base, "front buffer", -+ radeon->radeonScreen->frontOffset, size, map, -+ DRM_BO_FLAG_MEM_VRAM); -+ fprintf(stderr,"front is %p\n", rb->bo); -+ rb->cpp = radeon->radeonScreen->cpp; -+ rb->pitch = radeon->radeonScreen->frontPitch; -+ } -+ if ((rb = (void *)draw->Attachment[BUFFER_BACK_LEFT].Renderbuffer)) { -+ offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->back.offset : radeon->radeonScreen->backOffset; -+ if (!rb->bo) -+ rb->bo = dri_bo_alloc_static(&radeon->bufmgr->base, "back buffer", -+ radeon->radeonScreen->backOffset, size, map, -+ DRM_BO_FLAG_MEM_VRAM); -+ fprintf(stderr,"back is %p\n", rb->bo); -+ rb->cpp = radeon->radeonScreen->cpp; -+ rb->pitch = radeon->radeonScreen->backPitch; -+ } -+ if ((rb = (void *)draw->Attachment[BUFFER_DEPTH].Renderbuffer)) { -+ offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->depth.offset : radeon->radeonScreen->depthOffset; -+ if (!rb->bo) -+ rb->bo = dri_bo_alloc_static(&radeon->bufmgr->base, "depth buffer", -+ radeon->radeonScreen->depthOffset, size, map, -+ DRM_BO_FLAG_MEM_VRAM); -+ fprintf(stderr,"depth is %p\n", rb->bo); -+ rb->cpp = radeon->radeonScreen->cpp; -+ rb->pitch = radeon->radeonScreen->depthPitch; -+ } -+} -+ -+ - /* Force the context `c' to be the current context and associate with it - * buffer `b'. - */ -@@ -265,51 +312,57 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv) - { -- if (driContextPriv) { -- radeonContextPtr radeon = -- (radeonContextPtr) driContextPriv->driverPrivate; -+ radeonContextPtr radeon; -+ GLframebuffer *dfb, *rfb; - -+ if (!driContextPriv) { - if (RADEON_DEBUG & DEBUG_DRI) -- fprintf(stderr, "%s ctx %p\n", __FUNCTION__, -- radeon->glCtx); -- -- if (radeon->dri.drawable != driDrawPriv) { -- if (driDrawPriv->swap_interval == (unsigned)-1) { -- driDrawPriv->vblFlags = -- (radeon->radeonScreen->irq != 0) -- ? driGetDefaultVBlankFlags(&radeon-> -- optionCache) -- : VBLANK_FLAG_NO_IRQ; -+ fprintf(stderr, "%s ctx is null\n", __FUNCTION__); -+ _mesa_make_current(NULL, NULL, NULL); -+ return GL_TRUE; -+ } - -- driDrawableInitVBlank(driDrawPriv); -- } -- } -+ radeon = (radeonContextPtr) driContextPriv->driverPrivate; -+ dfb = driDrawPriv->driverPrivate; -+ rfb = driReadPriv->driverPrivate; - -- radeon->dri.readable = driReadPriv; -+ if (RADEON_DEBUG & DEBUG_DRI) -+ fprintf(stderr, "%s ctx %p\n", __FUNCTION__, radeon->glCtx); - -- if (radeon->dri.drawable != driDrawPriv || -- radeon->lastStamp != driDrawPriv->lastStamp) { -- radeon->dri.drawable = driDrawPriv; -+ driUpdateFramebufferSize(radeon->glCtx, driDrawPriv); -+ if (driReadPriv != driDrawPriv) -+ driUpdateFramebufferSize(radeon->glCtx, driReadPriv); - -- radeonSetCliprects(radeon); -- r300UpdateViewportOffset(radeon->glCtx); -- } -+ radeon_make_renderbuffer_current(radeon, dfb); - -- _mesa_make_current(radeon->glCtx, -- (GLframebuffer *) driDrawPriv-> -- driverPrivate, -- (GLframebuffer *) driReadPriv-> -- driverPrivate); -+ _mesa_make_current(radeon->glCtx, dfb, rfb); - -- _mesa_update_state(radeon->glCtx); -+ if (radeon->dri.drawable != driDrawPriv) { -+ if (driDrawPriv->swap_interval == (unsigned)-1) { -+ driDrawPriv->vblFlags = -+ (radeon->radeonScreen->irq != 0) -+ ? driGetDefaultVBlankFlags(&radeon-> -+ optionCache) -+ : VBLANK_FLAG_NO_IRQ; -+ -+ driDrawableInitVBlank(driDrawPriv); -+ } -+ } - -- radeonUpdatePageFlipping(radeon); -- } else { -- if (RADEON_DEBUG & DEBUG_DRI) -- fprintf(stderr, "%s ctx is null\n", __FUNCTION__); -- _mesa_make_current(0, 0, 0); -+ radeon->dri.readable = driReadPriv; -+ -+ if (radeon->dri.drawable != driDrawPriv || -+ radeon->lastStamp != driDrawPriv->lastStamp) { -+ radeon->dri.drawable = driDrawPriv; -+ -+ radeonSetCliprects(radeon); -+ r300UpdateViewportOffset(radeon->glCtx); - } - -+ _mesa_update_state(radeon->glCtx); -+ -+ radeonUpdatePageFlipping(radeon); -+ - if (RADEON_DEBUG & DEBUG_DRI) - fprintf(stderr, "End %s\n", __FUNCTION__); - return GL_TRUE; -diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h -index 7458d63..828853b 100644 ---- a/src/mesa/drivers/dri/r300/radeon_context.h -+++ b/src/mesa/drivers/dri/r300/radeon_context.h -@@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #include "drm.h" - #include "dri_util.h" - #include "colormac.h" -+#include "radeon_buffer.h" - - struct radeon_context; - typedef struct radeon_context radeonContextRec; -@@ -132,12 +133,13 @@ struct radeon_scissor_state { - - struct radeon_colorbuffer_state { - GLuint clear; -- GLint drawOffset, drawPitch; -+ struct radeon_renderbuffer *rrb; - }; - - struct radeon_state { - struct radeon_colorbuffer_state color; - struct radeon_scissor_state scissor; -+ struct radeon_renderbuffer *depth_buffer; - }; - - /** -@@ -185,6 +187,8 @@ struct radeon_context { - /* Configuration cache - */ - driOptionCache optionCache; -+ -+ struct radeon_bufmgr *bufmgr; - }; - - #define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx)) -diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.c b/src/mesa/drivers/dri/r300/radeon_ioctl.c -index 0c1a195..486ce8e 100644 ---- a/src/mesa/drivers/dri/r300/radeon_ioctl.c -+++ b/src/mesa/drivers/dri/r300/radeon_ioctl.c -@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #include "swrast/swrast.h" - #include "r300_context.h" - #include "radeon_ioctl.h" -+#include "radeon_buffer.h" - #include "r300_ioctl.h" - #include "r300_state.h" - #include "radeon_reg.h" -@@ -171,7 +172,7 @@ void radeonCopyBuffer(__DRIdrawablePrivate * dPriv, - assert(dPriv->driContextPriv->driverPrivate); - - radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; -- -+ - if (RADEON_DEBUG & DEBUG_IOCTL) { - fprintf(stderr, "\n%s( %p )\n\n", __FUNCTION__, - (void *)radeon->glCtx); -@@ -261,6 +262,8 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv) - GLint ret; - GLboolean missed_target; - __DRIscreenPrivate *psp = dPriv->driScreenPriv; -+ GLframebuffer *fb = dPriv->driverPrivate; -+ struct radeon_renderbuffer *rrb; - - assert(dPriv); - assert(dPriv->driContextPriv); -@@ -268,6 +271,8 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv) - - radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; - -+ rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; -+ - if (RADEON_DEBUG & DEBUG_IOCTL) { - fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__, - radeon->sarea->pfCurrentPage); -@@ -315,32 +320,10 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv) - radeon->swap_count++; - (void)(*psp->systemTime->getUST) (&radeon->swap_ust); - -- driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer, -+ driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer, - radeon->sarea->pfCurrentPage); - -- if (radeon->sarea->pfCurrentPage == 1) { -- radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset; -- radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch; -- } else { -- radeon->state.color.drawOffset = radeon->radeonScreen->backOffset; -- radeon->state.color.drawPitch = radeon->radeonScreen->backPitch; -- } -- -- if (IS_R300_CLASS(radeon->radeonScreen)) { -- r300ContextPtr r300 = (r300ContextPtr)radeon; -- R300_STATECHANGE(r300, cb); -- r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset + -- r300->radeon.radeonScreen->fbLocation; -- r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch; -- -- if (r300->radeon.radeonScreen->cpp == 4) -- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; -- else -- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; -- -- if (r300->radeon.sarea->tiling_enabled) -- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; -- } -+ radeon->state.color.rrb = rrb; - } - - void radeonWaitForIdleLocked(radeonContextPtr radeon) -diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c -index d54a821..3529555 100644 ---- a/src/mesa/drivers/dri/r300/radeon_lock.c -+++ b/src/mesa/drivers/dri/r300/radeon_lock.c -@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #include "radeon_state.h" - #include "r300_context.h" - #include "r300_state.h" -+#include "r300_mem.h" - - #include "framebuffer.h" - -@@ -59,6 +60,8 @@ int prevLockLine = 0; - void radeonUpdatePageFlipping(radeonContextPtr rmesa) - { - int use_back; -+ __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; -+ GLframebuffer *fb = drawable->driverPrivate; - - rmesa->doPageFlip = rmesa->sarea->pfState; - if (rmesa->glCtx->WinSysDrawBuffer) { -@@ -72,16 +75,12 @@ void radeonUpdatePageFlipping(radeonContextPtr rmesa) - BUFFER_BACK_LEFT) : 1; - use_back ^= (rmesa->sarea->pfCurrentPage == 1); - -- if (use_back) { -- rmesa->state.color.drawOffset = -- rmesa->radeonScreen->backOffset; -- rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch; -- } else { -- rmesa->state.color.drawOffset = -- rmesa->radeonScreen->frontOffset; -- rmesa->state.color.drawPitch = -- rmesa->radeonScreen->frontPitch; -- } -+ if (use_back) -+ rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; -+ else -+ rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; -+ -+ rmesa->state.depth_buffer = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer; - } - - /* Update the hardware state. This is called if another context has -@@ -125,12 +124,8 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags) - } - - if (sarea->ctx_owner != rmesa->dri.hwContext) { -- int i; -- - sarea->ctx_owner = rmesa->dri.hwContext; -- for (i = 0; i < r300->nr_heaps; i++) { -- DRI_AGE_TEXTURES(r300->texture_heaps[i]); -- } -+ radeonBufmgrContendedLockTake(r300->radeon.bufmgr); - } - - rmesa->lost_context = GL_TRUE; -diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c -index f1bc56e..7ea0842 100644 ---- a/src/mesa/drivers/dri/r300/radeon_span.c -+++ b/src/mesa/drivers/dri/r300/radeon_span.c -@@ -48,7 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #include "r300_ioctl.h" - #include "radeon_span.h" - --#include "drirenderbuffer.h" -+#include "radeon_buffer.h" - - #define DBG 0 - -@@ -58,21 +58,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * information. - */ - #define LOCAL_VARS \ -- driRenderbuffer *drb = (driRenderbuffer *) rb; \ -- const __DRIdrawablePrivate *dPriv = drb->dPriv; \ -+ struct radeon_renderbuffer *rrb = (void *) rb; \ -+ const __DRIdrawablePrivate *dPriv = rrb->dPriv; \ - const GLuint bottom = dPriv->h - 1; \ -- GLubyte *buf = (GLubyte *) drb->flippedData \ -- + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \ -+ GLubyte *buf = (GLubyte *) rrb->bo->virtual \ -+ + (dPriv->y * rrb->pitch + dPriv->x) * rrb->cpp; \ - GLuint p; \ - (void) p; - - #define LOCAL_DEPTH_VARS \ -- driRenderbuffer *drb = (driRenderbuffer *) rb; \ -- const __DRIdrawablePrivate *dPriv = drb->dPriv; \ -+ struct radeon_renderbuffer *rrb = (void *) rb; \ -+ const __DRIdrawablePrivate *dPriv = rrb->dPriv; \ - const GLuint bottom = dPriv->h - 1; \ - GLuint xo = dPriv->x; \ - GLuint yo = dPriv->y; \ -- GLubyte *buf = (GLubyte *) drb->Base.Data; -+ GLubyte *buf = (GLubyte *) rrb->base.Data; - - #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS - -@@ -93,7 +93,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - #define TAG(x) radeon##x##_RGB565 - #define TAG2(x,y) radeon##x##_RGB565##y --#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2) -+#define GET_PTR(X,Y) (buf + ((Y) * rrb->pitch + (X)) * 2) - #include "spantmp2.h" - - /* 32 bit, ARGB8888 color spanline and pixel functions -@@ -103,7 +103,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - #define TAG(x) radeon##x##_ARGB8888 - #define TAG2(x,y) radeon##x##_ARGB8888##y --#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4) -+#define GET_PTR(X,Y) (buf + ((Y) * rrb->pitch + (X)) * 4) - #include "spantmp2.h" - - /* ================================================================ -@@ -120,10 +120,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * too... - */ - --static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y) -+static GLuint radeon_mba_z32(const struct radeon_renderbuffer * rrb, -+ GLint x, GLint y) - { -- GLuint pitch = drb->pitch; -- if (drb->depthHasSurface) { -+ GLuint pitch = rrb->pitch; -+ if (rrb->depthHasSurface) { - return 4 * (x + y * pitch); - } else { - GLuint ba, address = 0; /* a[0..1] = 0 */ -@@ -148,10 +149,10 @@ static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y) - } - - static INLINE GLuint --radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) -+radeon_mba_z16(const struct radeon_renderbuffer *rrb, GLint x, GLint y) - { -- GLuint pitch = drb->pitch; -- if (drb->depthHasSurface) { -+ GLuint pitch = rrb->pitch; -+ if (rrb->depthHasSurface) { - return 2 * (x + y * pitch); - } else { - GLuint ba, address = 0; /* a[0] = 0 */ -@@ -173,10 +174,10 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) - /* 16-bit depth buffer functions - */ - #define WRITE_DEPTH( _x, _y, d ) \ -- *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d; -+ *(GLushort *)(buf + radeon_mba_z16( rrb, _x + xo, _y + yo )) = d; - - #define READ_DEPTH( d, _x, _y ) \ -- d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )); -+ d = *(GLushort *)(buf + radeon_mba_z16( rrb, _x + xo, _y + yo )); - - #define TAG(x) radeon##x##_z16 - #include "depthtmp.h" -@@ -189,7 +190,7 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) - #ifdef COMPILE_R300 - #define WRITE_DEPTH( _x, _y, d ) \ - do { \ -- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ -+ GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ - tmp &= 0x000000ff; \ - tmp |= ((d << 8) & 0xffffff00); \ -@@ -198,7 +199,7 @@ do { \ - #else - #define WRITE_DEPTH( _x, _y, d ) \ - do { \ -- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ -+ GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ - tmp &= 0xff000000; \ - tmp |= ((d) & 0x00ffffff); \ -@@ -209,12 +210,12 @@ do { \ - #ifdef COMPILE_R300 - #define READ_DEPTH( d, _x, _y ) \ - do { \ -- d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ -+ d = (*(GLuint *)(buf + radeon_mba_z32( rrb, _x + xo, \ - _y + yo )) & 0xffffff00) >> 8; \ - }while(0) - #else - #define READ_DEPTH( d, _x, _y ) \ -- d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ -+ d = *(GLuint *)(buf + radeon_mba_z32( rrb, _x + xo, \ - _y + yo )) & 0x00ffffff; - #endif - -@@ -230,7 +231,7 @@ do { \ - #ifdef COMPILE_R300 - #define WRITE_STENCIL( _x, _y, d ) \ - do { \ -- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ -+ GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ - tmp &= 0xffffff00; \ - tmp |= (d) & 0xff; \ -@@ -239,7 +240,7 @@ do { \ - #else - #define WRITE_STENCIL( _x, _y, d ) \ - do { \ -- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ -+ GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ - tmp &= 0x00ffffff; \ - tmp |= (((d) & 0xff) << 24); \ -@@ -250,14 +251,14 @@ do { \ - #ifdef COMPILE_R300 - #define READ_STENCIL( d, _x, _y ) \ - do { \ -- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ -+ GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ - d = tmp & 0x000000ff; \ - } while (0) - #else - #define READ_STENCIL( d, _x, _y ) \ - do { \ -- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ -+ GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ - d = (tmp & 0xff000000) >> 24; \ - } while (0) -@@ -300,10 +301,10 @@ static void radeonSpanRenderStart(GLcontext * ctx) - */ - { - int p; -- driRenderbuffer *drb = -- (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0]; -+ struct radeon_renderbuffer *rrb = -+ (void *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0]; - volatile int *buf = -- (volatile int *)(rmesa->dri.screen->pFB + drb->offset); -+ (volatile int *)(rmesa->dri.screen->pFB + rrb->bo->offset); - p = *buf; - } - } -@@ -326,20 +327,17 @@ void radeonInitSpanFuncs(GLcontext * ctx) - /** - * Plug in the Get/Put routines for the given driRenderbuffer. - */ --void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis) -+void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb) - { -- if (drb->Base.InternalFormat == GL_RGBA) { -- if (vis->redBits == 5 && vis->greenBits == 6 -- && vis->blueBits == 5) { -- radeonInitPointers_RGB565(&drb->Base); -- } else { -- radeonInitPointers_ARGB8888(&drb->Base); -- } -- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { -- radeonInitDepthPointers_z16(&drb->Base); -- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { -- radeonInitDepthPointers_z24_s8(&drb->Base); -- } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { -- radeonInitStencilPointers_z24_s8(&drb->Base); -- } -+ if (rrb->base.InternalFormat == GL_RGB5) { -+ radeonInitPointers_RGB565(&rrb->base); -+ } else if (rrb->base.InternalFormat == GL_RGBA8) { -+ radeonInitPointers_ARGB8888(&rrb->base); -+ } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT16) { -+ radeonInitDepthPointers_z16(&rrb->base); -+ } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT24) { -+ radeonInitDepthPointers_z24_s8(&rrb->base); -+ } else if (rrb->base.InternalFormat == GL_STENCIL_INDEX8_EXT) { -+ radeonInitStencilPointers_z24_s8(&rrb->base); -+ } - } -diff --git a/src/mesa/drivers/dri/r300/radeon_state.c b/src/mesa/drivers/dri/r300/radeon_state.c -index d81318c..a7720da 100644 ---- a/src/mesa/drivers/dri/r300/radeon_state.c -+++ b/src/mesa/drivers/dri/r300/radeon_state.c -@@ -222,14 +222,6 @@ void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state) - void radeonInitState(radeonContextPtr radeon) - { - radeon->Fallback = 0; -- -- if (radeon->glCtx->Visual.doubleBufferMode && radeon->sarea->pfCurrentPage == 0) { -- radeon->state.color.drawOffset = radeon->radeonScreen->backOffset; -- radeon->state.color.drawPitch = radeon->radeonScreen->backPitch; -- } else { -- radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset; -- radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch; -- } - } - - -diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer.h b/src/mesa/drivers/dri/radeon/radeon_buffer.h -new file mode 100644 -index 0000000..730c40b ---- /dev/null -+++ b/src/mesa/drivers/dri/radeon/radeon_buffer.h -@@ -0,0 +1,50 @@ -+/* -+ * Copyright 2008 Red Hat, Inc. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software") -+ * to deal in the software without restriction, including without limitation -+ * on the rights to use, copy, modify, merge, publish, distribute, sub -+ * license, and/or sell copies of the Software, and to permit persons to whom -+ * them Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTIBILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER -+ * IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF OR IN -+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ * -+ * Authors: -+ * Adam Jackson -+ */ -+ -+#ifndef RADEON_BUFFER_H -+#define RADEON_BUFFER_H -+ -+#include "dri_bufmgr.h" -+ -+struct radeon_renderbuffer -+{ -+ struct gl_renderbuffer base; -+ dri_bo *bo; -+ unsigned int cpp; -+ /* unsigned int offset; */ -+ unsigned int pitch; -+ unsigned int height; -+ -+ /* boo Xorg 6.8.2 compat */ -+ int depthHasSurface; -+ -+ __DRIdrawablePrivate *dPriv; -+}; -+ -+struct radeon_bufmgr { -+ dri_bufmgr base; -+}; -+ -+#endif -diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c -index 84b5c46..10a49d2 100644 ---- a/src/mesa/drivers/dri/radeon/radeon_screen.c -+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c -@@ -46,6 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #include "radeon_chipset.h" - #include "radeon_macros.h" - #include "radeon_screen.h" -+#include "radeon_buffer.h" - #if !RADEON_COMMON - #include "radeon_context.h" - #include "radeon_span.h" -@@ -69,6 +70,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - #include "GL/internal/dri_interface.h" - -+#include -+#include -+ - /* Radeon configuration - */ - #include "xmlpool.h" -@@ -350,6 +354,79 @@ static const __DRItexOffsetExtension r300texOffsetExtension = { - }; - #endif - -+ -+static void -+radeon_gem_update_handle(radeonScreenPtr screen, __DRIscreenPrivate *sPriv, -+ struct radeon_gem_object *gem_obj) -+{ -+ struct drm_gem_close close_args; -+ struct drm_gem_open args; -+ struct drm_radeon_gem_mmap mmap_args; -+ struct drm_radeon_gem_pin pin_args; -+ int ret; -+ -+ if (gem_obj->gem_handle) { -+ close_args.handle = gem_obj->gem_handle; -+ -+ ioctl(sPriv->fd, DRM_IOCTL_GEM_CLOSE, &close_args); -+ gem_obj->gem_handle = 0; -+ } -+ -+ /* do open */ -+ args.name = gem_obj->gem_name; -+ ret = ioctl(sPriv->fd, DRM_IOCTL_GEM_OPEN, &args); -+ if (ret) -+ return; -+ -+ gem_obj->gem_handle = args.handle; -+ gem_obj->size = args.size; -+ -+ mmap_args.handle = gem_obj->gem_handle; -+ mmap_args.size = gem_obj->size; -+ mmap_args.offset = 0; -+ -+ ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GEM_MMAP, &mmap_args, -+ sizeof(mmap_args)); -+ -+ if (ret) -+ return; -+ -+ gem_obj->map = (void *)(unsigned long)(mmap_args.addr_ptr); -+ -+ pin_args.handle = gem_obj->gem_handle; -+ pin_args.alignment = 0; -+ -+ ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GEM_PIN, &pin_args, -+ sizeof(pin_args)); -+ -+ if (ret) -+ return; -+ -+ gem_obj->offset = pin_args.offset; -+ -+ fprintf(stderr,"handle %d, size %llx, ptr %p, offset %llx\n", gem_obj->gem_handle, -+ gem_obj->size, gem_obj->map, gem_obj->offset); -+} -+ -+static int -+radeon_init_mm_buffers(radeonScreenPtr screen, __DRIscreenPrivate *sPriv, -+ RADEONDRIPtr dri_priv) -+{ -+ /* STOP GAP HERE */ -+ -+ screen->front.gem_name = dri_priv->frontOffset; -+ radeon_gem_update_handle(screen, sPriv, &screen->front); -+ screen->back.gem_name = dri_priv->backOffset; -+ radeon_gem_update_handle(screen, sPriv, &screen->back); -+ screen->depth.gem_name = dri_priv->depthOffset; -+ radeon_gem_update_handle(screen, sPriv, &screen->depth); -+ -+ screen->vram_texture.gem_name = dri_priv->textureOffset; -+ radeon_gem_update_handle(screen, sPriv, &screen->vram_texture); -+ screen->vram_texture.gem_name = dri_priv->gartTexHandle; -+ radeon_gem_update_handle(screen, sPriv, &screen->gart_texture); -+} -+ - /* Create the device specific screen private data struct. - */ - static radeonScreenPtr -@@ -389,6 +466,21 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) - screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP); - { - int ret; -+ -+#ifdef RADEON_PARAM_KERNEL_MM -+ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_KERNEL_MM, -+ &screen->kernel_mm); -+ -+ if (ret && ret != -EINVAL) { -+ FREE( screen ); -+ fprintf(stderr, "drm_radeon_getparam_t (RADEON_OFFSET): %d\n", ret); -+ return NULL; -+ } -+ -+ if (ret == -EINVAL) -+ screen->kernel_mm = 0; -+#endif -+ - ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET, - &screen->gart_buffer_offset); - -@@ -422,32 +514,34 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) - screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25); - } - -- screen->mmio.handle = dri_priv->registerHandle; -- screen->mmio.size = dri_priv->registerSize; -- if ( drmMap( sPriv->fd, -- screen->mmio.handle, -- screen->mmio.size, -- &screen->mmio.map ) ) { -- FREE( screen ); -- __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ ); -- return NULL; -- } -+ if (!screen->kernel_mm) { -+ screen->mmio.handle = dri_priv->registerHandle; -+ screen->mmio.size = dri_priv->registerSize; -+ if ( drmMap( sPriv->fd, -+ screen->mmio.handle, -+ screen->mmio.size, -+ &screen->mmio.map ) ) { -+ FREE( screen ); -+ __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ ); -+ return NULL; -+ } - -- RADEONMMIO = screen->mmio.map; -+ RADEONMMIO = screen->mmio.map; - -- screen->status.handle = dri_priv->statusHandle; -- screen->status.size = dri_priv->statusSize; -- if ( drmMap( sPriv->fd, -- screen->status.handle, -- screen->status.size, -- &screen->status.map ) ) { -- drmUnmap( screen->mmio.map, screen->mmio.size ); -- FREE( screen ); -- __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ ); -- return NULL; -+ screen->status.handle = dri_priv->statusHandle; -+ screen->status.size = dri_priv->statusSize; -+ if ( drmMap( sPriv->fd, -+ screen->status.handle, -+ screen->status.size, -+ &screen->status.map ) ) { -+ drmUnmap( screen->mmio.map, screen->mmio.size ); -+ FREE( screen ); -+ __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ ); -+ return NULL; -+ } -+ screen->scratch = (__volatile__ u_int32_t *) -+ ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); - } -- screen->scratch = (__volatile__ u_int32_t *) -- ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); - - screen->buffers = drmMapBufs( sPriv->fd ); - if ( !screen->buffers ) { -@@ -458,22 +552,24 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) - return NULL; - } - -- if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) { -- screen->gartTextures.handle = dri_priv->gartTexHandle; -- screen->gartTextures.size = dri_priv->gartTexMapSize; -- if ( drmMap( sPriv->fd, -- screen->gartTextures.handle, -- screen->gartTextures.size, -- (drmAddressPtr)&screen->gartTextures.map ) ) { -- drmUnmapBufs( screen->buffers ); -- drmUnmap( screen->status.map, screen->status.size ); -- drmUnmap( screen->mmio.map, screen->mmio.size ); -- FREE( screen ); -- __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__); -- return NULL; -+ if (!screen->kernel_mm) { -+ if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) { -+ screen->gartTextures.handle = dri_priv->gartTexHandle; -+ screen->gartTextures.size = dri_priv->gartTexMapSize; -+ if ( drmMap( sPriv->fd, -+ screen->gartTextures.handle, -+ screen->gartTextures.size, -+ (drmAddressPtr)&screen->gartTextures.map ) ) { -+ drmUnmapBufs( screen->buffers ); -+ drmUnmap( screen->status.map, screen->status.size ); -+ drmUnmap( screen->mmio.map, screen->mmio.size ); -+ FREE( screen ); -+ __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__); -+ return NULL; -+ } -+ -+ screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base; - } -- -- screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base; - } - - screen->chip_flags = 0; -@@ -840,7 +936,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) - ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION, - &temp); - if (ret) { -- if (screen->chip_family < CHIP_FAMILY_RS690) -+ if (screen->chip_family < CHIP_FAMILY_RS690 && !screen->kernel_mm) - screen->fbLocation = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16; - else { - FREE( screen ); -@@ -881,55 +977,58 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) - } - } - -- if ( sPriv->drm_version.minor >= 10 ) { -- drm_radeon_setparam_t sp; -+ if (!screen->kernel_mm) { -+ if ( sPriv->drm_version.minor >= 10 ) { -+ drm_radeon_setparam_t sp; - -- sp.param = RADEON_SETPARAM_FB_LOCATION; -- sp.value = screen->fbLocation; -+ sp.param = RADEON_SETPARAM_FB_LOCATION; -+ sp.value = screen->fbLocation; - -- drmCommandWrite( sPriv->fd, DRM_RADEON_SETPARAM, -- &sp, sizeof( sp ) ); -- } -- -- screen->frontOffset = dri_priv->frontOffset; -- screen->frontPitch = dri_priv->frontPitch; -- screen->backOffset = dri_priv->backOffset; -- screen->backPitch = dri_priv->backPitch; -- screen->depthOffset = dri_priv->depthOffset; -- screen->depthPitch = dri_priv->depthPitch; -- -- /* Check if ddx has set up a surface reg to cover depth buffer */ -- screen->depthHasSurface = (sPriv->ddx_version.major > 4) || -- /* these chips don't use tiled z without hyperz. So always pretend -- we have set up a surface which will cause linear reads/writes */ -- ((screen->chip_family & RADEON_CLASS_R100) && -- !(screen->chip_flags & RADEON_CHIPSET_TCL)); -- -- if ( dri_priv->textureSize == 0 ) { -- screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->gart_texture_offset; -- screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->gartTexMapSize; -- screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] = -- dri_priv->log2GARTTexGran; -- } else { -- screen->texOffset[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureOffset -- + screen->fbLocation; -- screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureSize; -- screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] = -- dri_priv->log2TexGran; -- } -+ drmCommandWrite( sPriv->fd, DRM_RADEON_SETPARAM, -+ &sp, sizeof( sp ) ); -+ } - -- if ( !screen->gartTextures.map || dri_priv->textureSize == 0 -- || getenv( "RADEON_GARTTEXTURING_FORCE_DISABLE" ) ) { -- screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1; -- screen->texOffset[RADEON_GART_TEX_HEAP] = 0; -- screen->texSize[RADEON_GART_TEX_HEAP] = 0; -- screen->logTexGranularity[RADEON_GART_TEX_HEAP] = 0; -+ screen->frontOffset = dri_priv->frontOffset; -+ screen->frontPitch = dri_priv->frontPitch; -+ screen->backOffset = dri_priv->backOffset; -+ screen->backPitch = dri_priv->backPitch; -+ screen->depthOffset = dri_priv->depthOffset; -+ screen->depthPitch = dri_priv->depthPitch; -+ -+ /* Check if ddx has set up a surface reg to cover depth buffer */ -+ screen->depthHasSurface = (sPriv->ddx_version.major > 4) || -+ /* these chips don't use tiled z without hyperz. So always pretend -+ we have set up a surface which will cause linear reads/writes */ -+ ((screen->chip_family & RADEON_CLASS_R100) && -+ !(screen->chip_flags & RADEON_CHIPSET_TCL)); -+ -+ if ( dri_priv->textureSize == 0 ) { -+ screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->gart_texture_offset; -+ screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->gartTexMapSize; -+ screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] = -+ dri_priv->log2GARTTexGran; -+ } else { -+ screen->texOffset[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureOffset -+ + screen->fbLocation; -+ screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureSize; -+ screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] = -+ dri_priv->log2TexGran; -+ } -+ -+ if ( !screen->gartTextures.map || dri_priv->textureSize == 0 -+ || getenv( "RADEON_GARTTEXTURING_FORCE_DISABLE" ) ) { -+ screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1; -+ screen->texOffset[RADEON_GART_TEX_HEAP] = 0; -+ screen->texSize[RADEON_GART_TEX_HEAP] = 0; -+ screen->logTexGranularity[RADEON_GART_TEX_HEAP] = 0; -+ } else { -+ screen->numTexHeaps = RADEON_NR_TEX_HEAPS; -+ screen->texOffset[RADEON_GART_TEX_HEAP] = screen->gart_texture_offset; -+ screen->texSize[RADEON_GART_TEX_HEAP] = dri_priv->gartTexMapSize; -+ screen->logTexGranularity[RADEON_GART_TEX_HEAP] = dri_priv->log2GARTTexGran; -+ } - } else { -- screen->numTexHeaps = RADEON_NR_TEX_HEAPS; -- screen->texOffset[RADEON_GART_TEX_HEAP] = screen->gart_texture_offset; -- screen->texSize[RADEON_GART_TEX_HEAP] = dri_priv->gartTexMapSize; -- screen->logTexGranularity[RADEON_GART_TEX_HEAP] = -- dri_priv->log2GARTTexGran; -+ radeon_init_mm_buffers(screen, sPriv, dri_priv); - } - - i = 0; -@@ -975,12 +1074,14 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv ) - if (!screen) - return; - -- if ( screen->gartTextures.map ) { -- drmUnmap( screen->gartTextures.map, screen->gartTextures.size ); -- } - drmUnmapBufs( screen->buffers ); -- drmUnmap( screen->status.map, screen->status.size ); -- drmUnmap( screen->mmio.map, screen->mmio.size ); -+ if (!screen->kernel_mm) { -+ if ( screen->gartTextures.map ) { -+ drmUnmap( screen->gartTextures.map, screen->gartTextures.size ); -+ } -+ drmUnmap( screen->status.map, screen->status.size ); -+ drmUnmap( screen->mmio.map, screen->mmio.size ); -+ } - - /* free all option information */ - driDestroyOptionInfo (&screen->optionCache); -@@ -1004,6 +1105,158 @@ radeonInitDriver( __DRIscreenPrivate *sPriv ) - return GL_TRUE; - } - -+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) -+static GLboolean -+radeon_alloc_window_storage(GLcontext *ctx, struct gl_renderbuffer *rb, -+ GLenum intFormat, GLuint w, GLuint h) -+{ -+ rb->Width = w; -+ rb->Height = h; -+ rb->_ActualFormat = intFormat; -+ -+ return GL_TRUE; -+} -+ -+ -+static struct radeon_renderbuffer * -+radeon_create_renderbuffer(GLenum format) -+{ -+ struct radeon_renderbuffer *ret; -+ -+ ret = CALLOC_STRUCT(radeon_renderbuffer); -+ if (!ret) -+ return NULL; -+ -+ _mesa_init_renderbuffer(&ret->base, 0); -+ -+ /* XXX format junk */ -+ switch (format) { -+ case GL_RGB5: -+ ret->base._ActualFormat = GL_RGB5; -+ ret->base._BaseFormat = GL_RGBA; -+ ret->base.RedBits = 5; -+ ret->base.GreenBits = 6; -+ ret->base.BlueBits = 5; -+ ret->base.DataType = GL_UNSIGNED_BYTE; -+ break; -+ case GL_RGBA8: -+ ret->base._ActualFormat = GL_RGBA8; -+ ret->base._BaseFormat = GL_RGBA; -+ ret->base.RedBits = 8; -+ ret->base.GreenBits = 8; -+ ret->base.BlueBits = 8; -+ ret->base.AlphaBits = 8; -+ ret->base.DataType = GL_UNSIGNED_BYTE; -+ break; -+ case GL_STENCIL_INDEX8_EXT: -+ ret->base._ActualFormat = GL_STENCIL_INDEX8_EXT; -+ ret->base._BaseFormat = GL_STENCIL_INDEX; -+ ret->base.StencilBits = 8; -+ ret->base.DataType = GL_UNSIGNED_BYTE; -+ break; -+ case GL_DEPTH_COMPONENT16: -+ ret->base._ActualFormat = GL_DEPTH_COMPONENT16; -+ ret->base._BaseFormat = GL_DEPTH_COMPONENT; -+ ret->base.DepthBits = 16; -+ ret->base.DataType = GL_UNSIGNED_SHORT; -+ break; -+ case GL_DEPTH_COMPONENT24: -+ ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; -+ ret->base._BaseFormat = GL_DEPTH_COMPONENT; -+ ret->base.DepthBits = 24; -+ ret->base.DataType = GL_UNSIGNED_INT; -+ break; -+ case GL_DEPTH24_STENCIL8_EXT: -+ ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; -+ ret->base._BaseFormat = GL_DEPTH_STENCIL_EXT; -+ ret->base.DepthBits = 24; -+ ret->base.StencilBits = 8; -+ ret->base.DataType = GL_UNSIGNED_INT_24_8_EXT; -+ break; -+ default: -+ /* whoops */ -+ break; -+ } -+ -+ ret->base.InternalFormat = format; -+ -+ ret->base.AllocStorage = radeon_alloc_window_storage; -+ -+ radeonSetSpanFunctions(ret); -+ -+ return ret; -+} -+ -+/** -+ * Create the Mesa framebuffer and renderbuffers for a given window/drawable. -+ * -+ * \todo This function (and its interface) will need to be updated to support -+ * pbuffers. -+ */ -+static GLboolean -+radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, -+ __DRIdrawablePrivate *driDrawPriv, -+ const __GLcontextModes *mesaVis, -+ GLboolean isPixmap ) -+{ -+ radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private; -+ -+ const GLboolean swDepth = GL_FALSE; -+ const GLboolean swAlpha = GL_FALSE; -+ const GLboolean swAccum = mesaVis->accumRedBits > 0; -+ const GLboolean swStencil = mesaVis->stencilBits > 0 && -+ mesaVis->depthBits != 24; -+ GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8); -+ GLenum depthFormat = GL_NONE; -+ struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis); -+ -+ if (mesaVis->depthBits == 16) -+ depthFormat = GL_DEPTH_COMPONENT16; -+ else if (mesaVis->depthBits == 24) -+ depthFormat = GL_DEPTH_COMPONENT24; -+ -+ /* front color renderbuffer */ -+ { -+ struct radeon_renderbuffer *front = -+ radeon_create_renderbuffer(rgbFormat); -+ _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &front->base); -+ } -+ -+ /* back color renderbuffer */ -+ if (mesaVis->doubleBufferMode) { -+ struct radeon_renderbuffer *back = -+ radeon_create_renderbuffer(GL_RGBA); -+ _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &back->base); -+ } -+ -+ /* depth renderbuffer */ -+ if (depthFormat != GL_NONE) { -+ struct radeon_renderbuffer *depth = -+ radeon_create_renderbuffer(depthFormat); -+ _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depth->base); -+ depth->depthHasSurface = screen->depthHasSurface; -+ } -+ -+ /* stencil renderbuffer */ -+ if (mesaVis->stencilBits > 0 && !swStencil) { -+ struct radeon_renderbuffer *stencil = -+ radeon_create_renderbuffer(GL_STENCIL_INDEX8_EXT); -+ _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencil->base); -+ stencil->depthHasSurface = screen->depthHasSurface; -+ } -+ -+ _mesa_add_soft_renderbuffers(fb, -+ GL_FALSE, /* color */ -+ swDepth, -+ swStencil, -+ swAccum, -+ swAlpha, -+ GL_FALSE /* aux */); -+ driDrawPriv->driverPrivate = (void *) fb; -+ -+ return (driDrawPriv->driverPrivate != NULL); -+} -+#else - - /** - * Create the Mesa framebuffer and renderbuffers for a given window/drawable. -@@ -1105,6 +1358,11 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, - } - - -+ -+ -+#endif -+ -+ - static void - radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) - { -@@ -1199,11 +1457,11 @@ radeonInitScreen(__DRIscreenPrivate *psp) - if (!radeonInitDriver(psp)) - return NULL; - -+ /* for now fill in all modes */ - return radeonFillInModes( psp, - dri_priv->bpp, - (dri_priv->bpp == 16) ? 16 : 24, -- (dri_priv->bpp == 16) ? 0 : 8, -- (dri_priv->backOffset != dri_priv->depthOffset) ); -+ (dri_priv->bpp == 16) ? 0 : 8, 1); - } - - -diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h -index ab859d5..82eb7d8 100644 ---- a/src/mesa/drivers/dri/radeon/radeon_screen.h -+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h -@@ -55,6 +55,14 @@ typedef struct { - drmAddress map; /* Mapping of the DRM region */ - } radeonRegionRec, *radeonRegionPtr; - -+struct radeon_gem_object { -+ uint32_t gem_name; -+ uint32_t gem_handle; -+ uint64_t size; -+ void *map; -+ uint64_t offset; -+}; -+ - typedef struct { - int chip_family; - int chip_flags; -@@ -107,6 +115,13 @@ typedef struct { - const __DRIextension *extensions[8]; - - int num_gb_pipes; -+ -+ int kernel_mm; -+ struct radeon_gem_object front; -+ struct radeon_gem_object back; -+ struct radeon_gem_object depth; -+ struct radeon_gem_object vram_texture; -+ struct radeon_gem_object gart_texture; - } radeonScreenRec, *radeonScreenPtr; - - #define IS_R100_CLASS(screen) \ -diff --git a/src/mesa/drivers/dri/radeon/radeon_span.h b/src/mesa/drivers/dri/radeon/radeon_span.h -index 9abe086..1650a9b 100644 ---- a/src/mesa/drivers/dri/radeon/radeon_span.h -+++ b/src/mesa/drivers/dri/radeon/radeon_span.h -@@ -44,7 +44,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - #include "drirenderbuffer.h" - -+#include "radeon_buffer.h" -+ - extern void radeonInitSpanFuncs(GLcontext * ctx); --extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis); - -+#if COMPILE_R300 -+extern void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); -+#else -+extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis); -+#endif - #endif diff --git a/sources b/sources index d1667ae..8c9b49d 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ 6ae05158e678f4594343f32c2ca50515 gl-manpages-1.0.1.tar.bz2 -937234d8b7b8528295b7080fbcf0a532 mesa-20080814.tar.bz2 +d5e2a6d63b4611ec38aaab19b8f68117 mesa-20080905.tar.bz2