From 7cf755a92d9e2b7631f9fcd37e86f3e59281d9b8 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@fedoraproject.org>
Date: Sep 05 2008 05:48:02 +0000
Subject: - latest snapshot - r300 bufmgr code

- stop building mach64, patch around some intel issues

---

diff --git a/.cvsignore b/.cvsignore
index ed91d79..3a4010b 100644
--- a/.cvsignore
+++ b/.cvsignore
@@ -1,2 +1,2 @@
 gl-manpages-1.0.1.tar.bz2
-mesa-20080814.tar.bz2
+mesa-20080905.tar.bz2
diff --git a/intel-mmio-fix.patch b/intel-mmio-fix.patch
new file mode 100644
index 0000000..4d93c7a
--- /dev/null
+++ b/intel-mmio-fix.patch
@@ -0,0 +1,57 @@
+diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
+index c2ad5a0..18e7348 100644
+--- a/src/mesa/drivers/dri/intel/intel_context.c
++++ b/src/mesa/drivers/dri/intel/intel_context.c
+@@ -409,10 +409,12 @@ static const struct dri_extension brw_extensions[] = {
+    { NULL,                                NULL }
+ };
+ 
++#ifdef I915_MMIO_READ
+ static const struct dri_extension arb_oc_extensions[] = {
+    {"GL_ARB_occlusion_query",            GL_ARB_occlusion_query_functions},
+    {NULL, NULL}
+ };
++#endif
+ 
+ static const struct dri_extension ttm_extensions[] = {
+    {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions},
+@@ -437,10 +439,12 @@ void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging)
+    if (intel == NULL || intel->ttm)
+       driInitExtensions(ctx, ttm_extensions, GL_FALSE);
+ 
++#ifdef I915_MMIO_READ
+    if (intel == NULL || 
+        (IS_965(intel->intelScreen->deviceID) && 
+ 	intel->intelScreen->drmMinor >= 8))
+       driInitExtensions(ctx, arb_oc_extensions, GL_FALSE);
++#endif
+ 
+    if (intel == NULL || IS_965(intel->intelScreen->deviceID))
+       driInitExtensions(ctx, brw_extensions, GL_FALSE);
+@@ -538,6 +542,7 @@ intelFinish(GLcontext * ctx)
+    }
+ }
+ 
++#ifdef I915_MMIO_READ
+ static void
+ intelBeginQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q)
+ {
+@@ -568,6 +573,7 @@ intelEndQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q)
+ 	q->Ready = GL_TRUE;
+ 	intel->stats_wm--;
+ }
++#endif
+ 
+ /** Driver-specific fence emit implementation for the fake memory manager. */
+ static unsigned int
+@@ -684,8 +690,10 @@ intelInitDriverFunctions(struct dd_function_table *functions)
+    functions->CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D;
+    functions->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D;
+ 
++#ifdef I915_MMIO_READ
+    functions->BeginQuery = intelBeginQuery;
+    functions->EndQuery = intelEndQuery;
++#endif
+ 
+    intelInitTextureFuncs(functions);
+    intelInitStateFuncs(functions);
diff --git a/mesa-7.1-nukeglthread-debug.patch b/mesa-7.1-nukeglthread-debug.patch
new file mode 100644
index 0000000..dc8ad6f
--- /dev/null
+++ b/mesa-7.1-nukeglthread-debug.patch
@@ -0,0 +1,24 @@
+diff -up Mesa-7.1/src/mesa/drivers/dri/intel/intel_fbo.c.intel-glthread Mesa-7.1/src/mesa/drivers/dri/intel/intel_fbo.c
+--- Mesa-7.1/src/mesa/drivers/dri/intel/intel_fbo.c.intel-glthread	2008-08-25 10:49:40.000000000 -0400
++++ Mesa-7.1/src/mesa/drivers/dri/intel/intel_fbo.c	2008-08-28 14:26:17.000000000 -0400
+@@ -633,11 +633,6 @@ intel_render_texture(GLcontext * ctx,
+        return;
+    }
+ 
+-   DBG("Begin render texture tid %x tex=%u w=%d h=%d refcount=%d\n",
+-       _glthread_GetID(),
+-       att->Texture->Name, newImage->Width, newImage->Height,
+-       irb->Base.RefCount);
+-
+    /* point the renderbufer's region to the texture image region */
+    intel_image = intel_texture_image(newImage);
+    if (irb->region != intel_image->mt->region) {
+@@ -674,8 +669,6 @@ intel_finish_render_texture(GLcontext * 
+ {
+    struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer);
+ 
+-   DBG("End render texture (tid %x) tex %u\n", _glthread_GetID(), att->Texture->Name);
+-
+    if (irb) {
+       /* just release the region */
+       intel_region_release(&irb->region);
diff --git a/mesa-7.1-osmesa-version.patch b/mesa-7.1-osmesa-version.patch
new file mode 100644
index 0000000..cd41ad2
--- /dev/null
+++ b/mesa-7.1-osmesa-version.patch
@@ -0,0 +1,21 @@
+diff -up Mesa-7.1/src/mesa/drivers/osmesa/Makefile.jx Mesa-7.1/src/mesa/drivers/osmesa/Makefile
+--- Mesa-7.1/src/mesa/drivers/osmesa/Makefile.jx	2008-08-28 14:05:47.000000000 -0400
++++ Mesa-7.1/src/mesa/drivers/osmesa/Makefile	2008-08-28 14:07:13.000000000 -0400
+@@ -46,7 +46,7 @@ osmesa8: $(TOP)/lib/$(OSMESA_LIB_NAME)
+ 
+ $(TOP)/lib/$(OSMESA_LIB_NAME): $(OBJECTS)
+ 	$(MKLIB) -o $(OSMESA_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+-		-major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \
++		-major 6 -minor 5 -patch 3 \
+ 		-install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \
+ 		-id $(INSTALL_LIB_DIR)/lib$(OSMESA_LIB).$(MESA_MAJOR).dylib \
+ 		$(OSMESA_LIB_DEPS) $(OBJECTS)
+@@ -58,7 +58,7 @@ $(TOP)/lib/$(OSMESA_LIB_NAME): $(OBJECTS
+ # with all the other Mesa sources (compiled with -DCHAN_BITS=16/32
+ osmesa16: $(OBJECTS) $(CORE_MESA)
+ 	$(MKLIB) -o $(OSMESA_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+-		-major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \
++		-major 6 -minor 5 -patch 3 \
+ 		-install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \
+ 		-id $(INSTALL_LIB_DIR)/lib$(OSMESA_LIB).$(MESA_MAJOR).dylib \
+ 		$(OSMESA_LIB_DEPS) $(OBJECTS) $(CORE_MESA)
diff --git a/mesa-7.1pre-nukeglthread-debug.patch b/mesa-7.1pre-nukeglthread-debug.patch
deleted file mode 100644
index 0ce6298..0000000
--- a/mesa-7.1pre-nukeglthread-debug.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
-index 94d499f..2ba596e 100644
---- a/src/mesa/drivers/dri/intel/intel_fbo.c
-+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
-@@ -615,11 +615,6 @@ intel_render_texture(GLcontext * ctx,
-       }
-    }
- 
--   DBG("Begin render texture tid %x tex=%u w=%d h=%d refcount=%d\n",
--       _glthread_GetID(),
--       att->Texture->Name, newImage->Width, newImage->Height,
--       irb->Base.RefCount);
--
-    /* point the renderbufer's region to the texture image region */
-    intel_image = intel_texture_image(newImage);
-    if (irb->region != intel_image->mt->region) {
-@@ -656,8 +651,6 @@ intel_finish_render_texture(GLcontext * ctx,
- {
-    struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer);
- 
--   DBG("End render texture (tid %x) tex %u\n", _glthread_GetID(), att->Texture->Name);
--
-    if (irb) {
-       /* just release the region */
-       intel_region_release(&irb->region);
diff --git a/mesa-7.1pre-osmesa-version.patch b/mesa-7.1pre-osmesa-version.patch
deleted file mode 100644
index b958f89..0000000
--- a/mesa-7.1pre-osmesa-version.patch
+++ /dev/null
@@ -1,21 +0,0 @@
-diff -up mesa-20080814/src/mesa/drivers/osmesa/Makefile.osmesa mesa-20080814/src/mesa/drivers/osmesa/Makefile
---- mesa-20080814/src/mesa/drivers/osmesa/Makefile.osmesa	2008-08-28 22:33:46.000000000 +1000
-+++ mesa-20080814/src/mesa/drivers/osmesa/Makefile	2008-08-28 22:34:06.000000000 +1000
-@@ -46,7 +46,7 @@ osmesa8: $(TOP)/lib/$(OSMESA_LIB_NAME)
- 
- $(TOP)/lib/$(OSMESA_LIB_NAME): $(OBJECTS)
- 	$(MKLIB) -o $(OSMESA_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
--		-major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \
-+		-major 6 -minor 5 -patch 3 \
- 		-install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \
- 		-id $(INSTALL_LIB_DIR)/lib$(OSMESA_LIB).$(MESA_MAJOR).dylib \
- 		$(OSMESA_LIB_DEPS) $(OBJECTS)
-@@ -58,7 +58,7 @@ $(TOP)/lib/$(OSMESA_LIB_NAME): $(OBJECTS
- # with all the other Mesa sources (compiled with -DCHAN_BITS=16/32
- osmesa16: $(OBJECTS) $(CORE_MESA)
- 	$(MKLIB) -o $(OSMESA_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
--		-major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \
-+		-major 6 -minor 5 -patch 3 \
- 		-install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \
- 		-id $(INSTALL_LIB_DIR)/lib$(OSMESA_LIB).$(MESA_MAJOR).dylib \
- 		$(OSMESA_LIB_DEPS) $(OBJECTS) $(CORE_MESA)
diff --git a/mesa-fixes.patch b/mesa-fixes.patch
deleted file mode 100644
index 2abf8e5..0000000
--- a/mesa-fixes.patch
+++ /dev/null
@@ -1,48 +0,0 @@
-diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
-index 91b835d..ddfdce3 100644
---- a/src/mesa/drivers/dri/intel/intel_regions.c
-+++ b/src/mesa/drivers/dri/intel/intel_regions.c
-@@ -455,8 +455,7 @@ static struct intel_region *
- intel_recreate_static(struct intel_context *intel,
- 		      const char *name,
- 		      struct intel_region *region,
--		      intelRegion *region_desc,
--		      GLuint mem_type)
-+		      intelRegion *region_desc)
- {
-    intelScreenPrivate *intelScreen = intel->intelScreen;
-    int ret;
-@@ -537,22 +536,19 @@ intel_recreate_static_regions(struct intel_context *intel)
-    intel->front_region =
-       intel_recreate_static(intel, "front",
- 			    intel->front_region,
--			    &intelScreen->front,
--			    DRM_BO_FLAG_MEM_TT);
-+			    &intelScreen->front);
- 
-    intel->back_region =
-       intel_recreate_static(intel, "back",
- 			    intel->back_region,
--			    &intelScreen->back,
--			    DRM_BO_FLAG_MEM_TT);
-+			    &intelScreen->back);
- 
- #ifdef I915
-    if (intelScreen->third.handle) {
-       intel->third_region =
- 	 intel_recreate_static(intel, "third",
- 			       intel->third_region,
--			       &intelScreen->third,
--			       DRM_BO_FLAG_MEM_TT);
-+			       &intelScreen->third);
-    }
- #endif /* I915 */
- 
-@@ -562,6 +558,5 @@ intel_recreate_static_regions(struct intel_context *intel)
-    intel->depth_region =
-       intel_recreate_static(intel, "depth",
- 			    intel->depth_region,
--			    &intelScreen->depth,
--			    DRM_BO_FLAG_MEM_TT);
-+			    &intelScreen->depth);
- }
diff --git a/mesa-no-mach64.patch b/mesa-no-mach64.patch
new file mode 100644
index 0000000..397531e
--- /dev/null
+++ b/mesa-no-mach64.patch
@@ -0,0 +1,45 @@
+--- configure.ac.mach64	2008-09-05 13:53:24.000000000 +1000
++++ configure.ac	2008-09-05 13:53:39.000000000 +1000
+@@ -656,7 +656,7 @@
+             # because there is no x86-64 system where they could *ever*
+             # be used.
+             if test "x$DRI_DIRS" = "xyes"; then
+-                DRI_DIRS="i915 i965 mach64 mga r128 r200 r300 radeon \
++                DRI_DIRS="i915 i965 mga r128 r200 r300 radeon \
+                     savage tdfx unichrome swrast"
+             fi
+             ;;
+@@ -664,13 +664,13 @@
+             # Build only the drivers for cards that exist on PowerPC.
+             # At some point MGA will be added, but not yet.
+             if test "x$DRI_DIRS" = "xyes"; then
+-                DRI_DIRS="mach64 r128 r200 r300 radeon tdfx swrast"
++                DRI_DIRS="r128 r200 r300 radeon tdfx swrast"
+             fi
+             ;;
+         sparc*)
+             # Build only the drivers for cards that exist on sparc`
+             if test "x$DRI_DIRS" = "xyes"; then
+-                DRI_DIRS="mach64 r128 r200 r300 radeon ffb swrast"
++                DRI_DIRS="r128 r200 r300 radeon ffb swrast"
+             fi
+             ;;
+         esac
+@@ -689,7 +689,7 @@
+         # ffb and gamma are missing because they have not been converted
+         # to use the new interface.
+         if test "x$DRI_DIRS" = "xyes"; then
+-            DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 radeon tdfx \
++            DRI_DIRS="i810 i915 i965 mga r128 r200 r300 radeon tdfx \
+                 unichrome savage sis swrast"
+         fi
+         ;;
+@@ -704,7 +704,7 @@
+ 
+     # default drivers
+     if test "x$DRI_DIRS" = "xyes"; then
+-        DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 radeon s3v \
++        DRI_DIRS="i810 i915 i965 mga r128 r200 r300 radeon s3v \
+             savage sis tdfx trident unichrome ffb swrast"
+     fi
+ 
diff --git a/mesa.spec b/mesa.spec
index b8a850e..ccd5cd5 100644
--- a/mesa.spec
+++ b/mesa.spec
@@ -12,12 +12,12 @@
 %define _default_patch_fuzz 2
 
 %define manpages gl-manpages-1.0.1
-%define gitdate 20080814
+%define gitdate 20080905
 
 Summary: Mesa graphics libraries
 Name: mesa
-Version: 7.1
-Release: 0.38%{?dist}
+Version: 7.2
+Release: 0.1%{?dist}
 License: MIT
 Group: System Environment/Libraries
 URL: http://www.mesa3d.org
@@ -29,13 +29,14 @@ Source0: %{name}-%{gitdate}.tar.bz2
 Source2: %{manpages}.tar.bz2
 Source3: make-git-snapshot.sh
 
-Patch0: mesa-7.1pre-osmesa-version.patch
-Patch1: mesa-fixes.patch
-Patch2: mesa-7.1pre-nukeglthread-debug.patch
+Patch0: mesa-7.1-osmesa-version.patch
+Patch2: mesa-7.1-nukeglthread-debug.patch
+Patch3: mesa-no-mach64.patch
 
-Patch5: r300-cmdbuf.patch
+Patch5: r300-bufmgr.patch
 
 Patch7: mesa-7.1-link-shared.patch
+Patch8: intel-mmio-fix.patch
 
 Patch12: mesa-7.1-disable-intel-classic-warn.patch
 
@@ -164,10 +165,11 @@ This package provides some demo applications for testing Mesa.
 #%setup -q -n Mesa-%{version}pre -b1 -b2
 %setup -q -n mesa-%{gitdate} -b2
 %patch0 -p1 -b .osmesa
-%patch1 -p1 -b .fixes
 %patch2 -p1 -b .intel-glthread
-%patch5 -p1 -b .r300cmdbuf
+%patch3 -p0 -b .no-mach64
+%patch5 -p1 -b .r300-bufmgr
 %patch7 -p1 -b .dricore
+%patch8 -p1 -b .intel-mmio
 %patch12 -p1 -b .intel-nowarn
 
 # WARNING: The following files are copyright "Mark J. Kilgard" under the GLUT
@@ -240,7 +242,7 @@ make install DESTDIR=$RPM_BUILD_ROOT DRI_DIRS=
 %if %{with_dri}
 install -d $RPM_BUILD_ROOT%{_libdir}/dri
 install -m 0755 -t $RPM_BUILD_ROOT%{_libdir}/dri %{_lib}/libdricore.so >& /dev/null
-for f in i810 i915 i965 mach64 mga r128 r200 r300 radeon savage sis swrast tdfx unichrome; do
+for f in i810 i915 i965 mga r128 r200 r300 radeon savage sis swrast tdfx unichrome; do
     so=%{_lib}/${f}_dri.so
     test -e $so && echo $so
 done | xargs install -m 0755 -t $RPM_BUILD_ROOT%{_libdir}/dri >& /dev/null || :
@@ -308,8 +310,8 @@ rm -rf $RPM_BUILD_ROOT
 %{_includedir}/GL/xmesa_xf86.h
 %dir %{_includedir}/GL/internal
 %{_includedir}/GL/internal/dri_interface.h
-%{_includedir}/GL/internal/dri_sarea.h
 %{_libdir}/libGL.so
+%{_libdir}/pkgconfig/dri.pc
 %{_libdir}/pkgconfig/gl.pc
 %{_datadir}/man/man3/gl[^uX]*.3gl*
 %{_datadir}/man/man3/glX*.3gl*
@@ -356,6 +358,7 @@ rm -rf $RPM_BUILD_ROOT
 %{_bindir}/cubemap
 %{_bindir}/drawpix
 %{_bindir}/engine
+%{_bindir}/fbo_firecube
 %{_bindir}/fire
 %{_bindir}/fogcoord
 %{_bindir}/fplight
@@ -402,6 +405,10 @@ rm -rf $RPM_BUILD_ROOT
 %{_libdir}/mesa-demos-data
 
 %changelog
+* Fri Sep 05 2008 Dave Airlie <airlied@redhat.com> 7.2-0.1
+- latest snapshot - r300 bufmgr code
+- stop building mach64, patch around some intel issues
+
 * Thu Aug 28 2008 Dave Airlie <airlied@redhat.com> 7.1-0.38
 - latest Mesa snapshot - re-enable tex offset
 - add r300 command buffer support on top of snapshot
diff --git a/r300-bufmgr.patch b/r300-bufmgr.patch
new file mode 100644
index 0000000..09c2e95
--- /dev/null
+++ b/r300-bufmgr.patch
@@ -0,0 +1,7844 @@
+diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
+index e9144ac..b6ed58b 100644
+--- a/src/mesa/drivers/dri/r200/Makefile
++++ b/src/mesa/drivers/dri/r200/Makefile
+@@ -48,7 +48,8 @@ SYMLINKS = \
+ COMMON_SYMLINKS = \
+ 	radeon_chipset.h \
+ 	radeon_screen.c \
+-	radeon_screen.h
++	radeon_screen.h \
++	radeon_buffer.h
+ 
+ ##### TARGETS #####
+ 
+diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
+index 6ca9342..3bb1ff4 100644
+--- a/src/mesa/drivers/dri/r300/Makefile
++++ b/src/mesa/drivers/dri/r300/Makefile
+@@ -11,15 +11,6 @@ ifeq ($(USING_EGL), 1)
+ EGL_SOURCES = server/radeon_egl.c
+ endif
+ 
+-COMMON_SOURCES = \
+-	../../common/driverfuncs.c \
+-	../common/mm.c \
+-	../common/utils.c \
+-	../common/texmem.c \
+-	../common/vblank.c \
+-	../common/xmlconfig.c \
+-	../common/dri_util.c
+-
+ DRIVER_SOURCES = \
+ 		 radeon_screen.c \
+ 		 radeon_context.c \
+@@ -36,6 +27,7 @@ DRIVER_SOURCES = \
+ 		 r300_texmem.c \
+ 		 r300_tex.c \
+ 		 r300_texstate.c \
++		 r300_mipmap_tree.c \
+ 		 radeon_program.c \
+ 		 radeon_program_alu.c \
+ 		 radeon_program_pair.c \
+@@ -51,7 +43,7 @@ DRIVER_SOURCES = \
+ 		 r300_swtcl.c \
+ 		 $(EGL_SOURCES)
+ 
+-C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
++C_SOURCES = $(COMMON_SOURCES) $(COMMON_BM_SOURCES) $(DRIVER_SOURCES)
+ 
+ DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \
+ 	-DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300
+@@ -68,7 +60,8 @@ COMMON_SYMLINKS = \
+ 	radeon_chipset.h \
+ 	radeon_screen.c \
+ 	radeon_screen.h \
+-	radeon_span.h
++	radeon_span.h \
++	radeon_buffer.h
+ 
+ ##### TARGETS #####
+ 
+diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+index c069660..dd42bf8 100644
+--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
++++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+@@ -51,11 +51,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r300_reg.h"
+ #include "r300_cmdbuf.h"
+ #include "r300_emit.h"
++#include "r300_mem.h"
++#include "r300_mipmap_tree.h"
+ #include "r300_state.h"
+ 
+ // Set this to 1 for extremely verbose debugging of command buffers
+ #define DEBUG_CMDBUF		0
+ 
++/** # of dwords reserved for additional instructions that may need to be written
++ * during flushing.
++ */
++#define SPACE_FOR_FLUSHING	4
++
+ /**
+  * Send the current command buffer via ioctl to the hardware.
+  */
+@@ -66,24 +73,42 @@ int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller)
+ 	drm_radeon_cmd_buffer_t cmd;
+ 	int start;
+ 
++	if (r300->cmdbuf.flushing) {
++		fprintf(stderr, "Recursive call into r300FlushCmdBufLocked!\n");
++		exit(-1);
++	}
++	r300->cmdbuf.flushing = 1;
++
+ 	if (r300->radeon.lost_context) {
+ 		start = 0;
+ 		r300->radeon.lost_context = GL_FALSE;
+ 	} else
+-		start = r300->cmdbuf.count_reemit;
++		start = r300->cmdbuf.reemit;
+ 
+ 	if (RADEON_DEBUG & DEBUG_IOCTL) {
+ 		fprintf(stderr, "%s from %s - %i cliprects\n",
+ 			__FUNCTION__, caller, r300->radeon.numClipRects);
+ 
+-		if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE)
+-			for (i = start; i < r300->cmdbuf.count_used; ++i)
++		if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE) {
++			fprintf(stderr, "written: %d  committed: %d\n", r300->cmdbuf.written, r300->cmdbuf.committed);
++			for (i = start; i < r300->cmdbuf.written; ++i)
+ 				fprintf(stderr, "%d: %08x\n", i,
+-					r300->cmdbuf.cmd_buf[i]);
++					((uint32_t*)r300->cmdbuf.buf->virtual)[i]);
++		}
+ 	}
+ 
+-	cmd.buf = (char *)(r300->cmdbuf.cmd_buf + start);
+-	cmd.bufsz = (r300->cmdbuf.count_used - start) * 4;
++	if (r300->cmdbuf.written != r300->cmdbuf.committed) {
++		_mesa_problem(r300->radeon.glCtx,
++			"Command buffer contains %d uncommitted dwords\n"
++			"in r300FlushCmdBufLocked called from %s.\n",
++			r300->cmdbuf.written - r300->cmdbuf.committed, caller);
++	}
++
++	dri_bo_unmap(r300->cmdbuf.buf);
++	dri_process_relocs(r300->cmdbuf.buf);
++
++	cmd.buf = (char *)r300->cmdbuf.buf->virtual + 4*start;
++	cmd.bufsz = (r300->cmdbuf.committed - start) * 4;
+ 
+ 	if (r300->radeon.state.scissor.enabled) {
+ 		cmd.nbox = r300->radeon.state.scissor.numClipRects;
+@@ -103,9 +128,19 @@ int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller)
+ 		radeonWaitForIdleLocked(&r300->radeon);
+ 	}
+ 
++	dri_post_submit(r300->cmdbuf.buf);
++	dri_bo_unreference(r300->cmdbuf.buf);
++
+ 	r300->dma.nr_released_bufs = 0;
+-	r300->cmdbuf.count_used = 0;
+-	r300->cmdbuf.count_reemit = 0;
++	r300->cmdbuf.buf = radeon_bufmgr_classic_bo_alloc(&r300->radeon.bufmgr->base, "cmdbuf",
++							r300->cmdbuf.size*4, 16, DRM_BO_MEM_CMDBUF);
++	r300->cmdbuf.written = 0;
++	r300->cmdbuf.reserved = 0;
++	r300->cmdbuf.committed = 0;
++	r300->cmdbuf.reemit = 0;
++	dri_bo_map(r300->cmdbuf.buf, GL_TRUE);
++
++	r300->cmdbuf.flushing = 0;
+ 
+ 	return ret;
+ }
+@@ -115,9 +150,7 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char *caller)
+ 	int ret;
+ 
+ 	LOCK_HARDWARE(&r300->radeon);
+-
+ 	ret = r300FlushCmdBufLocked(r300, caller);
+-
+ 	UNLOCK_HARDWARE(&r300->radeon);
+ 
+ 	if (ret) {
+@@ -128,6 +161,44 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char *caller)
+ 	return ret;
+ }
+ 
++/**
++ * Make sure that enough space is available in the command buffer
++ * by flushing if necessary.
++ *
++ * \param dwords The number of dwords we need to be free on the command buffer
++ */
++void r300EnsureCmdBufSpace(r300ContextPtr r300, int dwords, const char *caller)
++{
++	assert(dwords < r300->cmdbuf.size);
++
++	if (!r300->cmdbuf.flushing)
++		dwords += SPACE_FOR_FLUSHING;
++
++	if (r300->cmdbuf.written + dwords > r300->cmdbuf.size)
++		r300FlushCmdBuf(r300, caller);
++}
++
++void r300BeginBatch(r300ContextPtr r300, int n, GLboolean autostate, const char* function, int line)
++{
++	assert(r300->cmdbuf.written == r300->cmdbuf.reserved);
++
++	r300EnsureCmdBufSpace(r300, n, function);
++
++	if (autostate && !r300->cmdbuf.written) {
++		if (RADEON_DEBUG & DEBUG_IOCTL)
++			fprintf(stderr,
++				"Reemit state after flush (from %s)\n", function);
++		r300EmitState(r300);
++	}
++
++	r300->cmdbuf.reserved += n;
++	assert(r300->cmdbuf.reserved < r300->cmdbuf.size);
++
++	if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_IOCTL)
++		fprintf(stderr, "BEGIN_BATCH(%d) at %d, from %s:%i\n",
++			n, r300->cmdbuf.written, function, line);
++}
++
+ static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state)
+ {
+ 	int i;
+@@ -152,33 +223,18 @@ static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *stat
+  */
+ static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
+ {
++	BATCH_LOCALS(r300);
+ 	struct r300_state_atom *atom;
+-	uint32_t *dest;
+ 	int dwords;
+ 
+-	dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used;
+-
+-	/* Emit WAIT */
+-	*dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN);
+-	dest++;
+-	r300->cmdbuf.count_used++;
+-
+-	/* Emit cache flush */
+-	*dest = cmdpacket0(R300_TX_INVALTAGS, 1);
+-	dest++;
+-	r300->cmdbuf.count_used++;
+-
+-	*dest = R300_TX_FLUSH;
+-	dest++;
+-	r300->cmdbuf.count_used++;
+-
+-	/* Emit END3D */
+-	*dest = cmdpacify();
+-	dest++;
+-	r300->cmdbuf.count_used++;
++	BEGIN_BATCH_NO_AUTOSTATE(4);
++	OUT_BATCH(cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN));
++	OUT_BATCH(cmdpacket0(R300_TX_INVALTAGS, 1));
++	OUT_BATCH(R300_TX_FLUSH);
++	OUT_BATCH(cmdpacify());
++	END_BATCH();
+ 
+ 	/* Emit actual atoms */
+-
+ 	foreach(atom, &r300->hw.atomlist) {
+ 		if ((atom->dirty || r300->hw.all_dirty) == dirty) {
+ 			dwords = (*atom->check) (r300, atom);
+@@ -186,9 +242,13 @@ static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
+ 				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
+ 					r300PrintStateAtom(r300, atom);
+ 				}
+-				memcpy(dest, atom->cmd, dwords * 4);
+-				dest += dwords;
+-				r300->cmdbuf.count_used += dwords;
++				if (atom->emit) {
++					(*atom->emit)(r300);
++				} else {
++					BEGIN_BATCH_NO_AUTOSTATE(dwords);
++					OUT_BATCH_TABLE(atom->cmd, dwords);
++					END_BATCH();
++				}
+ 				atom->dirty = GL_FALSE;
+ 			} else {
+ 				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
+@@ -198,6 +258,8 @@ static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
+ 			}
+ 		}
+ 	}
++
++	COMMIT_BATCH();
+ }
+ 
+ /**
+@@ -211,22 +273,21 @@ void r300EmitState(r300ContextPtr r300)
+ 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS))
+ 		fprintf(stderr, "%s\n", __FUNCTION__);
+ 
+-	if (r300->cmdbuf.count_used && !r300->hw.is_dirty
++	if (r300->cmdbuf.written && !r300->hw.is_dirty
+ 	    && !r300->hw.all_dirty)
+ 		return;
+ 
+ 	/* To avoid going across the entire set of states multiple times, just check
+-	 * for enough space for the case of emitting all state, and inline the
+-	 * r300AllocCmdBuf code here without all the checks.
++	 * for enough space for the case of emitting all state.
+ 	 */
+ 	r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__);
+ 
+-	if (!r300->cmdbuf.count_used) {
++	if (!r300->cmdbuf.written) {
+ 		if (RADEON_DEBUG & DEBUG_STATE)
+ 			fprintf(stderr, "Begin reemit state\n");
+ 
+ 		r300EmitAtoms(r300, GL_FALSE);
+-		r300->cmdbuf.count_reemit = r300->cmdbuf.count_used;
++		r300->cmdbuf.reemit = r300->cmdbuf.committed;
+ 	}
+ 
+ 	if (RADEON_DEBUG & DEBUG_STATE)
+@@ -234,7 +295,7 @@ void r300EmitState(r300ContextPtr r300)
+ 
+ 	r300EmitAtoms(r300, GL_TRUE);
+ 
+-	assert(r300->cmdbuf.count_used < r300->cmdbuf.size);
++	assert(r300->cmdbuf.written < r300->cmdbuf.size);
+ 
+ 	r300->hw.is_dirty = GL_FALSE;
+ 	r300->hw.all_dirty = GL_FALSE;
+@@ -244,6 +305,79 @@ void r300EmitState(r300ContextPtr r300)
+ #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
+ #define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
+ 
++static void emit_tex_offsets(r300ContextPtr r300)
++{
++	BATCH_LOCALS(r300);
++	int numtmus = packet0_count(r300->hw.tex.offset.cmd);
++
++	if (numtmus) {
++		int i;
++
++		BEGIN_BATCH(numtmus + 1);
++		OUT_BATCH_REGSEQ(R300_TX_OFFSET_0, numtmus);
++		for(i = 0; i < numtmus; ++i) {
++			r300TexObj *t = r300->hw.textures[i];
++			if (t && !t->image_override) {
++				OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, DRM_RELOC_TXOFFSET);
++			} else if (!t) {
++				OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]);
++			} else {
++				OUT_BATCH(t->override_offset);
++			}
++		}
++		END_BATCH();
++	}
++}
++
++static void emit_cb_offset(r300ContextPtr r300)
++{
++	BATCH_LOCALS(r300);
++	struct radeon_renderbuffer *rrb;
++	uint32_t cbpitch;
++
++	rrb = r300->radeon.state.color.rrb;
++	if (!rrb) {
++		fprintf(stderr, "no rrb\n");
++		return;
++	}
++
++	cbpitch = rrb->pitch;
++	if (rrb->cpp == 4)
++		cbpitch |= R300_COLOR_FORMAT_ARGB8888;
++	else
++		cbpitch |= R300_COLOR_FORMAT_RGB565;
++
++	if (r300->radeon.sarea->tiling_enabled)
++		cbpitch |= R300_COLOR_TILE_ENABLE;
++
++	BEGIN_BATCH(4);
++	OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
++	OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET);
++	OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1);
++	OUT_BATCH(cbpitch);
++	END_BATCH();
++}
++
++static void emit_zb_offset(r300ContextPtr r300)
++{
++	BATCH_LOCALS(r300);
++	struct radeon_renderbuffer *rrb;
++	uint32_t zbpitch;
++
++	rrb = r300->radeon.state.depth_buffer;
++	if (!rrb)
++		return;
++
++	zbpitch = rrb->pitch;
++
++	BEGIN_BATCH(3);
++	OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 2);
++	OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET);
++	OUT_BATCH(zbpitch);
++	END_BATCH();
++
++}
++
+ static int check_always(r300ContextPtr r300, struct r300_state_atom *atom)
+ {
+ 	return atom->cmd_size;
+@@ -480,8 +614,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
+ 	ALLOC_STATE(rop, always, 2, 0);
+ 	r300->hw.rop.cmd[0] = cmdpacket0(R300_RB3D_ROPCNTL, 1);
+ 	ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0);
+-	r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1);
+-	r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1);
++	r300->hw.cb.emit = &emit_cb_offset;
+ 	ALLOC_STATE(rb3d_dither_ctl, always, 10, 0);
+ 	r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9);
+ 	ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
+@@ -495,7 +628,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
+ 	r300->hw.zstencil_format.cmd[0] =
+ 	    cmdpacket0(R300_ZB_FORMAT, 4);
+ 	ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0);
+-	r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2);
++	r300->hw.zb.emit = emit_zb_offset;
+ 	ALLOC_STATE(zb_depthclearvalue, always, 2, 0);
+ 	r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1);
+ 	ALLOC_STATE(unk4F30, always, 3, 0);
+@@ -562,9 +695,10 @@ void r300InitCmdBuf(r300ContextPtr r300)
+ 	ALLOC_STATE(tex.pitch, variable, mtu + 1, 0);
+ 	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT2_0, 0);
+ 
+-	ALLOC_STATE(tex.offset, variable, mtu + 1, 0);
++	ALLOC_STATE(tex.offset, variable, 1, 0);
+ 	r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
+ 	    cmdpacket0(R300_TX_OFFSET_0, 0);
++	r300->hw.tex.offset.emit = &emit_tex_offsets;
+ 
+ 	ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0);
+ 	r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
+@@ -597,10 +731,14 @@ void r300InitCmdBuf(r300ContextPtr r300)
+ 			size * 4, r300->hw.max_state_size * 4);
+ 	}
+ 
++	r300->cmdbuf.buf = radeon_bufmgr_classic_bo_alloc(&r300->radeon.bufmgr->base, "cmdbuf",
++		size*4, 16, DRM_BO_MEM_CMDBUF);
+ 	r300->cmdbuf.size = size;
+-	r300->cmdbuf.cmd_buf = (uint32_t *) CALLOC(size * 4);
+-	r300->cmdbuf.count_used = 0;
+-	r300->cmdbuf.count_reemit = 0;
++	r300->cmdbuf.written = 0;
++	r300->cmdbuf.reserved = 0;
++	r300->cmdbuf.committed = 0;
++	r300->cmdbuf.reemit = 0;
++	dri_bo_map(r300->cmdbuf.buf, GL_TRUE);
+ }
+ 
+ /**
+@@ -610,66 +748,10 @@ void r300DestroyCmdBuf(r300ContextPtr r300)
+ {
+ 	struct r300_state_atom *atom;
+ 
+-	FREE(r300->cmdbuf.cmd_buf);
++	dri_bo_unmap(r300->cmdbuf.buf);
++	dri_bo_unreference(r300->cmdbuf.buf);
+ 
+ 	foreach(atom, &r300->hw.atomlist) {
+ 		FREE(atom->cmd);
+ 	}
+ }
+-
+-void r300EmitBlit(r300ContextPtr rmesa,
+-		  GLuint color_fmt,
+-		  GLuint src_pitch,
+-		  GLuint src_offset,
+-		  GLuint dst_pitch,
+-		  GLuint dst_offset,
+-		  GLint srcx, GLint srcy,
+-		  GLint dstx, GLint dsty, GLuint w, GLuint h)
+-{
+-	drm_r300_cmd_header_t *cmd;
+-
+-	if (RADEON_DEBUG & DEBUG_IOCTL)
+-		fprintf(stderr,
+-			"%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
+-			__FUNCTION__, src_pitch, src_offset, srcx, srcy,
+-			dst_pitch, dst_offset, dstx, dsty, w, h);
+-
+-	assert((src_pitch & 63) == 0);
+-	assert((dst_pitch & 63) == 0);
+-	assert((src_offset & 1023) == 0);
+-	assert((dst_offset & 1023) == 0);
+-	assert(w < (1 << 16));
+-	assert(h < (1 << 16));
+-
+-	cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__);
+-
+-	cmd[0].header.cmd_type = R300_CMD_PACKET3;
+-	cmd[0].header.pad0 = R300_CMD_PACKET3_RAW;
+-	cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16);
+-	cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+-		    RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+-		    RADEON_GMC_BRUSH_NONE |
+-		    (color_fmt << 8) |
+-		    RADEON_GMC_SRC_DATATYPE_COLOR |
+-		    RADEON_ROP3_S |
+-		    RADEON_DP_SRC_SOURCE_MEMORY |
+-		    RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
+-
+-	cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10);
+-	cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10);
+-	cmd[5].u = (srcx << 16) | srcy;
+-	cmd[6].u = (dstx << 16) | dsty;	/* dst */
+-	cmd[7].u = (w << 16) | h;
+-}
+-
+-void r300EmitWait(r300ContextPtr rmesa, GLuint flags)
+-{
+-	drm_r300_cmd_header_t *cmd;
+-
+-	assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D)));
+-
+-	cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
+-	cmd[0].u = 0;
+-	cmd[0].wait.cmd_type = R300_CMD_WAIT;
+-	cmd[0].wait.flags = flags;
+-}
+diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
+index a8eaa58..5c84b67 100644
+--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h
++++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
+@@ -45,29 +45,88 @@ extern void r300EmitState(r300ContextPtr r300);
+ 
+ extern void r300InitCmdBuf(r300ContextPtr r300);
+ extern void r300DestroyCmdBuf(r300ContextPtr r300);
++extern void r300EnsureCmdBufSpace(r300ContextPtr r300, int dwords, const char *caller);
++
++extern void r300BeginBatch(r300ContextPtr r300, int n, GLboolean autostate, const char* function, int line);
+ 
+ /**
+- * Make sure that enough space is available in the command buffer
+- * by flushing if necessary.
+- *
+- * \param dwords The number of dwords we need to be free on the command buffer
++ * Every function writing to the command buffer needs to declare this
++ * to get the necessary local variables.
+  */
+-static INLINE void r300EnsureCmdBufSpace(r300ContextPtr r300,
+-					     int dwords, const char *caller)
+-{
+-	assert(dwords < r300->cmdbuf.size);
++#define BATCH_LOCALS(r300) \
++	const r300ContextPtr b_l_r300 = r300
+ 
+-	if (r300->cmdbuf.count_used + dwords > r300->cmdbuf.size)
+-		r300FlushCmdBuf(r300, caller);
+-}
++/**
++ * Prepare writing n dwords to the command buffer,
++ * including producing any necessary state emits on buffer wraparound.
++ */
++#define BEGIN_BATCH(n) r300BeginBatch(b_l_r300, n, GL_TRUE, __FUNCTION__, __LINE__)
++
++/**
++ * Same as BEGIN_BATCH, but do not cause automatic state emits.
++ */
++#define BEGIN_BATCH_NO_AUTOSTATE(n) r300BeginBatch(b_l_r300, n, GL_FALSE, __FUNCTION__, __LINE__)
++
++/**
++ * Write one dword to the command buffer.
++ */
++#define OUT_BATCH(data) \
++	do { \
++		if (b_l_r300->cmdbuf.written < b_l_r300->cmdbuf.reserved) { \
++			((uint32_t*)b_l_r300->cmdbuf.buf->virtual)[b_l_r300->cmdbuf.written++] = data; \
++		} else { \
++			_mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH mismatch", __FUNCTION__, __LINE__); \
++		} \
++	} while(0)
+ 
+ /**
+- * Allocate the given number of dwords in the command buffer and return
+- * a pointer to the allocated area.
+- * When necessary, these functions cause a flush. r300AllocCmdBuf() also
+- * causes state reemission after a flush. This is necessary to ensure
+- * correct hardware state after an unlock.
++ * Write a relocated dword to the command buffer.
+  */
++#define OUT_BATCH_RELOC(data, bo, offset, flags) \
++	do { \
++		if (b_l_r300->cmdbuf.written < b_l_r300->cmdbuf.reserved) { \
++		        radeon_bufmgr_classic_emit_reloc(b_l_r300->cmdbuf.buf, flags, offset, 4*b_l_r300->cmdbuf.written, bo); \
++			((uint32_t*)b_l_r300->cmdbuf.buf->virtual)[b_l_r300->cmdbuf.written++] = data; \
++		} else { \
++			_mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH mismatch", __FUNCTION__, __LINE__); \
++		} \
++	} while(0)
++
++/**
++ * Write n dwords from ptr to the command buffer.
++ */
++#define OUT_BATCH_TABLE(ptr,n) \
++	do { \
++		int _n = n; \
++		if (b_l_r300->cmdbuf.written+_n <= b_l_r300->cmdbuf.reserved) { \
++			memcpy((uint32_t*)b_l_r300->cmdbuf.buf->virtual + b_l_r300->cmdbuf.written, (ptr), 4*_n); \
++			b_l_r300->cmdbuf.written += _n; \
++		} else { \
++			_mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH_TABLE mismatch", __FUNCTION__, __LINE__); \
++		} \
++	} while(0)
++
++/**
++ * Finish writing dwords to the command buffer.
++ * The number of (direct or indirect) OUT_BATCH calls between the previous
++ * BEGIN_BATCH and END_BATCH must match the number specified at BEGIN_BATCH time.
++ */
++#define END_BATCH() \
++	do { \
++		if (b_l_r300->cmdbuf.written != b_l_r300->cmdbuf.reserved) \
++			_mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: END_BATCH mismatch", __FUNCTION__, __LINE__); \
++	} while(0)
++
++/**
++ * After the last END_BATCH() of rendering, this indicates that flushing
++ * the command buffer now is okay.
++ */
++#define COMMIT_BATCH() \
++	do { \
++		assert(b_l_r300->cmdbuf.written == b_l_r300->cmdbuf.reserved); \
++		b_l_r300->cmdbuf.committed = b_l_r300->cmdbuf.written; \
++	} while(0)
++
+ static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300,
+ 					       int dwords, const char *caller)
+ {
+@@ -75,8 +134,9 @@ static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300,
+ 
+ 	r300EnsureCmdBufSpace(r300, dwords, caller);
+ 
+-	ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
+-	r300->cmdbuf.count_used += dwords;
++	ptr = (uint32_t*)r300->cmdbuf.buf->virtual + r300->cmdbuf.written;
++	r300->cmdbuf.written += dwords;
++	r300->cmdbuf.reserved = r300->cmdbuf.committed = r300->cmdbuf.written;
+ 	return ptr;
+ }
+ 
+@@ -87,30 +147,17 @@ static INLINE uint32_t *r300AllocCmdBuf(r300ContextPtr r300,
+ 
+ 	r300EnsureCmdBufSpace(r300, dwords, caller);
+ 
+-	if (!r300->cmdbuf.count_used) {
++	if (!r300->cmdbuf.written) {
+ 		if (RADEON_DEBUG & DEBUG_IOCTL)
+ 			fprintf(stderr,
+ 				"Reemit state after flush (from %s)\n", caller);
+ 		r300EmitState(r300);
+ 	}
+ 
+-	ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
+-	r300->cmdbuf.count_used += dwords;
++	ptr = (uint32_t*)r300->cmdbuf.buf->virtual + r300->cmdbuf.written;
++	r300->cmdbuf.written += dwords;
++	r300->cmdbuf.reserved = r300->cmdbuf.committed = r300->cmdbuf.written;
+ 	return ptr;
+ }
+ 
+-extern void r300EmitBlit(r300ContextPtr rmesa,
+-			 GLuint color_fmt,
+-			 GLuint src_pitch,
+-			 GLuint src_offset,
+-			 GLuint dst_pitch,
+-			 GLuint dst_offset,
+-			 GLint srcx, GLint srcy,
+-			 GLint dstx, GLint dsty, GLuint w, GLuint h);
+-
+-extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags);
+-extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start);
+-extern void r300EmitVertexShader(r300ContextPtr rmesa);
+-extern void r300EmitPixelShader(r300ContextPtr rmesa);
+-
+ #endif				/* __R300_CMDBUF_H__ */
+diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
+index fcf571d..6c6b5ba 100644
+--- a/src/mesa/drivers/dri/r300/r300_context.c
++++ b/src/mesa/drivers/dri/r300/r300_context.c
+@@ -59,15 +59,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "radeon_span.h"
+ #include "r300_context.h"
+ #include "r300_cmdbuf.h"
++#include "r300_mipmap_tree.h"
+ #include "r300_state.h"
+ #include "r300_ioctl.h"
+ #include "r300_tex.h"
+ #include "r300_emit.h"
+ #include "r300_swtcl.h"
+ 
+-#ifdef USER_BUFFERS
+ #include "r300_mem.h"
+-#endif
+ 
+ #include "vblank.h"
+ #include "utils.h"
+@@ -190,7 +189,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ 	struct dd_function_table functions;
+ 	r300ContextPtr r300;
+ 	GLcontext *ctx;
+-	int tcl_mode, i;
++	int tcl_mode;
+ 
+ 	assert(glVisual);
+ 	assert(driContextPriv);
+@@ -222,10 +221,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ 	r300InitTextureFuncs(&functions);
+ 	r300InitShaderFuncs(&functions);
+ 
+-#ifdef USER_BUFFERS
+-	r300_mem_init(r300);
+-#endif
+-
+ 	if (!radeonInitContext(&r300->radeon, &functions,
+ 			       glVisual, driContextPriv,
+ 			       sharedContextPrivate)) {
+@@ -233,34 +228,9 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ 		return GL_FALSE;
+ 	}
+ 
++	r300->radeon.bufmgr = radeonBufmgrClassicInit(r300);
++
+ 	/* Init r300 context data */
+-	r300->dma.buf0_address =
+-	    r300->radeon.radeonScreen->buffers->list[0].address;
+-
+-	(void)memset(r300->texture_heaps, 0, sizeof(r300->texture_heaps));
+-	make_empty_list(&r300->swapped);
+-
+-	r300->nr_heaps = 1 /* screen->numTexHeaps */ ;
+-	assert(r300->nr_heaps < RADEON_NR_TEX_HEAPS);
+-	for (i = 0; i < r300->nr_heaps; i++) {
+-		/* *INDENT-OFF* */
+-		r300->texture_heaps[i] = driCreateTextureHeap(i, r300,
+-							       screen->
+-							       texSize[i], 12,
+-							       RADEON_NR_TEX_REGIONS,
+-							       (drmTextureRegionPtr)
+-							       r300->radeon.sarea->
+-							       tex_list[i],
+-							       &r300->radeon.sarea->
+-							       tex_age[i],
+-							       &r300->swapped,
+-							       sizeof
+-							       (r300TexObj),
+-							       (destroy_texture_object_t
+-								*)
+-							       r300DestroyTexObj);
+-		/* *INDENT-ON* */
+-	}
+ 	r300->texture_depth = driQueryOptioni(&r300->radeon.optionCache,
+ 					      "texture_depth");
+ 	if (r300->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
+@@ -299,13 +269,11 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ 	ctx->Const.MaxLineWidth = R300_LINESIZE_MAX;
+ 	ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX;
+ 
+-#ifdef USER_BUFFERS
+ 	/* Needs further modifications */
+ #if 0
+ 	ctx->Const.MaxArrayLockSize =
+ 	    ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4);
+ #endif
+-#endif
+ 
+ 	/* Initialize the software rasterizer and helper modules.
+ 	 */
+@@ -407,72 +375,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ 	return GL_TRUE;
+ }
+ 
+-static void r300FreeGartAllocations(r300ContextPtr r300)
+-{
+-	int i, ret, tries = 0, done_age, in_use = 0;
+-	drm_radeon_mem_free_t memfree;
+-
+-	memfree.region = RADEON_MEM_REGION_GART;
+-
+-#ifdef USER_BUFFERS
+-	for (i = r300->rmm->u_last; i > 0; i--) {
+-		if (r300->rmm->u_list[i].ptr == NULL) {
+-			continue;
+-		}
+-
+-		/* check whether this buffer is still in use */
+-		if (r300->rmm->u_list[i].pending) {
+-			in_use++;
+-		}
+-	}
+-	/* Cannot flush/lock if no context exists. */
+-	if (in_use)
+-		r300FlushCmdBuf(r300, __FUNCTION__);
+-
+-	done_age = radeonGetAge((radeonContextPtr) r300);
+-
+-	for (i = r300->rmm->u_last; i > 0; i--) {
+-		if (r300->rmm->u_list[i].ptr == NULL) {
+-			continue;
+-		}
+-
+-		/* check whether this buffer is still in use */
+-		if (!r300->rmm->u_list[i].pending) {
+-			continue;
+-		}
+-
+-		assert(r300->rmm->u_list[i].h_pending == 0);
+-
+-		tries = 0;
+-		while (r300->rmm->u_list[i].age > done_age && tries++ < 1000) {
+-			usleep(10);
+-			done_age = radeonGetAge((radeonContextPtr) r300);
+-		}
+-		if (tries >= 1000) {
+-			WARN_ONCE("Failed to idle region!");
+-		}
+-
+-		memfree.region_offset = (char *)r300->rmm->u_list[i].ptr -
+-		    (char *)r300->radeon.radeonScreen->gartTextures.map;
+-
+-		ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd,
+-				      DRM_RADEON_FREE, &memfree,
+-				      sizeof(memfree));
+-		if (ret) {
+-			fprintf(stderr, "Failed to free at %p\nret = %s\n",
+-				r300->rmm->u_list[i].ptr, strerror(-ret));
+-		} else {
+-			if (i == r300->rmm->u_last)
+-				r300->rmm->u_last--;
+-
+-			r300->rmm->u_list[i].pending = 0;
+-			r300->rmm->u_list[i].ptr = NULL;
+-		}
+-	}
+-	r300->rmm->u_head = i;
+-#endif				/* USER_BUFFERS */
+-}
+-
+ /* Destroy the device specific context.
+  */
+ void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
+@@ -496,24 +398,17 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
+ 	assert(r300);		/* should never be null */
+ 
+ 	if (r300) {
+-		GLboolean release_texture_heaps;
+-
+-		release_texture_heaps =
+-		    (r300->radeon.glCtx->Shared->RefCount == 1);
+ 		_swsetup_DestroyContext(r300->radeon.glCtx);
+ 		_tnl_ProgramCacheDestroy(r300->radeon.glCtx);
+ 		_tnl_DestroyContext(r300->radeon.glCtx);
+ 		_vbo_DestroyContext(r300->radeon.glCtx);
+ 		_swrast_DestroyContext(r300->radeon.glCtx);
+ 
+-		if (r300->dma.current.buf) {
+-			r300ReleaseDmaRegion(r300, &r300->dma.current,
+-					     __FUNCTION__);
+-#ifndef USER_BUFFERS
+-			r300FlushCmdBuf(r300, __FUNCTION__);
+-#endif
++		if (r300->dma.current) {
++			dri_bo_unreference(r300->dma.current);
++			r300->dma.current = 0;
+ 		}
+-		r300FreeGartAllocations(r300);
++		r300FlushCmdBuf(r300, __FUNCTION__);
+ 		r300DestroyCmdBuf(r300);
+ 
+ 		if (radeon->state.scissor.pClipRects) {
+@@ -521,29 +416,14 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
+ 			radeon->state.scissor.pClipRects = NULL;
+ 		}
+ 
+-		if (release_texture_heaps) {
+-			/* This share group is about to go away, free our private
+-			 * texture object data.
+-			 */
+-			int i;
+-
+-			for (i = 0; i < r300->nr_heaps; i++) {
+-				driDestroyTextureHeap(r300->texture_heaps[i]);
+-				r300->texture_heaps[i] = NULL;
+-			}
+-
+-			assert(is_empty_list(&r300->swapped));
+-		}
+-
+ 		radeonCleanupContext(&r300->radeon);
+ 
+-#ifdef USER_BUFFERS
+-		/* the memory manager might be accessed when Mesa frees the shared
+-		 * state, so don't destroy it earlier
+-		 */
+-		r300_mem_destroy(r300);
+-#endif
+ 
++   		/* the memory manager might be accessed when Mesa frees the shared
++    		 * state, so don't destroy it earlier
++    		 */
++   		dri_bufmgr_destroy(&r300->radeon.bufmgr->base);
++		r300->radeon.bufmgr = 0;
+ 		/* free the option cache */
+ 		driDestroyOptionCache(&r300->radeon.optionCache);
+ 
+diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
+index d2017f8..047caae 100644
+--- a/src/mesa/drivers/dri/r300/r300_context.h
++++ b/src/mesa/drivers/dri/r300/r300_context.h
+@@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "tnl/t_vertex.h"
+ #include "drm.h"
+ #include "radeon_drm.h"
++#include "dri_bufmgr.h"
+ #include "dri_util.h"
+ #include "texmem.h"
+ 
+@@ -47,11 +48,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "mtypes.h"
+ #include "colormac.h"
+ 
+-#define USER_BUFFERS
+-
+ struct r300_context;
+ typedef struct r300_context r300ContextRec;
+ typedef struct r300_context *r300ContextPtr;
++typedef struct radeon_bufmgr radeon_bufmgr;
+ 
+ #include "radeon_lock.h"
+ #include "mm.h"
+@@ -122,44 +122,22 @@ static INLINE uint32_t r300PackFloat24(float f)
+ 
+ /************ DMA BUFFERS **************/
+ 
+-/* Need refcounting on dma buffers:
+- */
+-struct r300_dma_buffer {
+-	int refcount;		/**< the number of retained regions in buf */
+-	drmBufPtr buf;
+-	int id;
+-};
+-#undef GET_START
+-#ifdef USER_BUFFERS
+-#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start))
+-#else
+-#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset +		\
+-			(rvb)->address - rmesa->dma.buf0_address +	\
+-			(rvb)->start)
+-#endif
+-/* A retained region, eg vertices for indexed vertices.
+- */
+-struct r300_dma_region {
+-	struct r300_dma_buffer *buf;
+-	char *address;		/* == buf->address */
+-	int start, end, ptr;	/* offsets from start of buf */
+-
+-	int aos_offset;		/* address in GART memory */
+-	int aos_stride;		/* distance between elements, in dwords */
+-	int aos_size;		/* number of components (1-4) */
+-};
+-
+ struct r300_dma {
+ 	/* Active dma region.  Allocations for vertices and retained
+ 	 * regions come from here.  Also used for emitting random vertices,
+ 	 * these may be flushed by calling flush_current();
+ 	 */
+-	struct r300_dma_region current;
++	dri_bo *current; /** Buffer that DMA memory is allocated from */
++	int current_used; /** Number of bytes allocated and forgotten about */
++	int current_vertexptr; /** End of active vertex region */
+ 
++	/**
++	 * If current_vertexptr != current_used then flush must be non-zero.
++	 * flush must be called before non-active vertex allocations can be
++	 * performed.
++	 */
+ 	void (*flush) (r300ContextPtr);
+ 
+-	char *buf0_address;	/* start of buf[0], for index calcs */
+-
+ 	/* Number of "in-flight" DMA buffers, i.e. the number of buffers
+ 	 * for which a DISCARD command is currently queued in the command buffer.
+ 	 */
+@@ -173,17 +151,13 @@ typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr;
+ /* Texture object in locally shared texture space.
+  */
+ struct r300_tex_obj {
+-	driTextureObject base;
+-
+-	GLuint bufAddr;		/* Offset to start of locally
+-				   shared texture block */
+-
+-	drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
+-	/* Six, for the cube faces */
++	struct gl_texture_object base;
++	struct _r300_mipmap_tree *mt;
++	GLuint dirty_images[6];
+ 
+ 	GLboolean image_override;	/* Image overridden by GLX_EXT_tfp */
++	GLuint override_offset;
+ 
+-	GLuint pitch;		/* this isn't sent to hardware just used in calculations */
+ 	/* hardware register values */
+ 	/* Note that R200 has 8 registers per texture and R300 only 7 */
+ 	GLuint filter;
+@@ -191,30 +165,16 @@ struct r300_tex_obj {
+ 	GLuint pitch_reg;
+ 	GLuint size;		/* npot only */
+ 	GLuint format;
+-	GLuint offset;		/* Image location in the card's address space.
+-				   All cube faces follow. */
+-	GLuint unknown4;
+-	GLuint unknown5;
+-	/* end hardware registers */
+-
+-	/* registers computed by r200 code - keep them here to
+-	   compare against what is actually written.
+-
+-	   to be removed later.. */
+ 	GLuint pp_border_color;
+-	GLuint pp_cubic_faces;	/* cube face 1,2,3,4 log2 sizes */
+-	GLuint format_x;
+-
+-	GLboolean border_fallback;
++	/* end hardware registers */
+ 
+ 	GLuint tile_bits;	/* hw texture tile bits used on this texture */
+ };
+ 
+-struct r300_texture_env_state {
+-	r300TexObjPtr texobj;
+-	GLenum format;
+-	GLenum envMode;
+-};
++static INLINE r300TexObj* r300_tex_obj(struct gl_texture_object *texObj)
++{
++	return (r300TexObj*)texObj;
++}
+ 
+ /* The blit width for texture uploads
+  */
+@@ -222,7 +182,6 @@ struct r300_texture_env_state {
+ #define R300_MAX_TEXTURE_UNITS 8
+ 
+ struct r300_texture_state {
+-	struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS];
+ 	int tc_count;		/* number of incoming texture coordinates from VAP */
+ };
+ 
+@@ -242,6 +201,7 @@ struct r300_state_atom {
+ 	GLboolean dirty;
+ 
+ 	int (*check) (r300ContextPtr, struct r300_state_atom * atom);
++	void (*emit) (r300ContextPtr);
+ };
+ 
+ #define R300_VPT_CMD_0		0
+@@ -549,6 +509,8 @@ struct r300_hw_state {
+ 		struct r300_state_atom border_color;
+ 	} tex;
+ 	struct r300_state_atom txe;	/* tex enable (4104) */
++
++	r300TexObj *textures[R300_MAX_TEXTURE_UNITS];
+ };
+ 
+ /**
+@@ -559,10 +521,14 @@ struct r300_hw_state {
+  * otherwise.
+  */
+ struct r300_cmdbuf {
+-	int size;		/* DWORDs allocated for buffer */
+-	uint32_t *cmd_buf;
+-	int count_used;		/* DWORDs filled so far */
+-	int count_reemit;	/* size of re-emission batch */
++	dri_bo *buf;
++	int reemit; /** # of dwords in reemit sequence (is always <= committed) */
++	int size; /** # of dwords total */
++
++	int committed; /** # of dwords that we have committed to */
++	int written; /** # of dwords written (is always >= committed) */
++	int reserved; /** # of dwords reserved up to previous BEGIN_BATCH */
++	unsigned int flushing:1; /** whether we're currently in FlushCmdBufLocked */
+ };
+ 
+ /**
+@@ -811,18 +777,25 @@ struct r500_fragment_program {
+ #define REG_COLOR0	1
+ #define REG_TEX0	2
+ 
++struct r300_aos {
++	dri_bo *bo; /** Buffer object where vertex data is stored */
++	int offset; /** Offset into buffer object, in bytes */
++	int components; /** Number of components per vertex */
++	int stride; /** Stride in dwords (may be 0 for repeating) */
++	int count; /** Number of vertices */
++};
++
+ struct r300_state {
+ 	struct r300_depthbuffer_state depth;
+ 	struct r300_texture_state texture;
+ 	int sw_tcl_inputs[VERT_ATTRIB_MAX];
+ 	struct r300_vertex_shader_state vertex_shader;
+-	struct r300_dma_region aos[R300_MAX_AOS_ARRAYS];
++	struct r300_aos aos[R300_MAX_AOS_ARRAYS];
+ 	int aos_count;
+ 
+-	GLuint *Elts;
+-	struct r300_dma_region elt_dma;
++	dri_bo *elt_dma_bo; /** Buffer object that contains element indices */
++	int elt_dma_offset; /** Offset into this buffer object, in bytes */
+ 
+-	struct r300_dma_region swtcl_dma;
+ 	DECLARE_RENDERINPUTS(render_inputs_bitset);	/* actual render inputs that R300 was configured for.
+ 							   They are the same as tnl->render_inputs for fixed pipeline */
+ 
+@@ -880,13 +853,6 @@ struct r300_swtcl_info {
+     * Offset of the 3UB specular color data within a hardware (swtcl) vertex.
+     */
+    GLuint specoffset;
+-
+-   /**
+-    * Should Mesa project vertex data or will the hardware do it?
+-    */
+-   GLboolean needproj;
+-
+-   struct r300_dma_region indexed_verts;
+ };
+ 
+ 
+@@ -905,25 +871,11 @@ struct r300_context {
+ 	/* Vertex buffers
+ 	 */
+ 	struct r300_dma dma;
+-	GLboolean save_on_next_unlock;
+ 	GLuint NewGLState;
+ 
+-	/* Texture object bookkeeping
+-	 */
+-	unsigned nr_heaps;
+-	driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS];
+-	driTextureObject swapped;
+ 	int texture_depth;
+ 	float initialMaxAnisotropy;
+ 
+-	/* Clientdata textures;
+-	 */
+-	GLuint prefer_gart_client_texturing;
+-
+-#ifdef USER_BUFFERS
+-	struct r300_memory_manager *rmm;
+-#endif
+-
+ 	GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
+ 	GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
+ 
+diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c
+index 2ea17ad..5e2afd5 100644
+--- a/src/mesa/drivers/dri/r300/r300_emit.c
++++ b/src/mesa/drivers/dri/r300/r300_emit.c
+@@ -51,9 +51,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r300_emit.h"
+ #include "r300_ioctl.h"
+ 
+-#ifdef USER_BUFFERS
+ #include "r300_mem.h"
+-#endif
+ 
+ #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
+     SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
+@@ -86,11 +84,9 @@ do {						\
+ } while (0)
+ #endif
+ 
+-static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
+-			 GLvoid * data, int stride, int count)
++static void r300EmitVec4(uint32_t *out, GLvoid * data, int stride, int count)
+ {
+ 	int i;
+-	int *out = (int *)(rvb->address + rvb->start);
+ 
+ 	if (RADEON_DEBUG & DEBUG_VERTS)
+ 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+@@ -106,11 +102,9 @@ static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
+ 		}
+ }
+ 
+-static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
+-			 GLvoid * data, int stride, int count)
++static void r300EmitVec8(uint32_t *out, GLvoid * data, int stride, int count)
+ {
+ 	int i;
+-	int *out = (int *)(rvb->address + rvb->start);
+ 
+ 	if (RADEON_DEBUG & DEBUG_VERTS)
+ 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+@@ -127,11 +121,9 @@ static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
+ 		}
+ }
+ 
+-static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
+-			  GLvoid * data, int stride, int count)
++static void r300EmitVec12(uint32_t *out, GLvoid * data, int stride, int count)
+ {
+ 	int i;
+-	int *out = (int *)(rvb->address + rvb->start);
+ 
+ 	if (RADEON_DEBUG & DEBUG_VERTS)
+ 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+@@ -149,11 +141,9 @@ static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
+ 		}
+ }
+ 
+-static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
+-			  GLvoid * data, int stride, int count)
++static void r300EmitVec16(uint32_t *out, GLvoid * data, int stride, int count)
+ {
+ 	int i;
+-	int *out = (int *)(rvb->address + rvb->start);
+ 
+ 	if (RADEON_DEBUG & DEBUG_VERTS)
+ 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+@@ -172,35 +162,31 @@ static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
+ 		}
+ }
+ 
+-static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb,
++
++static void r300EmitVec(GLcontext * ctx, struct r300_aos *aos,
+ 			GLvoid * data, int size, int stride, int count)
+ {
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
++	uint32_t *out;
+ 
+ 	if (stride == 0) {
+-		r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
++		r300AllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
+ 		count = 1;
+-		rvb->aos_offset = GET_START(rvb);
+-		rvb->aos_stride = 0;
++		aos->stride = 0;
+ 	} else {
+-		r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
+-		rvb->aos_offset = GET_START(rvb);
+-		rvb->aos_stride = size;
++		r300AllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
++		aos->stride = size;
+ 	}
+ 
++	aos->components = size;
++	aos->count = count;
++
++	out = (uint32_t*)((char*)aos->bo->virtual + aos->offset);
+ 	switch (size) {
+-	case 1:
+-		r300EmitVec4(ctx, rvb, data, stride, count);
+-		break;
+-	case 2:
+-		r300EmitVec8(ctx, rvb, data, stride, count);
+-		break;
+-	case 3:
+-		r300EmitVec12(ctx, rvb, data, stride, count);
+-		break;
+-	case 4:
+-		r300EmitVec16(ctx, rvb, data, stride, count);
+-		break;
++	case 1: r300EmitVec4(out, data, stride, count); break;
++	case 2: r300EmitVec8(out, data, stride, count); break;
++	case 3: r300EmitVec12(out, data, stride, count); break;
++	case 4: r300EmitVec16(out, data, stride, count); break;
+ 	default:
+ 		assert(0);
+ 		break;
+@@ -433,7 +419,7 @@ int r300EmitArrays(GLcontext * ctx)
+ 	}
+ 
+ 	for (i = 0; i < nr; i++) {
+-		int ci, fix, found = 0;
++		int ci;
+ 
+ 		swizzle[i][0] = SWIZZLE_ZERO;
+ 		swizzle[i][1] = SWIZZLE_ZERO;
+@@ -444,48 +430,10 @@ int r300EmitArrays(GLcontext * ctx)
+ 			swizzle[i][ci] = ci;
+ 		}
+ 
+-		if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
+-			if (vb->AttribPtr[tab[i]]->stride % 4) {
+-				return R300_FALLBACK_TCL;
+-			}
+-			rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data);
+-			rmesa->state.aos[i].start = 0;
+-			rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data);
+-			rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4;
+-			rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
+-		} else {
+-			r300EmitVec(ctx, &rmesa->state.aos[i],
+-				    vb->AttribPtr[tab[i]]->data,
+-				    vb->AttribPtr[tab[i]]->size,
+-				    vb->AttribPtr[tab[i]]->stride, count);
+-		}
+-
+-		rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
+-
+-		for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
+-			if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) {
+-				continue;
+-			}
+-			found = 1;
+-			break;
+-		}
+-
+-		if (found) {
+-			if (fix > 0) {
+-				WARN_ONCE("Feeling lucky?\n");
+-			}
+-			rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix;
+-			for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
+-				swizzle[i][ci] += fix;
+-			}
+-		} else {
+-			WARN_ONCE
+-			    ("Cannot handle offset %x with stride %d, comp %d\n",
+-			     rmesa->state.aos[i].aos_offset,
+-			     rmesa->state.aos[i].aos_stride,
+-			     vb->AttribPtr[tab[i]]->size);
+-			return R300_FALLBACK_TCL;
+-		}
++		r300EmitVec(ctx, &rmesa->state.aos[i],
++				vb->AttribPtr[tab[i]]->data,
++				vb->AttribPtr[tab[i]]->size,
++				vb->AttribPtr[tab[i]]->stride, count);
+ 	}
+ 
+ 	/* Setup INPUT_ROUTE. */
+@@ -515,45 +463,76 @@ int r300EmitArrays(GLcontext * ctx)
+ 	return R300_FALLBACK_NONE;
+ }
+ 
+-#ifdef USER_BUFFERS
+-void r300UseArrays(GLcontext * ctx)
+-{
+-	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	int i;
+-
+-	if (rmesa->state.elt_dma.buf)
+-		r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
+-
+-	for (i = 0; i < rmesa->state.aos_count; i++) {
+-		if (rmesa->state.aos[i].buf)
+-			r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
+-	}
+-}
+-#endif
+-
+ void r300ReleaseArrays(GLcontext * ctx)
+ {
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ 	int i;
+ 
+-	r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
++	if (rmesa->state.elt_dma_bo) {
++		dri_bo_unreference(rmesa->state.elt_dma_bo);
++		rmesa->state.elt_dma_bo = 0;
++	}
+ 	for (i = 0; i < rmesa->state.aos_count; i++) {
+-		r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
++		if (rmesa->state.aos[i].bo) {
++			dri_bo_unreference(rmesa->state.aos[i].bo);
++			rmesa->state.aos[i].bo = 0;
++		}
+ 	}
+ }
+ 
+ void r300EmitCacheFlush(r300ContextPtr rmesa)
+ {
+-	int cmd_reserved = 0;
+-	int cmd_written = 0;
+-
+-	drm_radeon_cmd_header_t *cmd = NULL;
+-
+-	reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
+-	e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+-	    R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
++	BATCH_LOCALS(rmesa);
++
++	BEGIN_BATCH(4);
++	OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT,
++		R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
++		R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
++	OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT,
++		R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
++		R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
++	END_BATCH();
++	COMMIT_BATCH();
++}
+ 
+-	reg_start(R300_ZB_ZCACHE_CTLSTAT, 0);
+-	e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+-	    R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
++void r300EmitBlit(r300ContextPtr rmesa,
++		  GLuint color_fmt,
++		  GLuint src_pitch,
++		  dri_bo *src_bo, int src_offset,
++		  GLuint dst_pitch,
++		  GLuint dst_offset,
++		  GLint srcx, GLint srcy,
++		  GLint dstx, GLint dsty, GLuint w, GLuint h)
++{
++	BATCH_LOCALS(rmesa);
++
++	if (RADEON_DEBUG & DEBUG_IOCTL)
++		fprintf(stderr,
++			"%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
++			__FUNCTION__, src_pitch, src_offset, srcx, srcy,
++			dst_pitch, dst_offset, dstx, dsty, w, h);
++
++	assert((src_pitch & 63) == 0);
++	assert((dst_pitch & 63) == 0);
++	assert((src_offset & 1023) == 0);
++	assert((dst_offset & 1023) == 0);
++	assert(w < (1 << 16));
++	assert(h < (1 << 16));
++
++	BEGIN_BATCH(8);
++	OUT_BATCH_PACKET3(R300_CP_CMD_BITBLT_MULTI, 5);
++	OUT_BATCH(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
++		  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
++		  RADEON_GMC_BRUSH_NONE |
++		  (color_fmt << 8) |
++		  RADEON_GMC_SRC_DATATYPE_COLOR |
++		  RADEON_ROP3_S |
++		  RADEON_DP_SRC_SOURCE_MEMORY |
++		  RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
++	OUT_BATCH_RELOC((src_pitch / 64) << 22, src_bo, src_offset, DRM_RELOC_BLITTER);
++	OUT_BATCH(((dst_pitch / 64) << 22) | (dst_offset >> 10));
++	OUT_BATCH((srcx << 16) | srcy);
++	OUT_BATCH((dstx << 16) | dsty);
++	OUT_BATCH((w << 16) | h);
++	END_BATCH();
+ }
+diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h
+index 5950539..179983d 100644
+--- a/src/mesa/drivers/dri/r300/r300_emit.h
++++ b/src/mesa/drivers/dri/r300/r300_emit.h
+@@ -127,130 +127,62 @@ static INLINE uint32_t cmdpacify(void)
+ 	return cmd.u;
+ }
+ 
+-/**
+- * Prepare to write a register value to register at address reg.
+- * If num_extra > 0 then the following extra values are written
+- * to registers with address +4, +8 and so on..
+- */
+-#define reg_start(reg, num_extra)					\
+-	do {								\
+-		int _n;							\
+-		_n=(num_extra);						\
+-		cmd = (drm_radeon_cmd_header_t*)			\
+-			r300AllocCmdBuf(rmesa,				\
+-					(_n+2),				\
+-					__FUNCTION__);			\
+-		cmd_reserved=_n+2;					\
+-		cmd_written=1;						\
+-		cmd[0].i=cmdpacket0((reg), _n+1);			\
+-	} while (0);
++
++/** Single register write to command buffer; requires 2 dwords. */
++#define OUT_BATCH_REGVAL(reg, val) \
++	OUT_BATCH(cmdpacket0((reg), 1)); \
++	OUT_BATCH((val))
++
++/** Continuous register range write to command buffer; requires 1 dword,
++ * expects count dwords afterwards for register contents. */
++#define OUT_BATCH_REGSEQ(reg, count) \
++	OUT_BATCH(cmdpacket0((reg), (count)));
++
++/** Write a 32 bit float to the ring; requires 1 dword. */
++#define OUT_BATCH_FLOAT32(f) \
++	OUT_BATCH(r300PackFloat32((f)));
+ 
+ /**
+- * Emit GLuint freestyle
++ * Write the header of a packet3 to the command buffer.
++ * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards.
+  */
+-#define e32(dword)							\
+-	do {								\
+-		if(cmd_written<cmd_reserved) {				\
+-			cmd[cmd_written].i=(dword);			\
+-			cmd_written++;					\
+-		} else {						\
+-			fprintf(stderr,					\
+-				"e32 but no previous packet "		\
+-				"declaration.\n"			\
+-				"Aborting! in %s::%s at line %d, "	\
+-				"cmd_written=%d cmd_reserved=%d\n",	\
+-				__FILE__, __FUNCTION__, __LINE__,	\
+-				cmd_written, cmd_reserved);		\
+-			_mesa_exit(-1);					\
+-		}							\
++#define OUT_BATCH_PACKET3(packet, num_extra) do {\
++	OUT_BATCH(cmdpacket3(R300_CMD_PACKET3_RAW)); \
++	OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+ 	} while(0)
+ 
+-#define	efloat(f) e32(r300PackFloat32(f))
+-
+-#define vsf_start_fragment(dest, length)				\
+-	do {								\
+-		int _n;							\
+-		_n = (length);						\
+-		cmd = (drm_radeon_cmd_header_t*)			\
+-			r300AllocCmdBuf(rmesa,				\
+-					(_n+1),				\
+-					__FUNCTION__);			\
+-		cmd_reserved = _n+2;					\
+-		cmd_written =1;						\
+-		cmd[0].i = cmdvpu((dest), _n/4);			\
+-	} while (0);
+-
+-#define r500fp_start_fragment(dest, length)				\
+-	do {								\
+-		int _n;							\
+-		_n = (length);						\
+-		cmd = (drm_radeon_cmd_header_t*)			\
+-			r300AllocCmdBuf(rmesa,				\
+-					(_n+1),				\
+-					__FUNCTION__);			\
+-		cmd_reserved = _n+1;					\
+-		cmd_written =1;						\
+-		cmd[0].i = cmdr500fp((dest), _n/6, 0, 0);		\
+-	} while (0);
+-
+-#define start_packet3(packet, count)					\
+-	{								\
+-		int _n;							\
+-		GLuint _p;						\
+-		_n = (count);						\
+-		_p = (packet);						\
+-		cmd = (drm_radeon_cmd_header_t*)			\
+-			r300AllocCmdBuf(rmesa,				\
+-					(_n+3),				\
+-					__FUNCTION__);			\
+-		cmd_reserved = _n+3;					\
+-		cmd_written = 2;					\
+-		if(_n > 0x3fff) {					\
+-			fprintf(stderr,"Too big packet3 %08x: cannot "	\
+-				"store %d dwords\n",			\
+-				_p, _n);				\
+-			_mesa_exit(-1);					\
+-		}							\
+-		cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW);		\
+-		cmd[1].i = _p | ((_n & 0x3fff)<<16);			\
+-	}
+-
+ /**
+  * Must be sent to switch to 2d commands
+  */
+ void static INLINE end_3d(r300ContextPtr rmesa)
+ {
+-	drm_radeon_cmd_header_t *cmd = NULL;
++	BATCH_LOCALS(rmesa);
+ 
+-	cmd =
+-	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
+-	cmd[0].header.cmd_type = R300_CMD_END3D;
++	BEGIN_BATCH(1);
++	OUT_BATCH(cmdpacify());
++	END_BATCH();
+ }
+ 
+ void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count)
+ {
+-	drm_radeon_cmd_header_t *cmd = NULL;
++	BATCH_LOCALS(rmesa);
+ 
+-	cmd =
+-	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
+-	cmd[0].i = cmdcpdelay(count);
++	BEGIN_BATCH(1);
++	OUT_BATCH(cmdcpdelay(count));
++	END_BATCH();
+ }
+ 
+ void static INLINE cp_wait(r300ContextPtr rmesa, unsigned char flags)
+ {
+-	drm_radeon_cmd_header_t *cmd = NULL;
++	BATCH_LOCALS(rmesa);
+ 
+-	cmd =
+-	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
+-	cmd[0].i = cmdwait(flags);
++	BEGIN_BATCH(1);
++	OUT_BATCH(cmdwait(flags));
++	END_BATCH();
+ }
+ 
+ extern int r300EmitArrays(GLcontext * ctx);
+ 
+-#ifdef USER_BUFFERS
+-void r300UseArrays(GLcontext * ctx);
+-#endif
+-
+ extern void r300ReleaseArrays(GLcontext * ctx);
+ extern int r300PrimitiveType(r300ContextPtr rmesa, int prim);
+ extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim);
+@@ -265,4 +197,13 @@ extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead);
+ extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten);
+ extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten);
+ 
++extern void r300EmitBlit(r300ContextPtr rmesa,
++			 GLuint color_fmt,
++			 GLuint src_pitch,
++			 dri_bo *src_bo, int src_offset,
++			 GLuint dst_pitch,
++			 GLuint dst_offset,
++			 GLint srcx, GLint srcy,
++			 GLint dstx, GLint dsty, GLuint w, GLuint h);
++
+ #endif
+diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c
+index bd7f060..2b8b266 100644
+--- a/src/mesa/drivers/dri/r300/r300_ioctl.c
++++ b/src/mesa/drivers/dri/r300/r300_ioctl.c
+@@ -55,6 +55,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "radeon_reg.h"
+ #include "r300_emit.h"
+ #include "r300_fragprog.h"
++#include "r300_mem.h"
+ 
+ #include "vblank.h"
+ 
+@@ -62,64 +63,51 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define CLEARBUFFER_DEPTH	0x2
+ #define CLEARBUFFER_STENCIL	0x4
+ 
+-static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
++static void r300ClearBuffer(r300ContextPtr r300, int flags,
++			    struct radeon_renderbuffer *rrb)
+ {
++	BATCH_LOCALS(r300);
+ 	GLcontext *ctx = r300->radeon.glCtx;
+ 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
+-	GLuint cboffset, cbpitch;
+-	drm_r300_cmd_header_t *cmd2;
+-	int cmd_reserved = 0;
+-	int cmd_written = 0;
+-	drm_radeon_cmd_header_t *cmd = NULL;
++	GLuint cbpitch = 0;
+ 	r300ContextPtr rmesa = r300;
+ 
+ 	if (RADEON_DEBUG & DEBUG_IOCTL)
+-		fprintf(stderr, "%s: %s buffer (%i,%i %ix%i)\n",
+-			__FUNCTION__, buffer ? "back" : "front",
+-			dPriv->x, dPriv->y, dPriv->w, dPriv->h);
+-
+-	if (buffer) {
+-		cboffset = r300->radeon.radeonScreen->backOffset;
+-		cbpitch = r300->radeon.radeonScreen->backPitch;
+-	} else {
+-		cboffset = r300->radeon.radeonScreen->frontOffset;
+-		cbpitch = r300->radeon.radeonScreen->frontPitch;
++		fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n",
++			__FUNCTION__, rrb, dPriv->x, dPriv->y,
++			dPriv->w, dPriv->h);
++
++	if (rrb) {
++		cbpitch = rrb->pitch;
++		if (rrb->cpp == 4)
++			cbpitch |= R300_COLOR_FORMAT_ARGB8888;
++		else
++			cbpitch |= R300_COLOR_FORMAT_RGB565;
++
++		if (r300->radeon.sarea->tiling_enabled)
++			cbpitch |= R300_COLOR_TILE_ENABLE;
+ 	}
+ 
+-	cboffset += r300->radeon.radeonScreen->fbLocation;
+-
++	/* TODO in bufmgr */
+ 	cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
+ 	end_3d(rmesa);
+ 
+-	R300_STATECHANGE(r300, cb);
+-	reg_start(R300_RB3D_COLOROFFSET0, 0);
+-	e32(cboffset);
+-
+-	if (r300->radeon.radeonScreen->cpp == 4)
+-		cbpitch |= R300_COLOR_FORMAT_ARGB8888;
+-	else
+-		cbpitch |= R300_COLOR_FORMAT_RGB565;
+-
+-	if (r300->radeon.sarea->tiling_enabled)
+-		cbpitch |= R300_COLOR_TILE_ENABLE;
+-
+-	reg_start(R300_RB3D_COLORPITCH0, 0);
+-	e32(cbpitch);
+-
+-	R300_STATECHANGE(r300, cmk);
+-	reg_start(RB3D_COLOR_CHANNEL_MASK, 0);
++	BEGIN_BATCH(19);
++	OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
++	OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET);
++	OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch);
+ 
++	OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1);
+ 	if (flags & CLEARBUFFER_COLOR) {
+-		e32((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
+-		    (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
+-		    (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
+-		    (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
++		OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
++			  (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
++			  (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
++			  (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
+ 	} else {
+-		e32(0x0);
++		OUT_BATCH(0);
+ 	}
+ 
+-	R300_STATECHANGE(r300, zs);
+-	reg_start(R300_ZB_CNTL, 2);
++	OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3);
+ 
+ 	{
+ 		uint32_t t1, t2;
+@@ -146,37 +134,37 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
+ 			     R300_S_FRONT_ZFAIL_OP_SHIFT);
+ 		}
+ 
+-		e32(t1);
+-		e32(t2);
+-		e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) |
+-		    (ctx->Stencil.Clear & R300_STENCILREF_MASK));
++		OUT_BATCH(t1);
++		OUT_BATCH(t2);
++		OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) |
++			  (ctx->Stencil.Clear & R300_STENCILREF_MASK));
+ 	}
+ 
+-	cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__);
+-	cmd2[0].packet3.cmd_type = R300_CMD_PACKET3;
+-	cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR;
+-	cmd2[1].u = r300PackFloat32(dPriv->w / 2.0);
+-	cmd2[2].u = r300PackFloat32(dPriv->h / 2.0);
+-	cmd2[3].u = r300PackFloat32(ctx->Depth.Clear);
+-	cmd2[4].u = r300PackFloat32(1.0);
+-	cmd2[5].u = r300PackFloat32(ctx->Color.ClearColor[0]);
+-	cmd2[6].u = r300PackFloat32(ctx->Color.ClearColor[1]);
+-	cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]);
+-	cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]);
++	OUT_BATCH(cmdpacket3(R300_CMD_PACKET3_CLEAR));
++	OUT_BATCH_FLOAT32(dPriv->w / 2.0);
++	OUT_BATCH_FLOAT32(dPriv->h / 2.0);
++	OUT_BATCH_FLOAT32(ctx->Depth.Clear);
++	OUT_BATCH_FLOAT32(1.0);
++	OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
++	OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
++	OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
++	OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
++	END_BATCH();
+ 
+ 	r300EmitCacheFlush(rmesa);
+ 	cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
++
++	R300_STATECHANGE(r300, cb);
++	R300_STATECHANGE(r300, cmk);
++	R300_STATECHANGE(r300, zs);
+ }
+ 
+ static void r300EmitClearState(GLcontext * ctx)
+ {
+ 	r300ContextPtr r300 = R300_CONTEXT(ctx);
+-	r300ContextPtr rmesa = r300;
++	BATCH_LOCALS(r300);
+ 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
+ 	int i;
+-	int cmd_reserved = 0;
+-	int cmd_written = 0;
+-	drm_radeon_cmd_header_t *cmd = NULL;
+ 	int has_tcl = 1;
+ 	int is_r500 = 0;
+ 	GLuint vap_cntl;
+@@ -184,35 +172,37 @@ static void r300EmitClearState(GLcontext * ctx)
+ 	if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
+ 		has_tcl = 0;
+ 
+-        if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+-                is_r500 = 1;
+-
++	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
++		is_r500 = 1;
+ 
+-	/* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and
+-	 * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are
+-	 * quite complex; see the functions in r300_emit.c.
++	/* State atom dirty tracking is a little subtle here.
++	 *
++	 * On the one hand, we need to make sure base state is emitted
++	 * here if we start with an empty batch buffer, otherwise clear
++	 * works incorrectly with multiple processes. Therefore, the first
++	 * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE.
+ 	 *
+-	 * I believe it would be a good idea to extend the functions in
+-	 * r300_emit.c so that they can be used to setup the default values for
+-	 * these registers, as well as the actual values used for rendering.
++	 * On the other hand, implicit state emission clears the state atom
++	 * dirty bits, so we have to call R300_STATECHANGE later than the
++	 * first BEGIN_BATCH.
++	 *
++	 * The final trickiness is that, because we change state, we need
++	 * to ensure that any stored swtcl primitives are flushed properly
++	 * before we start changing state. See the R300_NEWPRIM in r300Clear
++	 * for this.
+ 	 */
+-	R300_STATECHANGE(r300, vir[0]);
+-	reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0);
++	BEGIN_BATCH(31);
++	OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1);
+ 	if (!has_tcl)
+-	    e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
++		OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
+ 		 ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
+ 	else
+-	    e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
++		OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
+ 		 ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
+ 
+-	/* disable fog */
+-	R300_STATECHANGE(r300, fogs);
+-	reg_start(R300_FG_FOG_BLEND, 0);
+-	e32(0x0);
+-
+-	R300_STATECHANGE(r300, vir[1]);
+-	reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0);
+-	e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
++	OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0);
++	OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0,
++	   ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
+ 	       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
+ 	       (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) |
+ 	       (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) |
+@@ -226,238 +216,246 @@ static void r300EmitClearState(GLcontext * ctx)
+ 	      << R300_SWIZZLE1_SHIFT)));
+ 
+ 	/* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */
+-	R300_STATECHANGE(r300, vic);
+-	reg_start(R300_VAP_VTX_STATE_CNTL, 1);
+-	e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
+-	e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
++	OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2);
++	OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
++	OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
+ 
+-	R300_STATECHANGE(r300, vte);
+ 	/* comes from fglrx startup of clear */
+-	reg_start(R300_SE_VTE_CNTL, 1);
+-	e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
+-	    R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
+-	    R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
+-	    R300_VPORT_Z_OFFSET_ENA);
+-	e32(0x8);
++	OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2);
++	OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
++		  R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
++		  R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
++		  R300_VPORT_Z_OFFSET_ENA);
++	OUT_BATCH(0x8);
+ 
+-	reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0);
+-	e32(0xaaaaaaaa);
++	OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa);
+ 
+-	R300_STATECHANGE(r300, vof);
+-	reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1);
+-	e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
+-	    R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
+-	e32(0x0);		/* no textures */
++	OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
++	OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
++		  R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
++	OUT_BATCH(0); /* no textures */
+ 
+-	R300_STATECHANGE(r300, txe);
+-	reg_start(R300_TX_ENABLE, 0);
+-	e32(0x0);
++	OUT_BATCH_REGVAL(R300_TX_ENABLE, 0);
+ 
+-	R300_STATECHANGE(r300, vpt);
+-	reg_start(R300_SE_VPORT_XSCALE, 5);
+-	efloat(1.0);
+-	efloat(dPriv->x);
+-	efloat(1.0);
+-	efloat(dPriv->y);
+-	efloat(1.0);
+-	efloat(0.0);
++	OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6);
++	OUT_BATCH_FLOAT32(1.0);
++	OUT_BATCH_FLOAT32(dPriv->x);
++	OUT_BATCH_FLOAT32(1.0);
++	OUT_BATCH_FLOAT32(dPriv->y);
++	OUT_BATCH_FLOAT32(1.0);
++	OUT_BATCH_FLOAT32(0.0);
+ 
+-	R300_STATECHANGE(r300, at);
+-	reg_start(R300_FG_ALPHA_FUNC, 0);
+-	e32(0x0);
++	OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0);
++
++	OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2);
++	OUT_BATCH(0x0);
++	OUT_BATCH(0x0);
++	END_BATCH();
+ 
++	R300_STATECHANGE(r300, vir[0]);
++	R300_STATECHANGE(r300, fogs);
++	R300_STATECHANGE(r300, vir[1]);
++	R300_STATECHANGE(r300, vic);
++	R300_STATECHANGE(r300, vte);
++	R300_STATECHANGE(r300, vof);
++	R300_STATECHANGE(r300, txe);
++	R300_STATECHANGE(r300, vpt);
++	R300_STATECHANGE(r300, at);
+ 	R300_STATECHANGE(r300, bld);
+-	reg_start(R300_RB3D_CBLEND, 1);
+-	e32(0x0);
+-	e32(0x0);
++	R300_STATECHANGE(r300, ps);
+ 
+ 	if (has_tcl) {
+-	    R300_STATECHANGE(r300, vap_clip_cntl);
+-	    reg_start(R300_VAP_CLIP_CNTL, 0);
+-	    e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
++		R300_STATECHANGE(r300, vap_clip_cntl);
++
++		BEGIN_BATCH_NO_AUTOSTATE(2);
++		OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
++		END_BATCH();
+         }
+ 
+-	R300_STATECHANGE(r300, ps);
+-	reg_start(R300_GA_POINT_SIZE, 0);
+-	e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
+-	    ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
++	BEGIN_BATCH_NO_AUTOSTATE(2);
++	OUT_BATCH_REGVAL(R300_GA_POINT_SIZE,
++		((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
++		((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
++	END_BATCH();
+ 
+ 	if (!is_r500) {
+ 		R300_STATECHANGE(r300, ri);
+-		reg_start(R300_RS_IP_0, 7);
+-		for (i = 0; i < 8; ++i) {
+-			e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
+-		}
+-
+ 		R300_STATECHANGE(r300, rc);
+-		/* The second constant is needed to get glxgears display anything .. */
+-		reg_start(R300_RS_COUNT, 1);
+-		e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
+-		e32(0x0);
+-
+ 		R300_STATECHANGE(r300, rr);
+-		reg_start(R300_RS_INST_0, 0);
+-		e32(R300_RS_INST_COL_CN_WRITE);
++
++		BEGIN_BATCH(14);
++		OUT_BATCH_REGSEQ(R300_RS_IP_0, 8);
++		for (i = 0; i < 8; ++i)
++			OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
++
++		OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
++		OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
++		OUT_BATCH(0x0);
++
++		OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE);
++		END_BATCH();
+ 	} else {
+ 		R300_STATECHANGE(r300, ri);
+-		reg_start(R500_RS_IP_0, 7);
++		R300_STATECHANGE(r300, rc);
++		R300_STATECHANGE(r300, rr);
++
++		BEGIN_BATCH(14);
++		OUT_BATCH_REGSEQ(R500_RS_IP_0, 8);
+ 		for (i = 0; i < 8; ++i) {
+-			e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+-			    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+-			    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
+-			    (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
++			OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
++				  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
++				  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
++				  (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
+ 		}
+ 
+-		R300_STATECHANGE(r300, rc);
+-		/* The second constant is needed to get glxgears display anything .. */
+-		reg_start(R300_RS_COUNT, 1);
+-		e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
+-		e32(0x0);
+-
+-		R300_STATECHANGE(r300, rr);
+-		reg_start(R500_RS_INST_0, 0);
+-		e32(R500_RS_INST_COL_CN_WRITE);
++		OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
++		OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
++		OUT_BATCH(0x0);
+ 
++		OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE);
++		END_BATCH();
+ 	}
+ 
+ 	if (!is_r500) {
+ 		R300_STATECHANGE(r300, fp);
+-		reg_start(R300_US_CONFIG, 2);
+-		e32(0x0);
+-		e32(0x0);
+-		e32(0x0);
+-		reg_start(R300_US_CODE_ADDR_0, 3);
+-		e32(0x0);
+-		e32(0x0);
+-		e32(0x0);
+-		e32(R300_RGBA_OUT);
+-
+ 		R300_STATECHANGE(r300, fpi[0]);
+ 		R300_STATECHANGE(r300, fpi[1]);
+ 		R300_STATECHANGE(r300, fpi[2]);
+ 		R300_STATECHANGE(r300, fpi[3]);
+ 
+-		reg_start(R300_US_ALU_RGB_INST_0, 0);
+-		e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
+-
+-		reg_start(R300_US_ALU_RGB_ADDR_0, 0);
+-		e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
+-
+-		reg_start(R300_US_ALU_ALPHA_INST_0, 0);
+-		e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
+-
+-		reg_start(R300_US_ALU_ALPHA_ADDR_0, 0);
+-		e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
++		BEGIN_BATCH(17);
++		OUT_BATCH_REGSEQ(R300_US_CONFIG, 3);
++		OUT_BATCH(0x0);
++		OUT_BATCH(0x0);
++		OUT_BATCH(0x0);
++		OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4);
++		OUT_BATCH(0x0);
++		OUT_BATCH(0x0);
++		OUT_BATCH(0x0);
++		OUT_BATCH(R300_RGBA_OUT);
++
++		OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0,
++			FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
++		OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0,
++			FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
++		OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0,
++			FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
++		OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0,
++			FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
++		END_BATCH();
+ 	} else {
+- 		R300_STATECHANGE(r300, fp);
+- 		reg_start(R500_US_CONFIG, 1);
+- 		e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
+- 		e32(0x0);
+- 		reg_start(R500_US_CODE_ADDR, 2);
+- 		e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
+- 		e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
+- 		e32(R500_US_CODE_OFFSET_ADDR(0));
+-
++		R300_STATECHANGE(r300, fp);
+ 		R300_STATECHANGE(r300, r500fp);
+-		r500fp_start_fragment(0, 6);
+-
+-		e32(R500_INST_TYPE_OUT |
+-		    R500_INST_TEX_SEM_WAIT |
+-		    R500_INST_LAST |
+-		    R500_INST_RGB_OMASK_R |
+-		    R500_INST_RGB_OMASK_G |
+-		    R500_INST_RGB_OMASK_B |
+-		    R500_INST_ALPHA_OMASK |
+-		    R500_INST_RGB_CLAMP |
+-		    R500_INST_ALPHA_CLAMP);
+-
+-		e32(R500_RGB_ADDR0(0) |
+-		    R500_RGB_ADDR1(0) |
+-		    R500_RGB_ADDR1_CONST |
+-		    R500_RGB_ADDR2(0) |
+-		    R500_RGB_ADDR2_CONST);
+-
+-		e32(R500_ALPHA_ADDR0(0) |
+-		    R500_ALPHA_ADDR1(0) |
+-		    R500_ALPHA_ADDR1_CONST |
+-		    R500_ALPHA_ADDR2(0) |
+-		    R500_ALPHA_ADDR2_CONST);
+-
+-		e32(R500_ALU_RGB_SEL_A_SRC0 |
+-		    R500_ALU_RGB_R_SWIZ_A_R |
+-		    R500_ALU_RGB_G_SWIZ_A_G |
+-		    R500_ALU_RGB_B_SWIZ_A_B |
+-		    R500_ALU_RGB_SEL_B_SRC0 |
+-		    R500_ALU_RGB_R_SWIZ_B_R |
+-		    R500_ALU_RGB_B_SWIZ_B_G |
+-		    R500_ALU_RGB_G_SWIZ_B_B);
+-
+-		e32(R500_ALPHA_OP_CMP |
+-		    R500_ALPHA_SWIZ_A_A |
+-		    R500_ALPHA_SWIZ_B_A);
+-
+-		e32(R500_ALU_RGBA_OP_CMP |
+-		    R500_ALU_RGBA_R_SWIZ_0 |
+-		    R500_ALU_RGBA_G_SWIZ_0 |
+-		    R500_ALU_RGBA_B_SWIZ_0 |
+-		    R500_ALU_RGBA_A_SWIZ_0);
++
++		BEGIN_BATCH(14);
++		OUT_BATCH_REGSEQ(R500_US_CONFIG, 2);
++		OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
++		OUT_BATCH(0x0);
++		OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3);
++		OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
++		OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
++		OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0));
++
++		OUT_BATCH(cmdr500fp(0, 1, 0, 0));
++		OUT_BATCH(R500_INST_TYPE_OUT |
++			  R500_INST_TEX_SEM_WAIT |
++			  R500_INST_LAST |
++			  R500_INST_RGB_OMASK_R |
++			  R500_INST_RGB_OMASK_G |
++			  R500_INST_RGB_OMASK_B |
++			  R500_INST_ALPHA_OMASK |
++			  R500_INST_RGB_CLAMP |
++			  R500_INST_ALPHA_CLAMP);
++		OUT_BATCH(R500_RGB_ADDR0(0) |
++			  R500_RGB_ADDR1(0) |
++			  R500_RGB_ADDR1_CONST |
++			  R500_RGB_ADDR2(0) |
++			  R500_RGB_ADDR2_CONST);
++		OUT_BATCH(R500_ALPHA_ADDR0(0) |
++			  R500_ALPHA_ADDR1(0) |
++			  R500_ALPHA_ADDR1_CONST |
++			  R500_ALPHA_ADDR2(0) |
++			  R500_ALPHA_ADDR2_CONST);
++		OUT_BATCH(R500_ALU_RGB_SEL_A_SRC0 |
++			  R500_ALU_RGB_R_SWIZ_A_R |
++			  R500_ALU_RGB_G_SWIZ_A_G |
++			  R500_ALU_RGB_B_SWIZ_A_B |
++			  R500_ALU_RGB_SEL_B_SRC0 |
++			  R500_ALU_RGB_R_SWIZ_B_R |
++			  R500_ALU_RGB_B_SWIZ_B_G |
++			  R500_ALU_RGB_G_SWIZ_B_B);
++		OUT_BATCH(R500_ALPHA_OP_CMP |
++			  R500_ALPHA_SWIZ_A_A |
++			  R500_ALPHA_SWIZ_B_A);
++		OUT_BATCH(R500_ALU_RGBA_OP_CMP |
++			  R500_ALU_RGBA_R_SWIZ_0 |
++			  R500_ALU_RGBA_G_SWIZ_0 |
++			  R500_ALU_RGBA_B_SWIZ_0 |
++			  R500_ALU_RGBA_A_SWIZ_0);
++		END_BATCH();
+ 	}
+ 
+-	reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+-	e32(0x00000000);
++	BEGIN_BATCH(2);
++	OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
++	END_BATCH();
++
+ 	if (has_tcl) {
+-	    vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
++		vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+ 			(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+ 			(12 << R300_VF_MAX_VTX_NUM_SHIFT));
+-	    if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+-		vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
+-	} else
+-	    vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
++		if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
++			vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
++	} else {
++		vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+ 			(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+ 			(5 << R300_VF_MAX_VTX_NUM_SHIFT));
++	}
+ 
+ 	if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
+-	    vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
++		vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
+ 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
+ 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) ||
+ 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
+-	    vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
++		vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
+ 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
+ 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420))
+-	    vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
++		vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
+ 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
+ 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580))
+-	    vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
++		vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
+ 	else
+-	    vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
++		vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
++
++	R300_STATECHANGE(r300, vap_cntl);
+ 
+-	R300_STATECHANGE(rmesa, vap_cntl);
+-	reg_start(R300_VAP_CNTL, 0);
+-	e32(vap_cntl);
++	BEGIN_BATCH(2);
++	OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl);
++	END_BATCH();
+ 
+ 	if (has_tcl) {
+ 		R300_STATECHANGE(r300, pvs);
+-		reg_start(R300_VAP_PVS_CODE_CNTL_0, 2);
+-
+-		e32((0 << R300_PVS_FIRST_INST_SHIFT) |
+-		    (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+-		    (1 << R300_PVS_LAST_INST_SHIFT));
+-		e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
+-		    (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
+-		e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
+-
+ 		R300_STATECHANGE(r300, vpi);
+-		vsf_start_fragment(0x0, 8);
+-
+-		e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT));
+-		e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+-		e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+-		e32(0x0);
+ 
+-		e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT));
+-		e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+-		e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+-		e32(0x0);
++		BEGIN_BATCH(13);
++		OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
++		OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) |
++			  (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
++			  (1 << R300_PVS_LAST_INST_SHIFT));
++		OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
++			  (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
++		OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
++
++		OUT_BATCH(cmdvpu(0, 2));
++		OUT_BATCH(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT));
++		OUT_BATCH(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
++		OUT_BATCH(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
++		OUT_BATCH(0x0);
++
++		OUT_BATCH(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT));
++		OUT_BATCH(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
++		OUT_BATCH(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
++		OUT_BATCH(0x0);
++		END_BATCH();
+ 	}
+ }
+ 
+@@ -467,7 +465,10 @@ static void r300EmitClearState(GLcontext * ctx)
+ static void r300Clear(GLcontext * ctx, GLbitfield mask)
+ {
+ 	r300ContextPtr r300 = R300_CONTEXT(ctx);
++	BATCH_LOCALS(r300);
+ 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
++	GLframebuffer *fb = dPriv->driverPrivate;
++	struct radeon_renderbuffer *rrb;
+ 	int flags = 0;
+ 	int bits = 0;
+ 	int swapped;
+@@ -482,6 +483,12 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
+ 			return;
+ 	}
+ 
++	/* Flush swtcl vertices if necessary, because we will change hardware
++	 * state during clear. See also the state-related comment in
++	 * r300EmitClearState.
++	 */
++	R300_NEWPRIM(r300);
++
+ 	if (mask & BUFFER_BIT_FRONT_LEFT) {
+ 		flags |= BUFFER_BIT_FRONT_LEFT;
+ 		mask &= ~BUFFER_BIT_FRONT_LEFT;
+@@ -509,26 +516,27 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
+ 		_swrast_Clear(ctx, mask);
+ 	}
+ 
+-	swapped = r300->radeon.sarea->pfCurrentPage == 1;
+-
+ 	/* Make sure it fits there. */
+ 	r300EnsureCmdBufSpace(r300, 421 * 3, __FUNCTION__);
+ 	if (flags || bits)
+ 		r300EmitClearState(ctx);
+ 
+ 	if (flags & BUFFER_BIT_FRONT_LEFT) {
+-		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped);
++		rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb);
+ 		bits = 0;
+ 	}
+ 
+ 	if (flags & BUFFER_BIT_BACK_LEFT) {
+-		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped ^ 1);
++		rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb);
+ 		bits = 0;
+ 	}
+ 
+ 	if (bits)
+-		r300ClearBuffer(r300, bits, 0);
++		r300ClearBuffer(r300, bits, NULL);
+ 
++	COMMIT_BATCH();
+ }
+ 
+ void r300Flush(GLcontext * ctx)
+@@ -541,16 +549,12 @@ void r300Flush(GLcontext * ctx)
+ 	if (rmesa->dma.flush)
+ 		rmesa->dma.flush( rmesa );
+ 
+-	if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit)
++	if (rmesa->cmdbuf.committed > rmesa->cmdbuf.reemit)
+ 		r300FlushCmdBuf(rmesa, __FUNCTION__);
+ }
+ 
+-#ifdef USER_BUFFERS
+-#include "r300_mem.h"
+-
+ void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
+ {
+-	struct r300_dma_buffer *dmabuf;
+ 	size = MAX2(size, RADEON_BUFFER_SIZE * 16);
+ 
+ 	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
+@@ -560,71 +564,24 @@ void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
+ 		rmesa->dma.flush(rmesa);
+ 	}
+ 
+-	if (rmesa->dma.current.buf) {
+-#ifdef USER_BUFFERS
+-		r300_mem_use(rmesa, rmesa->dma.current.buf->id);
+-#endif
+-		r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
++	if (rmesa->dma.current) {
++		dri_bo_unreference(rmesa->dma.current);
++		rmesa->dma.current = 0;
+ 	}
+ 	if (rmesa->dma.nr_released_bufs > 4)
+ 		r300FlushCmdBuf(rmesa, __FUNCTION__);
+ 
+-	dmabuf = CALLOC_STRUCT(r300_dma_buffer);
+-	dmabuf->buf = (void *)1;	/* hack */
+-	dmabuf->refcount = 1;
+-
+-	dmabuf->id = r300_mem_alloc(rmesa, 4, size);
+-	if (dmabuf->id == 0) {
+-		LOCK_HARDWARE(&rmesa->radeon);	/* no need to validate */
+-
+-		r300FlushCmdBufLocked(rmesa, __FUNCTION__);
+-		radeonWaitForIdleLocked(&rmesa->radeon);
+-
+-		dmabuf->id = r300_mem_alloc(rmesa, 4, size);
+-
+-		UNLOCK_HARDWARE(&rmesa->radeon);
+-
+-		if (dmabuf->id == 0) {
+-			fprintf(stderr,
+-				"Error: Could not get dma buffer... exiting\n");
+-			_mesa_exit(-1);
+-		}
+-	}
+-
+-	rmesa->dma.current.buf = dmabuf;
+-	rmesa->dma.current.address = r300_mem_ptr(rmesa, dmabuf->id);
+-	rmesa->dma.current.end = size;
+-	rmesa->dma.current.start = 0;
+-	rmesa->dma.current.ptr = 0;
+-}
+-
+-void r300ReleaseDmaRegion(r300ContextPtr rmesa,
+-			  struct r300_dma_region *region, const char *caller)
+-{
+-	if (RADEON_DEBUG & DEBUG_IOCTL)
+-		fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+-
+-	if (!region->buf)
+-		return;
+-
+-	if (rmesa->dma.flush)
+-		rmesa->dma.flush(rmesa);
+-
+-	if (--region->buf->refcount == 0) {
+-		r300_mem_free(rmesa, region->buf->id);
+-		FREE(region->buf);
+-		rmesa->dma.nr_released_bufs++;
+-	}
+-
+-	region->buf = 0;
+-	region->start = 0;
++	rmesa->dma.current = radeon_bufmgr_classic_bo_alloc(&rmesa->radeon.bufmgr->base, "DMA regions",
++		size, 4, DRM_BO_MEM_DMA);
++	rmesa->dma.current_used = 0;
++	rmesa->dma.current_vertexptr = 0;
+ }
+ 
+ /* Allocates a region from rmesa->dma.current.  If there isn't enough
+  * space in current, grab a new buffer (and discard what was left of current)
+  */
+ void r300AllocDmaRegion(r300ContextPtr rmesa,
+-			struct r300_dma_region *region,
++			dri_bo **pbo, int *poffset,
+ 			int bytes, int alignment)
+ {
+ 	if (RADEON_DEBUG & DEBUG_IOCTL)
+@@ -633,207 +590,23 @@ void r300AllocDmaRegion(r300ContextPtr rmesa,
+ 	if (rmesa->dma.flush)
+ 		rmesa->dma.flush(rmesa);
+ 
+-	if (region->buf)
+-		r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
++	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
+ 
+ 	alignment--;
+-	rmesa->dma.current.start = rmesa->dma.current.ptr =
+-	    (rmesa->dma.current.ptr + alignment) & ~alignment;
+-
+-	if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
+-		r300RefillCurrentDmaRegion(rmesa, (bytes + 0x7) & ~0x7);
+-
+-	region->start = rmesa->dma.current.start;
+-	region->ptr = rmesa->dma.current.start;
+-	region->end = rmesa->dma.current.start + bytes;
+-	region->address = rmesa->dma.current.address;
+-	region->buf = rmesa->dma.current.buf;
+-	region->buf->refcount++;
++	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
+ 
+-	rmesa->dma.current.ptr += bytes;	/* bug - if alignment > 7 */
+-	rmesa->dma.current.start =
+-	    rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
+-
+-	assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
+-}
++	if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size)
++		r300RefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15);
+ 
+-#else
+-static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa)
+-{
+-	struct r300_dma_buffer *dmabuf;
+-	int fd = rmesa->radeon.dri.fd;
+-	int index = 0;
+-	int size = 0;
+-	drmDMAReq dma;
+-	int ret;
++	*poffset = rmesa->dma.current_used;
++	*pbo = rmesa->dma.current;
++	dri_bo_reference(*pbo);
+ 
+-	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
+-		fprintf(stderr, "%s\n", __FUNCTION__);
++	/* Always align to at least 16 bytes */
++	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
++	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
+ 
+-	if (rmesa->dma.flush) {
+-		rmesa->dma.flush(rmesa);
+-	}
+-
+-	if (rmesa->dma.current.buf)
+-		r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
+-
+-	if (rmesa->dma.nr_released_bufs > 4)
+-		r300FlushCmdBuf(rmesa, __FUNCTION__);
+-
+-	dma.context = rmesa->radeon.dri.hwContext;
+-	dma.send_count = 0;
+-	dma.send_list = NULL;
+-	dma.send_sizes = NULL;
+-	dma.flags = 0;
+-	dma.request_count = 1;
+-	dma.request_size = RADEON_BUFFER_SIZE;
+-	dma.request_list = &index;
+-	dma.request_sizes = &size;
+-	dma.granted_count = 0;
+-
+-	LOCK_HARDWARE(&rmesa->radeon);	/* no need to validate */
+-
+-	ret = drmDMA(fd, &dma);
+-
+-	if (ret != 0) {
+-		/* Try to release some buffers and wait until we can't get any more */
+-		if (rmesa->dma.nr_released_bufs) {
+-			r300FlushCmdBufLocked(rmesa, __FUNCTION__);
+-		}
+-
+-		if (RADEON_DEBUG & DEBUG_DMA)
+-			fprintf(stderr, "Waiting for buffers\n");
+-
+-		radeonWaitForIdleLocked(&rmesa->radeon);
+-		ret = drmDMA(fd, &dma);
+-
+-		if (ret != 0) {
+-			UNLOCK_HARDWARE(&rmesa->radeon);
+-			fprintf(stderr,
+-				"Error: Could not get dma buffer... exiting\n");
+-			_mesa_exit(-1);
+-		}
+-	}
+-
+-	UNLOCK_HARDWARE(&rmesa->radeon);
+-
+-	if (RADEON_DEBUG & DEBUG_DMA)
+-		fprintf(stderr, "Allocated buffer %d\n", index);
+-
+-	dmabuf = CALLOC_STRUCT(r300_dma_buffer);
+-	dmabuf->buf = &rmesa->radeon.radeonScreen->buffers->list[index];
+-	dmabuf->refcount = 1;
+-
+-	rmesa->dma.current.buf = dmabuf;
+-	rmesa->dma.current.address = dmabuf->buf->address;
+-	rmesa->dma.current.end = dmabuf->buf->total;
+-	rmesa->dma.current.start = 0;
+-	rmesa->dma.current.ptr = 0;
+-}
+-
+-void r300ReleaseDmaRegion(r300ContextPtr rmesa,
+-			  struct r300_dma_region *region, const char *caller)
+-{
+-	if (RADEON_DEBUG & DEBUG_IOCTL)
+-		fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+-
+-	if (!region->buf)
+-		return;
+-
+-	if (rmesa->dma.flush)
+-		rmesa->dma.flush(rmesa);
+-
+-	if (--region->buf->refcount == 0) {
+-		drm_radeon_cmd_header_t *cmd;
+-
+-		if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
+-			fprintf(stderr, "%s -- DISCARD BUF %d\n",
+-				__FUNCTION__, region->buf->buf->idx);
+-		cmd =
+-		    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa,
+-								sizeof
+-								(*cmd) / 4,
+-								__FUNCTION__);
+-		cmd->dma.cmd_type = R300_CMD_DMA_DISCARD;
+-		cmd->dma.buf_idx = region->buf->buf->idx;
+-
+-		FREE(region->buf);
+-		rmesa->dma.nr_released_bufs++;
+-	}
+-
+-	region->buf = 0;
+-	region->start = 0;
+-}
+-
+-/* Allocates a region from rmesa->dma.current.  If there isn't enough
+- * space in current, grab a new buffer (and discard what was left of current)
+- */
+-void r300AllocDmaRegion(r300ContextPtr rmesa,
+-			struct r300_dma_region *region,
+-			int bytes, int alignment)
+-{
+-	if (RADEON_DEBUG & DEBUG_IOCTL)
+-		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+-
+-	if (rmesa->dma.flush)
+-		rmesa->dma.flush(rmesa);
+-
+-	if (region->buf)
+-		r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
+-
+-	alignment--;
+-	rmesa->dma.current.start = rmesa->dma.current.ptr =
+-	    (rmesa->dma.current.ptr + alignment) & ~alignment;
+-
+-	if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
+-		r300RefillCurrentDmaRegion(rmesa);
+-
+-	region->start = rmesa->dma.current.start;
+-	region->ptr = rmesa->dma.current.start;
+-	region->end = rmesa->dma.current.start + bytes;
+-	region->address = rmesa->dma.current.address;
+-	region->buf = rmesa->dma.current.buf;
+-	region->buf->refcount++;
+-
+-	rmesa->dma.current.ptr += bytes;	/* bug - if alignment > 7 */
+-	rmesa->dma.current.start =
+-	    rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
+-
+-	assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
+-}
+-
+-#endif
+-
+-GLboolean r300IsGartMemory(r300ContextPtr rmesa, const GLvoid * pointer,
+-			   GLint size)
+-{
+-	int offset =
+-	    (char *)pointer -
+-	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+-	int valid = (size >= 0 && offset >= 0
+-		     && offset + size <
+-		     rmesa->radeon.radeonScreen->gartTextures.size);
+-
+-	if (RADEON_DEBUG & DEBUG_IOCTL)
+-		fprintf(stderr, "r300IsGartMemory( %p ) : %d\n", pointer,
+-			valid);
+-
+-	return valid;
+-}
+-
+-GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, const GLvoid * pointer)
+-{
+-	int offset =
+-	    (char *)pointer -
+-	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+-
+-	//fprintf(stderr, "offset=%08x\n", offset);
+-
+-	if (offset < 0
+-	    || offset > rmesa->radeon.radeonScreen->gartTextures.size)
+-		return ~0;
+-	else
+-		return rmesa->radeon.radeonScreen->gart_texture_offset + offset;
++	assert(rmesa->dma.current_used <= rmesa->dma.current->size);
+ }
+ 
+ void r300InitIoctlFuncs(struct dd_function_table *functions)
+diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.h b/src/mesa/drivers/dri/r300/r300_ioctl.h
+index e1143fb..c743478 100644
+--- a/src/mesa/drivers/dri/r300/r300_ioctl.h
++++ b/src/mesa/drivers/dri/r300/r300_ioctl.h
+@@ -39,20 +39,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r300_context.h"
+ #include "radeon_drm.h"
+ 
+-extern GLboolean r300IsGartMemory(r300ContextPtr rmesa,
+-				  const GLvoid * pointer, GLint size);
+-
+-extern GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa,
+-					const GLvoid * pointer);
+-
+ extern void r300Flush(GLcontext * ctx);
+ 
+-extern void r300ReleaseDmaRegion(r300ContextPtr rmesa,
+-				 struct r300_dma_region *region,
+-				 const char *caller);
+ extern void r300AllocDmaRegion(r300ContextPtr rmesa,
+-			       struct r300_dma_region *region, int bytes,
+-			       int alignment);
++			       dri_bo **pbo, int *poffset,
++			       int bytes, int alignment);
+ 
+ extern void r300InitIoctlFuncs(struct dd_function_table *functions);
+ 
+diff --git a/src/mesa/drivers/dri/r300/r300_mem.c b/src/mesa/drivers/dri/r300/r300_mem.c
+index f8f9d4f..1097388 100644
+--- a/src/mesa/drivers/dri/r300/r300_mem.c
++++ b/src/mesa/drivers/dri/r300/r300_mem.c
+@@ -27,359 +27,869 @@
+ 
+ /**
+  * \file
++ * Simulate a real memory manager for R300 in the old-style scheme.
++ *
++ * NOTE: Right now, this is DMA-only and really only a skeleton of a true bufmgr.
+  *
+  * \author Aapo Tahkola <aet@rasterburn.org>
+  */
+ 
++#include "r300_mem.h"
++
++#include <errno.h>
+ #include <unistd.h>
+ 
+-#include "r300_context.h"
+-#include "r300_cmdbuf.h"
+-#include "r300_ioctl.h"
+-#include "r300_mem.h"
++#include "simple_list.h"
++
+ #include "radeon_ioctl.h"
++#include "r300_cmdbuf.h"
+ 
+-#ifdef USER_BUFFERS
++typedef struct _radeon_bufmgr_classic radeon_bufmgr_classic;
++typedef struct _radeon_bo_classic radeon_bo_classic;
++typedef struct _radeon_bo_functions radeon_bo_functions;
++typedef struct _radeon_reloc radeon_reloc;
++typedef struct _radeon_bo_vram radeon_bo_vram;
++
++struct _radeon_bufmgr_classic {
++	radeon_bufmgr base;
++	r300ContextPtr rmesa;
++
++	radeon_bo_classic *buffers; /** Unsorted linked list of all buffer objects */
++
++	radeon_bo_classic *pending; /** Age-sorted linked list of pending buffer objects */
++	radeon_bo_classic **pending_tail;
++
++	/* Texture heap bookkeeping */
++	driTexHeap *texture_heap;
++	GLuint texture_offset;
++	driTextureObject texture_swapped;
++};
++
++struct _radeon_reloc {
++	uint64_t flags;
++	GLuint offset; /**< Offset (in bytes) into command buffer to relocated dword */
++	radeon_bo_classic *target;
++	GLuint delta;
++};
++
++struct _radeon_bo_functions {
++	/**
++	 * Free a buffer object. Caller has verified that the object is not
++	 * referenced or pending.
++	 */
++	void (*free)(radeon_bo_classic*);
++
++	/**
++	 * Validate the given buffer. Must set the validated flag to 1.
++	 *
++	 * May be null for buffer objects that are always valid.
++	 * Always called with lock held.
++	 */
++	void (*validate)(radeon_bo_classic*);
++
++	/**
++	 * Map the buffer for CPU access.
++	 * Only called when the buffer isn't already mapped.
++	 *
++	 * May be null.
++	 */
++	void (*map)(radeon_bo_classic*, GLboolean write);
++
++	/**
++	 * Unmap the buffer.
++	 * Only called on final unmap.
++	 *
++	 * May be null.
++	 */
++	void (*unmap)(radeon_bo_classic*);
++
++	/**
++	 * Indicate that the buffer object is now used by the hardware.
++	 *
++	 * May be null.
++	 */
++	void (*bind)(radeon_bo_classic*);
++
++	/**
++	 * Indicate that the buffer object is no longer used by the hardware.
++	 *
++	 * May be null.
++	 */
++	void (*unbind)(radeon_bo_classic*);
++};
+ 
+-static void resize_u_list(r300ContextPtr rmesa)
+-{
+-	void *temp;
+-	int nsize;
++/**
++ * A buffer object. There are three types of buffer objects:
++ *  1. cmdbuf: Ordinary malloc()ed memory, used for command buffers
++ *  2. dma: GART memory allocated via the DRM_RADEON_ALLOC ioctl.
++ *  3. vram: Objects with malloc()ed backing store that will be uploaded
++ *     into VRAM on demand; used for textures.
++ * There is a @ref functions table for operations that depend on the
++ * buffer object type.
++ *
++ * Fencing is handled the same way all buffer objects. During command buffer
++ * submission, the pending flag and corresponding variables are set accordingly.
++ */
++struct _radeon_bo_classic {
++	dri_bo base;
+ 
+-	temp = rmesa->rmm->u_list;
+-	nsize = rmesa->rmm->u_size * 2;
++	const radeon_bo_functions *functions;
+ 
+-	rmesa->rmm->u_list = _mesa_malloc(nsize * sizeof(*rmesa->rmm->u_list));
+-	_mesa_memset(rmesa->rmm->u_list, 0,
+-		     nsize * sizeof(*rmesa->rmm->u_list));
++	radeon_bo_classic *next; /** Unsorted linked list of all buffer objects */
++	radeon_bo_classic **pprev;
+ 
+-	if (temp) {
+-		r300FlushCmdBuf(rmesa, __FUNCTION__);
++	/**
++	 * Number of software references to this buffer.
++	 * A buffer is freed automatically as soon as its reference count reaches 0
++	 * *and* it is no longer pending.
++	 */
++	unsigned int refcount;
++	unsigned int mapcount; /** mmap count; mutually exclusive to being pending */
+ 
+-		_mesa_memcpy(rmesa->rmm->u_list, temp,
+-			     rmesa->rmm->u_size * sizeof(*rmesa->rmm->u_list));
+-		_mesa_free(temp);
+-	}
++	unsigned int validated:1; /** whether the buffer is validated for hardware use right now */
++	unsigned int used:1; /* only for communication between process_relocs and post_submit */
+ 
+-	rmesa->rmm->u_size = nsize;
++	unsigned int pending:1;
++	radeon_bo_classic *pending_next; /** Age-sorted linked list of pending buffer objects */
++	radeon_bo_classic **pending_pprev;
++
++	/* The following two variables are intricately linked to the DRM interface,
++	 * and must be in this physical memory order, or else chaos ensues.
++	 * See the DRM's implementation of R300_CMD_SCRATCH for details.
++	 */
++	uint32_t pending_age; /** Buffer object pending until this age is reached, written by the DRM */
++	uint32_t pending_count; /** Number of pending R300_CMD_SCRATCH references to this object */
++
++	radeon_reloc *relocs; /** Array of relocations in this buffer */
++	GLuint relocs_used; /** # of relocations in relocation array */
++	GLuint relocs_size; /** # of reloc records reserved in relocation array */
++};
++
++typedef struct _radeon_vram_wrapper radeon_vram_wrapper;
++
++/** Wrapper around heap object */
++struct _radeon_vram_wrapper {
++	driTextureObject base;
++	radeon_bo_vram *bo;
++};
++
++struct _radeon_bo_vram {
++	radeon_bo_classic base;
++
++	unsigned int backing_store_dirty:1; /** Backing store has changed, block must be reuploaded */
++
++	radeon_vram_wrapper *vram; /** Block in VRAM (if any) */
++};
++
++static radeon_bufmgr_classic* get_bufmgr_classic(dri_bufmgr *bufmgr_ctx)
++{
++	return (radeon_bufmgr_classic*)bufmgr_ctx;
+ }
+ 
+-void r300_mem_init(r300ContextPtr rmesa)
++static radeon_bo_classic* get_bo_classic(dri_bo *bo_base)
+ {
+-	rmesa->rmm = malloc(sizeof(struct r300_memory_manager));
+-	memset(rmesa->rmm, 0, sizeof(struct r300_memory_manager));
++	return (radeon_bo_classic*)bo_base;
++}
+ 
+-	rmesa->rmm->u_size = 128;
+-	resize_u_list(rmesa);
++static radeon_bo_vram* get_bo_vram(radeon_bo_classic *bo_base)
++{
++	return (radeon_bo_vram*)bo_base;
+ }
+ 
+-void r300_mem_destroy(r300ContextPtr rmesa)
++/**
++ * Really free a given buffer object.
++ */
++static void bo_free(radeon_bo_classic *bo)
+ {
+-	_mesa_free(rmesa->rmm->u_list);
+-	rmesa->rmm->u_list = NULL;
++	assert(!bo->refcount);
++	assert(!bo->pending);
++	assert(!bo->mapcount);
++
++	if (bo->relocs) {
++		int i;
++		for(i = 0; i < bo->relocs_used; ++i)
++			dri_bo_unreference(&bo->relocs[i].target->base);
++		free(bo->relocs);
++		bo->relocs = 0;
++	}
++
++	*bo->pprev = bo->next;
++	if (bo->next)
++		bo->next->pprev = bo->pprev;
+ 
+-	_mesa_free(rmesa->rmm);
+-	rmesa->rmm = NULL;
++	bo->functions->free(bo);
+ }
+ 
+-void *r300_mem_ptr(r300ContextPtr rmesa, int id)
++
++/**
++ * Keep track of which buffer objects are still pending, i.e. waiting for
++ * some hardware operation to complete.
++ */
++static void track_pending_buffers(radeon_bufmgr_classic *bufmgr)
+ {
+-	assert(id <= rmesa->rmm->u_last);
+-	return rmesa->rmm->u_list[id].ptr;
++	uint32_t currentage = radeonGetAge((radeonContextPtr)bufmgr->rmesa);
++
++	while(bufmgr->pending) {
++		radeon_bo_classic *bo = bufmgr->pending;
++
++		assert(bo->pending);
++
++		if (bo->pending_count ||
++		    bo->pending_age > currentage) // TODO: Age counter wraparound!
++			break;
++
++		bo->pending = 0;
++		bufmgr->pending = bo->pending_next;
++		if (bufmgr->pending)
++			bufmgr->pending->pending_pprev = &bufmgr->pending;
++		else
++			bufmgr->pending_tail = &bufmgr->pending;
++
++		if (bo->functions->unbind)
++			(*bo->functions->unbind)(bo);
++		if (!bo->refcount)
++			bo_free(bo);
++	}
+ }
+ 
+-int r300_mem_find(r300ContextPtr rmesa, void *ptr)
++/**
++ * Initialize common buffer object data.
++ */
++static void init_buffer(radeon_bufmgr_classic *bufmgr, radeon_bo_classic *bo, unsigned long size)
+ {
+-	int i;
++	bo->base.bufmgr = &bufmgr->base.base;
++	bo->base.size = size;
++	bo->refcount = 1;
++
++	bo->pprev = &bufmgr->buffers;
++	bo->next = bufmgr->buffers;
++	if (bo->next)
++		bo->next->pprev = &bo->next;
++	bufmgr->buffers = bo;
++}
+ 
+-	for (i = 1; i < rmesa->rmm->u_size + 1; i++)
+-		if (rmesa->rmm->u_list[i].ptr &&
+-		    ptr >= rmesa->rmm->u_list[i].ptr &&
+-		    ptr <
+-		    rmesa->rmm->u_list[i].ptr + rmesa->rmm->u_list[i].size)
+-			break;
+ 
+-	if (i < rmesa->rmm->u_size + 1)
+-		return i;
++/**
++ * Free a DMA-based buffer.
++ */
++static void dma_free(radeon_bo_classic *bo)
++{
++	radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bo->base.bufmgr);
++	drm_radeon_mem_free_t memfree;
++	int ret;
++
++	memfree.region = RADEON_MEM_REGION_GART;
++	memfree.region_offset = bo->base.offset;
++	memfree.region_offset -= bufmgr->rmesa->radeon.radeonScreen->gart_texture_offset;
+ 
+-	fprintf(stderr, "%p failed\n", ptr);
+-	return 0;
++	ret = drmCommandWrite(bufmgr->rmesa->radeon.radeonScreen->driScreen->fd,
++		DRM_RADEON_FREE, &memfree, sizeof(memfree));
++	if (ret) {
++		fprintf(stderr, "Failed to free bo[%p] at %08x\n", bo, memfree.region_offset);
++		fprintf(stderr, "ret = %s\n", strerror(-ret));
++		exit(1);
++	}
++
++	free(bo);
+ }
+ 
+-//#define MM_DEBUG
+-int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size)
++static const radeon_bo_functions dma_bo_functions = {
++	.free = &dma_free
++};
++
++/**
++ * Call the DRM to allocate GART memory for the given (incomplete)
++ * buffer object.
++ */
++static int try_dma_alloc(radeon_bufmgr_classic *bufmgr, radeon_bo_classic *bo,
++		unsigned long size, unsigned int alignment)
+ {
+ 	drm_radeon_mem_alloc_t alloc;
+-	int offset = 0, ret;
+-	int i, free = -1;
+-	int done_age;
+-	drm_radeon_mem_free_t memfree;
+-	int tries = 0;
+-	static int bytes_wasted = 0, allocated = 0;
++	int baseoffset;
++	int ret;
++
++	alloc.region = RADEON_MEM_REGION_GART;
++	alloc.alignment = alignment;
++	alloc.size = size;
++	alloc.region_offset = &baseoffset;
++
++	ret = drmCommandWriteRead(bufmgr->rmesa->radeon.dri.fd,
++			DRM_RADEON_ALLOC, &alloc, sizeof(alloc));
++	if (ret) {
++		if (RADEON_DEBUG & DEBUG_MEMORY)
++			fprintf(stderr, "DRM_RADEON_ALLOC failed: %d\n", ret);
++		return 0;
++	}
+ 
+-	if (size < 4096)
+-		bytes_wasted += 4096 - size;
++	bo->base.virtual = (char*)bufmgr->rmesa->radeon.radeonScreen->gartTextures.map + baseoffset;
++	bo->base.offset = bufmgr->rmesa->radeon.radeonScreen->gart_texture_offset + baseoffset;
+ 
+-	allocated += size;
++	return 1;
++}
+ 
+-#if 0
+-	static int t = 0;
+-	if (t != time(NULL)) {
+-		t = time(NULL);
+-		fprintf(stderr, "slots used %d, wasted %d kb, allocated %d\n",
+-			rmesa->rmm->u_last, bytes_wasted / 1024,
+-			allocated / 1024);
++/**
++ * Allocate a DMA buffer.
++ */
++static dri_bo *dma_alloc(radeon_bufmgr_classic *bufmgr, const char *name,
++		unsigned long size, unsigned int alignment)
++{
++	radeon_bo_classic* bo = (radeon_bo_classic*)calloc(1, sizeof(radeon_bo_classic));
++
++	bo->functions = &dma_bo_functions;
++
++	track_pending_buffers(bufmgr);
++	if (!try_dma_alloc(bufmgr, bo, size, alignment)) {
++		if (RADEON_DEBUG & DEBUG_MEMORY)
++			fprintf(stderr, "Failed to allocate %ld bytes, finishing command buffer...\n", size);
++		radeonFinish(bufmgr->rmesa->radeon.glCtx);
++		track_pending_buffers(bufmgr);
++		if (!try_dma_alloc(bufmgr, bo, size, alignment)) {
++			WARN_ONCE(
++				"Ran out of GART memory (for %ld)!\n"
++				"Please consider adjusting GARTSize option.\n",
++				size);
++			free(bo);
++			return 0;
++		}
+ 	}
+-#endif
+ 
+-	memfree.region = RADEON_MEM_REGION_GART;
++	init_buffer(bufmgr, bo, size);
++	bo->validated = 1; /* DMA buffer offsets are always valid */
+ 
+-      again:
++	return &bo->base;
++}
++
++/**
++ * Free a command buffer
++ */
++static void cmdbuf_free(radeon_bo_classic *bo)
++{
++	free(bo->base.virtual);
++	free(bo);
++}
+ 
+-	done_age = radeonGetAge((radeonContextPtr) rmesa);
++static const radeon_bo_functions cmdbuf_bo_functions = {
++	.free = cmdbuf_free
++};
+ 
+-	if (rmesa->rmm->u_last + 1 >= rmesa->rmm->u_size)
+-		resize_u_list(rmesa);
++/**
++ * Allocate a command buffer.
++ *
++ * Command buffers are really just malloc'ed buffers. They are managed by
++ * the bufmgr to enable relocations.
++ */
++static dri_bo *cmdbuf_alloc(radeon_bufmgr_classic *bufmgr, const char *name,
++		unsigned long size)
++{
++	radeon_bo_classic* bo = (radeon_bo_classic*)calloc(1, sizeof(radeon_bo_classic));
+ 
+-	for (i = rmesa->rmm->u_last + 1; i > 0; i--) {
+-		if (rmesa->rmm->u_list[i].ptr == NULL) {
+-			free = i;
+-			continue;
+-		}
++	bo->functions = &cmdbuf_bo_functions;
++	bo->base.virtual = malloc(size);
+ 
+-		if (rmesa->rmm->u_list[i].h_pending == 0 &&
+-		    rmesa->rmm->u_list[i].pending
+-		    && rmesa->rmm->u_list[i].age <= done_age) {
+-			memfree.region_offset =
+-			    (char *)rmesa->rmm->u_list[i].ptr -
+-			    (char *)rmesa->radeon.radeonScreen->gartTextures.
+-			    map;
++	init_buffer(bufmgr, bo, size);
++	return &bo->base;
++}
+ 
+-			ret =
+-			    drmCommandWrite(rmesa->radeon.radeonScreen->
+-					    driScreen->fd, DRM_RADEON_FREE,
+-					    &memfree, sizeof(memfree));
++/**
++ * Free a VRAM-based buffer object.
++ */
++static void vram_free(radeon_bo_classic *bo_base)
++{
++	radeon_bo_vram *bo = get_bo_vram(bo_base);
+ 
+-			if (ret) {
+-				fprintf(stderr, "Failed to free at %p\n",
+-					rmesa->rmm->u_list[i].ptr);
+-				fprintf(stderr, "ret = %s\n", strerror(-ret));
+-				exit(1);
+-			} else {
+-#ifdef MM_DEBUG
+-				fprintf(stderr, "really freed %d at age %x\n",
+-					i,
+-					radeonGetAge((radeonContextPtr) rmesa));
+-#endif
+-				if (i == rmesa->rmm->u_last)
+-					rmesa->rmm->u_last--;
+-
+-				if (rmesa->rmm->u_list[i].size < 4096)
+-					bytes_wasted -=
+-					    4096 - rmesa->rmm->u_list[i].size;
+-
+-				allocated -= rmesa->rmm->u_list[i].size;
+-				rmesa->rmm->u_list[i].pending = 0;
+-				rmesa->rmm->u_list[i].ptr = NULL;
+-				free = i;
+-			}
+-		}
++	if (bo->vram) {
++		driDestroyTextureObject(&bo->vram->base);
++		bo->vram = 0;
+ 	}
+-	rmesa->rmm->u_head = i;
+-
+-	if (free == -1) {
+-		WARN_ONCE("Ran out of slots!\n");
+-		//usleep(100);
+-		r300FlushCmdBuf(rmesa, __FUNCTION__);
+-		tries++;
+-		if (tries > 100) {
+-			WARN_ONCE("Ran out of slots!\n");
+-			exit(1);
++
++	free(bo->base.base.virtual);
++	free(bo);
++}
++
++/**
++ * Allocate/update the copy in vram.
++ *
++ * Note: Assume we're called with the DRI lock held.
++ */
++static void vram_validate(radeon_bo_classic *bo_base)
++{
++	radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo_base->base.bufmgr);
++	radeon_bo_vram *bo = get_bo_vram(bo_base);
++
++	if (!bo->vram) {
++		bo->backing_store_dirty = 1;
++
++		bo->vram = (radeon_vram_wrapper*)calloc(1, sizeof(radeon_vram_wrapper));
++		bo->vram->bo = bo;
++		make_empty_list(&bo->vram->base);
++		bo->vram->base.totalSize = bo->base.base.size;
++		if (driAllocateTexture(&bufmgr->texture_heap, 1, &bo->vram->base) < 0) {
++			fprintf(stderr, "Ouch! vram_validate failed\n");
++			free(bo->vram);
++			bo->base.base.offset = 0;
++			bo->vram = 0;
++			return;
+ 		}
+-		goto again;
+ 	}
+ 
+-	alloc.region = RADEON_MEM_REGION_GART;
+-	alloc.alignment = alignment;
+-	alloc.size = size;
+-	alloc.region_offset = &offset;
++	assert(bo->vram->base.memBlock);
+ 
+-	ret =
+-	    drmCommandWriteRead(rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc,
+-				sizeof(alloc));
+-	if (ret) {
+-#if 0
+-		WARN_ONCE("Ran out of mem!\n");
+-		r300FlushCmdBuf(rmesa, __FUNCTION__);
+-		//usleep(100);
+-		tries2++;
+-		tries = 0;
+-		if (tries2 > 100) {
+-			WARN_ONCE("Ran out of GART memory!\n");
+-			exit(1);
++	bo->base.base.offset = bufmgr->texture_offset + bo->vram->base.memBlock->ofs;
++
++	if (bo->backing_store_dirty) {
++		/* Copy to VRAM using a blit.
++		 * All memory is 4K aligned. We're using 1024 pixels wide blits.
++		 */
++		drm_radeon_texture_t tex;
++		drm_radeon_tex_image_t tmp;
++		int ret;
++
++		tex.offset = bo->base.base.offset;
++		tex.image = &tmp;
++
++		assert(!(tex.offset & 1023));
++
++		tmp.x = 0;
++		tmp.y = 0;
++		if (bo->base.base.size < 4096) {
++			tmp.width = (bo->base.base.size + 3) / 4;
++			tmp.height = 1;
++		} else {
++			tmp.width = 1024;
++			tmp.height = (bo->base.base.size + 4095) / 4096;
+ 		}
+-		goto again;
+-#else
+-		WARN_ONCE
+-		    ("Ran out of GART memory (for %d)!\nPlease consider adjusting GARTSize option.\n",
+-		     size);
+-		return 0;
+-#endif
++		tmp.data = bo->base.base.virtual;
++
++		tex.format = RADEON_TXFORMAT_ARGB8888;
++		tex.width = tmp.width;
++		tex.height = tmp.height;
++		tex.pitch = MAX2(tmp.width / 16, 1);
++
++		do {
++			ret = drmCommandWriteRead(bufmgr->rmesa->radeon.dri.fd,
++						DRM_RADEON_TEXTURE, &tex,
++						sizeof(drm_radeon_texture_t));
++			if (ret) {
++				if (RADEON_DEBUG & DEBUG_IOCTL)
++					fprintf(stderr,
++						"DRM_RADEON_TEXTURE:  again!\n");
++				usleep(1);
++			}
++		} while (ret == -EAGAIN);
++
++		bo->backing_store_dirty = 0;
+ 	}
+ 
+-	i = free;
++	bo->base.validated = 1;
++}
+ 
+-	if (i > rmesa->rmm->u_last)
+-		rmesa->rmm->u_last = i;
++/* No need for actual mmap actions since we have backing store,
++ * but mark buffer dirty when necessary */
++static void vram_map(radeon_bo_classic *bo_base, GLboolean write)
++{
++	radeon_bo_vram *bo = get_bo_vram(bo_base);
+ 
+-	rmesa->rmm->u_list[i].ptr =
+-	    ((GLubyte *) rmesa->radeon.radeonScreen->gartTextures.map) + offset;
+-	rmesa->rmm->u_list[i].size = size;
+-	rmesa->rmm->u_list[i].age = 0;
+-	//fprintf(stderr, "alloc %p at id %d\n", rmesa->rmm->u_list[i].ptr, i);
++	if (write) {
++		bo->base.validated = 0;
++		bo->backing_store_dirty = 1;
++	}
++}
+ 
+-#ifdef MM_DEBUG
+-	fprintf(stderr, "allocated %d at age %x\n", i,
+-		radeonGetAge((radeonContextPtr) rmesa));
+-#endif
++static void vram_bind(radeon_bo_classic *bo_base)
++{
++	radeon_bo_vram *bo = get_bo_vram(bo_base);
+ 
+-	return i;
++	if (bo->vram) {
++		bo->vram->base.bound = 1;
++		driUpdateTextureLRU(&bo->vram->base);
++	}
+ }
+ 
+-void r300_mem_use(r300ContextPtr rmesa, int id)
++static void vram_unbind(radeon_bo_classic *bo_base)
+ {
+-	uint64_t ull;
+-#ifdef MM_DEBUG
+-	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
+-		radeonGetAge((radeonContextPtr) rmesa));
+-#endif
+-	drm_r300_cmd_header_t *cmd;
++	radeon_bo_vram *bo = get_bo_vram(bo_base);
+ 
+-	assert(id <= rmesa->rmm->u_last);
++	if (bo->vram)
++		bo->vram->base.bound = 0;
++}
+ 
+-	if (id == 0)
+-		return;
++/** Callback function called by the texture heap when a texture is evicted */
++static void destroy_vram_wrapper(void *data, driTextureObject *t)
++{
++	radeon_vram_wrapper *wrapper = (radeon_vram_wrapper*)t;
++
++	if (wrapper->bo && wrapper->bo->vram == wrapper) {
++		wrapper->bo->base.validated = 0;
++		wrapper->bo->vram = 0;
++	}
++}
+ 
+-	cmd =
+-	    (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa,
+-						      2 + sizeof(ull) / 4,
+-						      __FUNCTION__);
+-	cmd[0].scratch.cmd_type = R300_CMD_SCRATCH;
+-	cmd[0].scratch.reg = R300_MEM_SCRATCH;
+-	cmd[0].scratch.n_bufs = 1;
+-	cmd[0].scratch.flags = 0;
+-	cmd++;
++static const radeon_bo_functions vram_bo_functions = {
++	.free = vram_free,
++	.validate = vram_validate,
++	.map = vram_map,
++	.bind = vram_bind,
++	.unbind = vram_unbind
++};
+ 
+-	ull = (uint64_t) (intptr_t) & rmesa->rmm->u_list[id].age;
+-	_mesa_memcpy(cmd, &ull, sizeof(ull));
+-	cmd += sizeof(ull) / 4;
++/**
++ * Free a VRAM-based buffer object.
++ */
++static void static_free(radeon_bo_classic *bo_base)
++{
++	radeon_bo_vram *bo = get_bo_vram(bo_base);
+ 
+-	cmd[0].u = /*id */ 0;
++	free(bo);
++}
+ 
+-	LOCK_HARDWARE(&rmesa->radeon);	/* Protect from DRM. */
+-	rmesa->rmm->u_list[id].h_pending++;
+-	UNLOCK_HARDWARE(&rmesa->radeon);
++static void static_map(radeon_bo_classic *bo_base, GLboolean write)
++{
++	radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo_base->base.bufmgr);
++
++	bo_base->base.virtual = bufmgr->rmesa->radeon.dri.screen->pFB +
++		(bo_base->base.offset - bufmgr->rmesa->radeon.radeonScreen->fbLocation);
++
++	/* Read the first pixel in the frame buffer.  This should
++	 * be a noop, right?  In fact without this conform fails as reading
++	 * from the framebuffer sometimes produces old results -- the
++	 * on-card read cache gets mixed up and doesn't notice that the
++	 * framebuffer has been updated.
++	 *
++	 * Note that we should probably be reading some otherwise unused
++	 * region of VRAM, otherwise we might get incorrect results when
++	 * reading pixels from the top left of the screen.
++	 *
++	 * I found this problem on an R420 with glean's texCube test.
++	 * Note that the R200 span code also *writes* the first pixel in the
++	 * framebuffer, but I've found this to be unnecessary.
++	 *  -- Nicolai Hähnle, June 2008
++	 */
++	{
++		int p;
++		volatile int *buf = (int*)bufmgr->rmesa->radeon.dri.screen->pFB;
++		p = *buf;
++	}
+ }
+ 
+-unsigned long r300_mem_offset(r300ContextPtr rmesa, int id)
++static void static_unmap(radeon_bo_classic *bo_base)
+ {
+-	unsigned long offset;
++	bo_base->base.virtual = 0;
++}
++
++static const radeon_bo_functions static_bo_functions = {
++	.free = static_free,
++	.map = static_map,
++	.unmap = static_unmap
++};
+ 
+-	assert(id <= rmesa->rmm->u_last);
++/**
++ * Allocate a backing store buffer object that is validated into VRAM.
++ */
++static dri_bo *vram_alloc(radeon_bufmgr_classic *bufmgr, const char *name,
++		unsigned long size, unsigned int alignment)
++{
++	radeon_bo_vram* bo = (radeon_bo_vram*)calloc(1, sizeof(radeon_bo_vram));
+ 
+-	offset = (char *)rmesa->rmm->u_list[id].ptr -
+-	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+-	offset += rmesa->radeon.radeonScreen->gart_texture_offset;
++	bo->base.functions = &vram_bo_functions;
++	bo->base.base.virtual = malloc(size);
++	init_buffer(bufmgr, &bo->base, size);
++	return &bo->base.base;
++}
+ 
+-	return offset;
++dri_bo *radeon_bufmgr_classic_bo_alloc(dri_bufmgr *bufmgr_ctx, const char *name,
++				       unsigned long size, unsigned int alignment,
++				       uint32_t location_mask)
++{
++	radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx);
++
++	if (location_mask & DRM_BO_MEM_CMDBUF) {
++		return cmdbuf_alloc(bufmgr, name, size);
++	} else if (location_mask & DRM_BO_MEM_DMA) {
++		return dma_alloc(bufmgr, name, size, alignment);
++	} else {
++		return vram_alloc(bufmgr, name, size, alignment);
++	}
+ }
+ 
+-void *r300_mem_map(r300ContextPtr rmesa, int id, int access)
++dri_bo *radeon_bufmgr_classic_bo_alloc_static(dri_bufmgr *bufmgr_ctx, const char *name,
++					      unsigned long offset, unsigned long size,
++					      void *virtual, uint32_t initial_domain)
+ {
+-#ifdef MM_DEBUG
+-	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
+-		radeonGetAge((radeonContextPtr) rmesa));
+-#endif
+-	void *ptr;
+-	int tries = 0;
++  	radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx);
++	radeon_bo_vram* bo = (radeon_bo_vram*)calloc(1, sizeof(radeon_bo_vram));
++
++	bo->base.functions = &static_bo_functions;
++	bo->base.base.virtual = virtual;
++	bo->base.base.offset = offset + bufmgr->rmesa->radeon.radeonScreen->fbLocation;
++	bo->base.validated = 1; /* Static buffer offsets are always valid */
++
++	init_buffer(bufmgr, &bo->base, size);
++	return &bo->base.base;
++
++}
+ 
+-	assert(id <= rmesa->rmm->u_last);
++static void bufmgr_classic_bo_reference(dri_bo *bo_base)
++{
++	radeon_bo_classic *bo = get_bo_classic(bo_base);
++	bo->refcount++;
++	assert(bo->refcount > 0);
++}
+ 
+-	if (access == R300_MEM_R) {
++static void bufmgr_classic_bo_unreference(dri_bo *bo_base)
++{
++	radeon_bo_classic *bo = get_bo_classic(bo_base);
+ 
+-		if (rmesa->rmm->u_list[id].mapped == 1)
+-			WARN_ONCE("buffer %d already mapped\n", id);
++	if (!bo_base)
++		return;
+ 
+-		rmesa->rmm->u_list[id].mapped = 1;
+-		ptr = r300_mem_ptr(rmesa, id);
++	assert(bo->refcount > 0);
++	bo->refcount--;
++	if (!bo->refcount) {
++		// Ugly HACK - figure out whether this is really necessary
++		get_bufmgr_classic(bo_base->bufmgr)->rmesa->dma.nr_released_bufs++;
+ 
+-		return ptr;
++		assert(!bo->mapcount);
++		if (!bo->pending)
++			bo_free(bo);
+ 	}
++}
+ 
+-	if (rmesa->rmm->u_list[id].h_pending)
+-		r300FlushCmdBuf(rmesa, __FUNCTION__);
+-
+-	if (rmesa->rmm->u_list[id].h_pending) {
+-		return NULL;
++static int bufmgr_classic_bo_map(dri_bo *bo_base, int write_enable)
++{
++	radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo_base->bufmgr);
++	radeon_bo_classic *bo = get_bo_classic(bo_base);
++	assert(bo->refcount > 0);
++
++	if (bo->pending) {
++		track_pending_buffers(bufmgr);
++		if (bo->pending) {
++			// TODO: Better fence waiting
++			if (RADEON_DEBUG & DEBUG_MEMORY)
++				fprintf(stderr, "bo_map: buffer is pending. Flushing...\n");
++			radeonFinish(bufmgr->rmesa->radeon.glCtx);
++			track_pending_buffers(bufmgr);
++			if (bo->pending) {
++				fprintf(stderr, "Internal error or hardware lockup: bo_map: buffer is still pending.\n");
++				abort();
++			}
++		}
+ 	}
+ 
+-	while (rmesa->rmm->u_list[id].age >
+-	       radeonGetAge((radeonContextPtr) rmesa) && tries++ < 1000)
+-		usleep(10);
++	if (!bo->mapcount && bo->functions->map)
++		bo->functions->map(bo, write_enable);
++
++	bo->mapcount++;
++	assert(bo->mapcount > 0);
++	return 0;
++}
++
++static int bufmgr_classic_bo_unmap(dri_bo *buf)
++{
++	radeon_bo_classic *bo = get_bo_classic(buf);
++	assert(bo->refcount > 0);
++	assert(bo->mapcount > 0);
++	bo->mapcount--;
++
++	if (!bo->mapcount && bo->functions->unmap)
++		bo->functions->unmap(bo);
++
++	return 0;
++}
+ 
+-	if (tries >= 1000) {
+-		fprintf(stderr, "Idling failed (%x vs %x)\n",
+-			rmesa->rmm->u_list[id].age,
+-			radeonGetAge((radeonContextPtr) rmesa));
+-		return NULL;
++/**
++ * Mark the given buffer as pending and move it to the tail
++ * of the pending list.
++ * The caller is responsible for setting up pending_count and pending_age.
++ */
++static void move_to_pending_tail(radeon_bo_classic *bo)
++{
++	radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo->base.bufmgr);
++
++	if (bo->pending) {
++		*bo->pending_pprev = bo->pending_next;
++		if (bo->pending_next)
++			bo->pending_next->pending_pprev = bo->pending_pprev;
++		else
++			bufmgr->pending_tail = bo->pending_pprev;
+ 	}
+ 
+-	if (rmesa->rmm->u_list[id].mapped == 1)
+-		WARN_ONCE("buffer %d already mapped\n", id);
++	bo->pending = 1;
++	bo->pending_pprev = bufmgr->pending_tail;
++	bo->pending_next = 0;
++	*bufmgr->pending_tail = bo;
++	bufmgr->pending_tail = &bo->pending_next;
++}
+ 
+-	rmesa->rmm->u_list[id].mapped = 1;
+-	ptr = r300_mem_ptr(rmesa, id);
++/**
++ * Emit commands to the batch buffer that cause the guven buffer's
++ * pending_count and pending_age to be updated.
++ */
++static void emit_age_for_buffer(radeon_bo_classic* bo)
++{
++	radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo->base.bufmgr);
++	BATCH_LOCALS(bufmgr->rmesa);
++	drm_r300_cmd_header_t cmd;
++	uint64_t ull;
+ 
+-	return ptr;
++	cmd.scratch.cmd_type = R300_CMD_SCRATCH;
++	cmd.scratch.reg = 2; /* Scratch register 2 corresponds to what radeonGetAge polls */
++	cmd.scratch.n_bufs = 1;
++	cmd.scratch.flags = 0;
++	ull = (uint64_t) (intptr_t) &bo->pending_age;
++
++	BEGIN_BATCH(4);
++	OUT_BATCH(cmd.u);
++	OUT_BATCH(ull & 0xffffffff);
++	OUT_BATCH(ull >> 32);
++	OUT_BATCH(0);
++	END_BATCH();
++	COMMIT_BATCH();
++
++	bo->pending_count++;
+ }
+ 
+-void r300_mem_unmap(r300ContextPtr rmesa, int id)
++int radeon_bufmgr_classic_emit_reloc(dri_bo *batch_buf, uint64_t flags, GLuint delta,
++				     GLuint offset, dri_bo *target)
+ {
+-#ifdef MM_DEBUG
+-	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
+-		radeonGetAge((radeonContextPtr) rmesa));
+-#endif
++	radeon_bo_classic *bo = get_bo_classic(batch_buf);
++	radeon_reloc *reloc;
++
++	if (bo->relocs_used >= bo->relocs_size) {
++		bo->relocs_size *= 2;
++		if (bo->relocs_size < 32)
++			bo->relocs_size = 32;
++
++		bo->relocs = (radeon_reloc*)realloc(bo->relocs, bo->relocs_size*sizeof(radeon_reloc));
++	}
+ 
+-	assert(id <= rmesa->rmm->u_last);
++	reloc = &bo->relocs[bo->relocs_used++];
++	reloc->flags = flags;
++	reloc->offset = offset;
++	reloc->delta = delta;
++	reloc->target = get_bo_classic(target);
++	dri_bo_reference(target);
++	return 0;
++}
+ 
+-	if (rmesa->rmm->u_list[id].mapped == 0)
+-		WARN_ONCE("buffer %d not mapped\n", id);
++/* process_relocs is called just before the given command buffer
++ * is executed. It ensures that all referenced buffers are in
++ * the right GPU domain.
++ */
++static void *bufmgr_classic_process_relocs(dri_bo *batch_buf)
++{
++	radeon_bo_classic *batch_bo = get_bo_classic(batch_buf);
++	int i;
+ 
+-	rmesa->rmm->u_list[id].mapped = 0;
++	// Warning: At this point, we append something to the batch buffer
++	// during flush.
++	emit_age_for_buffer(batch_bo);
++
++	dri_bo_map(batch_buf, GL_TRUE);
++	for(i = 0; i < batch_bo->relocs_used; ++i) {
++		radeon_reloc *reloc = &batch_bo->relocs[i];
++		uint32_t *dest = (uint32_t*)((char*)batch_buf->virtual + reloc->offset);
++		uint32_t offset;
++
++		if (!reloc->target->validated)
++			reloc->target->functions->validate(reloc->target);
++		reloc->target->used = 1;
++		offset = reloc->target->base.offset + reloc->delta;
++
++		if (reloc->flags & DRM_RELOC_BLITTER)
++			*dest = (*dest & 0xffc00000) | (offset >> 10);
++		else if (reloc->flags & DRM_RELOC_TXOFFSET)
++			*dest = (*dest & 31) | (offset & ~31);
++		else
++			*dest = offset;
++	}
++	dri_bo_unmap(batch_buf);
++	return 0;
+ }
+ 
+-void r300_mem_free(r300ContextPtr rmesa, int id)
++/* post_submit is called just after the given command buffer
++ * is executed. It ensures that buffers are properly marked as
++ * pending.
++ */
++static void bufmgr_classic_post_submit(dri_bo *batch_buf)
+ {
+-#ifdef MM_DEBUG
+-	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
+-		radeonGetAge((radeonContextPtr) rmesa));
+-#endif
++	radeon_bo_classic *batch_bo = get_bo_classic(batch_buf);
++	int i;
+ 
+-	assert(id <= rmesa->rmm->u_last);
++	assert(!batch_bo->pending_count);
+ 
+-	if (id == 0)
+-		return;
++	for(i = 0; i < batch_bo->relocs_used; ++i) {
++		radeon_reloc *reloc = &batch_bo->relocs[i];
+ 
+-	if (rmesa->rmm->u_list[id].ptr == NULL) {
+-		WARN_ONCE("Not allocated!\n");
+-		return;
++		if (reloc->target->used) {
++			reloc->target->used = 0;
++			assert(!reloc->target->pending_count);
++			reloc->target->pending_age = batch_bo->pending_age;
++			move_to_pending_tail(reloc->target);
++			if (reloc->target->functions->bind)
++				(*reloc->target->functions->bind)(reloc->target);
++		}
+ 	}
++}
+ 
+-	if (rmesa->rmm->u_list[id].pending) {
+-		WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr);
+-		return;
++static void bufmgr_classic_destroy(dri_bufmgr *bufmgr_ctx)
++{
++	radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx);
++
++	track_pending_buffers(bufmgr);
++	if (bufmgr->pending)
++		radeonFinish(bufmgr->rmesa->radeon.glCtx);
++	track_pending_buffers(bufmgr);
++
++	if (bufmgr->buffers) {
++		//fprintf(stderr, "Warning: Buffer objects have leaked\n");
++		while(bufmgr->buffers) {
++			//fprintf(stderr, "  Leak of size %ld\n", bufmgr->buffers->base.size);
++			bufmgr->buffers->refcount = 0;
++			bufmgr->buffers->mapcount = 0;
++			bufmgr->buffers->pending = 0;
++			bo_free(bufmgr->buffers);
++		}
+ 	}
+ 
+-	rmesa->rmm->u_list[id].pending = 1;
++	driDestroyTextureHeap(bufmgr->texture_heap);
++	bufmgr->texture_heap = 0;
++	assert(is_empty_list(&bufmgr->texture_swapped));
++
++	free(bufmgr);
++}
++
++radeon_bufmgr* radeonBufmgrClassicInit(r300ContextPtr rmesa)
++{
++	radeon_bufmgr_classic* bufmgr = (radeon_bufmgr_classic*)calloc(1, sizeof(radeon_bufmgr_classic));
++
++	bufmgr->rmesa = rmesa;
++	//	bufmgr->base.base.bo_alloc = &bufmgr_classic_bo_alloc;
++	bufmgr->base.base.bo_reference = &bufmgr_classic_bo_reference;
++	bufmgr->base.base.bo_unreference = &bufmgr_classic_bo_unreference;
++	bufmgr->base.base.bo_map = &bufmgr_classic_bo_map;
++	bufmgr->base.base.bo_unmap = &bufmgr_classic_bo_unmap;
++	bufmgr->base.base.process_relocs = &bufmgr_classic_process_relocs;
++	bufmgr->base.base.post_submit = &bufmgr_classic_post_submit;
++	bufmgr->base.base.destroy = &bufmgr_classic_destroy;
++
++	bufmgr->pending_tail = &bufmgr->pending;
++
++	/* Init texture heap */
++	make_empty_list(&bufmgr->texture_swapped);
++	bufmgr->texture_heap = driCreateTextureHeap(0, bufmgr,
++			rmesa->radeon.radeonScreen->texSize[0], 12, RADEON_NR_TEX_REGIONS,
++			(drmTextureRegionPtr)rmesa->radeon.sarea->tex_list[0],
++			&rmesa->radeon.sarea->tex_age[0],
++			&bufmgr->texture_swapped, sizeof(radeon_vram_wrapper),
++			&destroy_vram_wrapper);
++	bufmgr->texture_offset = rmesa->radeon.radeonScreen->texOffset[0];
++
++	return &bufmgr->base;
++}
++
++void radeonBufmgrContendedLockTake(radeon_bufmgr* bufmgr_ctx)
++{
++	radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(&bufmgr_ctx->base);
++
++	DRI_AGE_TEXTURES(bufmgr->texture_heap);
+ }
+-#endif
+diff --git a/src/mesa/drivers/dri/r300/r300_mem.h b/src/mesa/drivers/dri/r300/r300_mem.h
+index 625a7f6..4e9be65 100644
+--- a/src/mesa/drivers/dri/r300/r300_mem.h
++++ b/src/mesa/drivers/dri/r300/r300_mem.h
+@@ -1,37 +1,22 @@
+ #ifndef __R300_MEM_H__
+ #define __R300_MEM_H__
+ 
+-//#define R300_MEM_PDL 0
+-#define R300_MEM_UL 1
++#include "glheader.h"
++#include "dri_bufmgr.h"
+ 
+-#define R300_MEM_R 1
+-#define R300_MEM_W 2
+-#define R300_MEM_RW (R300_MEM_R | R300_MEM_W)
++#include "r300_context.h"
+ 
+-#define R300_MEM_SCRATCH 2
+ 
+-struct r300_memory_manager {
+-	struct {
+-		void *ptr;
+-		uint32_t size;
+-		uint32_t age;
+-		uint32_t h_pending;
+-		int pending;
+-		int mapped;
+-	} *u_list;
+-	int u_head, u_size, u_last;
++/* Note: The following flags should probably be ultimately eliminated,
++ * or replaced by something else.
++ */
++#define DRM_BO_MEM_DMA (1 << 27) /** Use for transient buffers (texture upload, vertex buffers...) */
++#define DRM_BO_MEM_CMDBUF (1 << 28) /** Use for command buffers */
+ 
+-};
++#define DRM_RELOC_BLITTER (1 << 23) /** Offset overwrites lower 22 bits (used with blit packet3) */
++#define DRM_RELOC_TXOFFSET (1 << 24) /** Offset overwrites everything but low bits (used for texture offsets) */
+ 
+-extern void r300_mem_init(r300ContextPtr rmesa);
+-extern void r300_mem_destroy(r300ContextPtr rmesa);
+-extern void *r300_mem_ptr(r300ContextPtr rmesa, int id);
+-extern int r300_mem_find(r300ContextPtr rmesa, void *ptr);
+-extern int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size);
+-extern void r300_mem_use(r300ContextPtr rmesa, int id);
+-extern unsigned long r300_mem_offset(r300ContextPtr rmesa, int id);
+-extern void *r300_mem_map(r300ContextPtr rmesa, int id, int access);
+-extern void r300_mem_unmap(r300ContextPtr rmesa, int id);
+-extern void r300_mem_free(r300ContextPtr rmesa, int id);
++radeon_bufmgr* radeonBufmgrClassicInit(r300ContextPtr rmesa);
++void radeonBufmgrContendedLockTake(radeon_bufmgr* bufmgr_ctx);
+ 
+ #endif
+diff --git a/src/mesa/drivers/dri/r300/r300_mipmap_tree.c b/src/mesa/drivers/dri/r300/r300_mipmap_tree.c
+new file mode 100644
+index 0000000..75b7d32
+--- /dev/null
++++ b/src/mesa/drivers/dri/r300/r300_mipmap_tree.c
+@@ -0,0 +1,248 @@
++/*
++ * Copyright (C) 2008 Nicolai Haehnle.
++ *
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sublicense, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial
++ * portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
++ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
++ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
++ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ */
++
++#include "r300_mipmap_tree.h"
++
++#include <errno.h>
++#include <unistd.h>
++
++#include "simple_list.h"
++#include "texcompress.h"
++#include "texformat.h"
++
++#include "r300_mem.h"
++
++static GLuint r300_compressed_texture_size(GLcontext *ctx,
++		GLsizei width, GLsizei height, GLsizei depth,
++		GLuint mesaFormat)
++{
++	GLuint size = _mesa_compressed_texture_size(ctx, width, height, depth, mesaFormat);
++
++	if (mesaFormat == MESA_FORMAT_RGB_DXT1 ||
++	    mesaFormat == MESA_FORMAT_RGBA_DXT1) {
++		if (width + 3 < 8)	/* width one block */
++			size = size * 4;
++		else if (width + 3 < 16)
++			size = size * 2;
++	} else {
++		/* DXT3/5, 16 bytes per block */
++		WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n");
++		if (width + 3 < 8)
++			size = size * 2;
++	}
++
++	return size;
++}
++
++/**
++ * Compute sizes and fill in offset and blit information for the given
++ * image (determined by \p face and \p level).
++ *
++ * \param curOffset points to the offset at which the image is to be stored
++ * and is updated by this function according to the size of the image.
++ */
++static void compute_tex_image_offset(r300_mipmap_tree *mt,
++	GLuint face, GLuint level, GLuint* curOffset)
++{
++	r300_mipmap_level *lvl = &mt->levels[level];
++
++	/* Find image size in bytes */
++	if (mt->compressed) {
++		lvl->size = r300_compressed_texture_size(mt->r300->radeon.glCtx,
++			lvl->width, lvl->height, lvl->depth, mt->compressed);
++	} else if (mt->target == GL_TEXTURE_RECTANGLE_NV) {
++		lvl->size = ((lvl->width * mt->bpp + 63) & ~63) * lvl->height;
++	} else if (mt->tilebits & R300_TXO_MICRO_TILE) {
++		/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
++		 * though the actual offset may be different (if texture is less than
++		 * 32 bytes width) to the untiled case */
++		int w = (lvl->width * mt->bpp * 2 + 31) & ~31;
++		lvl->size = (w * ((lvl->height + 1) / 2)) * lvl->depth;
++	} else {
++		int w = (lvl->width * mt->bpp + 31) & ~31;
++		lvl->size = w * lvl->height * lvl->depth;
++	}
++	assert(lvl->size > 0);
++
++	/* All images are aligned to a 32-byte offset */
++	*curOffset = (*curOffset + 0x1f) & ~0x1f;
++	lvl->faces[face].offset = *curOffset;
++	*curOffset += lvl->size;
++}
++
++static GLuint minify(GLuint size, GLuint levels)
++{
++	size = size >> levels;
++	if (size < 1)
++		size = 1;
++	return size;
++}
++
++static void calculate_miptree_layout(r300_mipmap_tree *mt)
++{
++	GLuint curOffset;
++	GLuint numLevels;
++	GLuint i;
++
++	numLevels = mt->lastLevel - mt->firstLevel + 1;
++	assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
++
++	curOffset = 0;
++	for(i = 0; i < numLevels; i++) {
++		GLuint face;
++
++		mt->levels[i].width = minify(mt->width0, mt->firstLevel + i);
++		mt->levels[i].height = minify(mt->height0, mt->firstLevel + i);
++		mt->levels[i].depth = minify(mt->depth0, mt->firstLevel + i);
++
++		for(face = 0; face < mt->faces; face++)
++			compute_tex_image_offset(mt, face, i, &curOffset);
++	}
++
++	/* Note the required size in memory */
++	mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
++}
++
++
++/**
++ * Create a new mipmap tree, calculate its layout and allocate memory.
++ */
++r300_mipmap_tree* r300_miptree_create(r300ContextPtr rmesa, r300TexObj *t,
++		GLenum target, GLuint firstLevel, GLuint lastLevel,
++		GLuint width0, GLuint height0, GLuint depth0,
++		GLuint bpp, GLuint tilebits, GLuint compressed)
++{
++	r300_mipmap_tree *mt = CALLOC_STRUCT(_r300_mipmap_tree);
++
++	mt->r300 = rmesa;
++	mt->t = t;
++	mt->target = target;
++	mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
++	mt->firstLevel = firstLevel;
++	mt->lastLevel = lastLevel;
++	mt->width0 = width0;
++	mt->height0 = height0;
++	mt->depth0 = depth0;
++	mt->bpp = bpp;
++	mt->tilebits = tilebits;
++	mt->compressed = compressed;
++
++	calculate_miptree_layout(mt);
++
++	mt->bo = radeon_bufmgr_classic_bo_alloc(&rmesa->radeon.bufmgr->base, "texture", mt->totalsize, 1024, 0);
++
++	return mt;
++}
++
++/**
++ * Destroy the given mipmap tree.
++ */
++void r300_miptree_destroy(r300_mipmap_tree *mt)
++{
++	dri_bo_unreference(mt->bo);
++	free(mt);
++}
++
++/*
++ * XXX Move this into core Mesa?
++ */
++static void
++_mesa_copy_rect(GLubyte * dst,
++                GLuint cpp,
++                GLuint dst_pitch,
++                GLuint dst_x,
++                GLuint dst_y,
++                GLuint width,
++                GLuint height,
++                const GLubyte * src,
++                GLuint src_pitch, GLuint src_x, GLuint src_y)
++{
++   GLuint i;
++
++   dst_pitch *= cpp;
++   src_pitch *= cpp;
++   dst += dst_x * cpp;
++   src += src_x * cpp;
++   dst += dst_y * dst_pitch;
++   src += src_y * dst_pitch;
++   width *= cpp;
++
++   if (width == dst_pitch && width == src_pitch)
++      memcpy(dst, src, height * width);
++   else {
++      for (i = 0; i < height; i++) {
++         memcpy(dst, src, width);
++         dst += dst_pitch;
++         src += src_pitch;
++      }
++   }
++}
++
++/**
++ * Upload the given texture image to the given face/level of the mipmap tree.
++ * \param level of the texture, i.e. \c level==mt->firstLevel is the first hw level
++ */
++void r300_miptree_upload_image(r300_mipmap_tree *mt, GLuint face, GLuint level,
++			       struct gl_texture_image *texImage)
++{
++	GLuint hwlevel = level - mt->firstLevel;
++	r300_mipmap_level *lvl = &mt->levels[hwlevel];
++	void *dest;
++
++	assert(face < mt->faces);
++	assert(level >= mt->firstLevel && level <= mt->lastLevel);
++	assert(texImage && texImage->Data);
++	assert(texImage->Width == lvl->width);
++	assert(texImage->Height == lvl->height);
++	assert(texImage->Depth == lvl->depth);
++
++	dri_bo_map(mt->bo, GL_TRUE);
++
++	dest = mt->bo->virtual + lvl->faces[face].offset;
++
++	if (mt->tilebits)
++		WARN_ONCE("%s: tiling not supported yet", __FUNCTION__);
++
++	if (!mt->compressed) {
++		GLuint dst_align;
++		GLuint dst_pitch = lvl->width;
++		GLuint src_pitch = lvl->width;
++
++		if (mt->target == GL_TEXTURE_RECTANGLE_NV)
++			dst_align = 64 / mt->bpp;
++		else
++			dst_align = 32 / mt->bpp;
++		dst_pitch = (dst_pitch + dst_align - 1) & ~(dst_align - 1);
++
++		_mesa_copy_rect(dest, mt->bpp, dst_pitch, 0, 0, lvl->width, lvl->height,
++				texImage->Data, src_pitch, 0, 0);
++	} else {
++		memcpy(dest, texImage->Data, lvl->size);
++	}
++
++	dri_bo_unmap(mt->bo);
++}
+diff --git a/src/mesa/drivers/dri/r300/r300_mipmap_tree.h b/src/mesa/drivers/dri/r300/r300_mipmap_tree.h
+new file mode 100644
+index 0000000..a888ecf
+--- /dev/null
++++ b/src/mesa/drivers/dri/r300/r300_mipmap_tree.h
+@@ -0,0 +1,91 @@
++/*
++ * Copyright (C) 2008 Nicolai Haehnle.
++ *
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sublicense, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial
++ * portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
++ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
++ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
++ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ */
++
++#ifndef __R300_MIPMAP_TREE_H_
++#define __R300_MIPMAP_TREE_H_
++
++#include "r300_context.h"
++
++typedef struct _r300_mipmap_tree r300_mipmap_tree;
++typedef struct _r300_mipmap_level r300_mipmap_level;
++typedef struct _r300_mipmap_image r300_mipmap_image;
++
++struct _r300_mipmap_image {
++	GLuint offset; /** Offset of this image from the start of mipmap tree, in bytes */
++};
++
++struct _r300_mipmap_level {
++	GLuint width;
++	GLuint height;
++	GLuint depth;
++	GLuint size; /** Size of each image, in bytes */
++	r300_mipmap_image faces[6];
++};
++
++
++/**
++ * A mipmap tree contains texture images in the layout that the hardware
++ * expects.
++ *
++ * The meta-data of mipmap trees is immutable, i.e. you cannot change the
++ * layout on-the-fly; however, the texture contents (i.e. texels) can be
++ * changed.
++ */
++struct _r300_mipmap_tree {
++	r300ContextPtr r300;
++	r300TexObj *t;
++	dri_bo *bo;
++
++	GLuint totalsize; /** total size of the miptree, in bytes */
++
++	GLenum target; /** GL_TEXTURE_xxx */
++	GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */
++	GLuint firstLevel; /** First mip level stored in this mipmap tree */
++	GLuint lastLevel; /** Last mip level stored in this mipmap tree */
++
++	GLuint width0; /** Width of level 0 image */
++	GLuint height0; /** Height of level 0 image */
++	GLuint depth0; /** Depth of level 0 image */
++
++	GLuint bpp; /** Bytes per texel */
++	GLuint tilebits; /** R300_TXO_xxx_TILE */
++	GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */
++
++	r300_mipmap_level levels[RADEON_MAX_TEXTURE_LEVELS];
++};
++
++r300_mipmap_tree* r300_miptree_create(r300ContextPtr rmesa, r300TexObj *t,
++		GLenum target, GLuint firstLevel, GLuint lastLevel,
++		GLuint width0, GLuint height0, GLuint depth0,
++		GLuint bpp, GLuint tilebits, GLuint compressed);
++void r300_miptree_destroy(r300_mipmap_tree *mt);
++
++void r300_miptree_upload_image(r300_mipmap_tree *mt, GLuint face, GLuint level,
++			       struct gl_texture_image *texImage);
++
++
++#endif /* __R300_MIPMAP_TREE_H_ */
+diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
+index 0a199e6..209fae9 100644
+--- a/src/mesa/drivers/dri/r300/r300_render.c
++++ b/src/mesa/drivers/dri/r300/r300_render.c
+@@ -175,89 +175,79 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
+ static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
+ {
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	struct r300_dma_region *rvb = &rmesa->state.elt_dma;
+ 	void *out;
+ 
+-	if (r300IsGartMemory(rmesa, elts, n_elts * 4)) {
+-		rvb->address = rmesa->radeon.radeonScreen->gartTextures.map;
+-		rvb->start = ((char *)elts) - rvb->address;
+-		rvb->aos_offset =
+-		    rmesa->radeon.radeonScreen->gart_texture_offset +
+-		    rvb->start;
+-		return;
+-	} else if (r300IsGartMemory(rmesa, elts, 1)) {
+-		WARN_ONCE("Pointer not within GART memory!\n");
+-		_mesa_exit(-1);
+-	}
+-
+-	r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4);
+-	rvb->aos_offset = GET_START(rvb);
++	r300AllocDmaRegion(rmesa, &rmesa->state.elt_dma_bo, &rmesa->state.elt_dma_offset,
++			   n_elts * 4, 4);
+ 
+-	out = rvb->address + rvb->start;
++	out = rmesa->state.elt_dma_bo->virtual + rmesa->state.elt_dma_offset;
+ 	memcpy(out, elts, n_elts * 4);
+ }
+ 
+-static void r300FireEB(r300ContextPtr rmesa, unsigned long addr,
+-		       int vertex_count, int type)
++static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
+ {
+-	int cmd_reserved = 0;
+-	int cmd_written = 0;
+-	drm_radeon_cmd_header_t *cmd = NULL;
++	BATCH_LOCALS(rmesa);
+ 
+-	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0);
+-	e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
++	BEGIN_BATCH(8);
++	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
++	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
+ 
+-	start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2);
+-	e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
+-	e32(addr);
+-	e32(vertex_count);
++	OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
++	OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
++	OUT_BATCH_RELOC(0, rmesa->state.elt_dma_bo, rmesa->state.elt_dma_offset, 0);
++	OUT_BATCH(vertex_count);
++	END_BATCH();
+ }
+ 
+ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
+ {
++	BATCH_LOCALS(rmesa);
+ 	int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
+ 	int i;
+-	int cmd_reserved = 0;
+-	int cmd_written = 0;
+-	drm_radeon_cmd_header_t *cmd = NULL;
+ 
+ 	if (RADEON_DEBUG & DEBUG_VERTS)
+ 		fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
+ 			offset);
+ 
+-	start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1);
+-	e32(nr);
++	BEGIN_BATCH(sz+2);
++	OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
++	OUT_BATCH(nr);
+ 
+ 	for (i = 0; i + 1 < nr; i += 2) {
+-		e32((rmesa->state.aos[i].aos_size << 0) |
+-		    (rmesa->state.aos[i].aos_stride << 8) |
+-		    (rmesa->state.aos[i + 1].aos_size << 16) |
+-		    (rmesa->state.aos[i + 1].aos_stride << 24));
+-
+-		e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride);
+-		e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride);
++		OUT_BATCH((rmesa->state.aos[i].components << 0) |
++			  (rmesa->state.aos[i].stride << 8) |
++			  (rmesa->state.aos[i + 1].components << 16) |
++			  (rmesa->state.aos[i + 1].stride << 24));
++
++		OUT_BATCH_RELOC(0, rmesa->state.aos[i].bo,
++			rmesa->state.aos[i].offset + offset * 4 * rmesa->state.aos[i].stride, 0);
++		OUT_BATCH_RELOC(0, rmesa->state.aos[i+1].bo,
++			rmesa->state.aos[i+1].offset + offset * 4 * rmesa->state.aos[i + 1].stride, 0);
+ 	}
+ 
+ 	if (nr & 1) {
+-		e32((rmesa->state.aos[nr - 1].aos_size << 0) |
+-		    (rmesa->state.aos[nr - 1].aos_stride << 8));
+-		e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride);
++		OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
++			  (rmesa->state.aos[nr - 1].stride << 8));
++		OUT_BATCH_RELOC(0, rmesa->state.aos[nr - 1].bo,
++			rmesa->state.aos[nr - 1].offset + offset * 4 * rmesa->state.aos[nr - 1].stride, 0);
+ 	}
++	END_BATCH();
+ }
+ 
+ static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
+ {
+-	int cmd_reserved = 0;
+-	int cmd_written = 0;
+-	drm_radeon_cmd_header_t *cmd = NULL;
++	BATCH_LOCALS(rmesa);
+ 
+-	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
+-	e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
++	BEGIN_BATCH(3);
++	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
++	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
++	END_BATCH();
+ }
+ 
+ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
+ 				   int start, int end, int prim)
+ {
++	BATCH_LOCALS(rmesa);
+ 	int type, num_verts;
+ 	TNLcontext *tnl = TNL_CONTEXT(ctx);
+ 	struct vertex_buffer *vb = &tnl->vb;
+@@ -268,6 +258,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
+ 	if (type < 0 || num_verts <= 0)
+ 		return;
+ 
++	/* Make space for at least 64 dwords.
++	 * This is supposed to ensure that we can get all rendering
++	 * commands into a single command buffer.
++	 */
++	r300EnsureCmdBufSpace(rmesa, 64, __FUNCTION__);
++
+ 	if (vb->Elts) {
+ 		if (num_verts > 65535) {
+ 			/* not implemented yet */
+@@ -287,11 +283,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
+ 		 */
+ 		r300EmitElts(ctx, vb->Elts, num_verts);
+ 		r300EmitAOS(rmesa, rmesa->state.aos_count, start);
+-		r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type);
++		r300FireEB(rmesa, num_verts, type);
+ 	} else {
+ 		r300EmitAOS(rmesa, rmesa->state.aos_count, start);
+ 		r300FireAOS(rmesa, num_verts, type);
+ 	}
++	COMMIT_BATCH();
+ }
+ 
+ static GLboolean r300RunRender(GLcontext * ctx,
+@@ -324,10 +321,6 @@ static GLboolean r300RunRender(GLcontext * ctx,
+ 
+ 	r300EmitCacheFlush(rmesa);
+ 
+-#ifdef USER_BUFFERS
+-	r300UseArrays(ctx);
+-#endif
+-
+ 	r300ReleaseArrays(ctx);
+ 
+ 	return GL_FALSE;
+diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
+index 15cd053..589327d 100644
+--- a/src/mesa/drivers/dri/r300/r300_state.c
++++ b/src/mesa/drivers/dri/r300/r300_state.c
+@@ -55,6 +55,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #include "radeon_ioctl.h"
+ #include "radeon_state.h"
++#include "radeon_buffer.h"
+ #include "r300_context.h"
+ #include "r300_ioctl.h"
+ #include "r300_state.h"
+@@ -1146,39 +1147,25 @@ void r300UpdateDrawBuffer(GLcontext * ctx)
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ 	r300ContextPtr r300 = rmesa;
+ 	struct gl_framebuffer *fb = ctx->DrawBuffer;
+-	driRenderbuffer *drb;
++	struct radeon_renderbuffer *rrb;
+ 
+ 	if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+ 		/* draw to front */
+-		drb =
+-		    (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].
+-		    Renderbuffer;
++		rrb =
++		    (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+ 	} else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
+ 		/* draw to back */
+-		drb =
+-		    (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].
+-		    Renderbuffer;
++		rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+ 	} else {
+ 		/* drawing to multiple buffers, or none */
+ 		return;
+ 	}
+ 
+-	assert(drb);
+-	assert(drb->flippedPitch);
++	assert(rrb);
++	assert(rrb->pitch);
+ 
+ 	R300_STATECHANGE(rmesa, cb);
+ 
+-	r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset +	//r300->radeon.state.color.drawOffset +
+-	    r300->radeon.radeonScreen->fbLocation;
+-	r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch;	//r300->radeon.state.color.drawPitch;
+-
+-	if (r300->radeon.radeonScreen->cpp == 4)
+-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
+-	else
+-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
+-
+-	if (r300->radeon.sarea->tiling_enabled)
+-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
+ #if 0
+ 	R200_STATECHANGE(rmesa, ctx);
+ 
+@@ -1497,14 +1484,9 @@ static void r300SetupTextures(GLcontext * ctx)
+ 	/* We cannot let disabled tmu offsets pass DRM */
+ 	for (i = 0; i < mtu; i++) {
+ 		if (ctx->Texture.Unit[i]._ReallyEnabled) {
+-
+-#if 0				/* Enables old behaviour */
+-			hw_tmu = i;
+-#endif
+ 			tmu_mappings[i] = hw_tmu;
+ 
+-			t = r300->state.texture.unit[i].texobj;
+-			/* XXX questionable fix for bug 9170: */
++			t = r300_tex_obj(ctx->Texture.Unit[i]._Current);
+ 			if (!t)
+ 				continue;
+ 
+@@ -1530,21 +1512,20 @@ static void r300SetupTextures(GLcontext * ctx)
+ 			 */
+ 			r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+ 				t->filter_1 |
+-				translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.tObj->LodBias);
++				translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias);
+ 			r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+ 			    t->size;
+ 			r300->hw.tex.format.cmd[R300_TEX_VALUE_0 +
+ 						hw_tmu] = t->format;
+ 			r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+ 			    t->pitch_reg;
+-			r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 +
+-						hw_tmu] = t->offset;
++			r300->hw.textures[hw_tmu] = t;
+ 
+-			if (t->offset & R300_TXO_MACRO_TILE) {
++			if (t->tile_bits & R300_TXO_MACRO_TILE) {
+ 				WARN_ONCE("macro tiling enabled!\n");
+ 			}
+ 
+-			if (t->offset & R300_TXO_MICRO_TILE) {
++			if (t->tile_bits & R300_TXO_MICRO_TILE) {
+ 				WARN_ONCE("micro tiling enabled!\n");
+ 			}
+ 
+@@ -2371,20 +2352,6 @@ static void r300ResetHwState(r300ContextPtr r300)
+ 
+ 	r300BlendColor(ctx, ctx->Color.BlendColor);
+ 
+-	/* Again, r300ClearBuffer uses this */
+-	r300->hw.cb.cmd[R300_CB_OFFSET] =
+-	    r300->radeon.state.color.drawOffset +
+-	    r300->radeon.radeonScreen->fbLocation;
+-	r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
+-
+-	if (r300->radeon.radeonScreen->cpp == 4)
+-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
+-	else
+-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
+-
+-	if (r300->radeon.sarea->tiling_enabled)
+-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
+-
+ 	r300->hw.rb3d_dither_ctl.cmd[1] = 0;
+ 	r300->hw.rb3d_dither_ctl.cmd[2] = 0;
+ 	r300->hw.rb3d_dither_ctl.cmd[3] = 0;
+@@ -2400,10 +2367,6 @@ static void r300ResetHwState(r300ContextPtr r300)
+ 	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000;
+ 	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff;
+ 
+-	r300->hw.zb.cmd[R300_ZB_OFFSET] =
+-	    r300->radeon.radeonScreen->depthOffset +
+-	    r300->radeon.radeonScreen->fbLocation;
+-	r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch;
+ 
+ 	if (r300->radeon.sarea->tiling_enabled) {
+ 		/* XXX: Turn off when clearing buffers ? */
+diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h
+index 0589ab7..96177ba 100644
+--- a/src/mesa/drivers/dri/r300/r300_state.h
++++ b/src/mesa/drivers/dri/r300/r300_state.h
+@@ -59,7 +59,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define R300_FIREVERTICES( r300 )			\
+ do {							\
+     \
+-   if ( (r300)->cmdbuf.count_used || (r300)->dma.flush ) {	\
++   if ( (r300)->cmdbuf.committed || (r300)->dma.flush ) {	\
+       r300Flush( (r300)->radeon.glCtx );		\
+    }							\
+     \
+diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
+index 8aebd9b..f4a0b7f 100644
+--- a/src/mesa/drivers/dri/r300/r300_swtcl.c
++++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
+@@ -61,7 +61,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
+ static void flush_last_swtcl_prim( r300ContextPtr rmesa  );
+ 
+ 
+-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset);
++void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, dri_bo *bo, GLuint offset);
+ void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr);
+ #define EMIT_ATTR( ATTR, STYLE )					\
+ do {									\
+@@ -175,7 +175,7 @@ static void r300SetVertexFormat( GLcontext *ctx )
+ 			inputs[i] = -1;
+ 		}
+ 	}
+-	
++
+ 	/* Fixed, apply to vir0 only */
+ 	if (InputsRead & (1 << VERT_ATTRIB_POS))
+ 		inputs[VERT_ATTRIB_POS] = 0;
+@@ -186,16 +186,16 @@ static void r300SetVertexFormat( GLcontext *ctx )
+ 	for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
+ 		if (InputsRead & (1 << i))
+ 			inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
+-	
++
+ 	for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
+ 		if (InputsRead & (1 << i)) {
+ 			tab[nr++] = i;
+ 		}
+ 	}
+-	
++
+ 	for (i = 0; i < nr; i++) {
+ 		int ci;
+-		
++
+ 		swizzle[i][0] = SWIZZLE_ZERO;
+ 		swizzle[i][1] = SWIZZLE_ZERO;
+ 		swizzle[i][2] = SWIZZLE_ZERO;
+@@ -215,21 +215,21 @@ static void r300SetVertexFormat( GLcontext *ctx )
+ 	((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
+ 		r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
+ 				   nr);
+-   
++
+ 	R300_STATECHANGE(rmesa, vic);
+ 	rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
+ 	rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
+-   
++
+ 	R300_STATECHANGE(rmesa, vof);
+ 	rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
+ 	rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1;
+-   
++
+ 	rmesa->swtcl.vertex_size =
+ 		_tnl_install_attrs( ctx,
+-				    rmesa->swtcl.vertex_attrs, 
++				    rmesa->swtcl.vertex_attrs,
+ 				    rmesa->swtcl.vertex_attr_count,
+ 				    NULL, 0 );
+-	
++
+ 	rmesa->swtcl.vertex_size /= 4;
+ 
+ 	RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
+@@ -245,38 +245,40 @@ static void r300SetVertexFormat( GLcontext *ctx )
+  */
+ static void flush_last_swtcl_prim( r300ContextPtr rmesa  )
+ {
++	BATCH_LOCALS(rmesa);
++
+ 	if (RADEON_DEBUG & DEBUG_IOCTL)
+ 		fprintf(stderr, "%s\n", __FUNCTION__);
+-	
++
+ 	rmesa->dma.flush = NULL;
+ 
+-	if (rmesa->dma.current.buf) {
+-		struct r300_dma_region *current = &rmesa->dma.current;
+-		GLuint current_offset = GET_START(current);
++	if (rmesa->dma.current) {
++		GLuint current_offset = rmesa->dma.current_used;
+ 
+-		assert (current->start + 
++		assert (rmesa->dma.current_used +
+ 			rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+-			current->ptr);
++			rmesa->dma.current_vertexptr);
+ 
+-		if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
++		if (rmesa->dma.current_used != rmesa->dma.current_vertexptr) {
++			rmesa->dma.current_used = rmesa->dma.current_vertexptr;
+ 
+ 			r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__);
+-			
++
+ 			r300EmitState(rmesa);
+-			
++
+ 			r300EmitVertexAOS( rmesa,
+ 					   rmesa->swtcl.vertex_size,
+-					   current_offset);
+-			
++					   rmesa->dma.current, current_offset);
++
+ 			r300EmitVbufPrim( rmesa,
+ 					  rmesa->swtcl.hw_primitive,
+ 					  rmesa->swtcl.numverts);
+-			
++
+ 			r300EmitCacheFlush(rmesa);
++			COMMIT_BATCH();
+ 		}
+-		
++
+ 		rmesa->swtcl.numverts = 0;
+-		current->start = current->ptr;
+ 	}
+ }
+ 
+@@ -287,7 +289,7 @@ r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize )
+ {
+ 	GLuint bytes = vsize * nverts;
+ 
+-	if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
++	if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size)
+ 		r300RefillCurrentDmaRegion( rmesa, bytes);
+ 
+ 	if (!rmesa->dma.flush) {
+@@ -297,13 +299,13 @@ r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize )
+ 
+ 	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
+ 	ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
+-	ASSERT( rmesa->dma.current.start + 
++	ASSERT( rmesa->dma.current_used +
+ 		rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+-		rmesa->dma.current.ptr );
++		rmesa->dma.current_vertexptr );
+ 
+ 	{
+-		GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
+-		rmesa->dma.current.ptr += bytes;
++		GLubyte *head = (GLubyte *) (rmesa->dma.current->virtual + rmesa->dma.current_vertexptr);
++		rmesa->dma.current_vertexptr += bytes;
+ 		rmesa->swtcl.numverts += nverts;
+ 		return head;
+ 	}
+@@ -352,7 +354,7 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
+    r300ContextPtr rmesa = R300_CONTEXT(ctx);		\
+    const char *r300verts = (char *)rmesa->swtcl.verts;
+ #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int)))
+-#define VERTEX r300Vertex 
++#define VERTEX r300Vertex
+ #define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS))
+ #define PRINT_VERTEX(x)
+ #undef TAG
+@@ -572,15 +574,15 @@ static void r300RenderStart(GLcontext *ctx)
+         r300ContextPtr rmesa = R300_CONTEXT( ctx );
+ 	//	fprintf(stderr, "%s\n", __FUNCTION__);
+ 
+-	r300ChooseRenderState(ctx);	
++	r300ChooseRenderState(ctx);
+ 	r300SetVertexFormat(ctx);
+ 
+ 	r300UpdateShaders(rmesa);
+ 	r300UpdateShaderStates(rmesa);
+ 
+ 	r300EmitCacheFlush(rmesa);
+-	
+-	if (rmesa->dma.flush != 0 && 
++
++	if (rmesa->dma.flush != 0 &&
+ 	    rmesa->dma.flush != flush_last_swtcl_prim)
+ 		rmesa->dma.flush( rmesa );
+ 
+@@ -593,7 +595,7 @@ static void r300RenderFinish(GLcontext *ctx)
+ static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
+ {
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	
++
+ 	if (rmesa->swtcl.hw_primitive != hwprim) {
+ 	        R300_NEWPRIM( rmesa );
+ 		rmesa->swtcl.hw_primitive = hwprim;
+@@ -611,7 +613,7 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
+ 
+ 	r300RasterPrimitive( ctx, reduced_prim[prim] );
+ 	//	fprintf(stderr, "%s\n", __FUNCTION__);
+-	
++
+ }
+ 
+ static void r300ResetLineStipple(GLcontext *ctx)
+@@ -625,12 +627,12 @@ void r300InitSwtcl(GLcontext *ctx)
+ 	TNLcontext *tnl = TNL_CONTEXT(ctx);
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ 	static int firsttime = 1;
+-	
++
+ 	if (firsttime) {
+ 		init_rast_tab();
+ 		firsttime = 0;
+ 	}
+-	
++
+ 	tnl->Driver.Render.Start = r300RenderStart;
+ 	tnl->Driver.Render.Finish = r300RenderFinish;
+ 	tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
+@@ -638,15 +640,15 @@ void r300InitSwtcl(GLcontext *ctx)
+ 	tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+ 	tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+ 	tnl->Driver.Render.Interp = _tnl_interp;
+-	
++
+ 	/* FIXME: what are these numbers? */
+-	_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
++	_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
+ 			    48 * sizeof(GLfloat) );
+-	
++
+ 	rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+ 	rmesa->swtcl.RenderIndex = ~0;
+ 	rmesa->swtcl.render_primitive = GL_TRIANGLES;
+-	rmesa->swtcl.hw_primitive = 0;	
++	rmesa->swtcl.hw_primitive = 0;
+ 
+ 	_tnl_invalidate_vertex_state( ctx, ~0 );
+ 	_tnl_invalidate_vertices( ctx, ~0 );
+@@ -655,9 +657,9 @@ void r300InitSwtcl(GLcontext *ctx)
+ 	_tnl_need_projected_coords( ctx, GL_FALSE );
+ 	r300ChooseRenderState(ctx);
+ 
+-	_mesa_validate_all_lighting_tables( ctx ); 
++	_mesa_validate_all_lighting_tables( ctx );
+ 
+-	tnl->Driver.NotifyMaterialChange = 
++	tnl->Driver.NotifyMaterialChange =
+ 	  _mesa_validate_all_lighting_tables;
+ }
+ 
+@@ -665,33 +667,32 @@ void r300DestroySwtcl(GLcontext *ctx)
+ {
+ }
+ 
+-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset)
++void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, dri_bo *bo, GLuint offset)
+ {
+-	int cmd_reserved = 0;
+-	int cmd_written = 0;
++	BATCH_LOCALS(rmesa);
+ 
+-	drm_radeon_cmd_header_t *cmd = NULL;
+ 	if (RADEON_DEBUG & DEBUG_VERTS)
+-	  fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
+-		  __FUNCTION__, vertex_size, offset);
+-
+-	start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2);
+-	e32(1);
+-	e32(vertex_size | (vertex_size << 8));
+-	e32(offset);
++		fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
++			__FUNCTION__, vertex_size, offset);
++
++	BEGIN_BATCH(5);
++	OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2);
++	OUT_BATCH(1);
++	OUT_BATCH(vertex_size | (vertex_size << 8));
++	OUT_BATCH_RELOC(0, bo, offset, 0);
++	END_BATCH();
+ }
+ 
+ void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
+ {
+-
+-	int cmd_reserved = 0;
+-	int cmd_written = 0;
++	BATCH_LOCALS(rmesa);
+ 	int type, num_verts;
+-	drm_radeon_cmd_header_t *cmd = NULL;
+ 
+ 	type = r300PrimitiveType(rmesa, primitive);
+ 	num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
+-	
+-	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
+-	e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
++
++	BEGIN_BATCH(3);
++	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
++	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
++	END_BATCH();
+ }
+diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
+index f7f4972..c6ee1b5 100644
+--- a/src/mesa/drivers/dri/r300/r300_tex.c
++++ b/src/mesa/drivers/dri/r300/r300_tex.c
+@@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r300_context.h"
+ #include "r300_state.h"
+ #include "r300_ioctl.h"
++#include "r300_mipmap_tree.h"
+ #include "r300_tex.h"
+ 
+ #include "xmlpool.h"
+@@ -78,7 +79,7 @@ static unsigned int translate_wrap_mode(GLenum wrapmode)
+  */
+ static void r300UpdateTexWrap(r300TexObjPtr t)
+ {
+-	struct gl_texture_object *tObj = t->base.tObj;
++	struct gl_texture_object *tObj = &t->base;
+ 
+ 	t->filter &=
+ 	    ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK);
+@@ -175,39 +176,6 @@ static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4])
+ 	t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]);
+ }
+ 
+-/**
+- * Allocate space for and load the mesa images into the texture memory block.
+- * This will happen before drawing with a new texture, or drawing with a
+- * texture after it was swapped out or teximaged again.
+- */
+-
+-static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj)
+-{
+-	r300TexObjPtr t;
+-
+-	t = CALLOC_STRUCT(r300_tex_obj);
+-	texObj->DriverData = t;
+-	if (t != NULL) {
+-		if (RADEON_DEBUG & DEBUG_TEXTURE) {
+-			fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
+-				(void *)texObj, (void *)t);
+-		}
+-
+-		/* Initialize non-image-dependent parts of the state:
+-		 */
+-		t->base.tObj = texObj;
+-		t->border_fallback = GL_FALSE;
+-
+-		make_empty_list(&t->base);
+-
+-		r300UpdateTexWrap(t);
+-		r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
+-		r300SetTexBorderColor(t, texObj->_BorderChan);
+-	}
+-
+-	return t;
+-}
+-
+ /* try to find a format which will only need a memcopy */
+ static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat,
+ 							       GLenum srcType)
+@@ -433,95 +401,14 @@ static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx,
+ 	return NULL;		/* never get here */
+ }
+ 
+-static GLboolean
+-r300ValidateClientStorage(GLcontext * ctx, GLenum target,
+-			  GLint internalFormat,
+-			  GLint srcWidth, GLint srcHeight,
+-			  GLenum format, GLenum type, const void *pixels,
+-			  const struct gl_pixelstore_attrib *packing,
+-			  struct gl_texture_object *texObj,
+-			  struct gl_texture_image *texImage)
++/**
++ * Marks the given face/level pair as dirty.
++ * This will cause an appropriate texture reupload the next time this
++ * texture is validated.
++ */
++static void mark_texture_image_dirty(r300TexObj *t, int face, int level)
+ {
+-	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-
+-	if (RADEON_DEBUG & DEBUG_TEXTURE)
+-		fprintf(stderr, "intformat %s format %s type %s\n",
+-			_mesa_lookup_enum_by_nr(internalFormat),
+-			_mesa_lookup_enum_by_nr(format),
+-			_mesa_lookup_enum_by_nr(type));
+-
+-	if (!ctx->Unpack.ClientStorage)
+-		return 0;
+-
+-	if (ctx->_ImageTransferState ||
+-	    texImage->IsCompressed || texObj->GenerateMipmap)
+-		return 0;
+-
+-	/* This list is incomplete, may be different on ppc???
+-	 */
+-	switch (internalFormat) {
+-	case GL_RGBA:
+-		if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) {
+-			texImage->TexFormat = _dri_texformat_argb8888;
+-		} else
+-			return 0;
+-		break;
+-
+-	case GL_RGB:
+-		if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
+-			texImage->TexFormat = _dri_texformat_rgb565;
+-		} else
+-			return 0;
+-		break;
+-
+-	case GL_YCBCR_MESA:
+-		if (format == GL_YCBCR_MESA &&
+-		    type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
+-			texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
+-		} else if (format == GL_YCBCR_MESA &&
+-			   (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+-			    type == GL_UNSIGNED_BYTE)) {
+-			texImage->TexFormat = &_mesa_texformat_ycbcr;
+-		} else
+-			return 0;
+-		break;
+-
+-	default:
+-		return 0;
+-	}
+-
+-	/* Could deal with these packing issues, but currently don't:
+-	 */
+-	if (packing->SkipPixels ||
+-	    packing->SkipRows || packing->SwapBytes || packing->LsbFirst) {
+-		return 0;
+-	}
+-
+-	GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
+-						    format, type);
+-
+-	if (RADEON_DEBUG & DEBUG_TEXTURE)
+-		fprintf(stderr, "%s: srcRowStride %d/%x\n",
+-			__FUNCTION__, srcRowStride, srcRowStride);
+-
+-	/* Could check this later in upload, pitch restrictions could be
+-	 * relaxed, but would need to store the image pitch somewhere,
+-	 * as packing details might change before image is uploaded:
+-	 */
+-	if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride)
+-	    || (srcRowStride & 63))
+-		return 0;
+-
+-	/* Have validated that _mesa_transfer_teximage would be a straight
+-	 * memcpy at this point.  NOTE: future calls to TexSubImage will
+-	 * overwrite the client data.  This is explicitly mentioned in the
+-	 * extension spec.
+-	 */
+-	texImage->Data = (void *)pixels;
+-	texImage->IsClientData = GL_TRUE;
+-	texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes;
+-
+-	return 1;
++	t->dirty_images[face] |= 1 << level;
+ }
+ 
+ static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level,
+@@ -532,24 +419,13 @@ static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level,
+ 			   struct gl_texture_object *texObj,
+ 			   struct gl_texture_image *texImage)
+ {
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
++	r300TexObj* t = r300_tex_obj(texObj);
+ 
+-	if (t) {
+-		driSwapOutTextureObject(t);
+-	} else {
+-		t = (driTextureObject *) r300AllocTexObj(texObj);
+-		if (!t) {
+-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
+-			return;
+-		}
+-	}
+-
+-	/* Note, this will call ChooseTextureFormat */
+ 	_mesa_store_teximage1d(ctx, target, level, internalFormat,
+ 			       width, border, format, type, pixels,
+ 			       &ctx->Unpack, texObj, texImage);
+ 
+-	t->dirty_images[0] |= (1 << level);
++	mark_texture_image_dirty(t, 0, level);
+ }
+ 
+ static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
+@@ -561,24 +437,13 @@ static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
+ 			      struct gl_texture_object *texObj,
+ 			      struct gl_texture_image *texImage)
+ {
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+-
+-	assert(t);		/* this _should_ be true */
+-	if (t) {
+-		driSwapOutTextureObject(t);
+-	} else {
+-		t = (driTextureObject *) r300AllocTexObj(texObj);
+-		if (!t) {
+-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
+-			return;
+-		}
+-	}
++	r300TexObj* t = r300_tex_obj(texObj);
+ 
+ 	_mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
+ 				  format, type, pixels, packing, texObj,
+ 				  texImage);
+ 
+-	t->dirty_images[0] |= (1 << level);
++	mark_texture_image_dirty(t, 0, level);
+ }
+ 
+ static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
+@@ -589,7 +454,7 @@ static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
+ 			   struct gl_texture_object *texObj,
+ 			   struct gl_texture_image *texImage)
+ {
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
++	r300TexObj* t = r300_tex_obj(texObj);
+ 	GLuint face;
+ 
+ 	/* which cube face or ordinary 2D image */
+@@ -608,43 +473,23 @@ static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
+ 		face = 0;
+ 	}
+ 
+-	if (t != NULL) {
+-		driSwapOutTextureObject(t);
+-	} else {
+-		t = (driTextureObject *) r300AllocTexObj(texObj);
+-		if (!t) {
+-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+-			return;
+-		}
+-	}
+-
+ 	texImage->IsClientData = GL_FALSE;
+ 
+-	if (r300ValidateClientStorage(ctx, target,
+-				      internalFormat,
+-				      width, height,
+-				      format, type, pixels,
+-				      packing, texObj, texImage)) {
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: Using client storage\n",
+-				__FUNCTION__);
+-	} else {
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: Using normal storage\n",
+-				__FUNCTION__);
+-
+-		/* Normal path: copy (to cached memory) and eventually upload
+-		 * via another copy to GART memory and then a blit...  Could
+-		 * eliminate one copy by going straight to (permanent) GART.
+-		 *
+-		 * Note, this will call r300ChooseTextureFormat.
+-		 */
+-		_mesa_store_teximage2d(ctx, target, level, internalFormat,
+-				       width, height, border, format, type,
+-				       pixels, &ctx->Unpack, texObj, texImage);
++	if (RADEON_DEBUG & DEBUG_TEXTURE)
++		fprintf(stderr, "%s: Using normal storage\n",
++			__FUNCTION__);
++
++	/* Normal path: copy (to cached memory) and eventually upload
++	 * via another copy to GART memory and then a blit...  Could
++	 * eliminate one copy by going straight to (permanent) GART.
++	 *
++	 * Note, this will call r300ChooseTextureFormat.
++	 */
++	_mesa_store_teximage2d(ctx, target, level, internalFormat,
++				width, height, border, format, type,
++				pixels, &ctx->Unpack, texObj, texImage);
+ 
+-		t->dirty_images[face] |= (1 << level);
+-	}
++	mark_texture_image_dirty(t, face, level);
+ }
+ 
+ static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
+@@ -656,7 +501,7 @@ static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
+ 			      struct gl_texture_object *texObj,
+ 			      struct gl_texture_image *texImage)
+ {
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
++	r300TexObj* t = r300_tex_obj(texObj);
+ 	GLuint face;
+ 
+ 	/* which cube face or ordinary 2D image */
+@@ -675,22 +520,11 @@ static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
+ 		face = 0;
+ 	}
+ 
+-	assert(t);		/* this _should_ be true */
+-	if (t) {
+-		driSwapOutTextureObject(t);
+-	} else {
+-		t = (driTextureObject *) r300AllocTexObj(texObj);
+-		if (!t) {
+-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+-			return;
+-		}
+-	}
+-
+ 	_mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+ 				  height, format, type, pixels, packing, texObj,
+ 				  texImage);
+ 
+-	t->dirty_images[face] |= (1 << level);
++	mark_texture_image_dirty(t, face, level);
+ }
+ 
+ static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
+@@ -700,7 +534,7 @@ static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
+ 				     struct gl_texture_object *texObj,
+ 				     struct gl_texture_image *texImage)
+ {
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
++	r300TexObj* t = r300_tex_obj(texObj);
+ 	GLuint face;
+ 
+ 	/* which cube face or ordinary 2D image */
+@@ -719,49 +553,24 @@ static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
+ 		face = 0;
+ 	}
+ 
+-	if (t != NULL) {
+-		driSwapOutTextureObject(t);
+-	} else {
+-		t = (driTextureObject *) r300AllocTexObj(texObj);
+-		if (!t) {
+-			_mesa_error(ctx, GL_OUT_OF_MEMORY,
+-				    "glCompressedTexImage2D");
+-			return;
+-		}
+-	}
+-
+ 	texImage->IsClientData = GL_FALSE;
+ 
+-	/* can't call this, different parameters. Would never evaluate to true anyway currently */
+-#if 0
+-	if (r300ValidateClientStorage(ctx, target,
+-				      internalFormat,
+-				      width, height,
+-				      format, type, pixels,
+-				      packing, texObj, texImage)) {
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: Using client storage\n",
+-				__FUNCTION__);
+-	} else
+-#endif
+-	{
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: Using normal storage\n",
+-				__FUNCTION__);
+-
+-		/* Normal path: copy (to cached memory) and eventually upload
+-		 * via another copy to GART memory and then a blit...  Could
+-		 * eliminate one copy by going straight to (permanent) GART.
+-		 *
+-		 * Note, this will call r300ChooseTextureFormat.
+-		 */
+-		_mesa_store_compressed_teximage2d(ctx, target, level,
+-						  internalFormat, width, height,
+-						  border, imageSize, data,
+-						  texObj, texImage);
++	if (RADEON_DEBUG & DEBUG_TEXTURE)
++		fprintf(stderr, "%s: Using normal storage\n",
++			__FUNCTION__);
++
++	/* Normal path: copy (to cached memory) and eventually upload
++	 * via another copy to GART memory and then a blit...  Could
++	 * eliminate one copy by going straight to (permanent) GART.
++	 *
++	 * Note, this will call r300ChooseTextureFormat.
++	 */
++	_mesa_store_compressed_teximage2d(ctx, target, level,
++						internalFormat, width, height,
++						border, imageSize, data,
++						texObj, texImage);
+ 
+-		t->dirty_images[face] |= (1 << level);
+-	}
++	mark_texture_image_dirty(t, face, level);
+ }
+ 
+ static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
+@@ -772,7 +581,7 @@ static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
+ 					struct gl_texture_object *texObj,
+ 					struct gl_texture_image *texImage)
+ {
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
++	r300TexObj* t = r300_tex_obj(texObj);
+ 	GLuint face;
+ 
+ 	/* which cube face or ordinary 2D image */
+@@ -791,23 +600,11 @@ static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
+ 		face = 0;
+ 	}
+ 
+-	assert(t);		/* this _should_ be true */
+-	if (t) {
+-		driSwapOutTextureObject(t);
+-	} else {
+-		t = (driTextureObject *) r300AllocTexObj(texObj);
+-		if (!t) {
+-			_mesa_error(ctx, GL_OUT_OF_MEMORY,
+-				    "glCompressedTexSubImage3D");
+-			return;
+-		}
+-	}
+-
+ 	_mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset,
+ 					     yoffset, width, height, format,
+ 					     imageSize, data, texObj, texImage);
+ 
+-	t->dirty_images[face] |= (1 << level);
++	mark_texture_image_dirty(t, face, level);
+ }
+ 
+ static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level,
+@@ -819,49 +616,26 @@ static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level,
+ 			   struct gl_texture_object *texObj,
+ 			   struct gl_texture_image *texImage)
+ {
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+-
+-	if (t) {
+-		driSwapOutTextureObject(t);
+-	} else {
+-		t = (driTextureObject *) r300AllocTexObj(texObj);
+-		if (!t) {
+-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
+-			return;
+-		}
+-	}
++	r300TexObj* t = r300_tex_obj(texObj);
+ 
+ 	texImage->IsClientData = GL_FALSE;
+ 
+-#if 0
+-	if (r300ValidateClientStorage(ctx, target,
+-				      internalFormat,
+-				      width, height,
+-				      format, type, pixels,
+-				      packing, texObj, texImage)) {
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: Using client storage\n",
+-				__FUNCTION__);
+-	} else
+-#endif
+-	{
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: Using normal storage\n",
+-				__FUNCTION__);
+-
+-		/* Normal path: copy (to cached memory) and eventually upload
+-		 * via another copy to GART memory and then a blit...  Could
+-		 * eliminate one copy by going straight to (permanent) GART.
+-		 *
+-		 * Note, this will call r300ChooseTextureFormat.
+-		 */
+-		_mesa_store_teximage3d(ctx, target, level, internalFormat,
+-				       width, height, depth, border,
+-				       format, type, pixels,
+-				       &ctx->Unpack, texObj, texImage);
++	if (RADEON_DEBUG & DEBUG_TEXTURE)
++		fprintf(stderr, "%s: Using normal storage\n",
++			__FUNCTION__);
++
++	/* Normal path: copy (to cached memory) and eventually upload
++	 * via another copy to GART memory and then a blit...  Could
++	 * eliminate one copy by going straight to (permanent) GART.
++	 *
++	 * Note, this will call r300ChooseTextureFormat.
++	 */
++	_mesa_store_teximage3d(ctx, target, level, internalFormat,
++				width, height, depth, border,
++				format, type, pixels,
++				&ctx->Unpack, texObj, texImage);
+ 
+-		t->dirty_images[0] |= (1 << level);
+-	}
++	mark_texture_image_dirty(t, 0, level);
+ }
+ 
+ static void
+@@ -874,28 +648,14 @@ r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
+ 		  struct gl_texture_object *texObj,
+ 		  struct gl_texture_image *texImage)
+ {
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+-
+-/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+-
+-	assert(t);		/* this _should_ be true */
+-	if (t) {
+-		driSwapOutTextureObject(t);
+-	} else {
+-		t = (driTextureObject *) r300AllocTexObj(texObj);
+-		if (!t) {
+-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D");
+-			return;
+-		}
+-		texObj->DriverData = t;
+-	}
++	r300TexObj* t = r300_tex_obj(texObj);
+ 
+ 	_mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
+ 				  width, height, depth,
+ 				  format, type, pixels, packing, texObj,
+ 				  texImage);
+ 
+-	t->dirty_images[0] |= (1 << level);
++	mark_texture_image_dirty(t, 0, level);
+ }
+ 
+ /**
+@@ -907,7 +667,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
+ 			     struct gl_texture_object *texObj,
+ 			     GLenum pname, const GLfloat * params)
+ {
+-	r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData;
++	r300TexObj* t = r300_tex_obj(texObj);
+ 
+ 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
+ 		fprintf(stderr, "%s( %s )\n", __FUNCTION__,
+@@ -940,7 +700,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
+ 		 * we just have to rely on loading the right subset of mipmap levels
+ 		 * to simulate a clamped LOD.
+ 		 */
+-		driSwapOutTextureObject((driTextureObject *) t);
++		if (t->mt) {
++			r300_miptree_destroy(t->mt);
++			t->mt = 0;
++		}
+ 		break;
+ 
+ 	case GL_DEPTH_TEXTURE_MODE:
+@@ -963,27 +726,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
+ 	}
+ }
+ 
+-static void r300BindTexture(GLcontext * ctx, GLenum target,
+-			    struct gl_texture_object *texObj)
+-{
+-	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
+-		fprintf(stderr, "%s( %p ) unit=%d\n", __FUNCTION__,
+-			(void *)texObj, ctx->Texture.CurrentUnit);
+-	}
+-
+-	if ((target == GL_TEXTURE_1D)
+-	    || (target == GL_TEXTURE_2D)
+-	    || (target == GL_TEXTURE_3D)
+-	    || (target == GL_TEXTURE_CUBE_MAP)
+-	    || (target == GL_TEXTURE_RECTANGLE_NV)) {
+-		assert(texObj->DriverData != NULL);
+-	}
+-}
+-
+ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+ {
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
++	r300TexObj* t = r300_tex_obj(texObj);
+ 
+ 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
+ 		fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
+@@ -991,14 +737,19 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+ 			_mesa_lookup_enum_by_nr(texObj->Target));
+ 	}
+ 
+-	if (t != NULL) {
+-		if (rmesa) {
+-			R300_FIREVERTICES(rmesa);
+-		}
++	if (rmesa) {
++		int i;
++		R300_FIREVERTICES(rmesa);
++
++		for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i)
++			if (rmesa->hw.textures[i] == t)
++				rmesa->hw.textures[i] = 0;
++	}
+ 
+-		driDestroyTextureObject(t);
++	if (t->mt) {
++		r300_miptree_destroy(t->mt);
++		t->mt = 0;
+ 	}
+-	/* Free mipmap images and the texture object itself */
+ 	_mesa_delete_texture_object(ctx, texObj);
+ }
+ 
+@@ -1007,8 +758,6 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+  * Called via ctx->Driver.NewTextureObject.
+  * Note: this function will be called during context creation to
+  * allocate the default texture objects.
+- * Note: we could use containment here to 'derive' the driver-specific
+- * texture object from the core mesa gl_texture_object.  Not done at this time.
+  * Fixup MaxAnisotropy according to user preference.
+  */
+ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
+@@ -1016,14 +765,23 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
+ 						      GLenum target)
+ {
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	struct gl_texture_object *obj;
+-	obj = _mesa_new_texture_object(ctx, name, target);
+-	if (!obj)
+-		return NULL;
+-	obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
++	r300TexObj* t = CALLOC_STRUCT(r300_tex_obj);
++
++
++	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
++		fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
++			t, _mesa_lookup_enum_by_nr(target));
++	}
++
++	_mesa_initialize_texture_object(&t->base, name, target);
++	t->base.MaxAnisotropy = rmesa->initialMaxAnisotropy;
++
++	/* Initialize hardware state */
++	r300UpdateTexWrap(t);
++	r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy);
++	r300SetTexBorderColor(t, t->base._BorderChan);
+ 
+-	r300AllocTexObj(obj);
+-	return obj;
++	return &t->base;
+ }
+ 
+ void r300InitTextureFuncs(struct dd_function_table *functions)
+@@ -1039,7 +797,6 @@ void r300InitTextureFuncs(struct dd_function_table *functions)
+ 	functions->TexSubImage2D = r300TexSubImage2D;
+ 	functions->TexSubImage3D = r300TexSubImage3D;
+ 	functions->NewTextureObject = r300NewTextureObject;
+-	functions->BindTexture = r300BindTexture;
+ 	functions->DeleteTexture = r300DeleteTexture;
+ 	functions->IsTextureResident = driIsTextureResident;
+ 
+diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h
+index b86d45b..5d7f21e 100644
+--- a/src/mesa/drivers/dri/r300/r300_tex.h
++++ b/src/mesa/drivers/dri/r300/r300_tex.h
+@@ -46,8 +46,6 @@ extern void r300UpdateTextureState(GLcontext * ctx);
+ extern int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t,
+ 			       GLuint face);
+ 
+-extern void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t);
+-
+ extern void r300InitTextureFuncs(struct dd_function_table *functions);
+ 
+ #endif				/* __r300_TEX_H__ */
+diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c
+index 69847a4..b3b501b 100644
+--- a/src/mesa/drivers/dri/r300/r300_texmem.c
++++ b/src/mesa/drivers/dri/r300/r300_texmem.c
+@@ -48,439 +48,15 @@ SOFTWARE.
+ #include "r300_context.h"
+ #include "r300_state.h"
+ #include "r300_cmdbuf.h"
++#include "r300_emit.h"
++#include "r300_mipmap_tree.h"
+ #include "radeon_ioctl.h"
+ #include "r300_tex.h"
+ #include "r300_ioctl.h"
+ #include <unistd.h>		/* for usleep() */
+ 
+-#ifdef USER_BUFFERS
+ #include "r300_mem.h"
+-#endif
+ 
+-/**
+- * Destroy any device-dependent state associated with the texture.  This may
+- * include NULLing out hardware state that points to the texture.
+- */
+-void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t)
+-{
+-	int i;
+-
+-	if (RADEON_DEBUG & DEBUG_TEXTURE) {
+-		fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
+-			(void *)t, (void *)t->base.tObj);
+-	}
+-
+-	for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) {
+-		if (rmesa->state.texture.unit[i].texobj == t) {
+-			rmesa->state.texture.unit[i].texobj = NULL;
+-		}
+-	}
+-}
+-
+-/* ------------------------------------------------------------
+- * Texture image conversions
+- */
+-
+-static void r300UploadGARTClientSubImage(r300ContextPtr rmesa,
+-					 r300TexObjPtr t,
+-					 struct gl_texture_image *texImage,
+-					 GLint hwlevel,
+-					 GLint x, GLint y,
+-					 GLint width, GLint height)
+-{
+-	const struct gl_texture_format *texFormat = texImage->TexFormat;
+-	GLuint srcPitch, dstPitch;
+-	int blit_format;
+-	int srcOffset;
+-
+-	/*
+-	 * XXX it appears that we always upload the full image, not a subimage.
+-	 * I.e. x==0, y==0, width=texWidth, height=texWidth.  If this is ever
+-	 * changed, the src pitch will have to change.
+-	 */
+-	switch (texFormat->TexelBytes) {
+-	case 1:
+-		blit_format = R300_CP_COLOR_FORMAT_CI8;
+-		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+-		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+-		break;
+-	case 2:
+-		blit_format = R300_CP_COLOR_FORMAT_RGB565;
+-		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+-		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+-		break;
+-	case 4:
+-		blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
+-		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+-		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+-		break;
+-	case 8:
+-	case 16:
+-		blit_format = R300_CP_COLOR_FORMAT_CI8;
+-		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+-		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+-		break;
+-	default:
+-		return;
+-	}
+-
+-	t->image[0][hwlevel].data = texImage->Data;
+-	srcOffset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
+-
+-	assert(srcOffset != ~0);
+-
+-	/* Don't currently need to cope with small pitches?
+-	 */
+-	width = texImage->Width;
+-	height = texImage->Height;
+-
+-	if (texFormat->TexelBytes > 4) {
+-		width *= texFormat->TexelBytes;
+-	}
+-
+-	r300EmitWait(rmesa, R300_WAIT_3D);
+-
+-	r300EmitBlit(rmesa, blit_format,
+-		     srcPitch,
+-		     srcOffset,
+-		     dstPitch,
+-		     t->bufAddr,
+-		     x,
+-		     y,
+-		     t->image[0][hwlevel].x + x,
+-		     t->image[0][hwlevel].y + y, width, height);
+-
+-	r300EmitWait(rmesa, R300_WAIT_2D);
+-}
+-
+-static void r300UploadRectSubImage(r300ContextPtr rmesa,
+-				   r300TexObjPtr t,
+-				   struct gl_texture_image *texImage,
+-				   GLint x, GLint y, GLint width, GLint height)
+-{
+-	const struct gl_texture_format *texFormat = texImage->TexFormat;
+-	int blit_format, dstPitch, done;
+-
+-	switch (texFormat->TexelBytes) {
+-	case 1:
+-		blit_format = R300_CP_COLOR_FORMAT_CI8;
+-		break;
+-	case 2:
+-		blit_format = R300_CP_COLOR_FORMAT_RGB565;
+-		break;
+-	case 4:
+-		blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
+-		break;
+-	case 8:
+-	case 16:
+-		blit_format = R300_CP_COLOR_FORMAT_CI8;
+-		break;
+-	default:
+-		return;
+-	}
+-
+-	t->image[0][0].data = texImage->Data;
+-
+-	/* Currently don't need to cope with small pitches.
+-	 */
+-	width = texImage->Width;
+-	height = texImage->Height;
+-	dstPitch = t->pitch;
+-
+-	if (texFormat->TexelBytes > 4) {
+-		width *= texFormat->TexelBytes;
+-	}
+-
+-	if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) {
+-		/* In this case, could also use GART texturing.  This is
+-		 * currently disabled, but has been tested & works.
+-		 */
+-		t->offset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
+-		t->pitch = texImage->RowStride * texFormat->TexelBytes - 32;
+-
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr,
+-				"Using GART texturing for rectangular client texture\n");
+-
+-		/* Release FB memory allocated for this image:
+-		 */
+-		/* FIXME This may not be correct as driSwapOutTextureObject sets
+-		 * FIXME dirty_images.  It may be fine, though.
+-		 */
+-		if (t->base.memBlock) {
+-			driSwapOutTextureObject((driTextureObject *) t);
+-		}
+-	} else if (texImage->IsClientData) {
+-		/* Data already in GART memory, with usable pitch.
+-		 */
+-		GLuint srcPitch;
+-		srcPitch = texImage->RowStride * texFormat->TexelBytes;
+-		r300EmitBlit(rmesa,
+-			     blit_format,
+-			     srcPitch,
+-			     r300GartOffsetFromVirtual(rmesa, texImage->Data),
+-			     dstPitch, t->bufAddr, 0, 0, 0, 0, width, height);
+-	} else {
+-		/* Data not in GART memory, or bad pitch.
+-		 */
+-		for (done = 0; done < height;) {
+-			struct r300_dma_region region;
+-			int lines =
+-			    MIN2(height - done, RADEON_BUFFER_SIZE / dstPitch);
+-			int src_pitch;
+-			char *tex;
+-
+-			src_pitch = texImage->RowStride * texFormat->TexelBytes;
+-
+-			tex = (char *)texImage->Data + done * src_pitch;
+-
+-			memset(&region, 0, sizeof(region));
+-			r300AllocDmaRegion(rmesa, &region, lines * dstPitch,
+-					   1024);
+-
+-			/* Copy texdata to dma:
+-			 */
+-			if (RADEON_DEBUG & DEBUG_TEXTURE)
+-				fprintf(stderr,
+-					"%s: src_pitch %d dst_pitch %d\n",
+-					__FUNCTION__, src_pitch, dstPitch);
+-
+-			if (src_pitch == dstPitch) {
+-				memcpy(region.address + region.start, tex,
+-				       lines * src_pitch);
+-			} else {
+-				char *buf = region.address + region.start;
+-				int i;
+-				for (i = 0; i < lines; i++) {
+-					memcpy(buf, tex, src_pitch);
+-					buf += dstPitch;
+-					tex += src_pitch;
+-				}
+-			}
+-
+-			r300EmitWait(rmesa, R300_WAIT_3D);
+-
+-			/* Blit to framebuffer
+-			 */
+-			r300EmitBlit(rmesa,
+-				     blit_format,
+-				     dstPitch, GET_START(&region),
+-				     dstPitch | (t->tile_bits >> 16),
+-				     t->bufAddr, 0, 0, 0, done, width, lines);
+-
+-			r300EmitWait(rmesa, R300_WAIT_2D);
+-#ifdef USER_BUFFERS
+-			r300_mem_use(rmesa, region.buf->id);
+-#endif
+-
+-			r300ReleaseDmaRegion(rmesa, &region, __FUNCTION__);
+-			done += lines;
+-		}
+-	}
+-}
+-
+-/**
+- * Upload the texture image associated with texture \a t at the specified
+- * level at the address relative to \a start.
+- */
+-static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t,
+-			       GLint hwlevel,
+-			       GLint x, GLint y, GLint width, GLint height,
+-			       GLuint face)
+-{
+-	struct gl_texture_image *texImage = NULL;
+-	GLuint offset;
+-	GLint imageWidth, imageHeight;
+-	GLint ret;
+-	drm_radeon_texture_t tex;
+-	drm_radeon_tex_image_t tmp;
+-	const int level = hwlevel + t->base.firstLevel;
+-
+-	if (RADEON_DEBUG & DEBUG_TEXTURE) {
+-		fprintf(stderr,
+-			"%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n",
+-			__FUNCTION__, (void *)t, (void *)t->base.tObj, level,
+-			width, height, face);
+-	}
+-
+-	ASSERT(face < 6);
+-
+-	/* Ensure we have a valid texture to upload */
+-	if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) {
+-		_mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
+-		return;
+-	}
+-
+-	texImage = t->base.tObj->Image[face][level];
+-
+-	if (!texImage) {
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: texImage %d is NULL!\n",
+-				__FUNCTION__, level);
+-		return;
+-	}
+-	if (!texImage->Data) {
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: image data is NULL!\n",
+-				__FUNCTION__);
+-		return;
+-	}
+-
+-	if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+-		assert(level == 0);
+-		assert(hwlevel == 0);
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: image data is rectangular\n",
+-				__FUNCTION__);
+-		r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height);
+-		return;
+-	} else if (texImage->IsClientData) {
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr,
+-				"%s: image data is in GART client storage\n",
+-				__FUNCTION__);
+-		r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y,
+-					     width, height);
+-		return;
+-	} else if (RADEON_DEBUG & DEBUG_TEXTURE)
+-		fprintf(stderr, "%s: image data is in normal memory\n",
+-			__FUNCTION__);
+-
+-	imageWidth = texImage->Width;
+-	imageHeight = texImage->Height;
+-
+-	offset = t->bufAddr;
+-
+-	if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
+-		GLint imageX = 0;
+-		GLint imageY = 0;
+-		GLint blitX = t->image[face][hwlevel].x;
+-		GLint blitY = t->image[face][hwlevel].y;
+-		GLint blitWidth = t->image[face][hwlevel].width;
+-		GLint blitHeight = t->image[face][hwlevel].height;
+-		fprintf(stderr, "   upload image: %d,%d at %d,%d\n",
+-			imageWidth, imageHeight, imageX, imageY);
+-		fprintf(stderr, "   upload  blit: %d,%d at %d,%d\n",
+-			blitWidth, blitHeight, blitX, blitY);
+-		fprintf(stderr, "       blit ofs: 0x%07x level: %d/%d\n",
+-			(GLuint) offset, hwlevel, level);
+-	}
+-
+-	t->image[face][hwlevel].data = texImage->Data;
+-
+-	/* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
+-	 * NOTE: we're always use a 1KB-wide blit and I8 texture format.
+-	 * We used to use 1, 2 and 4-byte texels and used to use the texture
+-	 * width to dictate the blit width - but that won't work for compressed
+-	 * textures. (Brian)
+-	 * NOTE: can't do that with texture tiling. (sroland)
+-	 */
+-	tex.offset = offset;
+-	tex.image = &tmp;
+-	/* copy (x,y,width,height,data) */
+-	memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp));
+-
+-	if (texImage->TexFormat->TexelBytes > 4) {
+-		const int log2TexelBytes =
+-		    (3 + (texImage->TexFormat->TexelBytes >> 4));
+-		tex.format = RADEON_TXFORMAT_I8;	/* any 1-byte texel format */
+-		tex.pitch =
+-		    MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
+-			 64, 1);
+-		tex.height = imageHeight;
+-		tex.width = imageWidth << log2TexelBytes;
+-		tex.offset += (tmp.x << log2TexelBytes) & ~1023;
+-		tmp.x = tmp.x % (1024 >> log2TexelBytes);
+-		tmp.width = tmp.width << log2TexelBytes;
+-	} else if (texImage->TexFormat->TexelBytes) {
+-		/* use multi-byte upload scheme */
+-		tex.height = imageHeight;
+-		tex.width = imageWidth;
+-		switch (texImage->TexFormat->TexelBytes) {
+-		case 1:
+-			tex.format = RADEON_TXFORMAT_I8;
+-			break;
+-		case 2:
+-			tex.format = RADEON_TXFORMAT_AI88;
+-			break;
+-		case 4:
+-			tex.format = RADEON_TXFORMAT_ARGB8888;
+-			break;
+-		}
+-		tex.pitch =
+-		    MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
+-			 64, 1);
+-		tex.offset += tmp.x & ~1023;
+-		tmp.x = tmp.x % 1024;
+-
+-		if (t->tile_bits & R300_TXO_MICRO_TILE) {
+-			/* need something like "tiled coordinates" ? */
+-			tmp.y = tmp.x / (tex.pitch * 128) * 2;
+-			tmp.x =
+-			    tmp.x % (tex.pitch * 128) / 2 /
+-			    texImage->TexFormat->TexelBytes;
+-			tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
+-		} else {
+-			tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
+-		}
+-#if 1
+-		if ((t->tile_bits & R300_TXO_MACRO_TILE) &&
+-		    (texImage->Width * texImage->TexFormat->TexelBytes >= 256)
+-		    && ((!(t->tile_bits & R300_TXO_MICRO_TILE)
+-			 && (texImage->Height >= 8))
+-			|| (texImage->Height >= 16))) {
+-			/* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
+-			   OR if height is smaller than 8 automatically, but if micro tiling is active
+-			   the limit is height 16 instead ? */
+-			tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
+-		}
+-#endif
+-	} else {
+-		/* In case of for instance 8x8 texture (2x2 dxt blocks),
+-		   padding after the first two blocks is needed (only
+-		   with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
+-		/* set tex.height to 1/4 since 1 "macropixel" (dxt-block)
+-		   has 4 real pixels. Needed so the kernel module reads
+-		   the right amount of data. */
+-		tex.format = RADEON_TXFORMAT_I8;	/* any 1-byte texel format */
+-		tex.pitch = (R300_BLIT_WIDTH_BYTES / 64);
+-		tex.height = (imageHeight + 3) / 4;
+-		tex.width = (imageWidth + 3) / 4;
+-		if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) {
+-			tex.width *= 8;
+-		} else {
+-			tex.width *= 16;
+-		}
+-	}
+-
+-	LOCK_HARDWARE(&rmesa->radeon);
+-	do {
+-		ret =
+-		    drmCommandWriteRead(rmesa->radeon.dri.fd,
+-					DRM_RADEON_TEXTURE, &tex,
+-					sizeof(drm_radeon_texture_t));
+-		if (ret) {
+-			if (RADEON_DEBUG & DEBUG_IOCTL)
+-				fprintf(stderr,
+-					"DRM_RADEON_TEXTURE:  again!\n");
+-			usleep(1);
+-		}
+-	} while (ret == -EAGAIN);
+-
+-	UNLOCK_HARDWARE(&rmesa->radeon);
+-
+-	if (ret) {
+-		fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret);
+-		fprintf(stderr, "   offset=0x%08x\n", offset);
+-		fprintf(stderr, "   image width=%d height=%d\n",
+-			imageWidth, imageHeight);
+-		fprintf(stderr, "    blit width=%d height=%d data=%p\n",
+-			t->image[face][hwlevel].width,
+-			t->image[face][hwlevel].height,
+-			t->image[face][hwlevel].data);
+-		_mesa_exit(-1);
+-	}
+-}
+ 
+ /**
+  * Upload the texture images associated with texture \a t.  This might
+@@ -493,69 +69,32 @@ static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t,
+ 
+ int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face)
+ {
+-	const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+-
+ 	if (t->image_override)
+ 		return 0;
++	if (!t->mt)
++		return 0;
+ 
+ 	if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
+-		fprintf(stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
+-			(void *)rmesa->radeon.glCtx, (void *)t->base.tObj,
+-			t->base.totalSize, t->base.firstLevel,
+-			t->base.lastLevel);
++		fprintf(stderr, "%s( %p, %p ) lvls=%d-%d\n", __FUNCTION__,
++			(void *)rmesa->radeon.glCtx, t,
++			t->mt->firstLevel, t->mt->lastLevel);
+ 	}
+ 
+-	if (t->base.totalSize == 0)
+-		return 0;
+-
+ 	if (RADEON_DEBUG & DEBUG_SYNC) {
+ 		fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
+ 		radeonFinish(rmesa->radeon.glCtx);
+ 	}
+ 
+-	LOCK_HARDWARE(&rmesa->radeon);
+-
+-	if (t->base.memBlock == NULL) {
+-		int heap;
+-
+-		heap = driAllocateTexture(rmesa->texture_heaps, rmesa->nr_heaps,
+-					  (driTextureObject *) t);
+-		if (heap == -1) {
+-			UNLOCK_HARDWARE(&rmesa->radeon);
+-			return -1;
+-		}
+-
+-		/* Set the base offset of the texture image */
+-		t->bufAddr = rmesa->radeon.radeonScreen->texOffset[heap]
+-		    + t->base.memBlock->ofs;
+-		t->offset = t->bufAddr;
+-
+-		if (!(t->base.tObj->Image[0][0]->IsClientData)) {
+-			/* hope it's safe to add that here... */
+-			t->offset |= t->tile_bits;
+-		}
+-	}
+-
+-	/* Let the world know we've used this memory recently.
+-	 */
+-	driUpdateTextureLRU((driTextureObject *) t);
+-	UNLOCK_HARDWARE(&rmesa->radeon);
+-
+ 	/* Upload any images that are new */
+-	if (t->base.dirty_images[face]) {
+-		int i;
++	if (t->dirty_images[face]) {
++		int i, numLevels = t->mt->lastLevel - t->mt->firstLevel + 1;
+ 		for (i = 0; i < numLevels; i++) {
+-			if ((t->base.
+-			     dirty_images[face] & (1 <<
+-						   (i + t->base.firstLevel))) !=
+-			    0) {
+-				r300UploadSubImage(rmesa, t, i, 0, 0,
+-						   t->image[face][i].width,
+-						   t->image[face][i].height,
+-						   face);
++			if (t->dirty_images[face] & (1 << (i + t->mt->firstLevel))) {
++				r300_miptree_upload_image(t->mt, face, t->mt->firstLevel + i,
++					t->base.Image[face][t->mt->firstLevel + i]);
+ 			}
+ 		}
+-		t->base.dirty_images[face] = 0;
++		t->dirty_images[face] = 0;
+ 	}
+ 
+ 	if (RADEON_DEBUG & DEBUG_SYNC) {
+diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
+index d19832f..5cb9010 100644
+--- a/src/mesa/drivers/dri/r300/r300_texstate.c
++++ b/src/mesa/drivers/dri/r300/r300_texstate.c
+@@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r300_state.h"
+ #include "r300_ioctl.h"
+ #include "radeon_ioctl.h"
++#include "r300_mipmap_tree.h"
+ #include "r300_tex.h"
+ #include "r300_reg.h"
+ 
+@@ -148,8 +149,7 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
+ 	if (!tObj)
+ 		return;
+ 
+-	t = (r300TexObjPtr) tObj->DriverData;
+-
++	t = r300_tex_obj(tObj);
+ 
+ 	switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) {
+ 	case MESA_FORMAT_Z16:
+@@ -189,118 +189,59 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
+ }
+ 
+ 
+-/**
+- * Compute sizes and fill in offset and blit information for the given
+- * image (determined by \p face and \p level).
+- *
+- * \param curOffset points to the offset at which the image is to be stored
+- * and is updated by this function according to the size of the image.
+- */
+-static void compute_tex_image_offset(
+-	struct gl_texture_object *tObj,
+-	GLuint face,
+-	GLint level,
+-	GLint* curOffset)
++static void calculate_first_last_level(struct gl_texture_object *tObj,
++				       GLuint *pfirstLevel, GLuint *plastLevel)
+ {
+-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+-	const struct gl_texture_image* texImage;
+-	GLuint blitWidth = R300_BLIT_WIDTH_BYTES;
+-	GLuint texelBytes;
+-	GLuint size;
+-
+-	texImage = tObj->Image[0][level + t->base.firstLevel];
+-	if (!texImage)
+-		return;
+-
+-	texelBytes = texImage->TexFormat->TexelBytes;
+-
+-	/* find image size in bytes */
+-	if (texImage->IsCompressed) {
+-		if ((t->format & R300_TX_FORMAT_DXT1) ==
+-			R300_TX_FORMAT_DXT1) {
+-			// fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format);
+-			if ((texImage->Width + 3) < 8)	/* width one block */
+-				size = texImage->CompressedSize * 4;
+-			else if ((texImage->Width + 3) < 16)
+-				size = texImage->CompressedSize * 2;
+-			else
+-				size = texImage->CompressedSize;
++	const struct gl_texture_image * const baseImage =
++		tObj->Image[0][tObj->BaseLevel];
++
++	/* These must be signed values.  MinLod and MaxLod can be negative numbers,
++	* and having firstLevel and lastLevel as signed prevents the need for
++	* extra sign checks.
++	*/
++	int   firstLevel;
++	int   lastLevel;
++
++	/* Yes, this looks overly complicated, but it's all needed.
++	*/
++	switch (tObj->Target) {
++	case GL_TEXTURE_1D:
++	case GL_TEXTURE_2D:
++	case GL_TEXTURE_3D:
++	case GL_TEXTURE_CUBE_MAP:
++		if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
++			/* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
++			*/
++			firstLevel = lastLevel = tObj->BaseLevel;
+ 		} else {
+-			/* DXT3/5, 16 bytes per block */
+-			WARN_ONCE
+-				("DXT 3/5 suffers from multitexturing problems!\n");
+-			// fprintf(stderr,"DXT 3/5 %d\n", texImage->Width);
+-			if ((texImage->Width + 3) < 8)
+-				size = texImage->CompressedSize * 2;
+-			else
+-				size = texImage->CompressedSize;
++			firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5);
++			firstLevel = MAX2(firstLevel, tObj->BaseLevel);
++			firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2);
++			lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5);
++			lastLevel = MAX2(lastLevel, tObj->BaseLevel);
++			lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
++			lastLevel = MIN2(lastLevel, tObj->MaxLevel);
++			lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
+ 		}
+-	} else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+-		size =
+-			((texImage->Width * texelBytes +
+-			63) & ~63) * texImage->Height;
+-		blitWidth = 64 / texelBytes;
+-	} else if (t->tile_bits & R300_TXO_MICRO_TILE) {
+-		/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+-			though the actual offset may be different (if texture is less than
+-			32 bytes width) to the untiled case */
+-		int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+-		size =
+-			(w * ((texImage->Height + 1) / 2)) *
+-			texImage->Depth;
+-		blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+-	} else {
+-		int w = (texImage->Width * texelBytes + 31) & ~31;
+-		size = w * texImage->Height * texImage->Depth;
+-		blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+-	}
+-	assert(size > 0);
+-
+-	if (RADEON_DEBUG & DEBUG_TEXTURE)
+-		fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n",
+-			texImage->Width, texImage->Height,
+-			texImage->Depth,
+-			texImage->TexFormat->TexelBytes,
+-			texImage->InternalFormat);
+-
+-	/* All images are aligned to a 32-byte offset */
+-	*curOffset = (*curOffset + 0x1f) & ~0x1f;
+-
+-	if (texelBytes) {
+-		/* fix x and y coords up later together with offset */
+-		t->image[face][level].x = *curOffset;
+-		t->image[face][level].y = 0;
+-		t->image[face][level].width =
+-			MIN2(size / texelBytes, blitWidth);
+-		t->image[face][level].height =
+-			(size / texelBytes) / t->image[face][level].width;
+-	} else {
+-		t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES;
+-		t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES;
+-		t->image[face][level].width =
+-			MIN2(size, R300_BLIT_WIDTH_BYTES);
+-		t->image[face][level].height = size / t->image[face][level].width;
++		break;
++	case GL_TEXTURE_RECTANGLE_NV:
++	case GL_TEXTURE_4D_SGIS:
++		firstLevel = lastLevel = 0;
++		break;
++	default:
++		return;
+ 	}
+ 
+-	if (RADEON_DEBUG & DEBUG_TEXTURE)
+-		fprintf(stderr,
+-			"level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
+-			level, face, texImage->Width, texImage->Height,
+-			t->image[face][level].x, t->image[face][level].y,
+-			t->image[face][level].width, t->image[face][level].height,
+-			size, *curOffset);
+-
+-	*curOffset += size;
++	/* save these values */
++	*pfirstLevel = firstLevel;
++	*plastLevel = lastLevel;
+ }
+ 
+ 
+-
+ /**
+- * This function computes the number of bytes of storage needed for
+- * the given texture object (all mipmap levels, all cube faces).
+- * The \c image[face][level].x/y/width/height parameters for upload/blitting
+- * are computed here.  \c filter, \c format, etc. will be set here
+- * too.
++ * This function ensures a validated miptree is available.
++ *
++ * Additionally, some texture format bits are configured here.
+  *
+  * \param rmesa Context pointer
+  * \param tObj GL texture object whose images are to be posted to
+@@ -309,13 +250,13 @@ static void compute_tex_image_offset(
+ static void r300SetTexImages(r300ContextPtr rmesa,
+ 			     struct gl_texture_object *tObj)
+ {
+-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
++	r300TexObjPtr t = r300_tex_obj(tObj);
+ 	const struct gl_texture_image *baseImage =
+ 	    tObj->Image[0][tObj->BaseLevel];
+-	GLint curOffset;
+-	GLint i, texelBytes;
+-	GLint numLevels;
+-	GLint log2Width, log2Height, log2Depth;
++	GLint texelBytes;
++	GLuint firstLevel = 0, lastLevel = 0;
++
++	calculate_first_last_level(tObj, &firstLevel, &lastLevel);
+ 
+ 	/* Set the hardware texture format
+ 	 */
+@@ -335,112 +276,66 @@ static void r300SetTexImages(r300ContextPtr rmesa,
+ 	}
+ 
+ 	texelBytes = baseImage->TexFormat->TexelBytes;
+-
+-	/* Compute which mipmap levels we really want to send to the hardware.
+-	 */
+-	driCalculateTextureFirstLastLevel((driTextureObject *) t);
+-	log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
+-	log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
+-	log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2;
+-
+-	numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+-
+-	assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
+-
+-	/* Calculate mipmap offsets and dimensions for blitting (uploading)
+-	 * The idea is that we lay out the mipmap levels within a block of
+-	 * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
+-	 */
+ 	t->tile_bits = 0;
+ 
+-	/* figure out if this texture is suitable for tiling. */
+-#if 0				/* Disabled for now */
+-	if (texelBytes) {
+-		if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
+-		    /* texrect might be able to use micro tiling too in theory? */
+-		    (baseImage->Height > 1)) {
+-
+-			/* allow 32 (bytes) x 1 mip (which will use two times the space
+-			   the non-tiled version would use) max if base texture is large enough */
+-			if ((numLevels == 1) ||
+-			    (((baseImage->Width * texelBytes /
+-			       baseImage->Height) <= 32)
+-			     && (baseImage->Width * texelBytes > 64))
+-			    ||
+-			    ((baseImage->Width * texelBytes /
+-			      baseImage->Height) <= 16)) {
+-				t->tile_bits |= R300_TXO_MICRO_TILE;
+-			}
+-		}
++	if (tObj->Target == GL_TEXTURE_CUBE_MAP)
++		t->format |= R300_TX_FORMAT_CUBIC_MAP;
+ 
+-		if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
+-			/* we can set macro tiling even for small textures, they will be untiled anyway */
+-			t->tile_bits |= R300_TXO_MACRO_TILE;
++	if (!t->image_override) {
++		GLuint compressed = baseImage->IsCompressed ? baseImage->TexFormat->MesaFormat : 0;
++
++		if (t->mt) {
++			if (t->mt->firstLevel != firstLevel ||
++			    t->mt->lastLevel != lastLevel ||
++			    t->mt->width0 != baseImage->Width ||
++			    t->mt->height0 != baseImage->Height ||
++			    t->mt->depth0 != baseImage->Depth ||
++			    t->mt->bpp != texelBytes ||
++			    t->mt->tilebits != t->tile_bits ||
++			    t->mt->compressed != compressed) {
++				r300_miptree_destroy(t->mt);
++				t->mt = 0;
++			}
+ 		}
+-	}
+-#endif
+-
+-	curOffset = 0;
+ 
+-	if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+-		ASSERT(log2Width == log2Height);
+-		t->format |= R300_TX_FORMAT_CUBIC_MAP;
+-
+-		for(i = 0; i < numLevels; i++) {
+-			GLuint face;
+-			for(face = 0; face < 6; face++)
+-				compute_tex_image_offset(tObj, face, i, &curOffset);
++		if (!t->mt) {
++			t->mt = r300_miptree_create(rmesa, t, tObj->Target,
++				firstLevel, lastLevel,
++				baseImage->Width, baseImage->Height, baseImage->Depth,
++				texelBytes, t->tile_bits, compressed);
++			memset(t->dirty_images, 0xff, sizeof(t->dirty_images));
+ 		}
+ 	} else {
+ 		if (tObj->Target == GL_TEXTURE_3D)
+                 	t->format |= R300_TX_FORMAT_3D;
+-
+-		for (i = 0; i < numLevels; i++)
+-			compute_tex_image_offset(tObj, 0, i, &curOffset);
+ 	}
+ 
+ 	/* Align the total size of texture memory block.
+ 	 */
+-	t->base.totalSize =
+-	    (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
++	//	dritex->totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+ 
+ 	t->size =
+-	    (((tObj->Image[0][t->base.firstLevel]->Width -
++	    (((tObj->Image[0][firstLevel]->Width -
+ 	       1) << R300_TX_WIDTHMASK_SHIFT)
+-	     | ((tObj->Image[0][t->base.firstLevel]->Height - 1) <<
++	     | ((tObj->Image[0][firstLevel]->Height - 1) <<
+ 		R300_TX_HEIGHTMASK_SHIFT)
+-	     | ((tObj->Image[0][t->base.firstLevel]->DepthLog2) <<
++	     | ((tObj->Image[0][firstLevel]->DepthLog2) <<
+ 		R300_TX_DEPTHMASK_SHIFT))
+-	    | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT);
++	    | ((lastLevel - firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT);
+ 
+-	t->pitch = 0;
++	//	t->pitch = 0;
+ 
+-	/* Only need to round to nearest 32 for textures, but the blitter
+-	 * requires 64-byte aligned pitches, and we may/may not need the
+-	 * blitter.   NPOT only!
+-	 */
+-	if (baseImage->IsCompressed) {
+-		t->pitch |=
+-		    (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
+-	} else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
++	if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+ 		unsigned int align = (64 / texelBytes) - 1;
+-		t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width *
+-			     texelBytes) + 63) & ~(63);
+ 		t->size |= R300_TX_SIZE_TXPITCH_EN;
+ 		if (!t->image_override)
+-			t->pitch_reg =
+-			    (((tObj->Image[0][t->base.firstLevel]->Width) +
+-			      align) & ~align) - 1;
+-	} else {
+-		t->pitch |=
+-		    ((tObj->Image[0][t->base.firstLevel]->Width *
+-		      texelBytes) + 63) & ~(63);
++			t->pitch_reg = (((tObj->Image[0][firstLevel]->Width) + align) & ~align) - 1;
+ 	}
+ 
+ 	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+-	    if (tObj->Image[0][t->base.firstLevel]->Width > 2048)
++	    if (tObj->Image[0][firstLevel]->Width > 2048)
+ 		t->pitch_reg |= R500_TXWIDTH_BIT11;
+-	    if (tObj->Image[0][t->base.firstLevel]->Height > 2048)
++	    if (tObj->Image[0][firstLevel]->Height > 2048)
+ 		t->pitch_reg |= R500_TXHEIGHT_BIT11;
+ 	}
+ }
+@@ -454,17 +349,15 @@ static GLboolean r300EnableTexture2D(GLcontext * ctx, int unit)
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ 	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ 	struct gl_texture_object *tObj = texUnit->_Current;
+-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
++	r300TexObjPtr t = r300_tex_obj(tObj);
+ 
+ 	ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
+ 
+-	if (t->base.dirty_images[0]) {
++	if (!t->mt || t->dirty_images[0]) {
+ 		R300_FIREVERTICES(rmesa);
+ 
+ 		r300SetTexImages(rmesa, tObj);
+-		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
+-		if (!t->base.memBlock && !t->image_override)
+-			return GL_FALSE;
++		r300UploadTexImages(rmesa, t, 0);
+ 	}
+ 
+ 	return GL_TRUE;
+@@ -475,7 +368,7 @@ static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit)
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ 	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ 	struct gl_texture_object *tObj = texUnit->_Current;
+-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
++	r300TexObjPtr t = r300_tex_obj(tObj);
+ 
+ 	ASSERT(tObj->Target == GL_TEXTURE_3D);
+ 
+@@ -484,12 +377,10 @@ static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit)
+ 		return GL_FALSE;
+ 	}
+ 
+-	if (t->base.dirty_images[0]) {
++	if (!t->mt || t->dirty_images[0]) {
+ 		R300_FIREVERTICES(rmesa);
+ 		r300SetTexImages(rmesa, tObj);
+-		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
+-		if (!t->base.memBlock)
+-			return GL_FALSE;
++		r300UploadTexImages(rmesa, t, 0);
+ 	}
+ 
+ 	return GL_TRUE;
+@@ -500,14 +391,15 @@ static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit)
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ 	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ 	struct gl_texture_object *tObj = texUnit->_Current;
+-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
++	r300TexObjPtr t = r300_tex_obj(tObj);
+ 	GLuint face;
+ 
+ 	ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
+ 
+-	if (t->base.dirty_images[0] || t->base.dirty_images[1] ||
+-	    t->base.dirty_images[2] || t->base.dirty_images[3] ||
+-	    t->base.dirty_images[4] || t->base.dirty_images[5]) {
++	if (!t->mt ||
++	    t->dirty_images[0] || t->dirty_images[1] ||
++	    t->dirty_images[2] || t->dirty_images[3] ||
++	    t->dirty_images[4] || t->dirty_images[5]) {
+ 		/* flush */
+ 		R300_FIREVERTICES(rmesa);
+ 		/* layout memory space, once for all faces */
+@@ -516,18 +408,11 @@ static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit)
+ 
+ 	/* upload (per face) */
+ 	for (face = 0; face < 6; face++) {
+-		if (t->base.dirty_images[face]) {
+-			r300UploadTexImages(rmesa,
+-					    (r300TexObjPtr) tObj->DriverData,
+-					    face);
++		if (t->dirty_images[face]) {
++			r300UploadTexImages(rmesa, t, face);
+ 		}
+ 	}
+ 
+-	if (!t->base.memBlock) {
+-		/* texmem alloc failed, use s/w fallback */
+-		return GL_FALSE;
+-	}
+-
+ 	return GL_TRUE;
+ }
+ 
+@@ -536,18 +421,15 @@ static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit)
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ 	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ 	struct gl_texture_object *tObj = texUnit->_Current;
+-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
++	r300TexObjPtr t = r300_tex_obj(tObj);
+ 
+ 	ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
+ 
+-	if (t->base.dirty_images[0]) {
++	if (!t->mt || t->dirty_images[0]) {
+ 		R300_FIREVERTICES(rmesa);
+ 
+ 		r300SetTexImages(rmesa, tObj);
+-		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
+-		if (!t->base.memBlock && !t->image_override &&
+-		    !rmesa->prefer_gart_client_texturing)
+-			return GL_FALSE;
++		r300UploadTexImages(rmesa, t, 0);
+ 	}
+ 
+ 	return GL_TRUE;
+@@ -555,34 +437,19 @@ static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit)
+ 
+ static GLboolean r300UpdateTexture(GLcontext * ctx, int unit)
+ {
+-	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ 	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ 	struct gl_texture_object *tObj = texUnit->_Current;
+-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
++	r300TexObjPtr t = r300_tex_obj(tObj);
+ 
+ 	/* Fallback if there's a texture border */
+ 	if (tObj->Image[0][tObj->BaseLevel]->Border > 0)
+ 		return GL_FALSE;
+ 
+-	/* Update state if this is a different texture object to last
+-	 * time.
+-	 */
+-	if (rmesa->state.texture.unit[unit].texobj != t) {
+-		if (rmesa->state.texture.unit[unit].texobj != NULL) {
+-			/* The old texture is no longer bound to this texture unit.
+-			 * Mark it as such.
+-			 */
+-
+-			rmesa->state.texture.unit[unit].texobj->base.bound &=
+-			    ~(1 << unit);
+-		}
+-
+-		rmesa->state.texture.unit[unit].texobj = t;
+-		t->base.bound |= (1 << unit);
+-		driUpdateTextureLRU((driTextureObject *) t);	/* XXX: should be locked! */
+-	}
++	/* Fallback if memory upload didn't work */
++	if (!t->mt)
++		return GL_FALSE;
+ 
+-	return !t->border_fallback;
++	return GL_TRUE;
+ }
+ 
+ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+@@ -591,20 +458,18 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+ 	r300ContextPtr rmesa = pDRICtx->driverPrivate;
+ 	struct gl_texture_object *tObj =
+ 	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
+-	r300TexObjPtr t;
++	r300TexObjPtr t = r300_tex_obj(tObj);
+ 	uint32_t pitch_val;
+ 
+ 	if (!tObj)
+ 		return;
+ 
+-	t = (r300TexObjPtr) tObj->DriverData;
+-
+ 	t->image_override = GL_TRUE;
+ 
+ 	if (!offset)
+ 		return;
+ 
+-	t->offset = offset;
++	t->override_offset = offset;
+ 	t->pitch_reg &= (1 << 13) -1;
+ 	pitch_val = pitch;
+ 
+diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c
+index 3fc724a..a9d36a2 100644
+--- a/src/mesa/drivers/dri/r300/radeon_context.c
++++ b/src/mesa/drivers/dri/r300/radeon_context.c
+@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "state.h"
+ #include "matrix.h"
+ #include "framebuffer.h"
++#include "drirenderbuffer.h"
+ 
+ #include "drivers/common/driverfuncs.h"
+ #include "swrast/swrast.h"
+@@ -258,6 +259,59 @@ void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
+     }
+ }
+ 
++static void
++radeon_make_renderbuffer_current(radeonContextPtr radeon,
++				 GLframebuffer *draw)
++{
++	int size = radeon->radeonScreen->driScreen->fbSize;
++	void *map = 0;
++	/* if radeon->fake */
++	struct radeon_renderbuffer *rb;
++	uint32_t offset;
++
++	if (!radeon->bufmgr)
++		return;
++
++	if ((rb = (void *)draw->Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
++
++		if (radeon->radeonScreen->kernel_mm)
++			map = radeon->radeonScreen->front.map;
++
++ 		offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->front.offset : radeon->radeonScreen->frontOffset;
++		if (!rb->bo)
++			rb->bo = radeon_bufmgr_classic_bo_alloc_static(&radeon->bufmgr->base, "front buffer",
++								       offset, size, map,
++								       0);
++		rb->cpp = radeon->radeonScreen->cpp;
++		rb->pitch = radeon->radeonScreen->frontPitch;
++	}
++	if ((rb = (void *)draw->Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
++
++		if (radeon->radeonScreen->kernel_mm)
++			map = radeon->radeonScreen->back.map;
++
++ 		offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->back.offset : radeon->radeonScreen->backOffset;
++		if (!rb->bo)
++			rb->bo = radeon_bufmgr_classic_bo_alloc_static(&radeon->bufmgr->base, "back buffer",
++						     offset, size, map, 0);
++		rb->cpp = radeon->radeonScreen->cpp;
++		rb->pitch = radeon->radeonScreen->backPitch;
++	}
++	if ((rb = (void *)draw->Attachment[BUFFER_DEPTH].Renderbuffer)) {
++		offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->depth.offset : radeon->radeonScreen->depthOffset;
++
++		if (radeon->radeonScreen->kernel_mm)
++			map = radeon->radeonScreen->depth.map;
++
++		if (!rb->bo)
++			rb->bo = radeon_bufmgr_classic_bo_alloc_static(&radeon->bufmgr->base, "depth buffer",
++								       offset, size, map, 0);
++		rb->cpp = radeon->radeonScreen->cpp;
++		rb->pitch = radeon->radeonScreen->depthPitch;
++	}
++}
++
++
+ /* Force the context `c' to be the current context and associate with it
+  * buffer `b'.
+  */
+@@ -265,51 +319,57 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
+ 			    __DRIdrawablePrivate * driDrawPriv,
+ 			    __DRIdrawablePrivate * driReadPriv)
+ {
+-	if (driContextPriv) {
+-		radeonContextPtr radeon =
+-			(radeonContextPtr) driContextPriv->driverPrivate;
++	radeonContextPtr radeon;
++	GLframebuffer *dfb, *rfb;
+ 
++	if (!driContextPriv) {
+ 		if (RADEON_DEBUG & DEBUG_DRI)
+-			fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
+-				radeon->glCtx);
+-
+-		if (radeon->dri.drawable != driDrawPriv) {
+-			if (driDrawPriv->swap_interval == (unsigned)-1) {
+-				driDrawPriv->vblFlags =
+-					(radeon->radeonScreen->irq != 0)
+-					? driGetDefaultVBlankFlags(&radeon->
+-								   optionCache)
+-					: VBLANK_FLAG_NO_IRQ;
++			fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
++		_mesa_make_current(NULL, NULL, NULL);
++		return GL_TRUE;
++	}
+ 
+-				driDrawableInitVBlank(driDrawPriv);
+-			}
+-		}
++	radeon = (radeonContextPtr) driContextPriv->driverPrivate;
++	dfb = driDrawPriv->driverPrivate;
++	rfb = driReadPriv->driverPrivate;
++
++	if (RADEON_DEBUG & DEBUG_DRI)
++		fprintf(stderr, "%s ctx %p\n", __FUNCTION__, radeon->glCtx);
++
++	driUpdateFramebufferSize(radeon->glCtx, driDrawPriv);
++	if (driReadPriv != driDrawPriv)
++		driUpdateFramebufferSize(radeon->glCtx, driReadPriv);
++
++	radeon_make_renderbuffer_current(radeon, dfb);
+ 
+-		radeon->dri.readable = driReadPriv;
++	_mesa_make_current(radeon->glCtx, dfb, rfb);
+ 
+-		if (radeon->dri.drawable != driDrawPriv ||
+-		    radeon->lastStamp != driDrawPriv->lastStamp) {
+-			radeon->dri.drawable = driDrawPriv;
++	if (radeon->dri.drawable != driDrawPriv) {
++		if (driDrawPriv->swap_interval == (unsigned)-1) {
++			driDrawPriv->vblFlags =
++				(radeon->radeonScreen->irq != 0)
++				? driGetDefaultVBlankFlags(&radeon->
++							   optionCache)
++					: VBLANK_FLAG_NO_IRQ;
+ 
+-			radeonSetCliprects(radeon);
+-			r300UpdateViewportOffset(radeon->glCtx);
++			driDrawableInitVBlank(driDrawPriv);
+ 		}
++	}
+ 
+-		_mesa_make_current(radeon->glCtx,
+-				    (GLframebuffer *) driDrawPriv->
+-				    driverPrivate,
+-				    (GLframebuffer *) driReadPriv->
+-				    driverPrivate);
++	radeon->dri.readable = driReadPriv;
+ 
+-		_mesa_update_state(radeon->glCtx);		
++	if (radeon->dri.drawable != driDrawPriv ||
++	    radeon->lastStamp != driDrawPriv->lastStamp) {
++		radeon->dri.drawable = driDrawPriv;
+ 
+-		radeonUpdatePageFlipping(radeon);
+-	} else {
+-		if (RADEON_DEBUG & DEBUG_DRI)
+-			fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
+-		_mesa_make_current(0, 0, 0);
++		radeonSetCliprects(radeon);
++		r300UpdateViewportOffset(radeon->glCtx);
+ 	}
+ 
++	_mesa_update_state(radeon->glCtx);
++
++	radeonUpdatePageFlipping(radeon);
++
+ 	if (RADEON_DEBUG & DEBUG_DRI)
+ 		fprintf(stderr, "End %s\n", __FUNCTION__);
+ 	return GL_TRUE;
+diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h
+index 7458d63..828853b 100644
+--- a/src/mesa/drivers/dri/r300/radeon_context.h
++++ b/src/mesa/drivers/dri/r300/radeon_context.h
+@@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "drm.h"
+ #include "dri_util.h"
+ #include "colormac.h"
++#include "radeon_buffer.h"
+ 
+ struct radeon_context;
+ typedef struct radeon_context radeonContextRec;
+@@ -132,12 +133,13 @@ struct radeon_scissor_state {
+ 
+ struct radeon_colorbuffer_state {
+ 	GLuint clear;
+-	GLint drawOffset, drawPitch;
++	struct radeon_renderbuffer *rrb;
+ };
+ 
+ struct radeon_state {
+ 	struct radeon_colorbuffer_state color;
+ 	struct radeon_scissor_state scissor;
++	struct radeon_renderbuffer *depth_buffer;
+ };
+ 
+ /**
+@@ -185,6 +187,8 @@ struct radeon_context {
+ 	/* Configuration cache
+ 	 */
+ 	driOptionCache optionCache;
++
++	struct radeon_bufmgr *bufmgr;
+ };
+ 
+ #define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx))
+diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.c b/src/mesa/drivers/dri/r300/radeon_ioctl.c
+index 0c1a195..486ce8e 100644
+--- a/src/mesa/drivers/dri/r300/radeon_ioctl.c
++++ b/src/mesa/drivers/dri/r300/radeon_ioctl.c
+@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "swrast/swrast.h"
+ #include "r300_context.h"
+ #include "radeon_ioctl.h"
++#include "radeon_buffer.h"
+ #include "r300_ioctl.h"
+ #include "r300_state.h"
+ #include "radeon_reg.h"
+@@ -171,7 +172,7 @@ void radeonCopyBuffer(__DRIdrawablePrivate * dPriv,
+ 	assert(dPriv->driContextPriv->driverPrivate);
+ 
+ 	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+-
++	
+ 	if (RADEON_DEBUG & DEBUG_IOCTL) {
+ 		fprintf(stderr, "\n%s( %p )\n\n", __FUNCTION__,
+ 			(void *)radeon->glCtx);
+@@ -261,6 +262,8 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv)
+ 	GLint ret;
+ 	GLboolean missed_target;
+ 	__DRIscreenPrivate *psp = dPriv->driScreenPriv;
++	GLframebuffer *fb = dPriv->driverPrivate;
++	struct radeon_renderbuffer *rrb;
+ 
+ 	assert(dPriv);
+ 	assert(dPriv->driContextPriv);
+@@ -268,6 +271,8 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv)
+ 
+ 	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+ 
++	rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++
+ 	if (RADEON_DEBUG & DEBUG_IOCTL) {
+ 		fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
+ 			radeon->sarea->pfCurrentPage);
+@@ -315,32 +320,10 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv)
+ 	radeon->swap_count++;
+ 	(void)(*psp->systemTime->getUST) (&radeon->swap_ust);
+ 
+-        driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer, 
++        driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer,
+                              radeon->sarea->pfCurrentPage);
+ 
+-	if (radeon->sarea->pfCurrentPage == 1) {
+-		radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
+-		radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
+-	} else {
+-		radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
+-		radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
+-	}
+-
+-	if (IS_R300_CLASS(radeon->radeonScreen)) {
+-		r300ContextPtr r300 = (r300ContextPtr)radeon;
+-		R300_STATECHANGE(r300, cb);
+-		r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset + 
+-						r300->radeon.radeonScreen->fbLocation;
+-		r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
+-		
+-		if (r300->radeon.radeonScreen->cpp == 4)
+-			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
+-		else
+-			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
+-	
+-		if (r300->radeon.sarea->tiling_enabled)
+-			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
+-	}
++	radeon->state.color.rrb = rrb;
+ }
+ 
+ void radeonWaitForIdleLocked(radeonContextPtr radeon)
+diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c
+index d54a821..4df6a9c 100644
+--- a/src/mesa/drivers/dri/r300/radeon_lock.c
++++ b/src/mesa/drivers/dri/r300/radeon_lock.c
+@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "radeon_state.h"
+ #include "r300_context.h"
+ #include "r300_state.h"
++#include "r300_mem.h"
+ 
+ #include "framebuffer.h"
+ 
+@@ -59,11 +60,11 @@ int prevLockLine = 0;
+ void radeonUpdatePageFlipping(radeonContextPtr rmesa)
+ {
+ 	int use_back;
++	__DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
++	GLframebuffer *fb = drawable->driverPrivate;
+ 
+ 	rmesa->doPageFlip = rmesa->sarea->pfState;
+ 	if (rmesa->glCtx->WinSysDrawBuffer) {
+-		driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
+-				     rmesa->sarea->pfCurrentPage);
+ 		r300UpdateDrawBuffer(rmesa->glCtx);
+ 	}
+ 
+@@ -72,16 +73,12 @@ void radeonUpdatePageFlipping(radeonContextPtr rmesa)
+ 	     BUFFER_BACK_LEFT) : 1;
+ 	use_back ^= (rmesa->sarea->pfCurrentPage == 1);
+ 
+-	if (use_back) {
+-		rmesa->state.color.drawOffset =
+-		    rmesa->radeonScreen->backOffset;
+-		rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch;
+-	} else {
+-		rmesa->state.color.drawOffset =
+-		    rmesa->radeonScreen->frontOffset;
+-		rmesa->state.color.drawPitch =
+-		    rmesa->radeonScreen->frontPitch;
+-	}
++	if (use_back)
++		rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++	else
++		rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++
++	rmesa->state.depth_buffer = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
+ }
+ 
+ /* Update the hardware state.  This is called if another context has
+@@ -125,12 +122,8 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
+ 	}
+ 
+ 	if (sarea->ctx_owner != rmesa->dri.hwContext) {
+-		int i;
+-
+ 		sarea->ctx_owner = rmesa->dri.hwContext;
+-		for (i = 0; i < r300->nr_heaps; i++) {
+-			DRI_AGE_TEXTURES(r300->texture_heaps[i]);
+-		}
++		radeonBufmgrContendedLockTake(r300->radeon.bufmgr);
+ 	}
+ 
+ 	rmesa->lost_context = GL_TRUE;
+diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c
+index 3616d8b..58b00ff 100644
+--- a/src/mesa/drivers/dri/r300/radeon_span.c
++++ b/src/mesa/drivers/dri/r300/radeon_span.c
+@@ -48,7 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r300_ioctl.h"
+ #include "radeon_span.h"
+ 
+-#include "drirenderbuffer.h"
++#include "radeon_buffer.h"
+ 
+ #define DBG 0
+ 
+@@ -58,21 +58,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  * information.
+  */
+ #define LOCAL_VARS						\
+-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
+-   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
++   struct radeon_renderbuffer *rrb = (void *) rb;		\
++   const __DRIdrawablePrivate *dPriv = rrb->dPriv;		\
+    const GLuint bottom = dPriv->h - 1;				\
+-   GLubyte *buf = (GLubyte *) drb->flippedData			\
+-      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
++   GLubyte *buf = (GLubyte *) rrb->bo->virtual			\
++      + (dPriv->y * rrb->pitch + dPriv->x) * rrb->cpp;	\
+    GLuint p;							\
+    (void) p;
+ 
+ #define LOCAL_DEPTH_VARS				\
+-   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
+-   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
++   struct radeon_renderbuffer *rrb = (void *) rb;	\
++   const __DRIdrawablePrivate *dPriv = rrb->dPriv;	\
+    const GLuint bottom = dPriv->h - 1;			\
+    GLuint xo = dPriv->x;				\
+    GLuint yo = dPriv->y;				\
+-   GLubyte *buf = (GLubyte *) drb->Base.Data;
++   GLubyte *buf = (GLubyte *) rrb->base.Data;
+ 
+ #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
+ 
+@@ -93,7 +93,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #define TAG(x)    radeon##x##_RGB565
+ #define TAG2(x,y) radeon##x##_RGB565##y
+-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
++#define GET_PTR(X,Y) (buf + ((Y) * rrb->pitch + (X)) * 2)
+ #include "spantmp2.h"
+ 
+ /* 32 bit, ARGB8888 color spanline and pixel functions
+@@ -103,7 +103,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #define TAG(x)    radeon##x##_ARGB8888
+ #define TAG2(x,y) radeon##x##_ARGB8888##y
+-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
++#define GET_PTR(X,Y) (buf + ((Y) * rrb->pitch + (X)) * 4)
+ #include "spantmp2.h"
+ 
+ /* ================================================================
+@@ -120,10 +120,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  * too...
+  */
+ 
+-static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
++static GLuint radeon_mba_z32(const struct radeon_renderbuffer * rrb,
++			     GLint x, GLint y)
+ {
+-	GLuint pitch = drb->pitch;
+-	if (drb->depthHasSurface) {
++	GLuint pitch = rrb->pitch;
++	if (rrb->depthHasSurface) {
+ 		return 4 * (x + y * pitch);
+ 	} else {
+ 		GLuint ba, address = 0;	/* a[0..1] = 0           */
+@@ -148,10 +149,10 @@ static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
+ }
+ 
+ static INLINE GLuint
+-radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
++radeon_mba_z16(const struct radeon_renderbuffer *rrb, GLint x, GLint y)
+ {
+-	GLuint pitch = drb->pitch;
+-	if (drb->depthHasSurface) {
++	GLuint pitch = rrb->pitch;
++	if (rrb->depthHasSurface) {
+ 		return 2 * (x + y * pitch);
+ 	} else {
+ 		GLuint ba, address = 0;	/* a[0]    = 0           */
+@@ -175,10 +176,10 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
+ #define VALUE_TYPE GLushort
+ 
+ #define WRITE_DEPTH( _x, _y, d )					\
+-   *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
++   *(GLushort *)(buf + radeon_mba_z16( rrb, _x + xo, _y + yo )) = d;
+ 
+ #define READ_DEPTH( d, _x, _y )						\
+-   d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
++   d = *(GLushort *)(buf + radeon_mba_z16( rrb, _x + xo, _y + yo ));
+ 
+ #define TAG(x) radeon##x##_z16
+ #include "depthtmp.h"
+@@ -193,7 +194,7 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
+ #ifdef COMPILE_R300
+ #define WRITE_DEPTH( _x, _y, d )					\
+ do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
++   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
+    GLuint tmp = *(GLuint *)(buf + offset);				\
+    tmp &= 0x000000ff;							\
+    tmp |= ((d << 8) & 0xffffff00);					\
+@@ -202,7 +203,7 @@ do {									\
+ #else
+ #define WRITE_DEPTH( _x, _y, d )					\
+ do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
++   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
+    GLuint tmp = *(GLuint *)(buf + offset);				\
+    tmp &= 0xff000000;							\
+    tmp |= ((d) & 0x00ffffff);						\
+@@ -213,12 +214,12 @@ do {									\
+ #ifdef COMPILE_R300
+ #define READ_DEPTH( d, _x, _y )						\
+   do { \
+-    d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,		\
++    d = (*(GLuint *)(buf + radeon_mba_z32( rrb, _x + xo,		\
+ 					 _y + yo )) & 0xffffff00) >> 8; \
+   }while(0)
+ #else
+ #define READ_DEPTH( d, _x, _y )						\
+-   d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,			\
++   d = *(GLuint *)(buf + radeon_mba_z32( rrb, _x + xo,			\
+ 					 _y + yo )) & 0x00ffffff;
+ #endif
+ 
+@@ -234,7 +235,7 @@ do {									\
+ #ifdef COMPILE_R300
+ #define WRITE_STENCIL( _x, _y, d )					\
+ do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
++   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
+    GLuint tmp = *(GLuint *)(buf + offset);				\
+    tmp &= 0xffffff00;							\
+    tmp |= (d) & 0xff;							\
+@@ -243,7 +244,7 @@ do {									\
+ #else
+ #define WRITE_STENCIL( _x, _y, d )					\
+ do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
++   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
+    GLuint tmp = *(GLuint *)(buf + offset);				\
+    tmp &= 0x00ffffff;							\
+    tmp |= (((d) & 0xff) << 24);						\
+@@ -254,14 +255,14 @@ do {									\
+ #ifdef COMPILE_R300
+ #define READ_STENCIL( d, _x, _y )					\
+ do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
++   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
+    GLuint tmp = *(GLuint *)(buf + offset);				\
+    d = tmp & 0x000000ff;						\
+ } while (0)
+ #else
+ #define READ_STENCIL( d, _x, _y )					\
+ do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
++   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
+    GLuint tmp = *(GLuint *)(buf + offset);				\
+    d = (tmp & 0xff000000) >> 24;					\
+ } while (0)
+@@ -270,6 +271,22 @@ do {									\
+ #define TAG(x) radeon##x##_z24_s8
+ #include "stenciltmp.h"
+ 
++static void map_buffer(struct gl_renderbuffer *rb, GLboolean write)
++{
++	struct radeon_renderbuffer *rrb = (void*)rb;
++
++	if (rrb->bo)
++		dri_bo_map(rrb->bo, write);
++}
++
++static void unmap_buffer(struct gl_renderbuffer *rb)
++{
++	struct radeon_renderbuffer *rrb = (void*)rb;
++
++	if (rrb->bo)
++		dri_bo_unmap(rrb->bo);
++}
++
+ /* Move locking out to get reasonable span performance (10x better
+  * than doing this in HW_LOCK above).  WaitForIdle() is the main
+  * culprit.
+@@ -278,45 +295,51 @@ do {									\
+ static void radeonSpanRenderStart(GLcontext * ctx)
+ {
+ 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++	int i;
+ #ifdef COMPILE_R300
+ 	r300ContextPtr r300 = (r300ContextPtr) rmesa;
+ 	R300_FIREVERTICES(r300);
+ #else
+ 	RADEON_FIREVERTICES(rmesa);
+ #endif
++
++	/* color draw buffers */
++	for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++)
++		map_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i], GL_TRUE);
++
++	map_buffer(ctx->ReadBuffer->_ColorReadBuffer, GL_FALSE);
++
++	if (ctx->DrawBuffer->_DepthBuffer)
++		map_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped, GL_TRUE);
++	if (ctx->DrawBuffer->_StencilBuffer)
++		map_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped, GL_TRUE);
++
++	/* The locking and wait for idle should really only be needed in classic mode.
++	 * In a future memory manager based implementation, this should become
++	 * unnecessary due to the fact that mapping our buffers, textures, etc.
++	 * should implicitly wait for any previous rendering commands that must
++	 * be waited on. */
+ 	LOCK_HARDWARE(rmesa);
+ 	radeonWaitForIdleLocked(rmesa);
+-
+-	/* Read the first pixel in the frame buffer.  This should
+-	 * be a noop, right?  In fact without this conform fails as reading
+-	 * from the framebuffer sometimes produces old results -- the
+-	 * on-card read cache gets mixed up and doesn't notice that the
+-	 * framebuffer has been updated.
+-	 *
+-	 * Note that we should probably be reading some otherwise unused
+-	 * region of VRAM, otherwise we might get incorrect results when
+-	 * reading pixels from the top left of the screen.
+-	 *
+-	 * I found this problem on an R420 with glean's texCube test.
+-	 * Note that the R200 span code also *writes* the first pixel in the
+-	 * framebuffer, but I've found this to be unnecessary.
+-	 *  -- Nicolai Hähnle, June 2008
+-	 */
+-	{
+-		int p;
+-		driRenderbuffer *drb =
+-			(driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
+-		volatile int *buf =
+-			(volatile int *)(rmesa->dri.screen->pFB + drb->offset);
+-		p = *buf;
+-	}
+ }
+ 
+ static void radeonSpanRenderFinish(GLcontext * ctx)
+ {
+ 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++	int i;
+ 	_swrast_flush(ctx);
+ 	UNLOCK_HARDWARE(rmesa);
++
++	/* color draw buffers */
++	for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++)
++		unmap_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
++
++	unmap_buffer(ctx->ReadBuffer->_ColorReadBuffer);
++
++	if (ctx->DrawBuffer->_DepthBuffer)
++		unmap_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
++	if (ctx->DrawBuffer->_StencilBuffer)
++		unmap_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
+ }
+ 
+ void radeonInitSpanFuncs(GLcontext * ctx)
+@@ -330,20 +353,17 @@ void radeonInitSpanFuncs(GLcontext * ctx)
+ /**
+  * Plug in the Get/Put routines for the given driRenderbuffer.
+  */
+-void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
++void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
+ {
+-	if (drb->Base.InternalFormat == GL_RGBA) {
+-		if (vis->redBits == 5 && vis->greenBits == 6
+-		    && vis->blueBits == 5) {
+-			radeonInitPointers_RGB565(&drb->Base);
+-		} else {
+-			radeonInitPointers_ARGB8888(&drb->Base);
+-		}
+-	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
+-		radeonInitDepthPointers_z16(&drb->Base);
+-	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
+-		radeonInitDepthPointers_z24_s8(&drb->Base);
+-	} else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
+-		radeonInitStencilPointers_z24_s8(&drb->Base);
++	if (rrb->base.InternalFormat == GL_RGB5) {
++		radeonInitPointers_RGB565(&rrb->base);
++	} else if (rrb->base.InternalFormat == GL_RGBA8) {
++		radeonInitPointers_ARGB8888(&rrb->base);
++	} else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT16) {
++		radeonInitDepthPointers_z16(&rrb->base);
++	} else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT24) {
++		radeonInitDepthPointers_z24_s8(&rrb->base);
++	} else if (rrb->base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
++		radeonInitStencilPointers_z24_s8(&rrb->base);
+ 	}
+ }
+diff --git a/src/mesa/drivers/dri/r300/radeon_state.c b/src/mesa/drivers/dri/r300/radeon_state.c
+index d81318c..a7720da 100644
+--- a/src/mesa/drivers/dri/r300/radeon_state.c
++++ b/src/mesa/drivers/dri/r300/radeon_state.c
+@@ -222,14 +222,6 @@ void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state)
+ void radeonInitState(radeonContextPtr radeon)
+ {
+ 	radeon->Fallback = 0;
+-
+-	if (radeon->glCtx->Visual.doubleBufferMode && radeon->sarea->pfCurrentPage == 0) {
+-		radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
+-		radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
+-	} else {
+-		radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
+-		radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
+-	}
+ }
+ 
+ 
+diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer.h b/src/mesa/drivers/dri/radeon/radeon_buffer.h
+new file mode 100644
+index 0000000..a5e4529
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_buffer.h
+@@ -0,0 +1,62 @@
++/*
++ * Copyright 2008 Red Hat, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software")
++ * to deal in the software without restriction, including without limitation
++ * on the rights to use, copy, modify, merge, publish, distribute, sub
++ * license, and/or sell copies of the Software, and to permit persons to whom
++ * them Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTIBILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER
++ * IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors:
++ *	Adam Jackson <ajax@redhat.com>
++ */
++
++#ifndef RADEON_BUFFER_H
++#define RADEON_BUFFER_H
++
++#include "dri_bufmgr.h"
++
++struct radeon_renderbuffer
++{
++    struct gl_renderbuffer base;
++    dri_bo *bo;
++    unsigned int cpp;
++    /* unsigned int offset; */
++    unsigned int pitch;
++    unsigned int height;
++
++    /* boo Xorg 6.8.2 compat */
++    int depthHasSurface;
++
++    __DRIdrawablePrivate *dPriv;
++};
++
++struct radeon_bufmgr {
++	dri_bufmgr base;
++	void (*emit_reloc)(dri_bo *buf, uint32_t *head, uint32_t *count_p, uint32_t read_domains, uint32_t write_domain);
++};
++
++void radeon_bufmgr_emit_reloc(dri_bo *buf, uint32_t *head, uint32_t *count_p, uint32_t read_domains, uint32_t write_domain);
++
++dri_bo *radeon_bufmgr_classic_bo_alloc_static(dri_bufmgr *bufmgr_ctx, const char *name,
++					      unsigned long offset, unsigned long size,
++					      void *virtual, uint32_t initial_domain);
++dri_bo *radeon_bufmgr_classic_bo_alloc(dri_bufmgr *bufmgr_ctx, const char *name,
++				       unsigned long size, unsigned int alignment,
++				       uint32_t location_mask);
++
++int radeon_bufmgr_classic_emit_reloc(dri_bo *batch_buf, uint64_t flags, GLuint delta,
++				     GLuint offset, dri_bo *target);
++#endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
+index 84b5c46..cc384e1 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
++++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
+@@ -46,6 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "radeon_chipset.h"
+ #include "radeon_macros.h"
+ #include "radeon_screen.h"
++#include "radeon_buffer.h"
+ #if !RADEON_COMMON
+ #include "radeon_context.h"
+ #include "radeon_span.h"
+@@ -69,6 +70,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #include "GL/internal/dri_interface.h"
+ 
++#include <errno.h>
++#include <sys/ioctl.h>
++
+ /* Radeon configuration
+  */
+ #include "xmlpool.h"
+@@ -344,11 +348,99 @@ static const __DRItexOffsetExtension r200texOffsetExtension = {
+ #endif
+ 
+ #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
++#if 0
+ static const __DRItexOffsetExtension r300texOffsetExtension = {
+     { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
+    r300SetTexOffset,
+ };
+ #endif
++#endif
++
++
++static void
++radeon_gem_update_handle(radeonScreenPtr screen, __DRIscreenPrivate *sPriv,
++			 struct radeon_gem_object *gem_obj)
++{
++     struct drm_gem_close close_args;
++     struct drm_gem_open args;
++     struct drm_radeon_gem_mmap mmap_args;
++     struct drm_radeon_gem_pin pin_args;
++     int ret;
++   
++     if (gem_obj->gem_handle) {
++	     close_args.handle = gem_obj->gem_handle;
++
++	     ioctl(sPriv->fd, DRM_IOCTL_GEM_CLOSE, &close_args);
++	     gem_obj->gem_handle = 0;
++     }
++
++     /* do open */
++     args.name = gem_obj->gem_name;
++     ret = ioctl(sPriv->fd, DRM_IOCTL_GEM_OPEN, &args);
++     if (ret) {
++		fprintf(stderr," failed to open handle %d\n", gem_obj->gem_name);
++	     return;
++     }
++     
++     gem_obj->gem_handle = args.handle;
++     gem_obj->size = args.size;
++     
++     mmap_args.handle = gem_obj->gem_handle;
++     mmap_args.size = gem_obj->size;
++     mmap_args.offset = 0;
++     
++     ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GEM_MMAP, &mmap_args,
++			       sizeof(mmap_args));
++     
++     if (ret)
++	     return;
++     
++     gem_obj->map = (void *)(unsigned long)(mmap_args.addr_ptr);
++     
++     pin_args.handle = gem_obj->gem_handle;
++     pin_args.alignment = 0;
++     
++     ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GEM_PIN, &pin_args,
++			       sizeof(pin_args));
++     
++     if (ret)
++	     return;
++     
++     gem_obj->offset = pin_args.offset;
++     
++}
++
++static int
++radeon_init_mm_buffers(radeonScreenPtr screen, __DRIscreenPrivate *sPriv,
++		       RADEONDRIPtr dri_priv)
++{
++	/* STOP GAP HERE */
++
++	screen->front.gem_name = dri_priv->frontOffset;
++	radeon_gem_update_handle(screen, sPriv, &screen->front);
++	screen->frontOffset = screen->front.offset;
++
++	screen->back.gem_name = dri_priv->backOffset;
++	radeon_gem_update_handle(screen, sPriv, &screen->back);
++
++	screen->backOffset = screen->back.offset;
++
++	screen->depth.gem_name = dri_priv->depthOffset;
++	radeon_gem_update_handle(screen, sPriv, &screen->depth);
++	screen->depthOffset = screen->depth.offset;
++
++	screen->vram_texture.gem_name = dri_priv->textureOffset;
++	radeon_gem_update_handle(screen, sPriv, &screen->vram_texture);
++
++	screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->vram_texture.offset + screen->fbLocation;
++	screen->texSize[RADEON_LOCAL_TEX_HEAP] = screen->vram_texture.size;
++
++	screen->gart_texture.gem_name = dri_priv->gartTexHandle;
++	radeon_gem_update_handle(screen, sPriv, &screen->gart_texture);
++	screen->gartTextures.map = screen->gart_texture.map;
++	screen->gart_texture_offset = screen->gart_texture.offset + screen->gart_base;
++
++}
+ 
+ /* Create the device specific screen private data struct.
+  */
+@@ -389,6 +481,21 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+    screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
+    {
+       int ret;
++
++#ifdef RADEON_PARAM_KERNEL_MM
++     ret = radeonGetParam( sPriv->fd, RADEON_PARAM_KERNEL_MM,
++                            &screen->kernel_mm);
++
++      if (ret && ret != -EINVAL) {
++         FREE( screen );
++         fprintf(stderr, "drm_radeon_getparam_t (RADEON_OFFSET): %d\n", ret);
++         return NULL;
++      }
++
++      if (ret == -EINVAL)
++          screen->kernel_mm = 0;
++#endif
++
+       ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET,
+ 			    &screen->gart_buffer_offset);
+ 
+@@ -422,32 +529,34 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+       screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
+    }
+ 
+-   screen->mmio.handle = dri_priv->registerHandle;
+-   screen->mmio.size   = dri_priv->registerSize;
+-   if ( drmMap( sPriv->fd,
+-		screen->mmio.handle,
+-		screen->mmio.size,
+-		&screen->mmio.map ) ) {
+-      FREE( screen );
+-      __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
+-      return NULL;
+-   }
++   if (!screen->kernel_mm) {
++      screen->mmio.handle = dri_priv->registerHandle;
++      screen->mmio.size   = dri_priv->registerSize;
++      if ( drmMap( sPriv->fd,
++		   screen->mmio.handle,
++		   screen->mmio.size,
++		   &screen->mmio.map ) ) {
++	 FREE( screen );
++	 __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
++	 return NULL;
++      }
+ 
+-   RADEONMMIO = screen->mmio.map;
++      RADEONMMIO = screen->mmio.map;
+ 
+-   screen->status.handle = dri_priv->statusHandle;
+-   screen->status.size   = dri_priv->statusSize;
+-   if ( drmMap( sPriv->fd,
+-		screen->status.handle,
+-		screen->status.size,
+-		&screen->status.map ) ) {
+-      drmUnmap( screen->mmio.map, screen->mmio.size );
+-      FREE( screen );
+-      __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
+-      return NULL;
++      screen->status.handle = dri_priv->statusHandle;
++      screen->status.size   = dri_priv->statusSize;
++      if ( drmMap( sPriv->fd,
++		   screen->status.handle,
++		   screen->status.size,
++		   &screen->status.map ) ) {
++	 drmUnmap( screen->mmio.map, screen->mmio.size );
++	 FREE( screen );
++	 __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
++	 return NULL;
++      }
++      screen->scratch = (__volatile__ u_int32_t *)
++	 ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
+    }
+-   screen->scratch = (__volatile__ u_int32_t *)
+-      ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
+ 
+    screen->buffers = drmMapBufs( sPriv->fd );
+    if ( !screen->buffers ) {
+@@ -458,22 +567,24 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+       return NULL;
+    }
+ 
+-   if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
+-      screen->gartTextures.handle = dri_priv->gartTexHandle;
+-      screen->gartTextures.size   = dri_priv->gartTexMapSize;
+-      if ( drmMap( sPriv->fd,
+-		   screen->gartTextures.handle,
+-		   screen->gartTextures.size,
+-		   (drmAddressPtr)&screen->gartTextures.map ) ) {
+-	 drmUnmapBufs( screen->buffers );
+-	 drmUnmap( screen->status.map, screen->status.size );
+-	 drmUnmap( screen->mmio.map, screen->mmio.size );
+-	 FREE( screen );
+-	 __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
+-	 return NULL;
++   if (!screen->kernel_mm) {
++      if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
++	 screen->gartTextures.handle = dri_priv->gartTexHandle;
++	 screen->gartTextures.size   = dri_priv->gartTexMapSize;
++	 if ( drmMap( sPriv->fd,
++		      screen->gartTextures.handle,
++		      screen->gartTextures.size,
++		      (drmAddressPtr)&screen->gartTextures.map ) ) {
++	    drmUnmapBufs( screen->buffers );
++	    drmUnmap( screen->status.map, screen->status.size );
++	    drmUnmap( screen->mmio.map, screen->mmio.size );
++	    FREE( screen );
++	    __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
++	    return NULL;
++	 }
++	 
++	 screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
+       }
+-
+-      screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
+    }
+ 
+    screen->chip_flags = 0;
+@@ -840,7 +951,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+    ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION,
+                          &temp);
+    if (ret) {
+-       if (screen->chip_family < CHIP_FAMILY_RS690)
++       if (screen->chip_family < CHIP_FAMILY_RS690 && !screen->kernel_mm)
+ 	   screen->fbLocation      = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16;
+        else {
+            FREE( screen );
+@@ -881,55 +992,59 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+        }
+    }
+ 
+-   if ( sPriv->drm_version.minor >= 10 ) {
+-      drm_radeon_setparam_t sp;
+-
+-      sp.param = RADEON_SETPARAM_FB_LOCATION;
+-      sp.value = screen->fbLocation;
+-
+-      drmCommandWrite( sPriv->fd, DRM_RADEON_SETPARAM,
+-		       &sp, sizeof( sp ) );
+-   }
+-
+-   screen->frontOffset	= dri_priv->frontOffset;
+    screen->frontPitch	= dri_priv->frontPitch;
+-   screen->backOffset	= dri_priv->backOffset;
+    screen->backPitch	= dri_priv->backPitch;
+-   screen->depthOffset	= dri_priv->depthOffset;
+    screen->depthPitch	= dri_priv->depthPitch;
+ 
+-   /* Check if ddx has set up a surface reg to cover depth buffer */
+-   screen->depthHasSurface = (sPriv->ddx_version.major > 4) ||
+-      /* these chips don't use tiled z without hyperz. So always pretend
+-         we have set up a surface which will cause linear reads/writes */
+-      ((screen->chip_family & RADEON_CLASS_R100) &&
+-      !(screen->chip_flags & RADEON_CHIPSET_TCL));
+-
+-   if ( dri_priv->textureSize == 0 ) {
+-      screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->gart_texture_offset;
+-      screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->gartTexMapSize;
+-      screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] =
+-	 dri_priv->log2GARTTexGran;
+-   } else {
+-      screen->texOffset[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureOffset
+-				               + screen->fbLocation;
+-      screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureSize;
+-      screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] =
+-	 dri_priv->log2TexGran;
+-   }
++   if (!screen->kernel_mm) {
++      if ( sPriv->drm_version.minor >= 10 ) {
++	 drm_radeon_setparam_t sp;
++
++	 sp.param = RADEON_SETPARAM_FB_LOCATION;
++	 sp.value = screen->fbLocation;
++
++	 drmCommandWrite( sPriv->fd, DRM_RADEON_SETPARAM,
++			  &sp, sizeof( sp ) );
++      }
+ 
+-   if ( !screen->gartTextures.map || dri_priv->textureSize == 0
+-	|| getenv( "RADEON_GARTTEXTURING_FORCE_DISABLE" ) ) {
+-      screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1;
+-      screen->texOffset[RADEON_GART_TEX_HEAP] = 0;
+-      screen->texSize[RADEON_GART_TEX_HEAP] = 0;
+-      screen->logTexGranularity[RADEON_GART_TEX_HEAP] = 0;
++      screen->frontOffset	= dri_priv->frontOffset;
++      screen->backOffset	= dri_priv->backOffset;
++      screen->depthOffset	= dri_priv->depthOffset;
++   
++      /* Check if ddx has set up a surface reg to cover depth buffer */
++      screen->depthHasSurface = (sPriv->ddx_version.major > 4) ||
++	 /* these chips don't use tiled z without hyperz. So always pretend
++	    we have set up a surface which will cause linear reads/writes */
++	 ((screen->chip_family & RADEON_CLASS_R100) &&
++	  !(screen->chip_flags & RADEON_CHIPSET_TCL));
++      
++      if ( dri_priv->textureSize == 0 ) {
++	 screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->gart_texture_offset;
++	 screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->gartTexMapSize;
++	 screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] =
++	    dri_priv->log2GARTTexGran;
++      } else {
++	 screen->texOffset[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureOffset
++	    + screen->fbLocation;
++	 screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureSize;
++	 screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] =
++	    dri_priv->log2TexGran;
++      }
++      
++      if ( !screen->gartTextures.map || dri_priv->textureSize == 0
++	   || getenv( "RADEON_GARTTEXTURING_FORCE_DISABLE" ) ) {
++	 screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1;
++	 screen->texOffset[RADEON_GART_TEX_HEAP] = 0;
++	 screen->texSize[RADEON_GART_TEX_HEAP] = 0;
++	 screen->logTexGranularity[RADEON_GART_TEX_HEAP] = 0;
++      } else {
++	 screen->numTexHeaps = RADEON_NR_TEX_HEAPS;
++	 screen->texOffset[RADEON_GART_TEX_HEAP] = screen->gart_texture_offset;
++	 screen->texSize[RADEON_GART_TEX_HEAP] = dri_priv->gartTexMapSize;
++	 screen->logTexGranularity[RADEON_GART_TEX_HEAP] = dri_priv->log2GARTTexGran;
++      }
+    } else {
+-      screen->numTexHeaps = RADEON_NR_TEX_HEAPS;
+-      screen->texOffset[RADEON_GART_TEX_HEAP] = screen->gart_texture_offset;
+-      screen->texSize[RADEON_GART_TEX_HEAP] = dri_priv->gartTexMapSize;
+-      screen->logTexGranularity[RADEON_GART_TEX_HEAP] =
+-	 dri_priv->log2GARTTexGran;
++      radeon_init_mm_buffers(screen, sPriv, dri_priv);
+    }
+ 
+    i = 0;
+@@ -954,8 +1069,10 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+ #endif
+ 
+ #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
++#if 0
+    screen->extensions[i++] = &r300texOffsetExtension.base;
+ #endif
++#endif
+ 
+    screen->extensions[i++] = NULL;
+    sPriv->extensions = screen->extensions;
+@@ -975,12 +1092,14 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv )
+    if (!screen)
+       return;
+ 
+-   if ( screen->gartTextures.map ) {
+-      drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
+-   }
+    drmUnmapBufs( screen->buffers );
+-   drmUnmap( screen->status.map, screen->status.size );
+-   drmUnmap( screen->mmio.map, screen->mmio.size );
++   if (!screen->kernel_mm) {
++      if ( screen->gartTextures.map ) {
++	 drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
++      }
++      drmUnmap( screen->status.map, screen->status.size );
++      drmUnmap( screen->mmio.map, screen->mmio.size );
++   }
+ 
+    /* free all option information */
+    driDestroyOptionInfo (&screen->optionCache);
+@@ -1004,6 +1123,160 @@ radeonInitDriver( __DRIscreenPrivate *sPriv )
+    return GL_TRUE;
+ }
+ 
++#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
++static GLboolean
++radeon_alloc_window_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
++			    GLenum intFormat, GLuint w, GLuint h)
++{
++    rb->Width = w;
++    rb->Height = h;
++    rb->_ActualFormat = intFormat;
++
++    return GL_TRUE;
++}
++
++
++static struct radeon_renderbuffer *
++radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv)
++{
++    struct radeon_renderbuffer *ret;
++
++    ret = CALLOC_STRUCT(radeon_renderbuffer);
++    if (!ret)
++	return NULL;
++
++    _mesa_init_renderbuffer(&ret->base, 0);
++
++    /* XXX format junk */
++    switch (format) {
++	case GL_RGB5:
++	    ret->base._ActualFormat = GL_RGB5;
++	    ret->base._BaseFormat = GL_RGBA;
++	    ret->base.RedBits = 5;
++	    ret->base.GreenBits = 6;
++	    ret->base.BlueBits = 5;
++	    ret->base.DataType = GL_UNSIGNED_BYTE;
++	    break;
++	case GL_RGBA8:
++	    ret->base._ActualFormat = GL_RGBA8;
++	    ret->base._BaseFormat = GL_RGBA;
++	    ret->base.RedBits = 8;
++	    ret->base.GreenBits = 8;
++	    ret->base.BlueBits = 8;
++	    ret->base.AlphaBits = 8;
++	    ret->base.DataType = GL_UNSIGNED_BYTE;
++	    break;
++	case GL_STENCIL_INDEX8_EXT:
++	    ret->base._ActualFormat = GL_STENCIL_INDEX8_EXT;
++	    ret->base._BaseFormat = GL_STENCIL_INDEX;
++	    ret->base.StencilBits = 8;
++	    ret->base.DataType = GL_UNSIGNED_BYTE;
++	    break;
++	case GL_DEPTH_COMPONENT16:
++	    ret->base._ActualFormat = GL_DEPTH_COMPONENT16;
++	    ret->base._BaseFormat = GL_DEPTH_COMPONENT;
++	    ret->base.DepthBits = 16;
++	    ret->base.DataType = GL_UNSIGNED_SHORT;
++	    break;
++	case GL_DEPTH_COMPONENT24:
++	    ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
++	    ret->base._BaseFormat = GL_DEPTH_COMPONENT;
++	    ret->base.DepthBits = 24;
++	    ret->base.DataType = GL_UNSIGNED_INT;
++	    break;
++	case GL_DEPTH24_STENCIL8_EXT:
++	    ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
++	    ret->base._BaseFormat = GL_DEPTH_STENCIL_EXT;
++	    ret->base.DepthBits = 24;
++	    ret->base.StencilBits = 8;
++	    ret->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
++	    break;
++	default:
++	    fprintf(stderr, "%s: Unknown format 0x%04x\n", __FUNCTION__, format);
++	    _mesa_delete_renderbuffer(&ret->base);
++	    return NULL;
++    }
++
++    ret->dPriv = driDrawPriv;
++    ret->base.InternalFormat = format;
++
++    ret->base.AllocStorage = radeon_alloc_window_storage;
++
++    radeonSetSpanFunctions(ret);
++
++    return ret;
++}
++
++/**
++ * Create the Mesa framebuffer and renderbuffers for a given window/drawable.
++ *
++ * \todo This function (and its interface) will need to be updated to support
++ * pbuffers.
++ */
++static GLboolean
++radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
++                    __DRIdrawablePrivate *driDrawPriv,
++                    const __GLcontextModes *mesaVis,
++                    GLboolean isPixmap )
++{
++   radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private;
++
++    const GLboolean swDepth = GL_FALSE;
++    const GLboolean swAlpha = GL_FALSE;
++    const GLboolean swAccum = mesaVis->accumRedBits > 0;
++    const GLboolean swStencil = mesaVis->stencilBits > 0 &&
++	mesaVis->depthBits != 24;
++    GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8);
++    GLenum depthFormat = GL_NONE;
++    struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
++
++    if (mesaVis->depthBits == 16)
++	depthFormat = GL_DEPTH_COMPONENT16;
++    else if (mesaVis->depthBits == 24)
++	depthFormat = GL_DEPTH_COMPONENT24;
++
++    /* front color renderbuffer */
++    {
++	struct radeon_renderbuffer *front =
++	    radeon_create_renderbuffer(rgbFormat, driDrawPriv);
++	_mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &front->base);
++    }
++
++    /* back color renderbuffer */
++    if (mesaVis->doubleBufferMode) {
++	struct radeon_renderbuffer *back =
++	    radeon_create_renderbuffer(rgbFormat, driDrawPriv);
++	_mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &back->base);
++    }
++
++    /* depth renderbuffer */
++    if (depthFormat != GL_NONE) {
++	struct radeon_renderbuffer *depth =
++	    radeon_create_renderbuffer(depthFormat, driDrawPriv);
++	_mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depth->base);
++	depth->depthHasSurface = screen->depthHasSurface;
++    }
++
++    /* stencil renderbuffer */
++    if (mesaVis->stencilBits > 0 && !swStencil) {
++	struct radeon_renderbuffer *stencil =
++	    radeon_create_renderbuffer(GL_STENCIL_INDEX8_EXT, driDrawPriv);
++	_mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencil->base);
++	stencil->depthHasSurface = screen->depthHasSurface;
++    }
++
++    _mesa_add_soft_renderbuffers(fb,
++	    GL_FALSE, /* color */
++	    swDepth,
++	    swStencil,
++	    swAccum,
++	    swAlpha,
++	    GL_FALSE /* aux */);
++    driDrawPriv->driverPrivate = (void *) fb;
++
++    return (driDrawPriv->driverPrivate != NULL);
++}
++#else
+ 
+ /**
+  * Create the Mesa framebuffer and renderbuffers for a given window/drawable.
+@@ -1103,7 +1376,7 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
+       return (driDrawPriv->driverPrivate != NULL);
+    }
+ }
+-
++#endif
+ 
+ static void
+ radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
+@@ -1199,11 +1472,11 @@ radeonInitScreen(__DRIscreenPrivate *psp)
+    if (!radeonInitDriver(psp))
+        return NULL;
+ 
++   /* for now fill in all modes */
+    return radeonFillInModes( psp,
+ 			     dri_priv->bpp,
+ 			     (dri_priv->bpp == 16) ? 16 : 24,
+-			     (dri_priv->bpp == 16) ? 0  : 8,
+-			     (dri_priv->backOffset != dri_priv->depthOffset) );
++			     (dri_priv->bpp == 16) ? 0  : 8, 1);
+ }
+ 
+ 
+diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h
+index ab859d5..82eb7d8 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_screen.h
++++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
+@@ -55,6 +55,14 @@ typedef struct {
+    drmAddress map;			/* Mapping of the DRM region */
+ } radeonRegionRec, *radeonRegionPtr;
+ 
++struct radeon_gem_object {
++   uint32_t gem_name;
++   uint32_t gem_handle;
++   uint64_t size;
++   void *map;
++   uint64_t offset;
++};
++
+ typedef struct {
+    int chip_family;
+    int chip_flags;
+@@ -107,6 +115,13 @@ typedef struct {
+    const __DRIextension *extensions[8];
+ 
+    int num_gb_pipes;
++
++   int kernel_mm;
++   struct radeon_gem_object front;
++   struct radeon_gem_object back;
++   struct radeon_gem_object depth;
++   struct radeon_gem_object vram_texture;
++   struct radeon_gem_object gart_texture;
+ } radeonScreenRec, *radeonScreenPtr;
+ 
+ #define IS_R100_CLASS(screen) \
+diff --git a/src/mesa/drivers/dri/radeon/radeon_span.h b/src/mesa/drivers/dri/radeon/radeon_span.h
+index 9abe086..1650a9b 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_span.h
++++ b/src/mesa/drivers/dri/radeon/radeon_span.h
+@@ -44,7 +44,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #include "drirenderbuffer.h"
+ 
++#include "radeon_buffer.h"
++
+ extern void radeonInitSpanFuncs(GLcontext * ctx);
+-extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis);
+ 
++#if COMPILE_R300
++extern void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
++#else
++extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis);
++#endif
+ #endif
diff --git a/r300-cmdbuf.patch b/r300-cmdbuf.patch
deleted file mode 100644
index 50eb918..0000000
--- a/r300-cmdbuf.patch
+++ /dev/null
@@ -1,7669 +0,0 @@
-diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
-index e9144ac..b6ed58b 100644
---- a/src/mesa/drivers/dri/r200/Makefile
-+++ b/src/mesa/drivers/dri/r200/Makefile
-@@ -48,7 +48,8 @@ SYMLINKS = \
- COMMON_SYMLINKS = \
- 	radeon_chipset.h \
- 	radeon_screen.c \
--	radeon_screen.h
-+	radeon_screen.h \
-+	radeon_buffer.h
- 
- ##### TARGETS #####
- 
-diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
-index 6ca9342..3bb1ff4 100644
---- a/src/mesa/drivers/dri/r300/Makefile
-+++ b/src/mesa/drivers/dri/r300/Makefile
-@@ -11,15 +11,6 @@ ifeq ($(USING_EGL), 1)
- EGL_SOURCES = server/radeon_egl.c
- endif
- 
--COMMON_SOURCES = \
--	../../common/driverfuncs.c \
--	../common/mm.c \
--	../common/utils.c \
--	../common/texmem.c \
--	../common/vblank.c \
--	../common/xmlconfig.c \
--	../common/dri_util.c
--
- DRIVER_SOURCES = \
- 		 radeon_screen.c \
- 		 radeon_context.c \
-@@ -36,6 +27,7 @@ DRIVER_SOURCES = \
- 		 r300_texmem.c \
- 		 r300_tex.c \
- 		 r300_texstate.c \
-+		 r300_mipmap_tree.c \
- 		 radeon_program.c \
- 		 radeon_program_alu.c \
- 		 radeon_program_pair.c \
-@@ -51,7 +43,7 @@ DRIVER_SOURCES = \
- 		 r300_swtcl.c \
- 		 $(EGL_SOURCES)
- 
--C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
-+C_SOURCES = $(COMMON_SOURCES) $(COMMON_BM_SOURCES) $(DRIVER_SOURCES)
- 
- DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \
- 	-DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300
-@@ -68,7 +60,8 @@ COMMON_SYMLINKS = \
- 	radeon_chipset.h \
- 	radeon_screen.c \
- 	radeon_screen.h \
--	radeon_span.h
-+	radeon_span.h \
-+	radeon_buffer.h
- 
- ##### TARGETS #####
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
-index c069660..493b0ac 100644
---- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
-+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
-@@ -51,11 +51,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_reg.h"
- #include "r300_cmdbuf.h"
- #include "r300_emit.h"
-+#include "r300_mem.h"
-+#include "r300_mipmap_tree.h"
- #include "r300_state.h"
- 
- // Set this to 1 for extremely verbose debugging of command buffers
- #define DEBUG_CMDBUF		0
- 
-+/** # of dwords reserved for additional instructions that may need to be written
-+ * during flushing.
-+ */
-+#define SPACE_FOR_FLUSHING	4
-+
- /**
-  * Send the current command buffer via ioctl to the hardware.
-  */
-@@ -66,24 +73,42 @@ int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller)
- 	drm_radeon_cmd_buffer_t cmd;
- 	int start;
- 
-+	if (r300->cmdbuf.flushing) {
-+		fprintf(stderr, "Recursive call into r300FlushCmdBufLocked!\n");
-+		exit(-1);
-+	}
-+	r300->cmdbuf.flushing = 1;
-+
- 	if (r300->radeon.lost_context) {
- 		start = 0;
- 		r300->radeon.lost_context = GL_FALSE;
- 	} else
--		start = r300->cmdbuf.count_reemit;
-+		start = r300->cmdbuf.reemit;
- 
- 	if (RADEON_DEBUG & DEBUG_IOCTL) {
- 		fprintf(stderr, "%s from %s - %i cliprects\n",
- 			__FUNCTION__, caller, r300->radeon.numClipRects);
- 
--		if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE)
--			for (i = start; i < r300->cmdbuf.count_used; ++i)
-+		if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE) {
-+			fprintf(stderr, "written: %d  committed: %d\n", r300->cmdbuf.written, r300->cmdbuf.committed);
-+			for (i = start; i < r300->cmdbuf.written; ++i)
- 				fprintf(stderr, "%d: %08x\n", i,
--					r300->cmdbuf.cmd_buf[i]);
-+					((uint32_t*)r300->cmdbuf.buf->virtual)[i]);
-+		}
- 	}
- 
--	cmd.buf = (char *)(r300->cmdbuf.cmd_buf + start);
--	cmd.bufsz = (r300->cmdbuf.count_used - start) * 4;
-+	if (r300->cmdbuf.written != r300->cmdbuf.committed) {
-+		_mesa_problem(r300->radeon.glCtx,
-+			"Command buffer contains %d uncommitted dwords\n"
-+			"in r300FlushCmdBufLocked called from %s.\n",
-+			r300->cmdbuf.written - r300->cmdbuf.committed, caller);
-+	}
-+
-+	dri_bo_unmap(r300->cmdbuf.buf);
-+	dri_process_relocs(r300->cmdbuf.buf, 0);
-+
-+	cmd.buf = (char *)r300->cmdbuf.buf->virtual + 4*start;
-+	cmd.bufsz = (r300->cmdbuf.committed - start) * 4;
- 
- 	if (r300->radeon.state.scissor.enabled) {
- 		cmd.nbox = r300->radeon.state.scissor.numClipRects;
-@@ -103,9 +128,19 @@ int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller)
- 		radeonWaitForIdleLocked(&r300->radeon);
- 	}
- 
-+	dri_post_submit(r300->cmdbuf.buf, 0);
-+	dri_bo_unreference(r300->cmdbuf.buf);
-+
- 	r300->dma.nr_released_bufs = 0;
--	r300->cmdbuf.count_used = 0;
--	r300->cmdbuf.count_reemit = 0;
-+	r300->cmdbuf.buf = dri_bo_alloc(&r300->radeon.bufmgr->base, "cmdbuf",
-+		r300->cmdbuf.size*4, 16, DRM_BO_MEM_CMDBUF);
-+	r300->cmdbuf.written = 0;
-+	r300->cmdbuf.reserved = 0;
-+	r300->cmdbuf.committed = 0;
-+	r300->cmdbuf.reemit = 0;
-+	dri_bo_map(r300->cmdbuf.buf, GL_TRUE);
-+
-+	r300->cmdbuf.flushing = 0;
- 
- 	return ret;
- }
-@@ -115,9 +150,7 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char *caller)
- 	int ret;
- 
- 	LOCK_HARDWARE(&r300->radeon);
--
- 	ret = r300FlushCmdBufLocked(r300, caller);
--
- 	UNLOCK_HARDWARE(&r300->radeon);
- 
- 	if (ret) {
-@@ -128,6 +161,44 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char *caller)
- 	return ret;
- }
- 
-+/**
-+ * Make sure that enough space is available in the command buffer
-+ * by flushing if necessary.
-+ *
-+ * \param dwords The number of dwords we need to be free on the command buffer
-+ */
-+void r300EnsureCmdBufSpace(r300ContextPtr r300, int dwords, const char *caller)
-+{
-+	assert(dwords < r300->cmdbuf.size);
-+
-+	if (!r300->cmdbuf.flushing)
-+		dwords += SPACE_FOR_FLUSHING;
-+
-+	if (r300->cmdbuf.written + dwords > r300->cmdbuf.size)
-+		r300FlushCmdBuf(r300, caller);
-+}
-+
-+void r300BeginBatch(r300ContextPtr r300, int n, GLboolean autostate, const char* function, int line)
-+{
-+	assert(r300->cmdbuf.written == r300->cmdbuf.reserved);
-+
-+	r300EnsureCmdBufSpace(r300, n, function);
-+
-+	if (autostate && !r300->cmdbuf.written) {
-+		if (RADEON_DEBUG & DEBUG_IOCTL)
-+			fprintf(stderr,
-+				"Reemit state after flush (from %s)\n", function);
-+		r300EmitState(r300);
-+	}
-+
-+	r300->cmdbuf.reserved += n;
-+	assert(r300->cmdbuf.reserved < r300->cmdbuf.size);
-+
-+	if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_IOCTL)
-+		fprintf(stderr, "BEGIN_BATCH(%d) at %d, from %s:%i\n",
-+			n, r300->cmdbuf.written, function, line);
-+}
-+
- static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state)
- {
- 	int i;
-@@ -152,33 +223,18 @@ static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *stat
-  */
- static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
- {
-+	BATCH_LOCALS(r300);
- 	struct r300_state_atom *atom;
--	uint32_t *dest;
- 	int dwords;
- 
--	dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used;
--
--	/* Emit WAIT */
--	*dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN);
--	dest++;
--	r300->cmdbuf.count_used++;
--
--	/* Emit cache flush */
--	*dest = cmdpacket0(R300_TX_INVALTAGS, 1);
--	dest++;
--	r300->cmdbuf.count_used++;
--
--	*dest = R300_TX_FLUSH;
--	dest++;
--	r300->cmdbuf.count_used++;
--
--	/* Emit END3D */
--	*dest = cmdpacify();
--	dest++;
--	r300->cmdbuf.count_used++;
-+	BEGIN_BATCH_NO_AUTOSTATE(4);
-+	OUT_BATCH(cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN));
-+	OUT_BATCH(cmdpacket0(R300_TX_INVALTAGS, 1));
-+	OUT_BATCH(R300_TX_FLUSH);
-+	OUT_BATCH(cmdpacify());
-+	END_BATCH();
- 
- 	/* Emit actual atoms */
--
- 	foreach(atom, &r300->hw.atomlist) {
- 		if ((atom->dirty || r300->hw.all_dirty) == dirty) {
- 			dwords = (*atom->check) (r300, atom);
-@@ -186,9 +242,13 @@ static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
- 				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
- 					r300PrintStateAtom(r300, atom);
- 				}
--				memcpy(dest, atom->cmd, dwords * 4);
--				dest += dwords;
--				r300->cmdbuf.count_used += dwords;
-+				if (atom->emit) {
-+					(*atom->emit)(r300);
-+				} else {
-+					BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+					OUT_BATCH_TABLE(atom->cmd, dwords);
-+					END_BATCH();
-+				}
- 				atom->dirty = GL_FALSE;
- 			} else {
- 				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
-@@ -198,6 +258,8 @@ static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
- 			}
- 		}
- 	}
-+
-+	COMMIT_BATCH();
- }
- 
- /**
-@@ -211,22 +273,21 @@ void r300EmitState(r300ContextPtr r300)
- 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS))
- 		fprintf(stderr, "%s\n", __FUNCTION__);
- 
--	if (r300->cmdbuf.count_used && !r300->hw.is_dirty
-+	if (r300->cmdbuf.written && !r300->hw.is_dirty
- 	    && !r300->hw.all_dirty)
- 		return;
- 
- 	/* To avoid going across the entire set of states multiple times, just check
--	 * for enough space for the case of emitting all state, and inline the
--	 * r300AllocCmdBuf code here without all the checks.
-+	 * for enough space for the case of emitting all state.
- 	 */
- 	r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__);
- 
--	if (!r300->cmdbuf.count_used) {
-+	if (!r300->cmdbuf.written) {
- 		if (RADEON_DEBUG & DEBUG_STATE)
- 			fprintf(stderr, "Begin reemit state\n");
- 
- 		r300EmitAtoms(r300, GL_FALSE);
--		r300->cmdbuf.count_reemit = r300->cmdbuf.count_used;
-+		r300->cmdbuf.reemit = r300->cmdbuf.committed;
- 	}
- 
- 	if (RADEON_DEBUG & DEBUG_STATE)
-@@ -234,7 +295,7 @@ void r300EmitState(r300ContextPtr r300)
- 
- 	r300EmitAtoms(r300, GL_TRUE);
- 
--	assert(r300->cmdbuf.count_used < r300->cmdbuf.size);
-+	assert(r300->cmdbuf.written < r300->cmdbuf.size);
- 
- 	r300->hw.is_dirty = GL_FALSE;
- 	r300->hw.all_dirty = GL_FALSE;
-@@ -244,6 +305,79 @@ void r300EmitState(r300ContextPtr r300)
- #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
- #define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
- 
-+static void emit_tex_offsets(r300ContextPtr r300)
-+{
-+	BATCH_LOCALS(r300);
-+	int numtmus = packet0_count(r300->hw.tex.offset.cmd);
-+
-+	if (numtmus) {
-+		int i;
-+
-+		BEGIN_BATCH(numtmus + 1);
-+		OUT_BATCH_REGSEQ(R300_TX_OFFSET_0, numtmus);
-+		for(i = 0; i < numtmus; ++i) {
-+			r300TexObj *t = r300->hw.textures[i];
-+			if (t && !t->image_override) {
-+				OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, DRM_RELOC_TXOFFSET);
-+			} else if (!t) {
-+				OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]);
-+			} else {
-+				OUT_BATCH(t->override_offset);
-+			}
-+		}
-+		END_BATCH();
-+	}
-+}
-+
-+static void emit_cb_offset(r300ContextPtr r300)
-+{
-+	BATCH_LOCALS(r300);
-+	struct radeon_renderbuffer *rrb;
-+	uint32_t cbpitch;
-+
-+	rrb = r300->radeon.state.color.rrb;
-+	if (!rrb) {
-+		fprintf(stderr, "no rrb\n");
-+		return;
-+	}
-+
-+	cbpitch = rrb->pitch;
-+	if (rrb->cpp == 4)
-+		cbpitch |= R300_COLOR_FORMAT_ARGB8888;
-+	else
-+		cbpitch |= R300_COLOR_FORMAT_RGB565;
-+
-+	if (r300->radeon.sarea->tiling_enabled)
-+		cbpitch |= R300_COLOR_TILE_ENABLE;
-+
-+	BEGIN_BATCH(4);
-+	OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
-+	OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET);
-+	OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1);
-+	OUT_BATCH(cbpitch);
-+	END_BATCH();
-+}
-+
-+static void emit_zb_offset(r300ContextPtr r300)
-+{
-+	BATCH_LOCALS(r300);
-+	struct radeon_renderbuffer *rrb;
-+	uint32_t zbpitch;
-+
-+	rrb = r300->radeon.state.depth_buffer;
-+	if (!rrb)
-+		return;
-+
-+	zbpitch = rrb->pitch;
-+
-+	BEGIN_BATCH(3);
-+	OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 2);
-+	OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET);
-+	OUT_BATCH(zbpitch);
-+	END_BATCH();
-+
-+}
-+
- static int check_always(r300ContextPtr r300, struct r300_state_atom *atom)
- {
- 	return atom->cmd_size;
-@@ -480,8 +614,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
- 	ALLOC_STATE(rop, always, 2, 0);
- 	r300->hw.rop.cmd[0] = cmdpacket0(R300_RB3D_ROPCNTL, 1);
- 	ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0);
--	r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1);
--	r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1);
-+	r300->hw.cb.emit = &emit_cb_offset;
- 	ALLOC_STATE(rb3d_dither_ctl, always, 10, 0);
- 	r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9);
- 	ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
-@@ -495,7 +628,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
- 	r300->hw.zstencil_format.cmd[0] =
- 	    cmdpacket0(R300_ZB_FORMAT, 4);
- 	ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0);
--	r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2);
-+	r300->hw.zb.emit = emit_zb_offset;
- 	ALLOC_STATE(zb_depthclearvalue, always, 2, 0);
- 	r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1);
- 	ALLOC_STATE(unk4F30, always, 3, 0);
-@@ -562,9 +695,10 @@ void r300InitCmdBuf(r300ContextPtr r300)
- 	ALLOC_STATE(tex.pitch, variable, mtu + 1, 0);
- 	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT2_0, 0);
- 
--	ALLOC_STATE(tex.offset, variable, mtu + 1, 0);
-+	ALLOC_STATE(tex.offset, variable, 1, 0);
- 	r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
- 	    cmdpacket0(R300_TX_OFFSET_0, 0);
-+	r300->hw.tex.offset.emit = &emit_tex_offsets;
- 
- 	ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0);
- 	r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
-@@ -597,10 +731,14 @@ void r300InitCmdBuf(r300ContextPtr r300)
- 			size * 4, r300->hw.max_state_size * 4);
- 	}
- 
-+	r300->cmdbuf.buf = dri_bo_alloc(&r300->radeon.bufmgr->base, "cmdbuf",
-+		size*4, 16, DRM_BO_MEM_CMDBUF);
- 	r300->cmdbuf.size = size;
--	r300->cmdbuf.cmd_buf = (uint32_t *) CALLOC(size * 4);
--	r300->cmdbuf.count_used = 0;
--	r300->cmdbuf.count_reemit = 0;
-+	r300->cmdbuf.written = 0;
-+	r300->cmdbuf.reserved = 0;
-+	r300->cmdbuf.committed = 0;
-+	r300->cmdbuf.reemit = 0;
-+	dri_bo_map(r300->cmdbuf.buf, GL_TRUE);
- }
- 
- /**
-@@ -610,66 +748,10 @@ void r300DestroyCmdBuf(r300ContextPtr r300)
- {
- 	struct r300_state_atom *atom;
- 
--	FREE(r300->cmdbuf.cmd_buf);
-+	dri_bo_unmap(r300->cmdbuf.buf);
-+	dri_bo_unreference(r300->cmdbuf.buf);
- 
- 	foreach(atom, &r300->hw.atomlist) {
- 		FREE(atom->cmd);
- 	}
- }
--
--void r300EmitBlit(r300ContextPtr rmesa,
--		  GLuint color_fmt,
--		  GLuint src_pitch,
--		  GLuint src_offset,
--		  GLuint dst_pitch,
--		  GLuint dst_offset,
--		  GLint srcx, GLint srcy,
--		  GLint dstx, GLint dsty, GLuint w, GLuint h)
--{
--	drm_r300_cmd_header_t *cmd;
--
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr,
--			"%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
--			__FUNCTION__, src_pitch, src_offset, srcx, srcy,
--			dst_pitch, dst_offset, dstx, dsty, w, h);
--
--	assert((src_pitch & 63) == 0);
--	assert((dst_pitch & 63) == 0);
--	assert((src_offset & 1023) == 0);
--	assert((dst_offset & 1023) == 0);
--	assert(w < (1 << 16));
--	assert(h < (1 << 16));
--
--	cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__);
--
--	cmd[0].header.cmd_type = R300_CMD_PACKET3;
--	cmd[0].header.pad0 = R300_CMD_PACKET3_RAW;
--	cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16);
--	cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
--		    RADEON_GMC_DST_PITCH_OFFSET_CNTL |
--		    RADEON_GMC_BRUSH_NONE |
--		    (color_fmt << 8) |
--		    RADEON_GMC_SRC_DATATYPE_COLOR |
--		    RADEON_ROP3_S |
--		    RADEON_DP_SRC_SOURCE_MEMORY |
--		    RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
--
--	cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10);
--	cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10);
--	cmd[5].u = (srcx << 16) | srcy;
--	cmd[6].u = (dstx << 16) | dsty;	/* dst */
--	cmd[7].u = (w << 16) | h;
--}
--
--void r300EmitWait(r300ContextPtr rmesa, GLuint flags)
--{
--	drm_r300_cmd_header_t *cmd;
--
--	assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D)));
--
--	cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
--	cmd[0].u = 0;
--	cmd[0].wait.cmd_type = R300_CMD_WAIT;
--	cmd[0].wait.flags = flags;
--}
-diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
-index a8eaa58..4708a4c 100644
---- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h
-+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
-@@ -45,29 +45,88 @@ extern void r300EmitState(r300ContextPtr r300);
- 
- extern void r300InitCmdBuf(r300ContextPtr r300);
- extern void r300DestroyCmdBuf(r300ContextPtr r300);
-+extern void r300EnsureCmdBufSpace(r300ContextPtr r300, int dwords, const char *caller);
-+
-+extern void r300BeginBatch(r300ContextPtr r300, int n, GLboolean autostate, const char* function, int line);
- 
- /**
-- * Make sure that enough space is available in the command buffer
-- * by flushing if necessary.
-- *
-- * \param dwords The number of dwords we need to be free on the command buffer
-+ * Every function writing to the command buffer needs to declare this
-+ * to get the necessary local variables.
-  */
--static INLINE void r300EnsureCmdBufSpace(r300ContextPtr r300,
--					     int dwords, const char *caller)
--{
--	assert(dwords < r300->cmdbuf.size);
-+#define BATCH_LOCALS(r300) \
-+	const r300ContextPtr b_l_r300 = r300
- 
--	if (r300->cmdbuf.count_used + dwords > r300->cmdbuf.size)
--		r300FlushCmdBuf(r300, caller);
--}
-+/**
-+ * Prepare writing n dwords to the command buffer,
-+ * including producing any necessary state emits on buffer wraparound.
-+ */
-+#define BEGIN_BATCH(n) r300BeginBatch(b_l_r300, n, GL_TRUE, __FUNCTION__, __LINE__)
-+
-+/**
-+ * Same as BEGIN_BATCH, but do not cause automatic state emits.
-+ */
-+#define BEGIN_BATCH_NO_AUTOSTATE(n) r300BeginBatch(b_l_r300, n, GL_FALSE, __FUNCTION__, __LINE__)
-+
-+/**
-+ * Write one dword to the command buffer.
-+ */
-+#define OUT_BATCH(data) \
-+	do { \
-+		if (b_l_r300->cmdbuf.written < b_l_r300->cmdbuf.reserved) { \
-+			((uint32_t*)b_l_r300->cmdbuf.buf->virtual)[b_l_r300->cmdbuf.written++] = data; \
-+		} else { \
-+			_mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH mismatch", __FUNCTION__, __LINE__); \
-+		} \
-+	} while(0)
- 
- /**
-- * Allocate the given number of dwords in the command buffer and return
-- * a pointer to the allocated area.
-- * When necessary, these functions cause a flush. r300AllocCmdBuf() also
-- * causes state reemission after a flush. This is necessary to ensure
-- * correct hardware state after an unlock.
-+ * Write a relocated dword to the command buffer.
-  */
-+#define OUT_BATCH_RELOC(data, bo, offset, flags) \
-+	do { \
-+		if (b_l_r300->cmdbuf.written < b_l_r300->cmdbuf.reserved) { \
-+			dri_emit_reloc(b_l_r300->cmdbuf.buf, flags, offset, 4*b_l_r300->cmdbuf.written, bo); \
-+			((uint32_t*)b_l_r300->cmdbuf.buf->virtual)[b_l_r300->cmdbuf.written++] = data; \
-+		} else { \
-+			_mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH mismatch", __FUNCTION__, __LINE__); \
-+		} \
-+	} while(0)
-+
-+/**
-+ * Write n dwords from ptr to the command buffer.
-+ */
-+#define OUT_BATCH_TABLE(ptr,n) \
-+	do { \
-+		int _n = n; \
-+		if (b_l_r300->cmdbuf.written+_n <= b_l_r300->cmdbuf.reserved) { \
-+			memcpy((uint32_t*)b_l_r300->cmdbuf.buf->virtual + b_l_r300->cmdbuf.written, (ptr), 4*_n); \
-+			b_l_r300->cmdbuf.written += _n; \
-+		} else { \
-+			_mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH_TABLE mismatch", __FUNCTION__, __LINE__); \
-+		} \
-+	} while(0)
-+
-+/**
-+ * Finish writing dwords to the command buffer.
-+ * The number of (direct or indirect) OUT_BATCH calls between the previous
-+ * BEGIN_BATCH and END_BATCH must match the number specified at BEGIN_BATCH time.
-+ */
-+#define END_BATCH() \
-+	do { \
-+		if (b_l_r300->cmdbuf.written != b_l_r300->cmdbuf.reserved) \
-+			_mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: END_BATCH mismatch", __FUNCTION__, __LINE__); \
-+	} while(0)
-+
-+/**
-+ * After the last END_BATCH() of rendering, this indicates that flushing
-+ * the command buffer now is okay.
-+ */
-+#define COMMIT_BATCH() \
-+	do { \
-+		assert(b_l_r300->cmdbuf.written == b_l_r300->cmdbuf.reserved); \
-+		b_l_r300->cmdbuf.committed = b_l_r300->cmdbuf.written; \
-+	} while(0)
-+
- static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300,
- 					       int dwords, const char *caller)
- {
-@@ -75,8 +134,9 @@ static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300,
- 
- 	r300EnsureCmdBufSpace(r300, dwords, caller);
- 
--	ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
--	r300->cmdbuf.count_used += dwords;
-+	ptr = (uint32_t*)r300->cmdbuf.buf->virtual + r300->cmdbuf.written;
-+	r300->cmdbuf.written += dwords;
-+	r300->cmdbuf.reserved = r300->cmdbuf.committed = r300->cmdbuf.written;
- 	return ptr;
- }
- 
-@@ -87,30 +147,17 @@ static INLINE uint32_t *r300AllocCmdBuf(r300ContextPtr r300,
- 
- 	r300EnsureCmdBufSpace(r300, dwords, caller);
- 
--	if (!r300->cmdbuf.count_used) {
-+	if (!r300->cmdbuf.written) {
- 		if (RADEON_DEBUG & DEBUG_IOCTL)
- 			fprintf(stderr,
- 				"Reemit state after flush (from %s)\n", caller);
- 		r300EmitState(r300);
- 	}
- 
--	ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
--	r300->cmdbuf.count_used += dwords;
-+	ptr = (uint32_t*)r300->cmdbuf.buf->virtual + r300->cmdbuf.written;
-+	r300->cmdbuf.written += dwords;
-+	r300->cmdbuf.reserved = r300->cmdbuf.committed = r300->cmdbuf.written;
- 	return ptr;
- }
- 
--extern void r300EmitBlit(r300ContextPtr rmesa,
--			 GLuint color_fmt,
--			 GLuint src_pitch,
--			 GLuint src_offset,
--			 GLuint dst_pitch,
--			 GLuint dst_offset,
--			 GLint srcx, GLint srcy,
--			 GLint dstx, GLint dsty, GLuint w, GLuint h);
--
--extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags);
--extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start);
--extern void r300EmitVertexShader(r300ContextPtr rmesa);
--extern void r300EmitPixelShader(r300ContextPtr rmesa);
--
- #endif				/* __R300_CMDBUF_H__ */
-diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
-index fcf571d..cc9c11a 100644
---- a/src/mesa/drivers/dri/r300/r300_context.c
-+++ b/src/mesa/drivers/dri/r300/r300_context.c
-@@ -59,15 +59,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_span.h"
- #include "r300_context.h"
- #include "r300_cmdbuf.h"
-+#include "r300_mipmap_tree.h"
- #include "r300_state.h"
- #include "r300_ioctl.h"
- #include "r300_tex.h"
- #include "r300_emit.h"
- #include "r300_swtcl.h"
- 
--#ifdef USER_BUFFERS
- #include "r300_mem.h"
--#endif
- 
- #include "vblank.h"
- #include "utils.h"
-@@ -190,7 +189,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	struct dd_function_table functions;
- 	r300ContextPtr r300;
- 	GLcontext *ctx;
--	int tcl_mode, i;
-+	int tcl_mode;
- 
- 	assert(glVisual);
- 	assert(driContextPriv);
-@@ -222,10 +221,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	r300InitTextureFuncs(&functions);
- 	r300InitShaderFuncs(&functions);
- 
--#ifdef USER_BUFFERS
--	r300_mem_init(r300);
--#endif
--
- 	if (!radeonInitContext(&r300->radeon, &functions,
- 			       glVisual, driContextPriv,
- 			       sharedContextPrivate)) {
-@@ -233,34 +228,9 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 		return GL_FALSE;
- 	}
- 
-+	r300->radeon.bufmgr = radeonBufmgrClassicInit(r300);
-+
- 	/* Init r300 context data */
--	r300->dma.buf0_address =
--	    r300->radeon.radeonScreen->buffers->list[0].address;
--
--	(void)memset(r300->texture_heaps, 0, sizeof(r300->texture_heaps));
--	make_empty_list(&r300->swapped);
--
--	r300->nr_heaps = 1 /* screen->numTexHeaps */ ;
--	assert(r300->nr_heaps < RADEON_NR_TEX_HEAPS);
--	for (i = 0; i < r300->nr_heaps; i++) {
--		/* *INDENT-OFF* */
--		r300->texture_heaps[i] = driCreateTextureHeap(i, r300,
--							       screen->
--							       texSize[i], 12,
--							       RADEON_NR_TEX_REGIONS,
--							       (drmTextureRegionPtr)
--							       r300->radeon.sarea->
--							       tex_list[i],
--							       &r300->radeon.sarea->
--							       tex_age[i],
--							       &r300->swapped,
--							       sizeof
--							       (r300TexObj),
--							       (destroy_texture_object_t
--								*)
--							       r300DestroyTexObj);
--		/* *INDENT-ON* */
--	}
- 	r300->texture_depth = driQueryOptioni(&r300->radeon.optionCache,
- 					      "texture_depth");
- 	if (r300->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
-@@ -299,13 +269,11 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	ctx->Const.MaxLineWidth = R300_LINESIZE_MAX;
- 	ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX;
- 
--#ifdef USER_BUFFERS
- 	/* Needs further modifications */
- #if 0
- 	ctx->Const.MaxArrayLockSize =
- 	    ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4);
- #endif
--#endif
- 
- 	/* Initialize the software rasterizer and helper modules.
- 	 */
-@@ -407,72 +375,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	return GL_TRUE;
- }
- 
--static void r300FreeGartAllocations(r300ContextPtr r300)
--{
--	int i, ret, tries = 0, done_age, in_use = 0;
--	drm_radeon_mem_free_t memfree;
--
--	memfree.region = RADEON_MEM_REGION_GART;
--
--#ifdef USER_BUFFERS
--	for (i = r300->rmm->u_last; i > 0; i--) {
--		if (r300->rmm->u_list[i].ptr == NULL) {
--			continue;
--		}
--
--		/* check whether this buffer is still in use */
--		if (r300->rmm->u_list[i].pending) {
--			in_use++;
--		}
--	}
--	/* Cannot flush/lock if no context exists. */
--	if (in_use)
--		r300FlushCmdBuf(r300, __FUNCTION__);
--
--	done_age = radeonGetAge((radeonContextPtr) r300);
--
--	for (i = r300->rmm->u_last; i > 0; i--) {
--		if (r300->rmm->u_list[i].ptr == NULL) {
--			continue;
--		}
--
--		/* check whether this buffer is still in use */
--		if (!r300->rmm->u_list[i].pending) {
--			continue;
--		}
--
--		assert(r300->rmm->u_list[i].h_pending == 0);
--
--		tries = 0;
--		while (r300->rmm->u_list[i].age > done_age && tries++ < 1000) {
--			usleep(10);
--			done_age = radeonGetAge((radeonContextPtr) r300);
--		}
--		if (tries >= 1000) {
--			WARN_ONCE("Failed to idle region!");
--		}
--
--		memfree.region_offset = (char *)r300->rmm->u_list[i].ptr -
--		    (char *)r300->radeon.radeonScreen->gartTextures.map;
--
--		ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd,
--				      DRM_RADEON_FREE, &memfree,
--				      sizeof(memfree));
--		if (ret) {
--			fprintf(stderr, "Failed to free at %p\nret = %s\n",
--				r300->rmm->u_list[i].ptr, strerror(-ret));
--		} else {
--			if (i == r300->rmm->u_last)
--				r300->rmm->u_last--;
--
--			r300->rmm->u_list[i].pending = 0;
--			r300->rmm->u_list[i].ptr = NULL;
--		}
--	}
--	r300->rmm->u_head = i;
--#endif				/* USER_BUFFERS */
--}
--
- /* Destroy the device specific context.
-  */
- void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
-@@ -496,24 +398,17 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
- 	assert(r300);		/* should never be null */
- 
- 	if (r300) {
--		GLboolean release_texture_heaps;
--
--		release_texture_heaps =
--		    (r300->radeon.glCtx->Shared->RefCount == 1);
- 		_swsetup_DestroyContext(r300->radeon.glCtx);
- 		_tnl_ProgramCacheDestroy(r300->radeon.glCtx);
- 		_tnl_DestroyContext(r300->radeon.glCtx);
- 		_vbo_DestroyContext(r300->radeon.glCtx);
- 		_swrast_DestroyContext(r300->radeon.glCtx);
- 
--		if (r300->dma.current.buf) {
--			r300ReleaseDmaRegion(r300, &r300->dma.current,
--					     __FUNCTION__);
--#ifndef USER_BUFFERS
--			r300FlushCmdBuf(r300, __FUNCTION__);
--#endif
-+		if (r300->dma.current) {
-+			dri_bo_unreference(r300->dma.current);
-+			r300->dma.current = 0;
- 		}
--		r300FreeGartAllocations(r300);
-+		r300FlushCmdBuf(r300, __FUNCTION__);
- 		r300DestroyCmdBuf(r300);
- 
- 		if (radeon->state.scissor.pClipRects) {
-@@ -521,28 +416,13 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
- 			radeon->state.scissor.pClipRects = NULL;
- 		}
- 
--		if (release_texture_heaps) {
--			/* This share group is about to go away, free our private
--			 * texture object data.
--			 */
--			int i;
--
--			for (i = 0; i < r300->nr_heaps; i++) {
--				driDestroyTextureHeap(r300->texture_heaps[i]);
--				r300->texture_heaps[i] = NULL;
--			}
--
--			assert(is_empty_list(&r300->swapped));
--		}
--
- 		radeonCleanupContext(&r300->radeon);
- 
--#ifdef USER_BUFFERS
- 		/* the memory manager might be accessed when Mesa frees the shared
- 		 * state, so don't destroy it earlier
- 		 */
--		r300_mem_destroy(r300);
--#endif
-+		dri_bufmgr_destroy(&r300->radeon.bufmgr->base);
-+		r300->radeon.bufmgr = 0;
- 
- 		/* free the option cache */
- 		driDestroyOptionCache(&r300->radeon.optionCache);
-diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
-index d2017f8..5c99740 100644
---- a/src/mesa/drivers/dri/r300/r300_context.h
-+++ b/src/mesa/drivers/dri/r300/r300_context.h
-@@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "tnl/t_vertex.h"
- #include "drm.h"
- #include "radeon_drm.h"
-+#include "dri_bufmgr.h"
- #include "dri_util.h"
- #include "texmem.h"
- 
-@@ -47,11 +48,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "mtypes.h"
- #include "colormac.h"
- 
--#define USER_BUFFERS
--
- struct r300_context;
- typedef struct r300_context r300ContextRec;
- typedef struct r300_context *r300ContextPtr;
-+typedef struct radeon_bufmgr radeon_bufmgr;
- 
- #include "radeon_lock.h"
- #include "mm.h"
-@@ -122,44 +122,22 @@ static INLINE uint32_t r300PackFloat24(float f)
- 
- /************ DMA BUFFERS **************/
- 
--/* Need refcounting on dma buffers:
-- */
--struct r300_dma_buffer {
--	int refcount;		/**< the number of retained regions in buf */
--	drmBufPtr buf;
--	int id;
--};
--#undef GET_START
--#ifdef USER_BUFFERS
--#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start))
--#else
--#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset +		\
--			(rvb)->address - rmesa->dma.buf0_address +	\
--			(rvb)->start)
--#endif
--/* A retained region, eg vertices for indexed vertices.
-- */
--struct r300_dma_region {
--	struct r300_dma_buffer *buf;
--	char *address;		/* == buf->address */
--	int start, end, ptr;	/* offsets from start of buf */
--
--	int aos_offset;		/* address in GART memory */
--	int aos_stride;		/* distance between elements, in dwords */
--	int aos_size;		/* number of components (1-4) */
--};
--
- struct r300_dma {
- 	/* Active dma region.  Allocations for vertices and retained
- 	 * regions come from here.  Also used for emitting random vertices,
- 	 * these may be flushed by calling flush_current();
- 	 */
--	struct r300_dma_region current;
-+	dri_bo *current; /** Buffer that DMA memory is allocated from */
-+	int current_used; /** Number of bytes allocated and forgotten about */
-+	int current_vertexptr; /** End of active vertex region */
- 
-+	/**
-+	 * If current_vertexptr != current_used then flush must be non-zero.
-+	 * flush must be called before non-active vertex allocations can be
-+	 * performed.
-+	 */
- 	void (*flush) (r300ContextPtr);
- 
--	char *buf0_address;	/* start of buf[0], for index calcs */
--
- 	/* Number of "in-flight" DMA buffers, i.e. the number of buffers
- 	 * for which a DISCARD command is currently queued in the command buffer.
- 	 */
-@@ -173,15 +151,12 @@ typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr;
- /* Texture object in locally shared texture space.
-  */
- struct r300_tex_obj {
--	driTextureObject base;
--
--	GLuint bufAddr;		/* Offset to start of locally
--				   shared texture block */
--
--	drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
--	/* Six, for the cube faces */
-+	struct gl_texture_object base;
-+	struct _r300_mipmap_tree *mt;
-+	GLuint dirty_images[6];
- 
- 	GLboolean image_override;	/* Image overridden by GLX_EXT_tfp */
-+	GLuint override_offset;
- 
- 	GLuint pitch;		/* this isn't sent to hardware just used in calculations */
- 	/* hardware register values */
-@@ -191,30 +166,16 @@ struct r300_tex_obj {
- 	GLuint pitch_reg;
- 	GLuint size;		/* npot only */
- 	GLuint format;
--	GLuint offset;		/* Image location in the card's address space.
--				   All cube faces follow. */
--	GLuint unknown4;
--	GLuint unknown5;
--	/* end hardware registers */
--
--	/* registers computed by r200 code - keep them here to
--	   compare against what is actually written.
--
--	   to be removed later.. */
- 	GLuint pp_border_color;
--	GLuint pp_cubic_faces;	/* cube face 1,2,3,4 log2 sizes */
--	GLuint format_x;
--
--	GLboolean border_fallback;
-+	/* end hardware registers */
- 
- 	GLuint tile_bits;	/* hw texture tile bits used on this texture */
- };
- 
--struct r300_texture_env_state {
--	r300TexObjPtr texobj;
--	GLenum format;
--	GLenum envMode;
--};
-+static INLINE r300TexObj* r300_tex_obj(struct gl_texture_object *texObj)
-+{
-+	return (r300TexObj*)texObj;
-+}
- 
- /* The blit width for texture uploads
-  */
-@@ -222,7 +183,6 @@ struct r300_texture_env_state {
- #define R300_MAX_TEXTURE_UNITS 8
- 
- struct r300_texture_state {
--	struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS];
- 	int tc_count;		/* number of incoming texture coordinates from VAP */
- };
- 
-@@ -242,6 +202,7 @@ struct r300_state_atom {
- 	GLboolean dirty;
- 
- 	int (*check) (r300ContextPtr, struct r300_state_atom * atom);
-+	void (*emit) (r300ContextPtr);
- };
- 
- #define R300_VPT_CMD_0		0
-@@ -549,6 +510,8 @@ struct r300_hw_state {
- 		struct r300_state_atom border_color;
- 	} tex;
- 	struct r300_state_atom txe;	/* tex enable (4104) */
-+
-+	r300TexObj *textures[R300_MAX_TEXTURE_UNITS];
- };
- 
- /**
-@@ -559,10 +522,14 @@ struct r300_hw_state {
-  * otherwise.
-  */
- struct r300_cmdbuf {
--	int size;		/* DWORDs allocated for buffer */
--	uint32_t *cmd_buf;
--	int count_used;		/* DWORDs filled so far */
--	int count_reemit;	/* size of re-emission batch */
-+	dri_bo *buf;
-+	int reemit; /** # of dwords in reemit sequence (is always <= committed) */
-+	int size; /** # of dwords total */
-+
-+	int committed; /** # of dwords that we have committed to */
-+	int written; /** # of dwords written (is always >= committed) */
-+	int reserved; /** # of dwords reserved up to previous BEGIN_BATCH */
-+	unsigned int flushing:1; /** whether we're currently in FlushCmdBufLocked */
- };
- 
- /**
-@@ -811,18 +778,25 @@ struct r500_fragment_program {
- #define REG_COLOR0	1
- #define REG_TEX0	2
- 
-+struct r300_aos {
-+	dri_bo *bo; /** Buffer object where vertex data is stored */
-+	int offset; /** Offset into buffer object, in bytes */
-+	int components; /** Number of components per vertex */
-+	int stride; /** Stride in dwords (may be 0 for repeating) */
-+	int count; /** Number of vertices */
-+};
-+
- struct r300_state {
- 	struct r300_depthbuffer_state depth;
- 	struct r300_texture_state texture;
- 	int sw_tcl_inputs[VERT_ATTRIB_MAX];
- 	struct r300_vertex_shader_state vertex_shader;
--	struct r300_dma_region aos[R300_MAX_AOS_ARRAYS];
-+	struct r300_aos aos[R300_MAX_AOS_ARRAYS];
- 	int aos_count;
- 
--	GLuint *Elts;
--	struct r300_dma_region elt_dma;
-+	dri_bo *elt_dma_bo; /** Buffer object that contains element indices */
-+	int elt_dma_offset; /** Offset into this buffer object, in bytes */
- 
--	struct r300_dma_region swtcl_dma;
- 	DECLARE_RENDERINPUTS(render_inputs_bitset);	/* actual render inputs that R300 was configured for.
- 							   They are the same as tnl->render_inputs for fixed pipeline */
- 
-@@ -880,13 +854,6 @@ struct r300_swtcl_info {
-     * Offset of the 3UB specular color data within a hardware (swtcl) vertex.
-     */
-    GLuint specoffset;
--
--   /**
--    * Should Mesa project vertex data or will the hardware do it?
--    */
--   GLboolean needproj;
--
--   struct r300_dma_region indexed_verts;
- };
- 
- 
-@@ -905,25 +872,11 @@ struct r300_context {
- 	/* Vertex buffers
- 	 */
- 	struct r300_dma dma;
--	GLboolean save_on_next_unlock;
- 	GLuint NewGLState;
- 
--	/* Texture object bookkeeping
--	 */
--	unsigned nr_heaps;
--	driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS];
--	driTextureObject swapped;
- 	int texture_depth;
- 	float initialMaxAnisotropy;
- 
--	/* Clientdata textures;
--	 */
--	GLuint prefer_gart_client_texturing;
--
--#ifdef USER_BUFFERS
--	struct r300_memory_manager *rmm;
--#endif
--
- 	GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
- 	GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c
-index 2ea17ad..5e2afd5 100644
---- a/src/mesa/drivers/dri/r300/r300_emit.c
-+++ b/src/mesa/drivers/dri/r300/r300_emit.c
-@@ -51,9 +51,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_emit.h"
- #include "r300_ioctl.h"
- 
--#ifdef USER_BUFFERS
- #include "r300_mem.h"
--#endif
- 
- #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
-     SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
-@@ -86,11 +84,9 @@ do {						\
- } while (0)
- #endif
- 
--static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
--			 GLvoid * data, int stride, int count)
-+static void r300EmitVec4(uint32_t *out, GLvoid * data, int stride, int count)
- {
- 	int i;
--	int *out = (int *)(rvb->address + rvb->start);
- 
- 	if (RADEON_DEBUG & DEBUG_VERTS)
- 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
-@@ -106,11 +102,9 @@ static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
- 		}
- }
- 
--static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
--			 GLvoid * data, int stride, int count)
-+static void r300EmitVec8(uint32_t *out, GLvoid * data, int stride, int count)
- {
- 	int i;
--	int *out = (int *)(rvb->address + rvb->start);
- 
- 	if (RADEON_DEBUG & DEBUG_VERTS)
- 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
-@@ -127,11 +121,9 @@ static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
- 		}
- }
- 
--static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
--			  GLvoid * data, int stride, int count)
-+static void r300EmitVec12(uint32_t *out, GLvoid * data, int stride, int count)
- {
- 	int i;
--	int *out = (int *)(rvb->address + rvb->start);
- 
- 	if (RADEON_DEBUG & DEBUG_VERTS)
- 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
-@@ -149,11 +141,9 @@ static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
- 		}
- }
- 
--static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
--			  GLvoid * data, int stride, int count)
-+static void r300EmitVec16(uint32_t *out, GLvoid * data, int stride, int count)
- {
- 	int i;
--	int *out = (int *)(rvb->address + rvb->start);
- 
- 	if (RADEON_DEBUG & DEBUG_VERTS)
- 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
-@@ -172,35 +162,31 @@ static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
- 		}
- }
- 
--static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb,
-+
-+static void r300EmitVec(GLcontext * ctx, struct r300_aos *aos,
- 			GLvoid * data, int size, int stride, int count)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-+	uint32_t *out;
- 
- 	if (stride == 0) {
--		r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
-+		r300AllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
- 		count = 1;
--		rvb->aos_offset = GET_START(rvb);
--		rvb->aos_stride = 0;
-+		aos->stride = 0;
- 	} else {
--		r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
--		rvb->aos_offset = GET_START(rvb);
--		rvb->aos_stride = size;
-+		r300AllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
-+		aos->stride = size;
- 	}
- 
-+	aos->components = size;
-+	aos->count = count;
-+
-+	out = (uint32_t*)((char*)aos->bo->virtual + aos->offset);
- 	switch (size) {
--	case 1:
--		r300EmitVec4(ctx, rvb, data, stride, count);
--		break;
--	case 2:
--		r300EmitVec8(ctx, rvb, data, stride, count);
--		break;
--	case 3:
--		r300EmitVec12(ctx, rvb, data, stride, count);
--		break;
--	case 4:
--		r300EmitVec16(ctx, rvb, data, stride, count);
--		break;
-+	case 1: r300EmitVec4(out, data, stride, count); break;
-+	case 2: r300EmitVec8(out, data, stride, count); break;
-+	case 3: r300EmitVec12(out, data, stride, count); break;
-+	case 4: r300EmitVec16(out, data, stride, count); break;
- 	default:
- 		assert(0);
- 		break;
-@@ -433,7 +419,7 @@ int r300EmitArrays(GLcontext * ctx)
- 	}
- 
- 	for (i = 0; i < nr; i++) {
--		int ci, fix, found = 0;
-+		int ci;
- 
- 		swizzle[i][0] = SWIZZLE_ZERO;
- 		swizzle[i][1] = SWIZZLE_ZERO;
-@@ -444,48 +430,10 @@ int r300EmitArrays(GLcontext * ctx)
- 			swizzle[i][ci] = ci;
- 		}
- 
--		if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
--			if (vb->AttribPtr[tab[i]]->stride % 4) {
--				return R300_FALLBACK_TCL;
--			}
--			rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data);
--			rmesa->state.aos[i].start = 0;
--			rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data);
--			rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4;
--			rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
--		} else {
--			r300EmitVec(ctx, &rmesa->state.aos[i],
--				    vb->AttribPtr[tab[i]]->data,
--				    vb->AttribPtr[tab[i]]->size,
--				    vb->AttribPtr[tab[i]]->stride, count);
--		}
--
--		rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
--
--		for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
--			if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) {
--				continue;
--			}
--			found = 1;
--			break;
--		}
--
--		if (found) {
--			if (fix > 0) {
--				WARN_ONCE("Feeling lucky?\n");
--			}
--			rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix;
--			for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
--				swizzle[i][ci] += fix;
--			}
--		} else {
--			WARN_ONCE
--			    ("Cannot handle offset %x with stride %d, comp %d\n",
--			     rmesa->state.aos[i].aos_offset,
--			     rmesa->state.aos[i].aos_stride,
--			     vb->AttribPtr[tab[i]]->size);
--			return R300_FALLBACK_TCL;
--		}
-+		r300EmitVec(ctx, &rmesa->state.aos[i],
-+				vb->AttribPtr[tab[i]]->data,
-+				vb->AttribPtr[tab[i]]->size,
-+				vb->AttribPtr[tab[i]]->stride, count);
- 	}
- 
- 	/* Setup INPUT_ROUTE. */
-@@ -515,45 +463,76 @@ int r300EmitArrays(GLcontext * ctx)
- 	return R300_FALLBACK_NONE;
- }
- 
--#ifdef USER_BUFFERS
--void r300UseArrays(GLcontext * ctx)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	int i;
--
--	if (rmesa->state.elt_dma.buf)
--		r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
--
--	for (i = 0; i < rmesa->state.aos_count; i++) {
--		if (rmesa->state.aos[i].buf)
--			r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
--	}
--}
--#endif
--
- void r300ReleaseArrays(GLcontext * ctx)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	int i;
- 
--	r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
-+	if (rmesa->state.elt_dma_bo) {
-+		dri_bo_unreference(rmesa->state.elt_dma_bo);
-+		rmesa->state.elt_dma_bo = 0;
-+	}
- 	for (i = 0; i < rmesa->state.aos_count; i++) {
--		r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
-+		if (rmesa->state.aos[i].bo) {
-+			dri_bo_unreference(rmesa->state.aos[i].bo);
-+			rmesa->state.aos[i].bo = 0;
-+		}
- 	}
- }
- 
- void r300EmitCacheFlush(r300ContextPtr rmesa)
- {
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--
--	drm_radeon_cmd_header_t *cmd = NULL;
--
--	reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
--	e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
--	    R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
-+	BATCH_LOCALS(rmesa);
-+
-+	BEGIN_BATCH(4);
-+	OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT,
-+		R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
-+		R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
-+	OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT,
-+		R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
-+		R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
-+	END_BATCH();
-+	COMMIT_BATCH();
-+}
- 
--	reg_start(R300_ZB_ZCACHE_CTLSTAT, 0);
--	e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
--	    R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
-+void r300EmitBlit(r300ContextPtr rmesa,
-+		  GLuint color_fmt,
-+		  GLuint src_pitch,
-+		  dri_bo *src_bo, int src_offset,
-+		  GLuint dst_pitch,
-+		  GLuint dst_offset,
-+		  GLint srcx, GLint srcy,
-+		  GLint dstx, GLint dsty, GLuint w, GLuint h)
-+{
-+	BATCH_LOCALS(rmesa);
-+
-+	if (RADEON_DEBUG & DEBUG_IOCTL)
-+		fprintf(stderr,
-+			"%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
-+			__FUNCTION__, src_pitch, src_offset, srcx, srcy,
-+			dst_pitch, dst_offset, dstx, dsty, w, h);
-+
-+	assert((src_pitch & 63) == 0);
-+	assert((dst_pitch & 63) == 0);
-+	assert((src_offset & 1023) == 0);
-+	assert((dst_offset & 1023) == 0);
-+	assert(w < (1 << 16));
-+	assert(h < (1 << 16));
-+
-+	BEGIN_BATCH(8);
-+	OUT_BATCH_PACKET3(R300_CP_CMD_BITBLT_MULTI, 5);
-+	OUT_BATCH(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
-+		  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
-+		  RADEON_GMC_BRUSH_NONE |
-+		  (color_fmt << 8) |
-+		  RADEON_GMC_SRC_DATATYPE_COLOR |
-+		  RADEON_ROP3_S |
-+		  RADEON_DP_SRC_SOURCE_MEMORY |
-+		  RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
-+	OUT_BATCH_RELOC((src_pitch / 64) << 22, src_bo, src_offset, DRM_RELOC_BLITTER);
-+	OUT_BATCH(((dst_pitch / 64) << 22) | (dst_offset >> 10));
-+	OUT_BATCH((srcx << 16) | srcy);
-+	OUT_BATCH((dstx << 16) | dsty);
-+	OUT_BATCH((w << 16) | h);
-+	END_BATCH();
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h
-index 5950539..179983d 100644
---- a/src/mesa/drivers/dri/r300/r300_emit.h
-+++ b/src/mesa/drivers/dri/r300/r300_emit.h
-@@ -127,130 +127,62 @@ static INLINE uint32_t cmdpacify(void)
- 	return cmd.u;
- }
- 
--/**
-- * Prepare to write a register value to register at address reg.
-- * If num_extra > 0 then the following extra values are written
-- * to registers with address +4, +8 and so on..
-- */
--#define reg_start(reg, num_extra)					\
--	do {								\
--		int _n;							\
--		_n=(num_extra);						\
--		cmd = (drm_radeon_cmd_header_t*)			\
--			r300AllocCmdBuf(rmesa,				\
--					(_n+2),				\
--					__FUNCTION__);			\
--		cmd_reserved=_n+2;					\
--		cmd_written=1;						\
--		cmd[0].i=cmdpacket0((reg), _n+1);			\
--	} while (0);
-+
-+/** Single register write to command buffer; requires 2 dwords. */
-+#define OUT_BATCH_REGVAL(reg, val) \
-+	OUT_BATCH(cmdpacket0((reg), 1)); \
-+	OUT_BATCH((val))
-+
-+/** Continuous register range write to command buffer; requires 1 dword,
-+ * expects count dwords afterwards for register contents. */
-+#define OUT_BATCH_REGSEQ(reg, count) \
-+	OUT_BATCH(cmdpacket0((reg), (count)));
-+
-+/** Write a 32 bit float to the ring; requires 1 dword. */
-+#define OUT_BATCH_FLOAT32(f) \
-+	OUT_BATCH(r300PackFloat32((f)));
- 
- /**
-- * Emit GLuint freestyle
-+ * Write the header of a packet3 to the command buffer.
-+ * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards.
-  */
--#define e32(dword)							\
--	do {								\
--		if(cmd_written<cmd_reserved) {				\
--			cmd[cmd_written].i=(dword);			\
--			cmd_written++;					\
--		} else {						\
--			fprintf(stderr,					\
--				"e32 but no previous packet "		\
--				"declaration.\n"			\
--				"Aborting! in %s::%s at line %d, "	\
--				"cmd_written=%d cmd_reserved=%d\n",	\
--				__FILE__, __FUNCTION__, __LINE__,	\
--				cmd_written, cmd_reserved);		\
--			_mesa_exit(-1);					\
--		}							\
-+#define OUT_BATCH_PACKET3(packet, num_extra) do {\
-+	OUT_BATCH(cmdpacket3(R300_CMD_PACKET3_RAW)); \
-+	OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
- 	} while(0)
- 
--#define	efloat(f) e32(r300PackFloat32(f))
--
--#define vsf_start_fragment(dest, length)				\
--	do {								\
--		int _n;							\
--		_n = (length);						\
--		cmd = (drm_radeon_cmd_header_t*)			\
--			r300AllocCmdBuf(rmesa,				\
--					(_n+1),				\
--					__FUNCTION__);			\
--		cmd_reserved = _n+2;					\
--		cmd_written =1;						\
--		cmd[0].i = cmdvpu((dest), _n/4);			\
--	} while (0);
--
--#define r500fp_start_fragment(dest, length)				\
--	do {								\
--		int _n;							\
--		_n = (length);						\
--		cmd = (drm_radeon_cmd_header_t*)			\
--			r300AllocCmdBuf(rmesa,				\
--					(_n+1),				\
--					__FUNCTION__);			\
--		cmd_reserved = _n+1;					\
--		cmd_written =1;						\
--		cmd[0].i = cmdr500fp((dest), _n/6, 0, 0);		\
--	} while (0);
--
--#define start_packet3(packet, count)					\
--	{								\
--		int _n;							\
--		GLuint _p;						\
--		_n = (count);						\
--		_p = (packet);						\
--		cmd = (drm_radeon_cmd_header_t*)			\
--			r300AllocCmdBuf(rmesa,				\
--					(_n+3),				\
--					__FUNCTION__);			\
--		cmd_reserved = _n+3;					\
--		cmd_written = 2;					\
--		if(_n > 0x3fff) {					\
--			fprintf(stderr,"Too big packet3 %08x: cannot "	\
--				"store %d dwords\n",			\
--				_p, _n);				\
--			_mesa_exit(-1);					\
--		}							\
--		cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW);		\
--		cmd[1].i = _p | ((_n & 0x3fff)<<16);			\
--	}
--
- /**
-  * Must be sent to switch to 2d commands
-  */
- void static INLINE end_3d(r300ContextPtr rmesa)
- {
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	BATCH_LOCALS(rmesa);
- 
--	cmd =
--	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
--	cmd[0].header.cmd_type = R300_CMD_END3D;
-+	BEGIN_BATCH(1);
-+	OUT_BATCH(cmdpacify());
-+	END_BATCH();
- }
- 
- void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count)
- {
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	BATCH_LOCALS(rmesa);
- 
--	cmd =
--	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
--	cmd[0].i = cmdcpdelay(count);
-+	BEGIN_BATCH(1);
-+	OUT_BATCH(cmdcpdelay(count));
-+	END_BATCH();
- }
- 
- void static INLINE cp_wait(r300ContextPtr rmesa, unsigned char flags)
- {
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	BATCH_LOCALS(rmesa);
- 
--	cmd =
--	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
--	cmd[0].i = cmdwait(flags);
-+	BEGIN_BATCH(1);
-+	OUT_BATCH(cmdwait(flags));
-+	END_BATCH();
- }
- 
- extern int r300EmitArrays(GLcontext * ctx);
- 
--#ifdef USER_BUFFERS
--void r300UseArrays(GLcontext * ctx);
--#endif
--
- extern void r300ReleaseArrays(GLcontext * ctx);
- extern int r300PrimitiveType(r300ContextPtr rmesa, int prim);
- extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim);
-@@ -265,4 +197,13 @@ extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead);
- extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten);
- extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten);
- 
-+extern void r300EmitBlit(r300ContextPtr rmesa,
-+			 GLuint color_fmt,
-+			 GLuint src_pitch,
-+			 dri_bo *src_bo, int src_offset,
-+			 GLuint dst_pitch,
-+			 GLuint dst_offset,
-+			 GLint srcx, GLint srcy,
-+			 GLint dstx, GLint dsty, GLuint w, GLuint h);
-+
- #endif
-diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c
-index bd7f060..046f9a2 100644
---- a/src/mesa/drivers/dri/r300/r300_ioctl.c
-+++ b/src/mesa/drivers/dri/r300/r300_ioctl.c
-@@ -55,6 +55,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_reg.h"
- #include "r300_emit.h"
- #include "r300_fragprog.h"
-+#include "r300_mem.h"
- 
- #include "vblank.h"
- 
-@@ -62,64 +63,51 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define CLEARBUFFER_DEPTH	0x2
- #define CLEARBUFFER_STENCIL	0x4
- 
--static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
-+static void r300ClearBuffer(r300ContextPtr r300, int flags,
-+			    struct radeon_renderbuffer *rrb)
- {
-+	BATCH_LOCALS(r300);
- 	GLcontext *ctx = r300->radeon.glCtx;
- 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
--	GLuint cboffset, cbpitch;
--	drm_r300_cmd_header_t *cmd2;
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	GLuint cbpitch = 0;
- 	r300ContextPtr rmesa = r300;
- 
- 	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s: %s buffer (%i,%i %ix%i)\n",
--			__FUNCTION__, buffer ? "back" : "front",
--			dPriv->x, dPriv->y, dPriv->w, dPriv->h);
--
--	if (buffer) {
--		cboffset = r300->radeon.radeonScreen->backOffset;
--		cbpitch = r300->radeon.radeonScreen->backPitch;
--	} else {
--		cboffset = r300->radeon.radeonScreen->frontOffset;
--		cbpitch = r300->radeon.radeonScreen->frontPitch;
-+		fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n",
-+			__FUNCTION__, rrb, dPriv->x, dPriv->y,
-+			dPriv->w, dPriv->h);
-+
-+	if (rrb) {
-+		cbpitch = rrb->pitch;
-+		if (rrb->cpp == 4)
-+			cbpitch |= R300_COLOR_FORMAT_ARGB8888;
-+		else
-+			cbpitch |= R300_COLOR_FORMAT_RGB565;
-+
-+		if (r300->radeon.sarea->tiling_enabled)
-+			cbpitch |= R300_COLOR_TILE_ENABLE;
- 	}
- 
--	cboffset += r300->radeon.radeonScreen->fbLocation;
--
-+	/* TODO in bufmgr */
- 	cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
- 	end_3d(rmesa);
- 
--	R300_STATECHANGE(r300, cb);
--	reg_start(R300_RB3D_COLOROFFSET0, 0);
--	e32(cboffset);
--
--	if (r300->radeon.radeonScreen->cpp == 4)
--		cbpitch |= R300_COLOR_FORMAT_ARGB8888;
--	else
--		cbpitch |= R300_COLOR_FORMAT_RGB565;
--
--	if (r300->radeon.sarea->tiling_enabled)
--		cbpitch |= R300_COLOR_TILE_ENABLE;
--
--	reg_start(R300_RB3D_COLORPITCH0, 0);
--	e32(cbpitch);
--
--	R300_STATECHANGE(r300, cmk);
--	reg_start(RB3D_COLOR_CHANNEL_MASK, 0);
-+	BEGIN_BATCH(19);
-+	OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
-+	OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET);
-+	OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch);
- 
-+	OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1);
- 	if (flags & CLEARBUFFER_COLOR) {
--		e32((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
--		    (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
--		    (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
--		    (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
-+		OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
-+			  (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
-+			  (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
-+			  (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
- 	} else {
--		e32(0x0);
-+		OUT_BATCH(0);
- 	}
- 
--	R300_STATECHANGE(r300, zs);
--	reg_start(R300_ZB_CNTL, 2);
-+	OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3);
- 
- 	{
- 		uint32_t t1, t2;
-@@ -146,37 +134,37 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
- 			     R300_S_FRONT_ZFAIL_OP_SHIFT);
- 		}
- 
--		e32(t1);
--		e32(t2);
--		e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) |
--		    (ctx->Stencil.Clear & R300_STENCILREF_MASK));
-+		OUT_BATCH(t1);
-+		OUT_BATCH(t2);
-+		OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) |
-+			  (ctx->Stencil.Clear & R300_STENCILREF_MASK));
- 	}
- 
--	cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__);
--	cmd2[0].packet3.cmd_type = R300_CMD_PACKET3;
--	cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR;
--	cmd2[1].u = r300PackFloat32(dPriv->w / 2.0);
--	cmd2[2].u = r300PackFloat32(dPriv->h / 2.0);
--	cmd2[3].u = r300PackFloat32(ctx->Depth.Clear);
--	cmd2[4].u = r300PackFloat32(1.0);
--	cmd2[5].u = r300PackFloat32(ctx->Color.ClearColor[0]);
--	cmd2[6].u = r300PackFloat32(ctx->Color.ClearColor[1]);
--	cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]);
--	cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]);
-+	OUT_BATCH(cmdpacket3(R300_CMD_PACKET3_CLEAR));
-+	OUT_BATCH_FLOAT32(dPriv->w / 2.0);
-+	OUT_BATCH_FLOAT32(dPriv->h / 2.0);
-+	OUT_BATCH_FLOAT32(ctx->Depth.Clear);
-+	OUT_BATCH_FLOAT32(1.0);
-+	OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
-+	OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
-+	OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
-+	OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
-+	END_BATCH();
- 
- 	r300EmitCacheFlush(rmesa);
- 	cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
-+
-+	R300_STATECHANGE(r300, cb);
-+	R300_STATECHANGE(r300, cmk);
-+	R300_STATECHANGE(r300, zs);
- }
- 
- static void r300EmitClearState(GLcontext * ctx)
- {
- 	r300ContextPtr r300 = R300_CONTEXT(ctx);
--	r300ContextPtr rmesa = r300;
-+	BATCH_LOCALS(r300);
- 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
- 	int i;
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
- 	int has_tcl = 1;
- 	int is_r500 = 0;
- 	GLuint vap_cntl;
-@@ -184,35 +172,37 @@ static void r300EmitClearState(GLcontext * ctx)
- 	if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
- 		has_tcl = 0;
- 
--        if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
--                is_r500 = 1;
--
-+	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
-+		is_r500 = 1;
- 
--	/* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and
--	 * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are
--	 * quite complex; see the functions in r300_emit.c.
-+	/* State atom dirty tracking is a little subtle here.
-+	 *
-+	 * On the one hand, we need to make sure base state is emitted
-+	 * here if we start with an empty batch buffer, otherwise clear
-+	 * works incorrectly with multiple processes. Therefore, the first
-+	 * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE.
- 	 *
--	 * I believe it would be a good idea to extend the functions in
--	 * r300_emit.c so that they can be used to setup the default values for
--	 * these registers, as well as the actual values used for rendering.
-+	 * On the other hand, implicit state emission clears the state atom
-+	 * dirty bits, so we have to call R300_STATECHANGE later than the
-+	 * first BEGIN_BATCH.
-+	 *
-+	 * The final trickiness is that, because we change state, we need
-+	 * to ensure that any stored swtcl primitives are flushed properly
-+	 * before we start changing state. See the R300_NEWPRIM in r300Clear
-+	 * for this.
- 	 */
--	R300_STATECHANGE(r300, vir[0]);
--	reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0);
-+	BEGIN_BATCH(31);
-+	OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1);
- 	if (!has_tcl)
--	    e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
-+		OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
- 		 ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
- 	else
--	    e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
-+		OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
- 		 ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
- 
--	/* disable fog */
--	R300_STATECHANGE(r300, fogs);
--	reg_start(R300_FG_FOG_BLEND, 0);
--	e32(0x0);
--
--	R300_STATECHANGE(r300, vir[1]);
--	reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0);
--	e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
-+	OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0);
-+	OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0,
-+	   ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
- 	       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
- 	       (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) |
- 	       (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) |
-@@ -226,238 +216,246 @@ static void r300EmitClearState(GLcontext * ctx)
- 	      << R300_SWIZZLE1_SHIFT)));
- 
- 	/* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */
--	R300_STATECHANGE(r300, vic);
--	reg_start(R300_VAP_VTX_STATE_CNTL, 1);
--	e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
--	e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
-+	OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2);
-+	OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
-+	OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
- 
--	R300_STATECHANGE(r300, vte);
- 	/* comes from fglrx startup of clear */
--	reg_start(R300_SE_VTE_CNTL, 1);
--	e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
--	    R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
--	    R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
--	    R300_VPORT_Z_OFFSET_ENA);
--	e32(0x8);
-+	OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2);
-+	OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
-+		  R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
-+		  R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
-+		  R300_VPORT_Z_OFFSET_ENA);
-+	OUT_BATCH(0x8);
- 
--	reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0);
--	e32(0xaaaaaaaa);
-+	OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa);
- 
--	R300_STATECHANGE(r300, vof);
--	reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1);
--	e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
--	    R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
--	e32(0x0);		/* no textures */
-+	OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
-+	OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
-+		  R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
-+	OUT_BATCH(0); /* no textures */
- 
--	R300_STATECHANGE(r300, txe);
--	reg_start(R300_TX_ENABLE, 0);
--	e32(0x0);
-+	OUT_BATCH_REGVAL(R300_TX_ENABLE, 0);
- 
--	R300_STATECHANGE(r300, vpt);
--	reg_start(R300_SE_VPORT_XSCALE, 5);
--	efloat(1.0);
--	efloat(dPriv->x);
--	efloat(1.0);
--	efloat(dPriv->y);
--	efloat(1.0);
--	efloat(0.0);
-+	OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6);
-+	OUT_BATCH_FLOAT32(1.0);
-+	OUT_BATCH_FLOAT32(dPriv->x);
-+	OUT_BATCH_FLOAT32(1.0);
-+	OUT_BATCH_FLOAT32(dPriv->y);
-+	OUT_BATCH_FLOAT32(1.0);
-+	OUT_BATCH_FLOAT32(0.0);
- 
--	R300_STATECHANGE(r300, at);
--	reg_start(R300_FG_ALPHA_FUNC, 0);
--	e32(0x0);
-+	OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0);
-+
-+	OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2);
-+	OUT_BATCH(0x0);
-+	OUT_BATCH(0x0);
-+	END_BATCH();
- 
-+	R300_STATECHANGE(r300, vir[0]);
-+	R300_STATECHANGE(r300, fogs);
-+	R300_STATECHANGE(r300, vir[1]);
-+	R300_STATECHANGE(r300, vic);
-+	R300_STATECHANGE(r300, vte);
-+	R300_STATECHANGE(r300, vof);
-+	R300_STATECHANGE(r300, txe);
-+	R300_STATECHANGE(r300, vpt);
-+	R300_STATECHANGE(r300, at);
- 	R300_STATECHANGE(r300, bld);
--	reg_start(R300_RB3D_CBLEND, 1);
--	e32(0x0);
--	e32(0x0);
-+	R300_STATECHANGE(r300, ps);
- 
- 	if (has_tcl) {
--	    R300_STATECHANGE(r300, vap_clip_cntl);
--	    reg_start(R300_VAP_CLIP_CNTL, 0);
--	    e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
-+		R300_STATECHANGE(r300, vap_clip_cntl);
-+
-+		BEGIN_BATCH_NO_AUTOSTATE(2);
-+		OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
-+		END_BATCH();
-         }
- 
--	R300_STATECHANGE(r300, ps);
--	reg_start(R300_GA_POINT_SIZE, 0);
--	e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
--	    ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
-+	BEGIN_BATCH_NO_AUTOSTATE(2);
-+	OUT_BATCH_REGVAL(R300_GA_POINT_SIZE,
-+		((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
-+		((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
-+	END_BATCH();
- 
- 	if (!is_r500) {
- 		R300_STATECHANGE(r300, ri);
--		reg_start(R300_RS_IP_0, 7);
--		for (i = 0; i < 8; ++i) {
--			e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
--		}
--
- 		R300_STATECHANGE(r300, rc);
--		/* The second constant is needed to get glxgears display anything .. */
--		reg_start(R300_RS_COUNT, 1);
--		e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
--		e32(0x0);
--
- 		R300_STATECHANGE(r300, rr);
--		reg_start(R300_RS_INST_0, 0);
--		e32(R300_RS_INST_COL_CN_WRITE);
-+
-+		BEGIN_BATCH(14);
-+		OUT_BATCH_REGSEQ(R300_RS_IP_0, 8);
-+		for (i = 0; i < 8; ++i)
-+			OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
-+
-+		OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
-+		OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
-+		OUT_BATCH(0x0);
-+
-+		OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE);
-+		END_BATCH();
- 	} else {
- 		R300_STATECHANGE(r300, ri);
--		reg_start(R500_RS_IP_0, 7);
-+		R300_STATECHANGE(r300, rc);
-+		R300_STATECHANGE(r300, rr);
-+
-+		BEGIN_BATCH(14);
-+		OUT_BATCH_REGSEQ(R500_RS_IP_0, 8);
- 		for (i = 0; i < 8; ++i) {
--			e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
--			    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
--			    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
--			    (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
-+			OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
-+				  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
-+				  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
-+				  (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
- 		}
- 
--		R300_STATECHANGE(r300, rc);
--		/* The second constant is needed to get glxgears display anything .. */
--		reg_start(R300_RS_COUNT, 1);
--		e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
--		e32(0x0);
--
--		R300_STATECHANGE(r300, rr);
--		reg_start(R500_RS_INST_0, 0);
--		e32(R500_RS_INST_COL_CN_WRITE);
-+		OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
-+		OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
-+		OUT_BATCH(0x0);
- 
-+		OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE);
-+		END_BATCH();
- 	}
- 
- 	if (!is_r500) {
- 		R300_STATECHANGE(r300, fp);
--		reg_start(R300_US_CONFIG, 2);
--		e32(0x0);
--		e32(0x0);
--		e32(0x0);
--		reg_start(R300_US_CODE_ADDR_0, 3);
--		e32(0x0);
--		e32(0x0);
--		e32(0x0);
--		e32(R300_RGBA_OUT);
--
- 		R300_STATECHANGE(r300, fpi[0]);
- 		R300_STATECHANGE(r300, fpi[1]);
- 		R300_STATECHANGE(r300, fpi[2]);
- 		R300_STATECHANGE(r300, fpi[3]);
- 
--		reg_start(R300_US_ALU_RGB_INST_0, 0);
--		e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
--
--		reg_start(R300_US_ALU_RGB_ADDR_0, 0);
--		e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
--
--		reg_start(R300_US_ALU_ALPHA_INST_0, 0);
--		e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
--
--		reg_start(R300_US_ALU_ALPHA_ADDR_0, 0);
--		e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
-+		BEGIN_BATCH(17);
-+		OUT_BATCH_REGSEQ(R300_US_CONFIG, 3);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(R300_RGBA_OUT);
-+
-+		OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0,
-+			FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
-+		OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0,
-+			FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
-+		OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0,
-+			FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
-+		OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0,
-+			FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
-+		END_BATCH();
- 	} else {
-- 		R300_STATECHANGE(r300, fp);
-- 		reg_start(R500_US_CONFIG, 1);
-- 		e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
-- 		e32(0x0);
-- 		reg_start(R500_US_CODE_ADDR, 2);
-- 		e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
-- 		e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
-- 		e32(R500_US_CODE_OFFSET_ADDR(0));
--
-+		R300_STATECHANGE(r300, fp);
- 		R300_STATECHANGE(r300, r500fp);
--		r500fp_start_fragment(0, 6);
--
--		e32(R500_INST_TYPE_OUT |
--		    R500_INST_TEX_SEM_WAIT |
--		    R500_INST_LAST |
--		    R500_INST_RGB_OMASK_R |
--		    R500_INST_RGB_OMASK_G |
--		    R500_INST_RGB_OMASK_B |
--		    R500_INST_ALPHA_OMASK |
--		    R500_INST_RGB_CLAMP |
--		    R500_INST_ALPHA_CLAMP);
--
--		e32(R500_RGB_ADDR0(0) |
--		    R500_RGB_ADDR1(0) |
--		    R500_RGB_ADDR1_CONST |
--		    R500_RGB_ADDR2(0) |
--		    R500_RGB_ADDR2_CONST);
--
--		e32(R500_ALPHA_ADDR0(0) |
--		    R500_ALPHA_ADDR1(0) |
--		    R500_ALPHA_ADDR1_CONST |
--		    R500_ALPHA_ADDR2(0) |
--		    R500_ALPHA_ADDR2_CONST);
--
--		e32(R500_ALU_RGB_SEL_A_SRC0 |
--		    R500_ALU_RGB_R_SWIZ_A_R |
--		    R500_ALU_RGB_G_SWIZ_A_G |
--		    R500_ALU_RGB_B_SWIZ_A_B |
--		    R500_ALU_RGB_SEL_B_SRC0 |
--		    R500_ALU_RGB_R_SWIZ_B_R |
--		    R500_ALU_RGB_B_SWIZ_B_G |
--		    R500_ALU_RGB_G_SWIZ_B_B);
--
--		e32(R500_ALPHA_OP_CMP |
--		    R500_ALPHA_SWIZ_A_A |
--		    R500_ALPHA_SWIZ_B_A);
--
--		e32(R500_ALU_RGBA_OP_CMP |
--		    R500_ALU_RGBA_R_SWIZ_0 |
--		    R500_ALU_RGBA_G_SWIZ_0 |
--		    R500_ALU_RGBA_B_SWIZ_0 |
--		    R500_ALU_RGBA_A_SWIZ_0);
-+
-+		BEGIN_BATCH(14);
-+		OUT_BATCH_REGSEQ(R500_US_CONFIG, 2);
-+		OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3);
-+		OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
-+		OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
-+		OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0));
-+
-+		OUT_BATCH(cmdr500fp(0, 1, 0, 0));
-+		OUT_BATCH(R500_INST_TYPE_OUT |
-+			  R500_INST_TEX_SEM_WAIT |
-+			  R500_INST_LAST |
-+			  R500_INST_RGB_OMASK_R |
-+			  R500_INST_RGB_OMASK_G |
-+			  R500_INST_RGB_OMASK_B |
-+			  R500_INST_ALPHA_OMASK |
-+			  R500_INST_RGB_CLAMP |
-+			  R500_INST_ALPHA_CLAMP);
-+		OUT_BATCH(R500_RGB_ADDR0(0) |
-+			  R500_RGB_ADDR1(0) |
-+			  R500_RGB_ADDR1_CONST |
-+			  R500_RGB_ADDR2(0) |
-+			  R500_RGB_ADDR2_CONST);
-+		OUT_BATCH(R500_ALPHA_ADDR0(0) |
-+			  R500_ALPHA_ADDR1(0) |
-+			  R500_ALPHA_ADDR1_CONST |
-+			  R500_ALPHA_ADDR2(0) |
-+			  R500_ALPHA_ADDR2_CONST);
-+		OUT_BATCH(R500_ALU_RGB_SEL_A_SRC0 |
-+			  R500_ALU_RGB_R_SWIZ_A_R |
-+			  R500_ALU_RGB_G_SWIZ_A_G |
-+			  R500_ALU_RGB_B_SWIZ_A_B |
-+			  R500_ALU_RGB_SEL_B_SRC0 |
-+			  R500_ALU_RGB_R_SWIZ_B_R |
-+			  R500_ALU_RGB_B_SWIZ_B_G |
-+			  R500_ALU_RGB_G_SWIZ_B_B);
-+		OUT_BATCH(R500_ALPHA_OP_CMP |
-+			  R500_ALPHA_SWIZ_A_A |
-+			  R500_ALPHA_SWIZ_B_A);
-+		OUT_BATCH(R500_ALU_RGBA_OP_CMP |
-+			  R500_ALU_RGBA_R_SWIZ_0 |
-+			  R500_ALU_RGBA_G_SWIZ_0 |
-+			  R500_ALU_RGBA_B_SWIZ_0 |
-+			  R500_ALU_RGBA_A_SWIZ_0);
-+		END_BATCH();
- 	}
- 
--	reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0);
--	e32(0x00000000);
-+	BEGIN_BATCH(2);
-+	OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-+	END_BATCH();
-+
- 	if (has_tcl) {
--	    vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-+		vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
- 			(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
- 			(12 << R300_VF_MAX_VTX_NUM_SHIFT));
--	    if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
--		vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
--	} else
--	    vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-+		if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
-+			vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
-+	} else {
-+		vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
- 			(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
- 			(5 << R300_VF_MAX_VTX_NUM_SHIFT));
-+	}
- 
- 	if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
--	    vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
- 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
- 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) ||
- 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
--	    vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
- 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
- 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420))
--	    vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
- 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
- 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580))
--	    vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
- 	else
--	    vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
-+
-+	R300_STATECHANGE(r300, vap_cntl);
- 
--	R300_STATECHANGE(rmesa, vap_cntl);
--	reg_start(R300_VAP_CNTL, 0);
--	e32(vap_cntl);
-+	BEGIN_BATCH(2);
-+	OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl);
-+	END_BATCH();
- 
- 	if (has_tcl) {
- 		R300_STATECHANGE(r300, pvs);
--		reg_start(R300_VAP_PVS_CODE_CNTL_0, 2);
--
--		e32((0 << R300_PVS_FIRST_INST_SHIFT) |
--		    (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
--		    (1 << R300_PVS_LAST_INST_SHIFT));
--		e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
--		    (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
--		e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
--
- 		R300_STATECHANGE(r300, vpi);
--		vsf_start_fragment(0x0, 8);
--
--		e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT));
--		e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
--		e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
--		e32(0x0);
- 
--		e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT));
--		e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
--		e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
--		e32(0x0);
-+		BEGIN_BATCH(13);
-+		OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
-+		OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) |
-+			  (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
-+			  (1 << R300_PVS_LAST_INST_SHIFT));
-+		OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
-+			  (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
-+		OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
-+
-+		OUT_BATCH(cmdvpu(0, 2));
-+		OUT_BATCH(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT));
-+		OUT_BATCH(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
-+		OUT_BATCH(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
-+		OUT_BATCH(0x0);
-+
-+		OUT_BATCH(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT));
-+		OUT_BATCH(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
-+		OUT_BATCH(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
-+		OUT_BATCH(0x0);
-+		END_BATCH();
- 	}
- }
- 
-@@ -467,7 +465,10 @@ static void r300EmitClearState(GLcontext * ctx)
- static void r300Clear(GLcontext * ctx, GLbitfield mask)
- {
- 	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	BATCH_LOCALS(r300);
- 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
-+	GLframebuffer *fb = dPriv->driverPrivate;
-+	struct radeon_renderbuffer *rrb;
- 	int flags = 0;
- 	int bits = 0;
- 	int swapped;
-@@ -482,6 +483,12 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
- 			return;
- 	}
- 
-+	/* Flush swtcl vertices if necessary, because we will change hardware
-+	 * state during clear. See also the state-related comment in
-+	 * r300EmitClearState.
-+	 */
-+	R300_NEWPRIM(r300);
-+
- 	if (mask & BUFFER_BIT_FRONT_LEFT) {
- 		flags |= BUFFER_BIT_FRONT_LEFT;
- 		mask &= ~BUFFER_BIT_FRONT_LEFT;
-@@ -509,26 +516,27 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
- 		_swrast_Clear(ctx, mask);
- 	}
- 
--	swapped = r300->radeon.sarea->pfCurrentPage == 1;
--
- 	/* Make sure it fits there. */
- 	r300EnsureCmdBufSpace(r300, 421 * 3, __FUNCTION__);
- 	if (flags || bits)
- 		r300EmitClearState(ctx);
- 
- 	if (flags & BUFFER_BIT_FRONT_LEFT) {
--		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped);
-+		rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb);
- 		bits = 0;
- 	}
- 
- 	if (flags & BUFFER_BIT_BACK_LEFT) {
--		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped ^ 1);
-+		rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb);
- 		bits = 0;
- 	}
- 
- 	if (bits)
--		r300ClearBuffer(r300, bits, 0);
-+		r300ClearBuffer(r300, bits, NULL);
- 
-+	COMMIT_BATCH();
- }
- 
- void r300Flush(GLcontext * ctx)
-@@ -541,16 +549,12 @@ void r300Flush(GLcontext * ctx)
- 	if (rmesa->dma.flush)
- 		rmesa->dma.flush( rmesa );
- 
--	if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit)
-+	if (rmesa->cmdbuf.committed > rmesa->cmdbuf.reemit)
- 		r300FlushCmdBuf(rmesa, __FUNCTION__);
- }
- 
--#ifdef USER_BUFFERS
--#include "r300_mem.h"
--
- void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
- {
--	struct r300_dma_buffer *dmabuf;
- 	size = MAX2(size, RADEON_BUFFER_SIZE * 16);
- 
- 	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
-@@ -560,71 +564,24 @@ void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
- 		rmesa->dma.flush(rmesa);
- 	}
- 
--	if (rmesa->dma.current.buf) {
--#ifdef USER_BUFFERS
--		r300_mem_use(rmesa, rmesa->dma.current.buf->id);
--#endif
--		r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
-+	if (rmesa->dma.current) {
-+		dri_bo_unreference(rmesa->dma.current);
-+		rmesa->dma.current = 0;
- 	}
- 	if (rmesa->dma.nr_released_bufs > 4)
- 		r300FlushCmdBuf(rmesa, __FUNCTION__);
- 
--	dmabuf = CALLOC_STRUCT(r300_dma_buffer);
--	dmabuf->buf = (void *)1;	/* hack */
--	dmabuf->refcount = 1;
--
--	dmabuf->id = r300_mem_alloc(rmesa, 4, size);
--	if (dmabuf->id == 0) {
--		LOCK_HARDWARE(&rmesa->radeon);	/* no need to validate */
--
--		r300FlushCmdBufLocked(rmesa, __FUNCTION__);
--		radeonWaitForIdleLocked(&rmesa->radeon);
--
--		dmabuf->id = r300_mem_alloc(rmesa, 4, size);
--
--		UNLOCK_HARDWARE(&rmesa->radeon);
--
--		if (dmabuf->id == 0) {
--			fprintf(stderr,
--				"Error: Could not get dma buffer... exiting\n");
--			_mesa_exit(-1);
--		}
--	}
--
--	rmesa->dma.current.buf = dmabuf;
--	rmesa->dma.current.address = r300_mem_ptr(rmesa, dmabuf->id);
--	rmesa->dma.current.end = size;
--	rmesa->dma.current.start = 0;
--	rmesa->dma.current.ptr = 0;
--}
--
--void r300ReleaseDmaRegion(r300ContextPtr rmesa,
--			  struct r300_dma_region *region, const char *caller)
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
--
--	if (!region->buf)
--		return;
--
--	if (rmesa->dma.flush)
--		rmesa->dma.flush(rmesa);
--
--	if (--region->buf->refcount == 0) {
--		r300_mem_free(rmesa, region->buf->id);
--		FREE(region->buf);
--		rmesa->dma.nr_released_bufs++;
--	}
--
--	region->buf = 0;
--	region->start = 0;
-+	rmesa->dma.current = dri_bo_alloc(&rmesa->radeon.bufmgr->base, "DMA regions",
-+		size, 4, DRM_BO_MEM_DMA);
-+	rmesa->dma.current_used = 0;
-+	rmesa->dma.current_vertexptr = 0;
- }
- 
- /* Allocates a region from rmesa->dma.current.  If there isn't enough
-  * space in current, grab a new buffer (and discard what was left of current)
-  */
- void r300AllocDmaRegion(r300ContextPtr rmesa,
--			struct r300_dma_region *region,
-+			dri_bo **pbo, int *poffset,
- 			int bytes, int alignment)
- {
- 	if (RADEON_DEBUG & DEBUG_IOCTL)
-@@ -633,207 +590,23 @@ void r300AllocDmaRegion(r300ContextPtr rmesa,
- 	if (rmesa->dma.flush)
- 		rmesa->dma.flush(rmesa);
- 
--	if (region->buf)
--		r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
-+	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
- 
- 	alignment--;
--	rmesa->dma.current.start = rmesa->dma.current.ptr =
--	    (rmesa->dma.current.ptr + alignment) & ~alignment;
--
--	if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
--		r300RefillCurrentDmaRegion(rmesa, (bytes + 0x7) & ~0x7);
--
--	region->start = rmesa->dma.current.start;
--	region->ptr = rmesa->dma.current.start;
--	region->end = rmesa->dma.current.start + bytes;
--	region->address = rmesa->dma.current.address;
--	region->buf = rmesa->dma.current.buf;
--	region->buf->refcount++;
-+	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
- 
--	rmesa->dma.current.ptr += bytes;	/* bug - if alignment > 7 */
--	rmesa->dma.current.start =
--	    rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
--
--	assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
--}
-+	if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size)
-+		r300RefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15);
- 
--#else
--static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa)
--{
--	struct r300_dma_buffer *dmabuf;
--	int fd = rmesa->radeon.dri.fd;
--	int index = 0;
--	int size = 0;
--	drmDMAReq dma;
--	int ret;
-+	*poffset = rmesa->dma.current_used;
-+	*pbo = rmesa->dma.current;
-+	dri_bo_reference(*pbo);
- 
--	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
--		fprintf(stderr, "%s\n", __FUNCTION__);
-+	/* Always align to at least 16 bytes */
-+	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
-+	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
- 
--	if (rmesa->dma.flush) {
--		rmesa->dma.flush(rmesa);
--	}
--
--	if (rmesa->dma.current.buf)
--		r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
--
--	if (rmesa->dma.nr_released_bufs > 4)
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--
--	dma.context = rmesa->radeon.dri.hwContext;
--	dma.send_count = 0;
--	dma.send_list = NULL;
--	dma.send_sizes = NULL;
--	dma.flags = 0;
--	dma.request_count = 1;
--	dma.request_size = RADEON_BUFFER_SIZE;
--	dma.request_list = &index;
--	dma.request_sizes = &size;
--	dma.granted_count = 0;
--
--	LOCK_HARDWARE(&rmesa->radeon);	/* no need to validate */
--
--	ret = drmDMA(fd, &dma);
--
--	if (ret != 0) {
--		/* Try to release some buffers and wait until we can't get any more */
--		if (rmesa->dma.nr_released_bufs) {
--			r300FlushCmdBufLocked(rmesa, __FUNCTION__);
--		}
--
--		if (RADEON_DEBUG & DEBUG_DMA)
--			fprintf(stderr, "Waiting for buffers\n");
--
--		radeonWaitForIdleLocked(&rmesa->radeon);
--		ret = drmDMA(fd, &dma);
--
--		if (ret != 0) {
--			UNLOCK_HARDWARE(&rmesa->radeon);
--			fprintf(stderr,
--				"Error: Could not get dma buffer... exiting\n");
--			_mesa_exit(-1);
--		}
--	}
--
--	UNLOCK_HARDWARE(&rmesa->radeon);
--
--	if (RADEON_DEBUG & DEBUG_DMA)
--		fprintf(stderr, "Allocated buffer %d\n", index);
--
--	dmabuf = CALLOC_STRUCT(r300_dma_buffer);
--	dmabuf->buf = &rmesa->radeon.radeonScreen->buffers->list[index];
--	dmabuf->refcount = 1;
--
--	rmesa->dma.current.buf = dmabuf;
--	rmesa->dma.current.address = dmabuf->buf->address;
--	rmesa->dma.current.end = dmabuf->buf->total;
--	rmesa->dma.current.start = 0;
--	rmesa->dma.current.ptr = 0;
--}
--
--void r300ReleaseDmaRegion(r300ContextPtr rmesa,
--			  struct r300_dma_region *region, const char *caller)
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
--
--	if (!region->buf)
--		return;
--
--	if (rmesa->dma.flush)
--		rmesa->dma.flush(rmesa);
--
--	if (--region->buf->refcount == 0) {
--		drm_radeon_cmd_header_t *cmd;
--
--		if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
--			fprintf(stderr, "%s -- DISCARD BUF %d\n",
--				__FUNCTION__, region->buf->buf->idx);
--		cmd =
--		    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa,
--								sizeof
--								(*cmd) / 4,
--								__FUNCTION__);
--		cmd->dma.cmd_type = R300_CMD_DMA_DISCARD;
--		cmd->dma.buf_idx = region->buf->buf->idx;
--
--		FREE(region->buf);
--		rmesa->dma.nr_released_bufs++;
--	}
--
--	region->buf = 0;
--	region->start = 0;
--}
--
--/* Allocates a region from rmesa->dma.current.  If there isn't enough
-- * space in current, grab a new buffer (and discard what was left of current)
-- */
--void r300AllocDmaRegion(r300ContextPtr rmesa,
--			struct r300_dma_region *region,
--			int bytes, int alignment)
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
--
--	if (rmesa->dma.flush)
--		rmesa->dma.flush(rmesa);
--
--	if (region->buf)
--		r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
--
--	alignment--;
--	rmesa->dma.current.start = rmesa->dma.current.ptr =
--	    (rmesa->dma.current.ptr + alignment) & ~alignment;
--
--	if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
--		r300RefillCurrentDmaRegion(rmesa);
--
--	region->start = rmesa->dma.current.start;
--	region->ptr = rmesa->dma.current.start;
--	region->end = rmesa->dma.current.start + bytes;
--	region->address = rmesa->dma.current.address;
--	region->buf = rmesa->dma.current.buf;
--	region->buf->refcount++;
--
--	rmesa->dma.current.ptr += bytes;	/* bug - if alignment > 7 */
--	rmesa->dma.current.start =
--	    rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
--
--	assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
--}
--
--#endif
--
--GLboolean r300IsGartMemory(r300ContextPtr rmesa, const GLvoid * pointer,
--			   GLint size)
--{
--	int offset =
--	    (char *)pointer -
--	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
--	int valid = (size >= 0 && offset >= 0
--		     && offset + size <
--		     rmesa->radeon.radeonScreen->gartTextures.size);
--
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "r300IsGartMemory( %p ) : %d\n", pointer,
--			valid);
--
--	return valid;
--}
--
--GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, const GLvoid * pointer)
--{
--	int offset =
--	    (char *)pointer -
--	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
--
--	//fprintf(stderr, "offset=%08x\n", offset);
--
--	if (offset < 0
--	    || offset > rmesa->radeon.radeonScreen->gartTextures.size)
--		return ~0;
--	else
--		return rmesa->radeon.radeonScreen->gart_texture_offset + offset;
-+	assert(rmesa->dma.current_used <= rmesa->dma.current->size);
- }
- 
- void r300InitIoctlFuncs(struct dd_function_table *functions)
-diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.h b/src/mesa/drivers/dri/r300/r300_ioctl.h
-index e1143fb..c743478 100644
---- a/src/mesa/drivers/dri/r300/r300_ioctl.h
-+++ b/src/mesa/drivers/dri/r300/r300_ioctl.h
-@@ -39,20 +39,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_context.h"
- #include "radeon_drm.h"
- 
--extern GLboolean r300IsGartMemory(r300ContextPtr rmesa,
--				  const GLvoid * pointer, GLint size);
--
--extern GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa,
--					const GLvoid * pointer);
--
- extern void r300Flush(GLcontext * ctx);
- 
--extern void r300ReleaseDmaRegion(r300ContextPtr rmesa,
--				 struct r300_dma_region *region,
--				 const char *caller);
- extern void r300AllocDmaRegion(r300ContextPtr rmesa,
--			       struct r300_dma_region *region, int bytes,
--			       int alignment);
-+			       dri_bo **pbo, int *poffset,
-+			       int bytes, int alignment);
- 
- extern void r300InitIoctlFuncs(struct dd_function_table *functions);
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_mem.c b/src/mesa/drivers/dri/r300/r300_mem.c
-index f8f9d4f..b045393 100644
---- a/src/mesa/drivers/dri/r300/r300_mem.c
-+++ b/src/mesa/drivers/dri/r300/r300_mem.c
-@@ -27,359 +27,843 @@
- 
- /**
-  * \file
-+ * Simulate a real memory manager for R300 in the old-style scheme.
-+ *
-+ * NOTE: Right now, this is DMA-only and really only a skeleton of a true bufmgr.
-  *
-  * \author Aapo Tahkola <aet@rasterburn.org>
-  */
- 
-+#include "r300_mem.h"
-+
-+#include <errno.h>
- #include <unistd.h>
- 
--#include "r300_context.h"
--#include "r300_cmdbuf.h"
--#include "r300_ioctl.h"
--#include "r300_mem.h"
-+#include "simple_list.h"
-+
- #include "radeon_ioctl.h"
-+#include "r300_cmdbuf.h"
- 
--#ifdef USER_BUFFERS
-+typedef struct _radeon_bufmgr_classic radeon_bufmgr_classic;
-+typedef struct _radeon_bo_classic radeon_bo_classic;
-+typedef struct _radeon_bo_functions radeon_bo_functions;
-+typedef struct _radeon_reloc radeon_reloc;
-+typedef struct _radeon_bo_vram radeon_bo_vram;
-+
-+struct _radeon_bufmgr_classic {
-+	radeon_bufmgr base;
-+	r300ContextPtr rmesa;
-+
-+	radeon_bo_classic *buffers; /** Unsorted linked list of all buffer objects */
-+
-+	radeon_bo_classic *pending; /** Age-sorted linked list of pending buffer objects */
-+	radeon_bo_classic **pending_tail;
-+
-+	/* Texture heap bookkeeping */
-+	driTexHeap *texture_heap;
-+	GLuint texture_offset;
-+	driTextureObject texture_swapped;
-+};
-+
-+struct _radeon_reloc {
-+	uint64_t flags;
-+	GLuint offset; /**< Offset (in bytes) into command buffer to relocated dword */
-+	radeon_bo_classic *target;
-+	GLuint delta;
-+};
-+
-+struct _radeon_bo_functions {
-+	/**
-+	 * Free a buffer object. Caller has verified that the object is not
-+	 * referenced or pending.
-+	 */
-+	void (*free)(radeon_bo_classic*);
-+
-+	/**
-+	 * Validate the given buffer. Must set the validated flag to 1.
-+	 *
-+	 * May be null for buffer objects that are always valid.
-+	 * Always called with lock held.
-+	 */
-+	void (*validate)(radeon_bo_classic*);
-+
-+	/**
-+	 * Called when a writing map of the buffer is taken, to note that
-+	 * the buffer will have to be re-validated.
-+	 *
-+	 * May be null for buffer objects that don't need it.
-+	 */
-+	void (*dirty)(radeon_bo_classic*);
-+
-+	/**
-+	 * Indicate that the buffer object is now used by the hardware.
-+	 *
-+	 * May be null.
-+	 */
-+	void (*bind)(radeon_bo_classic*);
-+
-+	/**
-+	 * Indicate that the buffer object is no longer used by the hardware.
-+	 *
-+	 * May be null.
-+	 */
-+	void (*unbind)(radeon_bo_classic*);
-+};
- 
--static void resize_u_list(r300ContextPtr rmesa)
--{
--	void *temp;
--	int nsize;
-+/**
-+ * A buffer object. There are three types of buffer objects:
-+ *  1. cmdbuf: Ordinary malloc()ed memory, used for command buffers
-+ *  2. dma: GART memory allocated via the DRM_RADEON_ALLOC ioctl.
-+ *  3. vram: Objects with malloc()ed backing store that will be uploaded
-+ *     into VRAM on demand; used for textures.
-+ * There is a @ref functions table for operations that depend on the
-+ * buffer object type.
-+ *
-+ * Fencing is handled the same way all buffer objects. During command buffer
-+ * submission, the pending flag and corresponding variables are set accordingly.
-+ */
-+struct _radeon_bo_classic {
-+	dri_bo base;
- 
--	temp = rmesa->rmm->u_list;
--	nsize = rmesa->rmm->u_size * 2;
-+	const radeon_bo_functions *functions;
- 
--	rmesa->rmm->u_list = _mesa_malloc(nsize * sizeof(*rmesa->rmm->u_list));
--	_mesa_memset(rmesa->rmm->u_list, 0,
--		     nsize * sizeof(*rmesa->rmm->u_list));
-+	radeon_bo_classic *next; /** Unsorted linked list of all buffer objects */
-+	radeon_bo_classic **pprev;
- 
--	if (temp) {
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
-+	/**
-+	 * Number of software references to this buffer.
-+	 * A buffer is freed automatically as soon as its reference count reaches 0
-+	 * *and* it is no longer pending.
-+	 */
-+	unsigned int refcount;
-+	unsigned int mapcount; /** mmap count; mutually exclusive to being pending */
- 
--		_mesa_memcpy(rmesa->rmm->u_list, temp,
--			     rmesa->rmm->u_size * sizeof(*rmesa->rmm->u_list));
--		_mesa_free(temp);
--	}
-+	unsigned int validated:1; /** whether the buffer is validated for hardware use right now */
-+	unsigned int used:1; /* only for communication between process_relocs and post_submit */
-+
-+	unsigned int pending:1;
-+	radeon_bo_classic *pending_next; /** Age-sorted linked list of pending buffer objects */
-+	radeon_bo_classic **pending_pprev;
- 
--	rmesa->rmm->u_size = nsize;
-+	/* The following two variables are intricately linked to the DRM interface,
-+	 * and must be in this physical memory order, or else chaos ensues.
-+	 * See the DRM's implementation of R300_CMD_SCRATCH for details.
-+	 */
-+	uint32_t pending_age; /** Buffer object pending until this age is reached, written by the DRM */
-+	uint32_t pending_count; /** Number of pending R300_CMD_SCRATCH references to this object */
-+
-+	radeon_reloc *relocs; /** Array of relocations in this buffer */
-+	GLuint relocs_used; /** # of relocations in relocation array */
-+	GLuint relocs_size; /** # of reloc records reserved in relocation array */
-+};
-+
-+typedef struct _radeon_vram_wrapper radeon_vram_wrapper;
-+
-+/** Wrapper around heap object */
-+struct _radeon_vram_wrapper {
-+	driTextureObject base;
-+	radeon_bo_vram *bo;
-+};
-+
-+struct _radeon_bo_vram {
-+	radeon_bo_classic base;
-+
-+	unsigned int backing_store_dirty:1; /** Backing store has changed, block must be reuploaded */
-+
-+	radeon_vram_wrapper *vram; /** Block in VRAM (if any) */
-+};
-+
-+static radeon_bufmgr_classic* get_bufmgr_classic(dri_bufmgr *bufmgr_ctx)
-+{
-+	return (radeon_bufmgr_classic*)bufmgr_ctx;
- }
- 
--void r300_mem_init(r300ContextPtr rmesa)
-+static radeon_bo_classic* get_bo_classic(dri_bo *bo_base)
- {
--	rmesa->rmm = malloc(sizeof(struct r300_memory_manager));
--	memset(rmesa->rmm, 0, sizeof(struct r300_memory_manager));
-+	return (radeon_bo_classic*)bo_base;
-+}
- 
--	rmesa->rmm->u_size = 128;
--	resize_u_list(rmesa);
-+static radeon_bo_vram* get_bo_vram(radeon_bo_classic *bo_base)
-+{
-+	return (radeon_bo_vram*)bo_base;
- }
- 
--void r300_mem_destroy(r300ContextPtr rmesa)
-+/**
-+ * Really free a given buffer object.
-+ */
-+static void bo_free(radeon_bo_classic *bo)
- {
--	_mesa_free(rmesa->rmm->u_list);
--	rmesa->rmm->u_list = NULL;
-+	assert(!bo->refcount);
-+	assert(!bo->pending);
-+	assert(!bo->mapcount);
-+
-+	if (bo->relocs) {
-+		int i;
-+		for(i = 0; i < bo->relocs_used; ++i)
-+			dri_bo_unreference(&bo->relocs[i].target->base);
-+		free(bo->relocs);
-+		bo->relocs = 0;
-+	}
-+
-+	*bo->pprev = bo->next;
-+	if (bo->next)
-+		bo->next->pprev = bo->pprev;
- 
--	_mesa_free(rmesa->rmm);
--	rmesa->rmm = NULL;
-+	bo->functions->free(bo);
- }
- 
--void *r300_mem_ptr(r300ContextPtr rmesa, int id)
-+
-+/**
-+ * Keep track of which buffer objects are still pending, i.e. waiting for
-+ * some hardware operation to complete.
-+ */
-+static void track_pending_buffers(radeon_bufmgr_classic *bufmgr)
- {
--	assert(id <= rmesa->rmm->u_last);
--	return rmesa->rmm->u_list[id].ptr;
-+	uint32_t currentage = radeonGetAge((radeonContextPtr)bufmgr->rmesa);
-+
-+	while(bufmgr->pending) {
-+		radeon_bo_classic *bo = bufmgr->pending;
-+
-+		assert(bo->pending);
-+
-+		if (bo->pending_count ||
-+		    bo->pending_age > currentage) // TODO: Age counter wraparound!
-+			break;
-+
-+		bo->pending = 0;
-+		bufmgr->pending = bo->pending_next;
-+		if (bufmgr->pending)
-+			bufmgr->pending->pending_pprev = &bufmgr->pending;
-+		else
-+			bufmgr->pending_tail = &bufmgr->pending;
-+
-+		if (bo->functions->unbind)
-+			(*bo->functions->unbind)(bo);
-+		if (!bo->refcount)
-+			bo_free(bo);
-+	}
- }
- 
--int r300_mem_find(r300ContextPtr rmesa, void *ptr)
-+/**
-+ * Initialize common buffer object data.
-+ */
-+static void init_buffer(radeon_bufmgr_classic *bufmgr, radeon_bo_classic *bo, unsigned long size)
- {
--	int i;
-+	bo->base.bufmgr = &bufmgr->base.base;
-+	bo->base.size = size;
-+	bo->refcount = 1;
-+
-+	bo->pprev = &bufmgr->buffers;
-+	bo->next = bufmgr->buffers;
-+	if (bo->next)
-+		bo->next->pprev = &bo->next;
-+	bufmgr->buffers = bo;
-+}
- 
--	for (i = 1; i < rmesa->rmm->u_size + 1; i++)
--		if (rmesa->rmm->u_list[i].ptr &&
--		    ptr >= rmesa->rmm->u_list[i].ptr &&
--		    ptr <
--		    rmesa->rmm->u_list[i].ptr + rmesa->rmm->u_list[i].size)
--			break;
- 
--	if (i < rmesa->rmm->u_size + 1)
--		return i;
-+/**
-+ * Free a DMA-based buffer.
-+ */
-+static void dma_free(radeon_bo_classic *bo)
-+{
-+	radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bo->base.bufmgr);
-+	drm_radeon_mem_free_t memfree;
-+	int ret;
-+
-+	memfree.region = RADEON_MEM_REGION_GART;
-+	memfree.region_offset = bo->base.offset;
-+	memfree.region_offset -= bufmgr->rmesa->radeon.radeonScreen->gart_texture_offset;
- 
--	fprintf(stderr, "%p failed\n", ptr);
--	return 0;
-+	ret = drmCommandWrite(bufmgr->rmesa->radeon.radeonScreen->driScreen->fd,
-+		DRM_RADEON_FREE, &memfree, sizeof(memfree));
-+	if (ret) {
-+		fprintf(stderr, "Failed to free bo[%p] at %08x\n", bo, memfree.region_offset);
-+		fprintf(stderr, "ret = %s\n", strerror(-ret));
-+		exit(1);
-+	}
-+
-+	free(bo);
- }
- 
--//#define MM_DEBUG
--int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size)
-+static const radeon_bo_functions dma_bo_functions = {
-+	.free = &dma_free
-+};
-+
-+/**
-+ * Call the DRM to allocate GART memory for the given (incomplete)
-+ * buffer object.
-+ */
-+static int try_dma_alloc(radeon_bufmgr_classic *bufmgr, radeon_bo_classic *bo,
-+		unsigned long size, unsigned int alignment)
- {
- 	drm_radeon_mem_alloc_t alloc;
--	int offset = 0, ret;
--	int i, free = -1;
--	int done_age;
--	drm_radeon_mem_free_t memfree;
--	int tries = 0;
--	static int bytes_wasted = 0, allocated = 0;
-+	int baseoffset;
-+	int ret;
- 
--	if (size < 4096)
--		bytes_wasted += 4096 - size;
-+	alloc.region = RADEON_MEM_REGION_GART;
-+	alloc.alignment = alignment;
-+	alloc.size = size;
-+	alloc.region_offset = &baseoffset;
-+
-+	ret = drmCommandWriteRead(bufmgr->rmesa->radeon.dri.fd,
-+			DRM_RADEON_ALLOC, &alloc, sizeof(alloc));
-+	if (ret) {
-+		if (RADEON_DEBUG & DEBUG_MEMORY)
-+			fprintf(stderr, "DRM_RADEON_ALLOC failed: %d\n", ret);
-+		return 0;
-+	}
- 
--	allocated += size;
-+	bo->base.virtual = (char*)bufmgr->rmesa->radeon.radeonScreen->gartTextures.map + baseoffset;
-+	bo->base.offset = bufmgr->rmesa->radeon.radeonScreen->gart_texture_offset + baseoffset;
- 
--#if 0
--	static int t = 0;
--	if (t != time(NULL)) {
--		t = time(NULL);
--		fprintf(stderr, "slots used %d, wasted %d kb, allocated %d\n",
--			rmesa->rmm->u_last, bytes_wasted / 1024,
--			allocated / 1024);
-+	return 1;
-+}
-+
-+/**
-+ * Allocate a DMA buffer.
-+ */
-+static dri_bo *dma_alloc(radeon_bufmgr_classic *bufmgr, const char *name,
-+		unsigned long size, unsigned int alignment)
-+{
-+	radeon_bo_classic* bo = (radeon_bo_classic*)calloc(1, sizeof(radeon_bo_classic));
-+
-+	bo->functions = &dma_bo_functions;
-+
-+	track_pending_buffers(bufmgr);
-+	if (!try_dma_alloc(bufmgr, bo, size, alignment)) {
-+		if (RADEON_DEBUG & DEBUG_MEMORY)
-+			fprintf(stderr, "Failed to allocate %ld bytes, finishing command buffer...\n", size);
-+		radeonFinish(bufmgr->rmesa->radeon.glCtx);
-+		track_pending_buffers(bufmgr);
-+		if (!try_dma_alloc(bufmgr, bo, size, alignment)) {
-+			WARN_ONCE(
-+				"Ran out of GART memory (for %ld)!\n"
-+				"Please consider adjusting GARTSize option.\n",
-+				size);
-+			free(bo);
-+			return 0;
-+		}
- 	}
--#endif
- 
--	memfree.region = RADEON_MEM_REGION_GART;
-+	init_buffer(bufmgr, bo, size);
-+	bo->validated = 1; /* DMA buffer offsets are always valid */
- 
--      again:
-+	return &bo->base;
-+}
- 
--	done_age = radeonGetAge((radeonContextPtr) rmesa);
-+/**
-+ * Free a command buffer
-+ */
-+static void cmdbuf_free(radeon_bo_classic *bo)
-+{
-+	free(bo->base.virtual);
-+	free(bo);
-+}
- 
--	if (rmesa->rmm->u_last + 1 >= rmesa->rmm->u_size)
--		resize_u_list(rmesa);
-+static const radeon_bo_functions cmdbuf_bo_functions = {
-+	.free = cmdbuf_free
-+};
- 
--	for (i = rmesa->rmm->u_last + 1; i > 0; i--) {
--		if (rmesa->rmm->u_list[i].ptr == NULL) {
--			free = i;
--			continue;
-+/**
-+ * Allocate a command buffer.
-+ *
-+ * Command buffers are really just malloc'ed buffers. They are managed by
-+ * the bufmgr to enable relocations.
-+ */
-+static dri_bo *cmdbuf_alloc(radeon_bufmgr_classic *bufmgr, const char *name,
-+		unsigned long size)
-+{
-+	radeon_bo_classic* bo = (radeon_bo_classic*)calloc(1, sizeof(radeon_bo_classic));
-+
-+	bo->functions = &cmdbuf_bo_functions;
-+	bo->base.virtual = malloc(size);
-+
-+	init_buffer(bufmgr, bo, size);
-+	return &bo->base;
-+}
-+
-+/**
-+ * Free a VRAM-based buffer object.
-+ */
-+static void vram_free(radeon_bo_classic *bo_base)
-+{
-+	radeon_bo_vram *bo = get_bo_vram(bo_base);
-+
-+	if (bo->vram) {
-+		driDestroyTextureObject(&bo->vram->base);
-+		bo->vram = 0;
-+	}
-+
-+	free(bo->base.base.virtual);
-+	free(bo);
-+}
-+
-+/**
-+ * Allocate/update the copy in vram.
-+ *
-+ * Note: Assume we're called with the DRI lock held.
-+ */
-+static void vram_validate(radeon_bo_classic *bo_base)
-+{
-+	radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo_base->base.bufmgr);
-+	radeon_bo_vram *bo = get_bo_vram(bo_base);
-+
-+	if (!bo->vram) {
-+		bo->backing_store_dirty = 1;
-+
-+		bo->vram = (radeon_vram_wrapper*)calloc(1, sizeof(radeon_vram_wrapper));
-+		bo->vram->bo = bo;
-+		make_empty_list(&bo->vram->base);
-+		bo->vram->base.totalSize = bo->base.base.size;
-+		if (driAllocateTexture(&bufmgr->texture_heap, 1, &bo->vram->base) < 0) {
-+			fprintf(stderr, "Ouch! vram_validate failed\n");
-+			free(bo->vram);
-+			bo->base.base.offset = 0;
-+			bo->vram = 0;
-+			return;
- 		}
-+	}
-+
-+	assert(bo->vram->base.memBlock);
-+
-+	bo->base.base.offset = bufmgr->texture_offset + bo->vram->base.memBlock->ofs;
-+
-+	if (bo->backing_store_dirty) {
-+		/* Copy to VRAM using a blit.
-+		 * All memory is 4K aligned. We're using 1024 pixels wide blits.
-+		 */
-+		drm_radeon_texture_t tex;
-+		drm_radeon_tex_image_t tmp;
-+		int ret;
- 
--		if (rmesa->rmm->u_list[i].h_pending == 0 &&
--		    rmesa->rmm->u_list[i].pending
--		    && rmesa->rmm->u_list[i].age <= done_age) {
--			memfree.region_offset =
--			    (char *)rmesa->rmm->u_list[i].ptr -
--			    (char *)rmesa->radeon.radeonScreen->gartTextures.
--			    map;
-+		tex.offset = bo->base.base.offset;
-+		tex.image = &tmp;
- 
--			ret =
--			    drmCommandWrite(rmesa->radeon.radeonScreen->
--					    driScreen->fd, DRM_RADEON_FREE,
--					    &memfree, sizeof(memfree));
-+		assert(!(tex.offset & 1023));
- 
-+		tmp.x = 0;
-+		tmp.y = 0;
-+		if (bo->base.base.size < 4096) {
-+			tmp.width = (bo->base.base.size + 3) / 4;
-+			tmp.height = 1;
-+		} else {
-+			tmp.width = 1024;
-+			tmp.height = (bo->base.base.size + 4095) / 4096;
-+		}
-+		tmp.data = bo->base.base.virtual;
-+
-+		tex.format = RADEON_TXFORMAT_ARGB8888;
-+		tex.width = tmp.width;
-+		tex.height = tmp.height;
-+		tex.pitch = MAX2(tmp.width / 16, 1);
-+
-+		do {
-+			ret = drmCommandWriteRead(bufmgr->rmesa->radeon.dri.fd,
-+						DRM_RADEON_TEXTURE, &tex,
-+						sizeof(drm_radeon_texture_t));
- 			if (ret) {
--				fprintf(stderr, "Failed to free at %p\n",
--					rmesa->rmm->u_list[i].ptr);
--				fprintf(stderr, "ret = %s\n", strerror(-ret));
--				exit(1);
--			} else {
--#ifdef MM_DEBUG
--				fprintf(stderr, "really freed %d at age %x\n",
--					i,
--					radeonGetAge((radeonContextPtr) rmesa));
--#endif
--				if (i == rmesa->rmm->u_last)
--					rmesa->rmm->u_last--;
--
--				if (rmesa->rmm->u_list[i].size < 4096)
--					bytes_wasted -=
--					    4096 - rmesa->rmm->u_list[i].size;
--
--				allocated -= rmesa->rmm->u_list[i].size;
--				rmesa->rmm->u_list[i].pending = 0;
--				rmesa->rmm->u_list[i].ptr = NULL;
--				free = i;
-+				if (RADEON_DEBUG & DEBUG_IOCTL)
-+					fprintf(stderr,
-+						"DRM_RADEON_TEXTURE:  again!\n");
-+				usleep(1);
- 			}
--		}
-+		} while (ret == -EAGAIN);
-+
-+		bo->backing_store_dirty = 0;
- 	}
--	rmesa->rmm->u_head = i;
--
--	if (free == -1) {
--		WARN_ONCE("Ran out of slots!\n");
--		//usleep(100);
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--		tries++;
--		if (tries > 100) {
--			WARN_ONCE("Ran out of slots!\n");
--			exit(1);
--		}
--		goto again;
-+
-+	bo->base.validated = 1;
-+}
-+
-+static void vram_dirty(radeon_bo_classic *bo_base)
-+{
-+	radeon_bo_vram *bo = get_bo_vram(bo_base);
-+
-+	bo->base.validated = 0;
-+	bo->backing_store_dirty = 1;
-+}
-+
-+static void vram_bind(radeon_bo_classic *bo_base)
-+{
-+	radeon_bo_vram *bo = get_bo_vram(bo_base);
-+
-+	if (bo->vram) {
-+		bo->vram->base.bound = 1;
-+		driUpdateTextureLRU(&bo->vram->base);
- 	}
-+}
- 
--	alloc.region = RADEON_MEM_REGION_GART;
--	alloc.alignment = alignment;
--	alloc.size = size;
--	alloc.region_offset = &offset;
-+static void vram_unbind(radeon_bo_classic *bo_base)
-+{
-+	radeon_bo_vram *bo = get_bo_vram(bo_base);
- 
--	ret =
--	    drmCommandWriteRead(rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc,
--				sizeof(alloc));
--	if (ret) {
--#if 0
--		WARN_ONCE("Ran out of mem!\n");
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--		//usleep(100);
--		tries2++;
--		tries = 0;
--		if (tries2 > 100) {
--			WARN_ONCE("Ran out of GART memory!\n");
--			exit(1);
--		}
--		goto again;
--#else
--		WARN_ONCE
--		    ("Ran out of GART memory (for %d)!\nPlease consider adjusting GARTSize option.\n",
--		     size);
--		return 0;
--#endif
-+	if (bo->vram)
-+		bo->vram->base.bound = 0;
-+}
-+
-+/** Callback function called by the texture heap when a texture is evicted */
-+static void destroy_vram_wrapper(void *data, driTextureObject *t)
-+{
-+	radeon_vram_wrapper *wrapper = (radeon_vram_wrapper*)t;
-+
-+	if (wrapper->bo && wrapper->bo->vram == wrapper) {
-+		wrapper->bo->base.validated = 0;
-+		wrapper->bo->vram = 0;
- 	}
-+}
- 
--	i = free;
-+static const radeon_bo_functions vram_bo_functions = {
-+	.free = vram_free,
-+	.validate = vram_validate,
-+	.dirty = vram_dirty,
-+	.bind = vram_bind,
-+	.unbind = vram_unbind
-+};
- 
--	if (i > rmesa->rmm->u_last)
--		rmesa->rmm->u_last = i;
-+/**
-+ * Free a VRAM-based buffer object.
-+ */
-+static void static_free(radeon_bo_classic *bo_base)
-+{
-+	radeon_bo_vram *bo = get_bo_vram(bo_base);
- 
--	rmesa->rmm->u_list[i].ptr =
--	    ((GLubyte *) rmesa->radeon.radeonScreen->gartTextures.map) + offset;
--	rmesa->rmm->u_list[i].size = size;
--	rmesa->rmm->u_list[i].age = 0;
--	//fprintf(stderr, "alloc %p at id %d\n", rmesa->rmm->u_list[i].ptr, i);
-+	free(bo);
-+}
- 
--#ifdef MM_DEBUG
--	fprintf(stderr, "allocated %d at age %x\n", i,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
-+static void static_bind(radeon_bo_classic *bo_base)
-+{
-+}
- 
--	return i;
-+static void static_unbind(radeon_bo_classic *bo_base)
-+{
- }
- 
--void r300_mem_use(r300ContextPtr rmesa, int id)
-+static void static_validate(radeon_bo_classic *bo_base)
- {
--	uint64_t ull;
--#ifdef MM_DEBUG
--	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--	drm_r300_cmd_header_t *cmd;
-+}
- 
--	assert(id <= rmesa->rmm->u_last);
-+static void static_dirty(radeon_bo_classic *bo_base)
-+{
-+}
- 
--	if (id == 0)
--		return;
-+static const radeon_bo_functions static_bo_functions = {
-+	.free = static_free,
-+	.validate = static_validate,
-+	.dirty = static_dirty,
-+	.bind = static_bind,
-+	.unbind = static_unbind
-+};
- 
--	cmd =
--	    (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa,
--						      2 + sizeof(ull) / 4,
--						      __FUNCTION__);
--	cmd[0].scratch.cmd_type = R300_CMD_SCRATCH;
--	cmd[0].scratch.reg = R300_MEM_SCRATCH;
--	cmd[0].scratch.n_bufs = 1;
--	cmd[0].scratch.flags = 0;
--	cmd++;
- 
--	ull = (uint64_t) (intptr_t) & rmesa->rmm->u_list[id].age;
--	_mesa_memcpy(cmd, &ull, sizeof(ull));
--	cmd += sizeof(ull) / 4;
-+/**
-+ * Allocate a backing store buffer object that is validated into VRAM.
-+ */
-+static dri_bo *vram_alloc(radeon_bufmgr_classic *bufmgr, const char *name,
-+		unsigned long size, unsigned int alignment)
-+{
-+	radeon_bo_vram* bo = (radeon_bo_vram*)calloc(1, sizeof(radeon_bo_vram));
-+
-+	bo->base.functions = &vram_bo_functions;
-+	bo->base.base.virtual = malloc(size);
-+	init_buffer(bufmgr, &bo->base, size);
-+	return &bo->base.base;
-+}
- 
--	cmd[0].u = /*id */ 0;
- 
--	LOCK_HARDWARE(&rmesa->radeon);	/* Protect from DRM. */
--	rmesa->rmm->u_list[id].h_pending++;
--	UNLOCK_HARDWARE(&rmesa->radeon);
-+static dri_bo *bufmgr_classic_bo_alloc(dri_bufmgr *bufmgr_ctx, const char *name,
-+		unsigned long size, unsigned int alignment,
-+		uint64_t location_mask)
-+{
-+	radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx);
-+
-+	if (location_mask & DRM_BO_MEM_CMDBUF) {
-+		return cmdbuf_alloc(bufmgr, name, size);
-+	} else if (location_mask & DRM_BO_MEM_DMA) {
-+		return dma_alloc(bufmgr, name, size, alignment);
-+	} else {
-+		return vram_alloc(bufmgr, name, size, alignment);
-+	}
- }
- 
--unsigned long r300_mem_offset(r300ContextPtr rmesa, int id)
-+static dri_bo *bufmgr_classic_bo_alloc_static(dri_bufmgr *bufmgr_ctx, const char *name,
-+					      unsigned long offset, unsigned long size,
-+					      void *virtual, uint64_t location_mask)
- {
--	unsigned long offset;
-+  	radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx);
-+	radeon_bo_vram* bo = (radeon_bo_vram*)calloc(1, sizeof(radeon_bo_vram));
- 
--	assert(id <= rmesa->rmm->u_last);
-+	bo->base.functions = &static_bo_functions;
-+	bo->base.base.virtual = virtual;
-+	bo->base.base.offset = offset + bufmgr->rmesa->radeon.radeonScreen->fbLocation;
-+	bo->base.validated = 1; /* Static buffer offsets are always valid */
- 
--	offset = (char *)rmesa->rmm->u_list[id].ptr -
--	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
--	offset += rmesa->radeon.radeonScreen->gart_texture_offset;
-+	init_buffer(bufmgr, &bo->base, size);
-+	return &bo->base.base;
- 
--	return offset;
- }
- 
--void *r300_mem_map(r300ContextPtr rmesa, int id, int access)
-+
-+
-+static void bufmgr_classic_bo_reference(dri_bo *bo_base)
- {
--#ifdef MM_DEBUG
--	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--	void *ptr;
--	int tries = 0;
-+	radeon_bo_classic *bo = get_bo_classic(bo_base);
-+	bo->refcount++;
-+	assert(bo->refcount > 0);
-+}
- 
--	assert(id <= rmesa->rmm->u_last);
-+static void bufmgr_classic_bo_unreference(dri_bo *bo_base)
-+{
-+	radeon_bo_classic *bo = get_bo_classic(bo_base);
- 
--	if (access == R300_MEM_R) {
-+	if (!bo_base)
-+		return;
- 
--		if (rmesa->rmm->u_list[id].mapped == 1)
--			WARN_ONCE("buffer %d already mapped\n", id);
-+	assert(bo->refcount > 0);
-+	bo->refcount--;
-+	if (!bo->refcount) {
-+		// Ugly HACK - figure out whether this is really necessary
-+		get_bufmgr_classic(bo_base->bufmgr)->rmesa->dma.nr_released_bufs++;
- 
--		rmesa->rmm->u_list[id].mapped = 1;
--		ptr = r300_mem_ptr(rmesa, id);
-+		assert(!bo->mapcount);
-+		if (!bo->pending)
-+			bo_free(bo);
-+	}
-+}
- 
--		return ptr;
-+static int bufmgr_classic_bo_map(dri_bo *bo_base, GLboolean write_enable)
-+{
-+	radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo_base->bufmgr);
-+	radeon_bo_classic *bo = get_bo_classic(bo_base);
-+	assert(bo->refcount > 0);
-+
-+	if (bo->pending) {
-+		track_pending_buffers(bufmgr);
-+		if (bo->pending) {
-+			// TODO: Better fence waiting
-+			if (RADEON_DEBUG & DEBUG_MEMORY)
-+				fprintf(stderr, "bo_map: buffer is pending. Flushing...\n");
-+			radeonFinish(bufmgr->rmesa->radeon.glCtx);
-+			track_pending_buffers(bufmgr);
-+			if (bo->pending) {
-+				fprintf(stderr, "Internal error or hardware lockup: bo_map: buffer is still pending.\n");
-+				abort();
-+			}
-+		}
- 	}
- 
--	if (rmesa->rmm->u_list[id].h_pending)
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
-+	if (write_enable && bo->functions->dirty)
-+		bo->functions->dirty(bo);
- 
--	if (rmesa->rmm->u_list[id].h_pending) {
--		return NULL;
--	}
-+	bo->mapcount++;
-+	assert(bo->mapcount > 0);
-+	return 0;
-+}
- 
--	while (rmesa->rmm->u_list[id].age >
--	       radeonGetAge((radeonContextPtr) rmesa) && tries++ < 1000)
--		usleep(10);
-+static int bufmgr_classic_bo_unmap(dri_bo *buf)
-+{
-+	radeon_bo_classic *bo = get_bo_classic(buf);
-+	assert(bo->refcount > 0);
-+	assert(bo->mapcount > 0);
-+	bo->mapcount--;
-+	return 0;
-+}
- 
--	if (tries >= 1000) {
--		fprintf(stderr, "Idling failed (%x vs %x)\n",
--			rmesa->rmm->u_list[id].age,
--			radeonGetAge((radeonContextPtr) rmesa));
--		return NULL;
-+/**
-+ * Mark the given buffer as pending and move it to the tail
-+ * of the pending list.
-+ * The caller is responsible for setting up pending_count and pending_age.
-+ */
-+static void move_to_pending_tail(radeon_bo_classic *bo)
-+{
-+	radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo->base.bufmgr);
-+
-+	if (bo->pending) {
-+		*bo->pending_pprev = bo->pending_next;
-+		if (bo->pending_next)
-+			bo->pending_next->pending_pprev = bo->pending_pprev;
-+		else
-+			bufmgr->pending_tail = bo->pending_pprev;
- 	}
- 
--	if (rmesa->rmm->u_list[id].mapped == 1)
--		WARN_ONCE("buffer %d already mapped\n", id);
-+	bo->pending = 1;
-+	bo->pending_pprev = bufmgr->pending_tail;
-+	bo->pending_next = 0;
-+	*bufmgr->pending_tail = bo;
-+	bufmgr->pending_tail = &bo->pending_next;
-+}
- 
--	rmesa->rmm->u_list[id].mapped = 1;
--	ptr = r300_mem_ptr(rmesa, id);
-+/**
-+ * Emit commands to the batch buffer that cause the guven buffer's
-+ * pending_count and pending_age to be updated.
-+ */
-+static void emit_age_for_buffer(radeon_bo_classic* bo)
-+{
-+	radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo->base.bufmgr);
-+	BATCH_LOCALS(bufmgr->rmesa);
-+	drm_r300_cmd_header_t cmd;
-+	uint64_t ull;
- 
--	return ptr;
-+	cmd.scratch.cmd_type = R300_CMD_SCRATCH;
-+	cmd.scratch.reg = 2; /* Scratch register 2 corresponds to what radeonGetAge polls */
-+	cmd.scratch.n_bufs = 1;
-+	cmd.scratch.flags = 0;
-+	ull = (uint64_t) (intptr_t) &bo->pending_age;
-+
-+	BEGIN_BATCH(4);
-+	OUT_BATCH(cmd.u);
-+	OUT_BATCH(ull & 0xffffffff);
-+	OUT_BATCH(ull >> 32);
-+	OUT_BATCH(0);
-+	END_BATCH();
-+	COMMIT_BATCH();
-+
-+	bo->pending_count++;
- }
- 
--void r300_mem_unmap(r300ContextPtr rmesa, int id)
-+static int bufmgr_classic_emit_reloc(dri_bo *batch_buf, uint64_t flags, GLuint delta,
-+			GLuint offset, dri_bo *target)
- {
--#ifdef MM_DEBUG
--	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
-+	radeon_bo_classic *bo = get_bo_classic(batch_buf);
-+	radeon_reloc *reloc;
- 
--	assert(id <= rmesa->rmm->u_last);
-+	if (bo->relocs_used >= bo->relocs_size) {
-+		bo->relocs_size *= 2;
-+		if (bo->relocs_size < 32)
-+			bo->relocs_size = 32;
- 
--	if (rmesa->rmm->u_list[id].mapped == 0)
--		WARN_ONCE("buffer %d not mapped\n", id);
-+		bo->relocs = (radeon_reloc*)realloc(bo->relocs, bo->relocs_size*sizeof(radeon_reloc));
-+	}
- 
--	rmesa->rmm->u_list[id].mapped = 0;
-+	reloc = &bo->relocs[bo->relocs_used++];
-+	reloc->flags = flags;
-+	reloc->offset = offset;
-+	reloc->delta = delta;
-+	reloc->target = get_bo_classic(target);
-+	dri_bo_reference(target);
-+	return 0;
- }
- 
--void r300_mem_free(r300ContextPtr rmesa, int id)
-+/* process_relocs is called just before the given command buffer
-+ * is executed. It ensures that all referenced buffers are in
-+ * the right GPU domain.
-+ */
-+static void *bufmgr_classic_process_relocs(dri_bo *batch_buf, GLuint *count)
- {
--#ifdef MM_DEBUG
--	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
-+	radeon_bo_classic *batch_bo = get_bo_classic(batch_buf);
-+	int i;
- 
--	assert(id <= rmesa->rmm->u_last);
-+	// Warning: At this point, we append something to the batch buffer
-+	// during flush.
-+	emit_age_for_buffer(batch_bo);
-+
-+	dri_bo_map(batch_buf, GL_TRUE);
-+	for(i = 0; i < batch_bo->relocs_used; ++i) {
-+		radeon_reloc *reloc = &batch_bo->relocs[i];
-+		uint32_t *dest = (uint32_t*)((char*)batch_buf->virtual + reloc->offset);
-+		uint32_t offset;
-+
-+		if (!reloc->target->validated)
-+			reloc->target->functions->validate(reloc->target);
-+		reloc->target->used = 1;
-+		offset = reloc->target->base.offset + reloc->delta;
-+
-+		if (reloc->flags & DRM_RELOC_BLITTER)
-+			*dest = (*dest & 0xffc00000) | (offset >> 10);
-+		else if (reloc->flags & DRM_RELOC_TXOFFSET)
-+			*dest = (*dest & 31) | (offset & ~31);
-+		else
-+			*dest = offset;
-+	}
-+	dri_bo_unmap(batch_buf);
-+	return 0;
-+}
- 
--	if (id == 0)
--		return;
-+/* post_submit is called just after the given command buffer
-+ * is executed. It ensures that buffers are properly marked as
-+ * pending.
-+ */
-+static void bufmgr_classic_post_submit(dri_bo *batch_buf, dri_fence **fence)
-+{
-+	radeon_bo_classic *batch_bo = get_bo_classic(batch_buf);
-+	int i;
- 
--	if (rmesa->rmm->u_list[id].ptr == NULL) {
--		WARN_ONCE("Not allocated!\n");
--		return;
-+	assert(!batch_bo->pending_count);
-+
-+	for(i = 0; i < batch_bo->relocs_used; ++i) {
-+		radeon_reloc *reloc = &batch_bo->relocs[i];
-+
-+		if (reloc->target->used) {
-+			reloc->target->used = 0;
-+			assert(!reloc->target->pending_count);
-+			reloc->target->pending_age = batch_bo->pending_age;
-+			move_to_pending_tail(reloc->target);
-+			if (reloc->target->functions->bind)
-+				(*reloc->target->functions->bind)(reloc->target);
-+		}
- 	}
-+}
- 
--	if (rmesa->rmm->u_list[id].pending) {
--		WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr);
--		return;
-+static void bufmgr_classic_destroy(dri_bufmgr *bufmgr_ctx)
-+{
-+	radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx);
-+
-+	track_pending_buffers(bufmgr);
-+	if (bufmgr->pending)
-+		radeonFinish(bufmgr->rmesa->radeon.glCtx);
-+	track_pending_buffers(bufmgr);
-+
-+	if (bufmgr->buffers) {
-+		fprintf(stderr, "Warning: Buffer objects have leaked\n");
-+		while(bufmgr->buffers) {
-+			fprintf(stderr, "  Leak of size %ld\n", bufmgr->buffers->base.size);
-+			bufmgr->buffers->refcount = 0;
-+			bufmgr->buffers->mapcount = 0;
-+			bufmgr->buffers->pending = 0;
-+			bo_free(bufmgr->buffers);
-+		}
- 	}
- 
--	rmesa->rmm->u_list[id].pending = 1;
-+	driDestroyTextureHeap(bufmgr->texture_heap);
-+	bufmgr->texture_heap = 0;
-+	assert(is_empty_list(&bufmgr->texture_swapped));
-+
-+	free(bufmgr);
-+}
-+
-+radeon_bufmgr* radeonBufmgrClassicInit(r300ContextPtr rmesa)
-+{
-+	radeon_bufmgr_classic* bufmgr = (radeon_bufmgr_classic*)calloc(1, sizeof(radeon_bufmgr_classic));
-+
-+	bufmgr->rmesa = rmesa;
-+	bufmgr->base.base.bo_alloc = &bufmgr_classic_bo_alloc;
-+	bufmgr->base.base.bo_alloc_static = bufmgr_classic_bo_alloc_static;
-+	bufmgr->base.base.bo_reference = &bufmgr_classic_bo_reference;
-+	bufmgr->base.base.bo_unreference = &bufmgr_classic_bo_unreference;
-+	bufmgr->base.base.bo_map = &bufmgr_classic_bo_map;
-+	bufmgr->base.base.bo_unmap = &bufmgr_classic_bo_unmap;
-+	bufmgr->base.base.emit_reloc = &bufmgr_classic_emit_reloc;
-+	bufmgr->base.base.process_relocs = &bufmgr_classic_process_relocs;
-+	bufmgr->base.base.post_submit = &bufmgr_classic_post_submit;
-+	bufmgr->base.base.destroy = &bufmgr_classic_destroy;
-+
-+	bufmgr->pending_tail = &bufmgr->pending;
-+
-+	/* Init texture heap */
-+	make_empty_list(&bufmgr->texture_swapped);
-+	bufmgr->texture_heap = driCreateTextureHeap(0, bufmgr,
-+			rmesa->radeon.radeonScreen->texSize[0], 12, RADEON_NR_TEX_REGIONS,
-+			(drmTextureRegionPtr)rmesa->radeon.sarea->tex_list[0],
-+			&rmesa->radeon.sarea->tex_age[0],
-+			&bufmgr->texture_swapped, sizeof(radeon_vram_wrapper),
-+			&destroy_vram_wrapper);
-+	bufmgr->texture_offset = rmesa->radeon.radeonScreen->texOffset[0];
-+
-+	return &bufmgr->base;
-+}
-+
-+void radeonBufmgrContendedLockTake(radeon_bufmgr* bufmgr_ctx)
-+{
-+	radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(&bufmgr_ctx->base);
-+
-+	DRI_AGE_TEXTURES(bufmgr->texture_heap);
- }
--#endif
-diff --git a/src/mesa/drivers/dri/r300/r300_mem.h b/src/mesa/drivers/dri/r300/r300_mem.h
-index 625a7f6..4e9be65 100644
---- a/src/mesa/drivers/dri/r300/r300_mem.h
-+++ b/src/mesa/drivers/dri/r300/r300_mem.h
-@@ -1,37 +1,22 @@
- #ifndef __R300_MEM_H__
- #define __R300_MEM_H__
- 
--//#define R300_MEM_PDL 0
--#define R300_MEM_UL 1
-+#include "glheader.h"
-+#include "dri_bufmgr.h"
- 
--#define R300_MEM_R 1
--#define R300_MEM_W 2
--#define R300_MEM_RW (R300_MEM_R | R300_MEM_W)
-+#include "r300_context.h"
- 
--#define R300_MEM_SCRATCH 2
- 
--struct r300_memory_manager {
--	struct {
--		void *ptr;
--		uint32_t size;
--		uint32_t age;
--		uint32_t h_pending;
--		int pending;
--		int mapped;
--	} *u_list;
--	int u_head, u_size, u_last;
-+/* Note: The following flags should probably be ultimately eliminated,
-+ * or replaced by something else.
-+ */
-+#define DRM_BO_MEM_DMA (1 << 27) /** Use for transient buffers (texture upload, vertex buffers...) */
-+#define DRM_BO_MEM_CMDBUF (1 << 28) /** Use for command buffers */
- 
--};
-+#define DRM_RELOC_BLITTER (1 << 23) /** Offset overwrites lower 22 bits (used with blit packet3) */
-+#define DRM_RELOC_TXOFFSET (1 << 24) /** Offset overwrites everything but low bits (used for texture offsets) */
- 
--extern void r300_mem_init(r300ContextPtr rmesa);
--extern void r300_mem_destroy(r300ContextPtr rmesa);
--extern void *r300_mem_ptr(r300ContextPtr rmesa, int id);
--extern int r300_mem_find(r300ContextPtr rmesa, void *ptr);
--extern int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size);
--extern void r300_mem_use(r300ContextPtr rmesa, int id);
--extern unsigned long r300_mem_offset(r300ContextPtr rmesa, int id);
--extern void *r300_mem_map(r300ContextPtr rmesa, int id, int access);
--extern void r300_mem_unmap(r300ContextPtr rmesa, int id);
--extern void r300_mem_free(r300ContextPtr rmesa, int id);
-+radeon_bufmgr* radeonBufmgrClassicInit(r300ContextPtr rmesa);
-+void radeonBufmgrContendedLockTake(radeon_bufmgr* bufmgr_ctx);
- 
- #endif
-diff --git a/src/mesa/drivers/dri/r300/r300_mipmap_tree.c b/src/mesa/drivers/dri/r300/r300_mipmap_tree.c
-new file mode 100644
-index 0000000..c3b918c
---- /dev/null
-+++ b/src/mesa/drivers/dri/r300/r300_mipmap_tree.c
-@@ -0,0 +1,248 @@
-+/*
-+ * Copyright (C) 2008 Nicolai Haehnle.
-+ *
-+ * All Rights Reserved.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining
-+ * a copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sublicense, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial
-+ * portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ */
-+
-+#include "r300_mipmap_tree.h"
-+
-+#include <errno.h>
-+#include <unistd.h>
-+
-+#include "simple_list.h"
-+#include "texcompress.h"
-+#include "texformat.h"
-+
-+#include "r300_mem.h"
-+
-+static GLuint r300_compressed_texture_size(GLcontext *ctx,
-+		GLsizei width, GLsizei height, GLsizei depth,
-+		GLuint mesaFormat)
-+{
-+	GLuint size = _mesa_compressed_texture_size(ctx, width, height, depth, mesaFormat);
-+
-+	if (mesaFormat == MESA_FORMAT_RGB_DXT1 ||
-+	    mesaFormat == MESA_FORMAT_RGBA_DXT1) {
-+		if (width + 3 < 8)	/* width one block */
-+			size = size * 4;
-+		else if (width + 3 < 16)
-+			size = size * 2;
-+	} else {
-+		/* DXT3/5, 16 bytes per block */
-+		WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n");
-+		if (width + 3 < 8)
-+			size = size * 2;
-+	}
-+
-+	return size;
-+}
-+
-+/**
-+ * Compute sizes and fill in offset and blit information for the given
-+ * image (determined by \p face and \p level).
-+ *
-+ * \param curOffset points to the offset at which the image is to be stored
-+ * and is updated by this function according to the size of the image.
-+ */
-+static void compute_tex_image_offset(r300_mipmap_tree *mt,
-+	GLuint face, GLuint level, GLuint* curOffset)
-+{
-+	r300_mipmap_level *lvl = &mt->levels[level];
-+
-+	/* Find image size in bytes */
-+	if (mt->compressed) {
-+		lvl->size = r300_compressed_texture_size(mt->r300->radeon.glCtx,
-+			lvl->width, lvl->height, lvl->depth, mt->compressed);
-+	} else if (mt->target == GL_TEXTURE_RECTANGLE_NV) {
-+		lvl->size = ((lvl->width * mt->bpp + 63) & ~63) * lvl->height;
-+	} else if (mt->tilebits & R300_TXO_MICRO_TILE) {
-+		/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
-+		 * though the actual offset may be different (if texture is less than
-+		 * 32 bytes width) to the untiled case */
-+		int w = (lvl->width * mt->bpp * 2 + 31) & ~31;
-+		lvl->size = (w * ((lvl->height + 1) / 2)) * lvl->depth;
-+	} else {
-+		int w = (lvl->width * mt->bpp + 31) & ~31;
-+		lvl->size = w * lvl->height * lvl->depth;
-+	}
-+	assert(lvl->size > 0);
-+
-+	/* All images are aligned to a 32-byte offset */
-+	*curOffset = (*curOffset + 0x1f) & ~0x1f;
-+	lvl->faces[face].offset = *curOffset;
-+	*curOffset += lvl->size;
-+}
-+
-+static GLuint minify(GLuint size, GLuint levels)
-+{
-+	size = size >> levels;
-+	if (size < 1)
-+		size = 1;
-+	return size;
-+}
-+
-+static void calculate_miptree_layout(r300_mipmap_tree *mt)
-+{
-+	GLuint curOffset;
-+	GLuint numLevels;
-+	GLuint i;
-+
-+	numLevels = mt->lastLevel - mt->firstLevel + 1;
-+	assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
-+
-+	curOffset = 0;
-+	for(i = 0; i < numLevels; i++) {
-+		GLuint face;
-+
-+		mt->levels[i].width = minify(mt->width0, mt->firstLevel + i);
-+		mt->levels[i].height = minify(mt->height0, mt->firstLevel + i);
-+		mt->levels[i].depth = minify(mt->depth0, mt->firstLevel + i);
-+
-+		for(face = 0; face < mt->faces; face++)
-+			compute_tex_image_offset(mt, face, i, &curOffset);
-+	}
-+
-+	/* Note the required size in memory */
-+	mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
-+}
-+
-+
-+/**
-+ * Create a new mipmap tree, calculate its layout and allocate memory.
-+ */
-+r300_mipmap_tree* r300_miptree_create(r300ContextPtr rmesa, r300TexObj *t,
-+		GLenum target, GLuint firstLevel, GLuint lastLevel,
-+		GLuint width0, GLuint height0, GLuint depth0,
-+		GLuint bpp, GLuint tilebits, GLuint compressed)
-+{
-+	r300_mipmap_tree *mt = CALLOC_STRUCT(_r300_mipmap_tree);
-+
-+	mt->r300 = rmesa;
-+	mt->t = t;
-+	mt->target = target;
-+	mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-+	mt->firstLevel = firstLevel;
-+	mt->lastLevel = lastLevel;
-+	mt->width0 = width0;
-+	mt->height0 = height0;
-+	mt->depth0 = depth0;
-+	mt->bpp = bpp;
-+	mt->tilebits = tilebits;
-+	mt->compressed = compressed;
-+
-+	calculate_miptree_layout(mt);
-+
-+	mt->bo = dri_bo_alloc(&rmesa->radeon.bufmgr->base, "texture", mt->totalsize, 1024, 0);
-+
-+	return mt;
-+}
-+
-+/**
-+ * Destroy the given mipmap tree.
-+ */
-+void r300_miptree_destroy(r300_mipmap_tree *mt)
-+{
-+	dri_bo_unreference(mt->bo);
-+	free(mt);
-+}
-+
-+/*
-+ * XXX Move this into core Mesa?
-+ */
-+static void
-+_mesa_copy_rect(GLubyte * dst,
-+                GLuint cpp,
-+                GLuint dst_pitch,
-+                GLuint dst_x,
-+                GLuint dst_y,
-+                GLuint width,
-+                GLuint height,
-+                const GLubyte * src,
-+                GLuint src_pitch, GLuint src_x, GLuint src_y)
-+{
-+   GLuint i;
-+
-+   dst_pitch *= cpp;
-+   src_pitch *= cpp;
-+   dst += dst_x * cpp;
-+   src += src_x * cpp;
-+   dst += dst_y * dst_pitch;
-+   src += src_y * dst_pitch;
-+   width *= cpp;
-+
-+   if (width == dst_pitch && width == src_pitch)
-+      memcpy(dst, src, height * width);
-+   else {
-+      for (i = 0; i < height; i++) {
-+         memcpy(dst, src, width);
-+         dst += dst_pitch;
-+         src += src_pitch;
-+      }
-+   }
-+}
-+
-+/**
-+ * Upload the given texture image to the given face/level of the mipmap tree.
-+ * \param level of the texture, i.e. \c level==mt->firstLevel is the first hw level
-+ */
-+void r300_miptree_upload_image(r300_mipmap_tree *mt, GLuint face, GLuint level,
-+			       struct gl_texture_image *texImage)
-+{
-+	GLuint hwlevel = level - mt->firstLevel;
-+	r300_mipmap_level *lvl = &mt->levels[hwlevel];
-+	void *dest;
-+
-+	assert(face < mt->faces);
-+	assert(level >= mt->firstLevel && level <= mt->lastLevel);
-+	assert(texImage && texImage->Data);
-+	assert(texImage->Width == lvl->width);
-+	assert(texImage->Height == lvl->height);
-+	assert(texImage->Depth == lvl->depth);
-+
-+	dri_bo_map(mt->bo, GL_TRUE);
-+
-+	dest = mt->bo->virtual + lvl->faces[face].offset;
-+
-+	if (mt->tilebits)
-+		WARN_ONCE("%s: tiling not supported yet", __FUNCTION__);
-+
-+	if (!mt->compressed) {
-+		GLuint dst_align;
-+		GLuint dst_pitch = lvl->width;
-+		GLuint src_pitch = lvl->width;
-+
-+		if (mt->target == GL_TEXTURE_RECTANGLE_NV)
-+			dst_align = 64 / mt->bpp;
-+		else
-+			dst_align = 32 / mt->bpp;
-+		dst_pitch = (dst_pitch + dst_align - 1) & ~(dst_align - 1);
-+
-+		_mesa_copy_rect(dest, mt->bpp, dst_pitch, 0, 0, lvl->width, lvl->height,
-+				texImage->Data, src_pitch, 0, 0);
-+	} else {
-+		memcpy(dest, texImage->Data, lvl->size);
-+	}
-+
-+	dri_bo_unmap(mt->bo);
-+}
-diff --git a/src/mesa/drivers/dri/r300/r300_mipmap_tree.h b/src/mesa/drivers/dri/r300/r300_mipmap_tree.h
-new file mode 100644
-index 0000000..a888ecf
---- /dev/null
-+++ b/src/mesa/drivers/dri/r300/r300_mipmap_tree.h
-@@ -0,0 +1,91 @@
-+/*
-+ * Copyright (C) 2008 Nicolai Haehnle.
-+ *
-+ * All Rights Reserved.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining
-+ * a copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sublicense, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial
-+ * portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ */
-+
-+#ifndef __R300_MIPMAP_TREE_H_
-+#define __R300_MIPMAP_TREE_H_
-+
-+#include "r300_context.h"
-+
-+typedef struct _r300_mipmap_tree r300_mipmap_tree;
-+typedef struct _r300_mipmap_level r300_mipmap_level;
-+typedef struct _r300_mipmap_image r300_mipmap_image;
-+
-+struct _r300_mipmap_image {
-+	GLuint offset; /** Offset of this image from the start of mipmap tree, in bytes */
-+};
-+
-+struct _r300_mipmap_level {
-+	GLuint width;
-+	GLuint height;
-+	GLuint depth;
-+	GLuint size; /** Size of each image, in bytes */
-+	r300_mipmap_image faces[6];
-+};
-+
-+
-+/**
-+ * A mipmap tree contains texture images in the layout that the hardware
-+ * expects.
-+ *
-+ * The meta-data of mipmap trees is immutable, i.e. you cannot change the
-+ * layout on-the-fly; however, the texture contents (i.e. texels) can be
-+ * changed.
-+ */
-+struct _r300_mipmap_tree {
-+	r300ContextPtr r300;
-+	r300TexObj *t;
-+	dri_bo *bo;
-+
-+	GLuint totalsize; /** total size of the miptree, in bytes */
-+
-+	GLenum target; /** GL_TEXTURE_xxx */
-+	GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */
-+	GLuint firstLevel; /** First mip level stored in this mipmap tree */
-+	GLuint lastLevel; /** Last mip level stored in this mipmap tree */
-+
-+	GLuint width0; /** Width of level 0 image */
-+	GLuint height0; /** Height of level 0 image */
-+	GLuint depth0; /** Depth of level 0 image */
-+
-+	GLuint bpp; /** Bytes per texel */
-+	GLuint tilebits; /** R300_TXO_xxx_TILE */
-+	GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */
-+
-+	r300_mipmap_level levels[RADEON_MAX_TEXTURE_LEVELS];
-+};
-+
-+r300_mipmap_tree* r300_miptree_create(r300ContextPtr rmesa, r300TexObj *t,
-+		GLenum target, GLuint firstLevel, GLuint lastLevel,
-+		GLuint width0, GLuint height0, GLuint depth0,
-+		GLuint bpp, GLuint tilebits, GLuint compressed);
-+void r300_miptree_destroy(r300_mipmap_tree *mt);
-+
-+void r300_miptree_upload_image(r300_mipmap_tree *mt, GLuint face, GLuint level,
-+			       struct gl_texture_image *texImage);
-+
-+
-+#endif /* __R300_MIPMAP_TREE_H_ */
-diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
-index 0a199e6..209fae9 100644
---- a/src/mesa/drivers/dri/r300/r300_render.c
-+++ b/src/mesa/drivers/dri/r300/r300_render.c
-@@ -175,89 +175,79 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
- static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct r300_dma_region *rvb = &rmesa->state.elt_dma;
- 	void *out;
- 
--	if (r300IsGartMemory(rmesa, elts, n_elts * 4)) {
--		rvb->address = rmesa->radeon.radeonScreen->gartTextures.map;
--		rvb->start = ((char *)elts) - rvb->address;
--		rvb->aos_offset =
--		    rmesa->radeon.radeonScreen->gart_texture_offset +
--		    rvb->start;
--		return;
--	} else if (r300IsGartMemory(rmesa, elts, 1)) {
--		WARN_ONCE("Pointer not within GART memory!\n");
--		_mesa_exit(-1);
--	}
--
--	r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4);
--	rvb->aos_offset = GET_START(rvb);
-+	r300AllocDmaRegion(rmesa, &rmesa->state.elt_dma_bo, &rmesa->state.elt_dma_offset,
-+			   n_elts * 4, 4);
- 
--	out = rvb->address + rvb->start;
-+	out = rmesa->state.elt_dma_bo->virtual + rmesa->state.elt_dma_offset;
- 	memcpy(out, elts, n_elts * 4);
- }
- 
--static void r300FireEB(r300ContextPtr rmesa, unsigned long addr,
--		       int vertex_count, int type)
-+static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
- {
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	BATCH_LOCALS(rmesa);
- 
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0);
--	e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
-+	BEGIN_BATCH(8);
-+	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
-+	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
- 
--	start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2);
--	e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
--	e32(addr);
--	e32(vertex_count);
-+	OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
-+	OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
-+	OUT_BATCH_RELOC(0, rmesa->state.elt_dma_bo, rmesa->state.elt_dma_offset, 0);
-+	OUT_BATCH(vertex_count);
-+	END_BATCH();
- }
- 
- static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
- {
-+	BATCH_LOCALS(rmesa);
- 	int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
- 	int i;
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
- 
- 	if (RADEON_DEBUG & DEBUG_VERTS)
- 		fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
- 			offset);
- 
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1);
--	e32(nr);
-+	BEGIN_BATCH(sz+2);
-+	OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
-+	OUT_BATCH(nr);
- 
- 	for (i = 0; i + 1 < nr; i += 2) {
--		e32((rmesa->state.aos[i].aos_size << 0) |
--		    (rmesa->state.aos[i].aos_stride << 8) |
--		    (rmesa->state.aos[i + 1].aos_size << 16) |
--		    (rmesa->state.aos[i + 1].aos_stride << 24));
--
--		e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride);
--		e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride);
-+		OUT_BATCH((rmesa->state.aos[i].components << 0) |
-+			  (rmesa->state.aos[i].stride << 8) |
-+			  (rmesa->state.aos[i + 1].components << 16) |
-+			  (rmesa->state.aos[i + 1].stride << 24));
-+
-+		OUT_BATCH_RELOC(0, rmesa->state.aos[i].bo,
-+			rmesa->state.aos[i].offset + offset * 4 * rmesa->state.aos[i].stride, 0);
-+		OUT_BATCH_RELOC(0, rmesa->state.aos[i+1].bo,
-+			rmesa->state.aos[i+1].offset + offset * 4 * rmesa->state.aos[i + 1].stride, 0);
- 	}
- 
- 	if (nr & 1) {
--		e32((rmesa->state.aos[nr - 1].aos_size << 0) |
--		    (rmesa->state.aos[nr - 1].aos_stride << 8));
--		e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride);
-+		OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
-+			  (rmesa->state.aos[nr - 1].stride << 8));
-+		OUT_BATCH_RELOC(0, rmesa->state.aos[nr - 1].bo,
-+			rmesa->state.aos[nr - 1].offset + offset * 4 * rmesa->state.aos[nr - 1].stride, 0);
- 	}
-+	END_BATCH();
- }
- 
- static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
- {
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	BATCH_LOCALS(rmesa);
- 
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
--	e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
-+	BEGIN_BATCH(3);
-+	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
-+	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
-+	END_BATCH();
- }
- 
- static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
- 				   int start, int end, int prim)
- {
-+	BATCH_LOCALS(rmesa);
- 	int type, num_verts;
- 	TNLcontext *tnl = TNL_CONTEXT(ctx);
- 	struct vertex_buffer *vb = &tnl->vb;
-@@ -268,6 +258,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
- 	if (type < 0 || num_verts <= 0)
- 		return;
- 
-+	/* Make space for at least 64 dwords.
-+	 * This is supposed to ensure that we can get all rendering
-+	 * commands into a single command buffer.
-+	 */
-+	r300EnsureCmdBufSpace(rmesa, 64, __FUNCTION__);
-+
- 	if (vb->Elts) {
- 		if (num_verts > 65535) {
- 			/* not implemented yet */
-@@ -287,11 +283,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
- 		 */
- 		r300EmitElts(ctx, vb->Elts, num_verts);
- 		r300EmitAOS(rmesa, rmesa->state.aos_count, start);
--		r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type);
-+		r300FireEB(rmesa, num_verts, type);
- 	} else {
- 		r300EmitAOS(rmesa, rmesa->state.aos_count, start);
- 		r300FireAOS(rmesa, num_verts, type);
- 	}
-+	COMMIT_BATCH();
- }
- 
- static GLboolean r300RunRender(GLcontext * ctx,
-@@ -324,10 +321,6 @@ static GLboolean r300RunRender(GLcontext * ctx,
- 
- 	r300EmitCacheFlush(rmesa);
- 
--#ifdef USER_BUFFERS
--	r300UseArrays(ctx);
--#endif
--
- 	r300ReleaseArrays(ctx);
- 
- 	return GL_FALSE;
-diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
-index cce07d3..b314764 100644
---- a/src/mesa/drivers/dri/r300/r300_state.c
-+++ b/src/mesa/drivers/dri/r300/r300_state.c
-@@ -55,6 +55,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "radeon_ioctl.h"
- #include "radeon_state.h"
-+#include "radeon_buffer.h"
- #include "r300_context.h"
- #include "r300_ioctl.h"
- #include "r300_state.h"
-@@ -1148,39 +1149,25 @@ void r300UpdateDrawBuffer(GLcontext * ctx)
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	r300ContextPtr r300 = rmesa;
- 	struct gl_framebuffer *fb = ctx->DrawBuffer;
--	driRenderbuffer *drb;
-+	struct radeon_renderbuffer *rrb;
- 
- 	if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
- 		/* draw to front */
--		drb =
--		    (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].
--		    Renderbuffer;
-+		rrb =
-+		    (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
- 	} else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
- 		/* draw to back */
--		drb =
--		    (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].
--		    Renderbuffer;
-+		rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
- 	} else {
- 		/* drawing to multiple buffers, or none */
- 		return;
- 	}
- 
--	assert(drb);
--	assert(drb->flippedPitch);
-+	assert(rrb);
-+	assert(rrb->pitch);
- 
- 	R300_STATECHANGE(rmesa, cb);
- 
--	r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset +	//r300->radeon.state.color.drawOffset +
--	    r300->radeon.radeonScreen->fbLocation;
--	r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch;	//r300->radeon.state.color.drawPitch;
--
--	if (r300->radeon.radeonScreen->cpp == 4)
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
--	else
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
--
--	if (r300->radeon.sarea->tiling_enabled)
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
- #if 0
- 	R200_STATECHANGE(rmesa, ctx);
- 
-@@ -1499,14 +1486,9 @@ static void r300SetupTextures(GLcontext * ctx)
- 	/* We cannot let disabled tmu offsets pass DRM */
- 	for (i = 0; i < mtu; i++) {
- 		if (ctx->Texture.Unit[i]._ReallyEnabled) {
--
--#if 0				/* Enables old behaviour */
--			hw_tmu = i;
--#endif
- 			tmu_mappings[i] = hw_tmu;
- 
--			t = r300->state.texture.unit[i].texobj;
--			/* XXX questionable fix for bug 9170: */
-+			t = r300_tex_obj(ctx->Texture.Unit[i]._Current);
- 			if (!t)
- 				continue;
- 
-@@ -1532,21 +1514,20 @@ static void r300SetupTextures(GLcontext * ctx)
- 			 */
- 			r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] =
- 				t->filter_1 |
--				translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.tObj->LodBias);
-+				translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias);
- 			r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] =
- 			    t->size;
- 			r300->hw.tex.format.cmd[R300_TEX_VALUE_0 +
- 						hw_tmu] = t->format;
- 			r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] =
- 			    t->pitch_reg;
--			r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 +
--						hw_tmu] = t->offset;
-+			r300->hw.textures[hw_tmu] = t;
- 
--			if (t->offset & R300_TXO_MACRO_TILE) {
-+			if (t->tile_bits & R300_TXO_MACRO_TILE) {
- 				WARN_ONCE("macro tiling enabled!\n");
- 			}
- 
--			if (t->offset & R300_TXO_MICRO_TILE) {
-+			if (t->tile_bits & R300_TXO_MICRO_TILE) {
- 				WARN_ONCE("micro tiling enabled!\n");
- 			}
- 
-@@ -2373,20 +2354,6 @@ static void r300ResetHwState(r300ContextPtr r300)
- 
- 	r300BlendColor(ctx, ctx->Color.BlendColor);
- 
--	/* Again, r300ClearBuffer uses this */
--	r300->hw.cb.cmd[R300_CB_OFFSET] =
--	    r300->radeon.state.color.drawOffset +
--	    r300->radeon.radeonScreen->fbLocation;
--	r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
--
--	if (r300->radeon.radeonScreen->cpp == 4)
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
--	else
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
--
--	if (r300->radeon.sarea->tiling_enabled)
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
--
- 	r300->hw.rb3d_dither_ctl.cmd[1] = 0;
- 	r300->hw.rb3d_dither_ctl.cmd[2] = 0;
- 	r300->hw.rb3d_dither_ctl.cmd[3] = 0;
-@@ -2402,10 +2369,6 @@ static void r300ResetHwState(r300ContextPtr r300)
- 	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000;
- 	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff;
- 
--	r300->hw.zb.cmd[R300_ZB_OFFSET] =
--	    r300->radeon.radeonScreen->depthOffset +
--	    r300->radeon.radeonScreen->fbLocation;
--	r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch;
- 
- 	if (r300->radeon.sarea->tiling_enabled) {
- 		/* XXX: Turn off when clearing buffers ? */
-diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h
-index 0589ab7..96177ba 100644
---- a/src/mesa/drivers/dri/r300/r300_state.h
-+++ b/src/mesa/drivers/dri/r300/r300_state.h
-@@ -59,7 +59,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define R300_FIREVERTICES( r300 )			\
- do {							\
-     \
--   if ( (r300)->cmdbuf.count_used || (r300)->dma.flush ) {	\
-+   if ( (r300)->cmdbuf.committed || (r300)->dma.flush ) {	\
-       r300Flush( (r300)->radeon.glCtx );		\
-    }							\
-     \
-diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
-index 8aebd9b..f4a0b7f 100644
---- a/src/mesa/drivers/dri/r300/r300_swtcl.c
-+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
-@@ -61,7 +61,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
- static void flush_last_swtcl_prim( r300ContextPtr rmesa  );
- 
- 
--void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset);
-+void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, dri_bo *bo, GLuint offset);
- void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr);
- #define EMIT_ATTR( ATTR, STYLE )					\
- do {									\
-@@ -175,7 +175,7 @@ static void r300SetVertexFormat( GLcontext *ctx )
- 			inputs[i] = -1;
- 		}
- 	}
--	
-+
- 	/* Fixed, apply to vir0 only */
- 	if (InputsRead & (1 << VERT_ATTRIB_POS))
- 		inputs[VERT_ATTRIB_POS] = 0;
-@@ -186,16 +186,16 @@ static void r300SetVertexFormat( GLcontext *ctx )
- 	for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
- 		if (InputsRead & (1 << i))
- 			inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
--	
-+
- 	for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
- 		if (InputsRead & (1 << i)) {
- 			tab[nr++] = i;
- 		}
- 	}
--	
-+
- 	for (i = 0; i < nr; i++) {
- 		int ci;
--		
-+
- 		swizzle[i][0] = SWIZZLE_ZERO;
- 		swizzle[i][1] = SWIZZLE_ZERO;
- 		swizzle[i][2] = SWIZZLE_ZERO;
-@@ -215,21 +215,21 @@ static void r300SetVertexFormat( GLcontext *ctx )
- 	((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
- 		r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
- 				   nr);
--   
-+
- 	R300_STATECHANGE(rmesa, vic);
- 	rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
- 	rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
--   
-+
- 	R300_STATECHANGE(rmesa, vof);
- 	rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
- 	rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1;
--   
-+
- 	rmesa->swtcl.vertex_size =
- 		_tnl_install_attrs( ctx,
--				    rmesa->swtcl.vertex_attrs, 
-+				    rmesa->swtcl.vertex_attrs,
- 				    rmesa->swtcl.vertex_attr_count,
- 				    NULL, 0 );
--	
-+
- 	rmesa->swtcl.vertex_size /= 4;
- 
- 	RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
-@@ -245,38 +245,40 @@ static void r300SetVertexFormat( GLcontext *ctx )
-  */
- static void flush_last_swtcl_prim( r300ContextPtr rmesa  )
- {
-+	BATCH_LOCALS(rmesa);
-+
- 	if (RADEON_DEBUG & DEBUG_IOCTL)
- 		fprintf(stderr, "%s\n", __FUNCTION__);
--	
-+
- 	rmesa->dma.flush = NULL;
- 
--	if (rmesa->dma.current.buf) {
--		struct r300_dma_region *current = &rmesa->dma.current;
--		GLuint current_offset = GET_START(current);
-+	if (rmesa->dma.current) {
-+		GLuint current_offset = rmesa->dma.current_used;
- 
--		assert (current->start + 
-+		assert (rmesa->dma.current_used +
- 			rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
--			current->ptr);
-+			rmesa->dma.current_vertexptr);
- 
--		if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
-+		if (rmesa->dma.current_used != rmesa->dma.current_vertexptr) {
-+			rmesa->dma.current_used = rmesa->dma.current_vertexptr;
- 
- 			r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__);
--			
-+
- 			r300EmitState(rmesa);
--			
-+
- 			r300EmitVertexAOS( rmesa,
- 					   rmesa->swtcl.vertex_size,
--					   current_offset);
--			
-+					   rmesa->dma.current, current_offset);
-+
- 			r300EmitVbufPrim( rmesa,
- 					  rmesa->swtcl.hw_primitive,
- 					  rmesa->swtcl.numverts);
--			
-+
- 			r300EmitCacheFlush(rmesa);
-+			COMMIT_BATCH();
- 		}
--		
-+
- 		rmesa->swtcl.numverts = 0;
--		current->start = current->ptr;
- 	}
- }
- 
-@@ -287,7 +289,7 @@ r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize )
- {
- 	GLuint bytes = vsize * nverts;
- 
--	if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
-+	if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size)
- 		r300RefillCurrentDmaRegion( rmesa, bytes);
- 
- 	if (!rmesa->dma.flush) {
-@@ -297,13 +299,13 @@ r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize )
- 
- 	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
- 	ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
--	ASSERT( rmesa->dma.current.start + 
-+	ASSERT( rmesa->dma.current_used +
- 		rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
--		rmesa->dma.current.ptr );
-+		rmesa->dma.current_vertexptr );
- 
- 	{
--		GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
--		rmesa->dma.current.ptr += bytes;
-+		GLubyte *head = (GLubyte *) (rmesa->dma.current->virtual + rmesa->dma.current_vertexptr);
-+		rmesa->dma.current_vertexptr += bytes;
- 		rmesa->swtcl.numverts += nverts;
- 		return head;
- 	}
-@@ -352,7 +354,7 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
-    r300ContextPtr rmesa = R300_CONTEXT(ctx);		\
-    const char *r300verts = (char *)rmesa->swtcl.verts;
- #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int)))
--#define VERTEX r300Vertex 
-+#define VERTEX r300Vertex
- #define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS))
- #define PRINT_VERTEX(x)
- #undef TAG
-@@ -572,15 +574,15 @@ static void r300RenderStart(GLcontext *ctx)
-         r300ContextPtr rmesa = R300_CONTEXT( ctx );
- 	//	fprintf(stderr, "%s\n", __FUNCTION__);
- 
--	r300ChooseRenderState(ctx);	
-+	r300ChooseRenderState(ctx);
- 	r300SetVertexFormat(ctx);
- 
- 	r300UpdateShaders(rmesa);
- 	r300UpdateShaderStates(rmesa);
- 
- 	r300EmitCacheFlush(rmesa);
--	
--	if (rmesa->dma.flush != 0 && 
-+
-+	if (rmesa->dma.flush != 0 &&
- 	    rmesa->dma.flush != flush_last_swtcl_prim)
- 		rmesa->dma.flush( rmesa );
- 
-@@ -593,7 +595,7 @@ static void r300RenderFinish(GLcontext *ctx)
- static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	
-+
- 	if (rmesa->swtcl.hw_primitive != hwprim) {
- 	        R300_NEWPRIM( rmesa );
- 		rmesa->swtcl.hw_primitive = hwprim;
-@@ -611,7 +613,7 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
- 
- 	r300RasterPrimitive( ctx, reduced_prim[prim] );
- 	//	fprintf(stderr, "%s\n", __FUNCTION__);
--	
-+
- }
- 
- static void r300ResetLineStipple(GLcontext *ctx)
-@@ -625,12 +627,12 @@ void r300InitSwtcl(GLcontext *ctx)
- 	TNLcontext *tnl = TNL_CONTEXT(ctx);
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	static int firsttime = 1;
--	
-+
- 	if (firsttime) {
- 		init_rast_tab();
- 		firsttime = 0;
- 	}
--	
-+
- 	tnl->Driver.Render.Start = r300RenderStart;
- 	tnl->Driver.Render.Finish = r300RenderFinish;
- 	tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
-@@ -638,15 +640,15 @@ void r300InitSwtcl(GLcontext *ctx)
- 	tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
- 	tnl->Driver.Render.CopyPV = _tnl_copy_pv;
- 	tnl->Driver.Render.Interp = _tnl_interp;
--	
-+
- 	/* FIXME: what are these numbers? */
--	_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
-+	_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
- 			    48 * sizeof(GLfloat) );
--	
-+
- 	rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
- 	rmesa->swtcl.RenderIndex = ~0;
- 	rmesa->swtcl.render_primitive = GL_TRIANGLES;
--	rmesa->swtcl.hw_primitive = 0;	
-+	rmesa->swtcl.hw_primitive = 0;
- 
- 	_tnl_invalidate_vertex_state( ctx, ~0 );
- 	_tnl_invalidate_vertices( ctx, ~0 );
-@@ -655,9 +657,9 @@ void r300InitSwtcl(GLcontext *ctx)
- 	_tnl_need_projected_coords( ctx, GL_FALSE );
- 	r300ChooseRenderState(ctx);
- 
--	_mesa_validate_all_lighting_tables( ctx ); 
-+	_mesa_validate_all_lighting_tables( ctx );
- 
--	tnl->Driver.NotifyMaterialChange = 
-+	tnl->Driver.NotifyMaterialChange =
- 	  _mesa_validate_all_lighting_tables;
- }
- 
-@@ -665,33 +667,32 @@ void r300DestroySwtcl(GLcontext *ctx)
- {
- }
- 
--void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset)
-+void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, dri_bo *bo, GLuint offset)
- {
--	int cmd_reserved = 0;
--	int cmd_written = 0;
-+	BATCH_LOCALS(rmesa);
- 
--	drm_radeon_cmd_header_t *cmd = NULL;
- 	if (RADEON_DEBUG & DEBUG_VERTS)
--	  fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
--		  __FUNCTION__, vertex_size, offset);
--
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2);
--	e32(1);
--	e32(vertex_size | (vertex_size << 8));
--	e32(offset);
-+		fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
-+			__FUNCTION__, vertex_size, offset);
-+
-+	BEGIN_BATCH(5);
-+	OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2);
-+	OUT_BATCH(1);
-+	OUT_BATCH(vertex_size | (vertex_size << 8));
-+	OUT_BATCH_RELOC(0, bo, offset, 0);
-+	END_BATCH();
- }
- 
- void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
- {
--
--	int cmd_reserved = 0;
--	int cmd_written = 0;
-+	BATCH_LOCALS(rmesa);
- 	int type, num_verts;
--	drm_radeon_cmd_header_t *cmd = NULL;
- 
- 	type = r300PrimitiveType(rmesa, primitive);
- 	num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
--	
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
--	e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
-+
-+	BEGIN_BATCH(3);
-+	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
-+	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
-+	END_BATCH();
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
-index f7f4972..c6ee1b5 100644
---- a/src/mesa/drivers/dri/r300/r300_tex.c
-+++ b/src/mesa/drivers/dri/r300/r300_tex.c
-@@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_context.h"
- #include "r300_state.h"
- #include "r300_ioctl.h"
-+#include "r300_mipmap_tree.h"
- #include "r300_tex.h"
- 
- #include "xmlpool.h"
-@@ -78,7 +79,7 @@ static unsigned int translate_wrap_mode(GLenum wrapmode)
-  */
- static void r300UpdateTexWrap(r300TexObjPtr t)
- {
--	struct gl_texture_object *tObj = t->base.tObj;
-+	struct gl_texture_object *tObj = &t->base;
- 
- 	t->filter &=
- 	    ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK);
-@@ -175,39 +176,6 @@ static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4])
- 	t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]);
- }
- 
--/**
-- * Allocate space for and load the mesa images into the texture memory block.
-- * This will happen before drawing with a new texture, or drawing with a
-- * texture after it was swapped out or teximaged again.
-- */
--
--static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj)
--{
--	r300TexObjPtr t;
--
--	t = CALLOC_STRUCT(r300_tex_obj);
--	texObj->DriverData = t;
--	if (t != NULL) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE) {
--			fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
--				(void *)texObj, (void *)t);
--		}
--
--		/* Initialize non-image-dependent parts of the state:
--		 */
--		t->base.tObj = texObj;
--		t->border_fallback = GL_FALSE;
--
--		make_empty_list(&t->base);
--
--		r300UpdateTexWrap(t);
--		r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
--		r300SetTexBorderColor(t, texObj->_BorderChan);
--	}
--
--	return t;
--}
--
- /* try to find a format which will only need a memcopy */
- static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat,
- 							       GLenum srcType)
-@@ -433,95 +401,14 @@ static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx,
- 	return NULL;		/* never get here */
- }
- 
--static GLboolean
--r300ValidateClientStorage(GLcontext * ctx, GLenum target,
--			  GLint internalFormat,
--			  GLint srcWidth, GLint srcHeight,
--			  GLenum format, GLenum type, const void *pixels,
--			  const struct gl_pixelstore_attrib *packing,
--			  struct gl_texture_object *texObj,
--			  struct gl_texture_image *texImage)
-+/**
-+ * Marks the given face/level pair as dirty.
-+ * This will cause an appropriate texture reupload the next time this
-+ * texture is validated.
-+ */
-+static void mark_texture_image_dirty(r300TexObj *t, int face, int level)
- {
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr, "intformat %s format %s type %s\n",
--			_mesa_lookup_enum_by_nr(internalFormat),
--			_mesa_lookup_enum_by_nr(format),
--			_mesa_lookup_enum_by_nr(type));
--
--	if (!ctx->Unpack.ClientStorage)
--		return 0;
--
--	if (ctx->_ImageTransferState ||
--	    texImage->IsCompressed || texObj->GenerateMipmap)
--		return 0;
--
--	/* This list is incomplete, may be different on ppc???
--	 */
--	switch (internalFormat) {
--	case GL_RGBA:
--		if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) {
--			texImage->TexFormat = _dri_texformat_argb8888;
--		} else
--			return 0;
--		break;
--
--	case GL_RGB:
--		if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
--			texImage->TexFormat = _dri_texformat_rgb565;
--		} else
--			return 0;
--		break;
--
--	case GL_YCBCR_MESA:
--		if (format == GL_YCBCR_MESA &&
--		    type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
--			texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
--		} else if (format == GL_YCBCR_MESA &&
--			   (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
--			    type == GL_UNSIGNED_BYTE)) {
--			texImage->TexFormat = &_mesa_texformat_ycbcr;
--		} else
--			return 0;
--		break;
--
--	default:
--		return 0;
--	}
--
--	/* Could deal with these packing issues, but currently don't:
--	 */
--	if (packing->SkipPixels ||
--	    packing->SkipRows || packing->SwapBytes || packing->LsbFirst) {
--		return 0;
--	}
--
--	GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
--						    format, type);
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr, "%s: srcRowStride %d/%x\n",
--			__FUNCTION__, srcRowStride, srcRowStride);
--
--	/* Could check this later in upload, pitch restrictions could be
--	 * relaxed, but would need to store the image pitch somewhere,
--	 * as packing details might change before image is uploaded:
--	 */
--	if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride)
--	    || (srcRowStride & 63))
--		return 0;
--
--	/* Have validated that _mesa_transfer_teximage would be a straight
--	 * memcpy at this point.  NOTE: future calls to TexSubImage will
--	 * overwrite the client data.  This is explicitly mentioned in the
--	 * extension spec.
--	 */
--	texImage->Data = (void *)pixels;
--	texImage->IsClientData = GL_TRUE;
--	texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes;
--
--	return 1;
-+	t->dirty_images[face] |= 1 << level;
- }
- 
- static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level,
-@@ -532,24 +419,13 @@ static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level,
- 			   struct gl_texture_object *texObj,
- 			   struct gl_texture_image *texImage)
- {
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
-+	r300TexObj* t = r300_tex_obj(texObj);
- 
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
--			return;
--		}
--	}
--
--	/* Note, this will call ChooseTextureFormat */
- 	_mesa_store_teximage1d(ctx, target, level, internalFormat,
- 			       width, border, format, type, pixels,
- 			       &ctx->Unpack, texObj, texImage);
- 
--	t->dirty_images[0] |= (1 << level);
-+	mark_texture_image_dirty(t, 0, level);
- }
- 
- static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
-@@ -561,24 +437,13 @@ static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
- 			      struct gl_texture_object *texObj,
- 			      struct gl_texture_image *texImage)
- {
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--
--	assert(t);		/* this _should_ be true */
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
--			return;
--		}
--	}
-+	r300TexObj* t = r300_tex_obj(texObj);
- 
- 	_mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
- 				  format, type, pixels, packing, texObj,
- 				  texImage);
- 
--	t->dirty_images[0] |= (1 << level);
-+	mark_texture_image_dirty(t, 0, level);
- }
- 
- static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
-@@ -589,7 +454,7 @@ static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
- 			   struct gl_texture_object *texObj,
- 			   struct gl_texture_image *texImage)
- {
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
-+	r300TexObj* t = r300_tex_obj(texObj);
- 	GLuint face;
- 
- 	/* which cube face or ordinary 2D image */
-@@ -608,43 +473,23 @@ static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
- 		face = 0;
- 	}
- 
--	if (t != NULL) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
--			return;
--		}
--	}
--
- 	texImage->IsClientData = GL_FALSE;
- 
--	if (r300ValidateClientStorage(ctx, target,
--				      internalFormat,
--				      width, height,
--				      format, type, pixels,
--				      packing, texObj, texImage)) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using client storage\n",
--				__FUNCTION__);
--	} else {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using normal storage\n",
--				__FUNCTION__);
--
--		/* Normal path: copy (to cached memory) and eventually upload
--		 * via another copy to GART memory and then a blit...  Could
--		 * eliminate one copy by going straight to (permanent) GART.
--		 *
--		 * Note, this will call r300ChooseTextureFormat.
--		 */
--		_mesa_store_teximage2d(ctx, target, level, internalFormat,
--				       width, height, border, format, type,
--				       pixels, &ctx->Unpack, texObj, texImage);
-+	if (RADEON_DEBUG & DEBUG_TEXTURE)
-+		fprintf(stderr, "%s: Using normal storage\n",
-+			__FUNCTION__);
-+
-+	/* Normal path: copy (to cached memory) and eventually upload
-+	 * via another copy to GART memory and then a blit...  Could
-+	 * eliminate one copy by going straight to (permanent) GART.
-+	 *
-+	 * Note, this will call r300ChooseTextureFormat.
-+	 */
-+	_mesa_store_teximage2d(ctx, target, level, internalFormat,
-+				width, height, border, format, type,
-+				pixels, &ctx->Unpack, texObj, texImage);
- 
--		t->dirty_images[face] |= (1 << level);
--	}
-+	mark_texture_image_dirty(t, face, level);
- }
- 
- static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
-@@ -656,7 +501,7 @@ static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
- 			      struct gl_texture_object *texObj,
- 			      struct gl_texture_image *texImage)
- {
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
-+	r300TexObj* t = r300_tex_obj(texObj);
- 	GLuint face;
- 
- 	/* which cube face or ordinary 2D image */
-@@ -675,22 +520,11 @@ static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
- 		face = 0;
- 	}
- 
--	assert(t);		/* this _should_ be true */
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
--			return;
--		}
--	}
--
- 	_mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
- 				  height, format, type, pixels, packing, texObj,
- 				  texImage);
- 
--	t->dirty_images[face] |= (1 << level);
-+	mark_texture_image_dirty(t, face, level);
- }
- 
- static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
-@@ -700,7 +534,7 @@ static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
- 				     struct gl_texture_object *texObj,
- 				     struct gl_texture_image *texImage)
- {
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
-+	r300TexObj* t = r300_tex_obj(texObj);
- 	GLuint face;
- 
- 	/* which cube face or ordinary 2D image */
-@@ -719,49 +553,24 @@ static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
- 		face = 0;
- 	}
- 
--	if (t != NULL) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY,
--				    "glCompressedTexImage2D");
--			return;
--		}
--	}
--
- 	texImage->IsClientData = GL_FALSE;
- 
--	/* can't call this, different parameters. Would never evaluate to true anyway currently */
--#if 0
--	if (r300ValidateClientStorage(ctx, target,
--				      internalFormat,
--				      width, height,
--				      format, type, pixels,
--				      packing, texObj, texImage)) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using client storage\n",
--				__FUNCTION__);
--	} else
--#endif
--	{
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using normal storage\n",
--				__FUNCTION__);
--
--		/* Normal path: copy (to cached memory) and eventually upload
--		 * via another copy to GART memory and then a blit...  Could
--		 * eliminate one copy by going straight to (permanent) GART.
--		 *
--		 * Note, this will call r300ChooseTextureFormat.
--		 */
--		_mesa_store_compressed_teximage2d(ctx, target, level,
--						  internalFormat, width, height,
--						  border, imageSize, data,
--						  texObj, texImage);
-+	if (RADEON_DEBUG & DEBUG_TEXTURE)
-+		fprintf(stderr, "%s: Using normal storage\n",
-+			__FUNCTION__);
-+
-+	/* Normal path: copy (to cached memory) and eventually upload
-+	 * via another copy to GART memory and then a blit...  Could
-+	 * eliminate one copy by going straight to (permanent) GART.
-+	 *
-+	 * Note, this will call r300ChooseTextureFormat.
-+	 */
-+	_mesa_store_compressed_teximage2d(ctx, target, level,
-+						internalFormat, width, height,
-+						border, imageSize, data,
-+						texObj, texImage);
- 
--		t->dirty_images[face] |= (1 << level);
--	}
-+	mark_texture_image_dirty(t, face, level);
- }
- 
- static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
-@@ -772,7 +581,7 @@ static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
- 					struct gl_texture_object *texObj,
- 					struct gl_texture_image *texImage)
- {
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
-+	r300TexObj* t = r300_tex_obj(texObj);
- 	GLuint face;
- 
- 	/* which cube face or ordinary 2D image */
-@@ -791,23 +600,11 @@ static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
- 		face = 0;
- 	}
- 
--	assert(t);		/* this _should_ be true */
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY,
--				    "glCompressedTexSubImage3D");
--			return;
--		}
--	}
--
- 	_mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset,
- 					     yoffset, width, height, format,
- 					     imageSize, data, texObj, texImage);
- 
--	t->dirty_images[face] |= (1 << level);
-+	mark_texture_image_dirty(t, face, level);
- }
- 
- static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level,
-@@ -819,49 +616,26 @@ static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level,
- 			   struct gl_texture_object *texObj,
- 			   struct gl_texture_image *texImage)
- {
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
--			return;
--		}
--	}
-+	r300TexObj* t = r300_tex_obj(texObj);
- 
- 	texImage->IsClientData = GL_FALSE;
- 
--#if 0
--	if (r300ValidateClientStorage(ctx, target,
--				      internalFormat,
--				      width, height,
--				      format, type, pixels,
--				      packing, texObj, texImage)) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using client storage\n",
--				__FUNCTION__);
--	} else
--#endif
--	{
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using normal storage\n",
--				__FUNCTION__);
--
--		/* Normal path: copy (to cached memory) and eventually upload
--		 * via another copy to GART memory and then a blit...  Could
--		 * eliminate one copy by going straight to (permanent) GART.
--		 *
--		 * Note, this will call r300ChooseTextureFormat.
--		 */
--		_mesa_store_teximage3d(ctx, target, level, internalFormat,
--				       width, height, depth, border,
--				       format, type, pixels,
--				       &ctx->Unpack, texObj, texImage);
-+	if (RADEON_DEBUG & DEBUG_TEXTURE)
-+		fprintf(stderr, "%s: Using normal storage\n",
-+			__FUNCTION__);
-+
-+	/* Normal path: copy (to cached memory) and eventually upload
-+	 * via another copy to GART memory and then a blit...  Could
-+	 * eliminate one copy by going straight to (permanent) GART.
-+	 *
-+	 * Note, this will call r300ChooseTextureFormat.
-+	 */
-+	_mesa_store_teximage3d(ctx, target, level, internalFormat,
-+				width, height, depth, border,
-+				format, type, pixels,
-+				&ctx->Unpack, texObj, texImage);
- 
--		t->dirty_images[0] |= (1 << level);
--	}
-+	mark_texture_image_dirty(t, 0, level);
- }
- 
- static void
-@@ -874,28 +648,14 @@ r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
- 		  struct gl_texture_object *texObj,
- 		  struct gl_texture_image *texImage)
- {
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--
--/*     fprintf(stderr, "%s\n", __FUNCTION__); */
--
--	assert(t);		/* this _should_ be true */
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D");
--			return;
--		}
--		texObj->DriverData = t;
--	}
-+	r300TexObj* t = r300_tex_obj(texObj);
- 
- 	_mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
- 				  width, height, depth,
- 				  format, type, pixels, packing, texObj,
- 				  texImage);
- 
--	t->dirty_images[0] |= (1 << level);
-+	mark_texture_image_dirty(t, 0, level);
- }
- 
- /**
-@@ -907,7 +667,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
- 			     struct gl_texture_object *texObj,
- 			     GLenum pname, const GLfloat * params)
- {
--	r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData;
-+	r300TexObj* t = r300_tex_obj(texObj);
- 
- 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
- 		fprintf(stderr, "%s( %s )\n", __FUNCTION__,
-@@ -940,7 +700,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
- 		 * we just have to rely on loading the right subset of mipmap levels
- 		 * to simulate a clamped LOD.
- 		 */
--		driSwapOutTextureObject((driTextureObject *) t);
-+		if (t->mt) {
-+			r300_miptree_destroy(t->mt);
-+			t->mt = 0;
-+		}
- 		break;
- 
- 	case GL_DEPTH_TEXTURE_MODE:
-@@ -963,27 +726,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
- 	}
- }
- 
--static void r300BindTexture(GLcontext * ctx, GLenum target,
--			    struct gl_texture_object *texObj)
--{
--	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
--		fprintf(stderr, "%s( %p ) unit=%d\n", __FUNCTION__,
--			(void *)texObj, ctx->Texture.CurrentUnit);
--	}
--
--	if ((target == GL_TEXTURE_1D)
--	    || (target == GL_TEXTURE_2D)
--	    || (target == GL_TEXTURE_3D)
--	    || (target == GL_TEXTURE_CUBE_MAP)
--	    || (target == GL_TEXTURE_RECTANGLE_NV)) {
--		assert(texObj->DriverData != NULL);
--	}
--}
--
- static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
-+	r300TexObj* t = r300_tex_obj(texObj);
- 
- 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
- 		fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
-@@ -991,14 +737,19 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
- 			_mesa_lookup_enum_by_nr(texObj->Target));
- 	}
- 
--	if (t != NULL) {
--		if (rmesa) {
--			R300_FIREVERTICES(rmesa);
--		}
-+	if (rmesa) {
-+		int i;
-+		R300_FIREVERTICES(rmesa);
-+
-+		for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i)
-+			if (rmesa->hw.textures[i] == t)
-+				rmesa->hw.textures[i] = 0;
-+	}
- 
--		driDestroyTextureObject(t);
-+	if (t->mt) {
-+		r300_miptree_destroy(t->mt);
-+		t->mt = 0;
- 	}
--	/* Free mipmap images and the texture object itself */
- 	_mesa_delete_texture_object(ctx, texObj);
- }
- 
-@@ -1007,8 +758,6 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
-  * Called via ctx->Driver.NewTextureObject.
-  * Note: this function will be called during context creation to
-  * allocate the default texture objects.
-- * Note: we could use containment here to 'derive' the driver-specific
-- * texture object from the core mesa gl_texture_object.  Not done at this time.
-  * Fixup MaxAnisotropy according to user preference.
-  */
- static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
-@@ -1016,14 +765,23 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
- 						      GLenum target)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_object *obj;
--	obj = _mesa_new_texture_object(ctx, name, target);
--	if (!obj)
--		return NULL;
--	obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
-+	r300TexObj* t = CALLOC_STRUCT(r300_tex_obj);
-+
-+
-+	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
-+		fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
-+			t, _mesa_lookup_enum_by_nr(target));
-+	}
-+
-+	_mesa_initialize_texture_object(&t->base, name, target);
-+	t->base.MaxAnisotropy = rmesa->initialMaxAnisotropy;
-+
-+	/* Initialize hardware state */
-+	r300UpdateTexWrap(t);
-+	r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy);
-+	r300SetTexBorderColor(t, t->base._BorderChan);
- 
--	r300AllocTexObj(obj);
--	return obj;
-+	return &t->base;
- }
- 
- void r300InitTextureFuncs(struct dd_function_table *functions)
-@@ -1039,7 +797,6 @@ void r300InitTextureFuncs(struct dd_function_table *functions)
- 	functions->TexSubImage2D = r300TexSubImage2D;
- 	functions->TexSubImage3D = r300TexSubImage3D;
- 	functions->NewTextureObject = r300NewTextureObject;
--	functions->BindTexture = r300BindTexture;
- 	functions->DeleteTexture = r300DeleteTexture;
- 	functions->IsTextureResident = driIsTextureResident;
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h
-index b86d45b..5d7f21e 100644
---- a/src/mesa/drivers/dri/r300/r300_tex.h
-+++ b/src/mesa/drivers/dri/r300/r300_tex.h
-@@ -46,8 +46,6 @@ extern void r300UpdateTextureState(GLcontext * ctx);
- extern int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t,
- 			       GLuint face);
- 
--extern void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t);
--
- extern void r300InitTextureFuncs(struct dd_function_table *functions);
- 
- #endif				/* __r300_TEX_H__ */
-diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c
-index 69847a4..b3b501b 100644
---- a/src/mesa/drivers/dri/r300/r300_texmem.c
-+++ b/src/mesa/drivers/dri/r300/r300_texmem.c
-@@ -48,439 +48,15 @@ SOFTWARE.
- #include "r300_context.h"
- #include "r300_state.h"
- #include "r300_cmdbuf.h"
-+#include "r300_emit.h"
-+#include "r300_mipmap_tree.h"
- #include "radeon_ioctl.h"
- #include "r300_tex.h"
- #include "r300_ioctl.h"
- #include <unistd.h>		/* for usleep() */
- 
--#ifdef USER_BUFFERS
- #include "r300_mem.h"
--#endif
- 
--/**
-- * Destroy any device-dependent state associated with the texture.  This may
-- * include NULLing out hardware state that points to the texture.
-- */
--void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t)
--{
--	int i;
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE) {
--		fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
--			(void *)t, (void *)t->base.tObj);
--	}
--
--	for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) {
--		if (rmesa->state.texture.unit[i].texobj == t) {
--			rmesa->state.texture.unit[i].texobj = NULL;
--		}
--	}
--}
--
--/* ------------------------------------------------------------
-- * Texture image conversions
-- */
--
--static void r300UploadGARTClientSubImage(r300ContextPtr rmesa,
--					 r300TexObjPtr t,
--					 struct gl_texture_image *texImage,
--					 GLint hwlevel,
--					 GLint x, GLint y,
--					 GLint width, GLint height)
--{
--	const struct gl_texture_format *texFormat = texImage->TexFormat;
--	GLuint srcPitch, dstPitch;
--	int blit_format;
--	int srcOffset;
--
--	/*
--	 * XXX it appears that we always upload the full image, not a subimage.
--	 * I.e. x==0, y==0, width=texWidth, height=texWidth.  If this is ever
--	 * changed, the src pitch will have to change.
--	 */
--	switch (texFormat->TexelBytes) {
--	case 1:
--		blit_format = R300_CP_COLOR_FORMAT_CI8;
--		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--		break;
--	case 2:
--		blit_format = R300_CP_COLOR_FORMAT_RGB565;
--		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--		break;
--	case 4:
--		blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
--		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--		break;
--	case 8:
--	case 16:
--		blit_format = R300_CP_COLOR_FORMAT_CI8;
--		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--		break;
--	default:
--		return;
--	}
--
--	t->image[0][hwlevel].data = texImage->Data;
--	srcOffset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
--
--	assert(srcOffset != ~0);
--
--	/* Don't currently need to cope with small pitches?
--	 */
--	width = texImage->Width;
--	height = texImage->Height;
--
--	if (texFormat->TexelBytes > 4) {
--		width *= texFormat->TexelBytes;
--	}
--
--	r300EmitWait(rmesa, R300_WAIT_3D);
--
--	r300EmitBlit(rmesa, blit_format,
--		     srcPitch,
--		     srcOffset,
--		     dstPitch,
--		     t->bufAddr,
--		     x,
--		     y,
--		     t->image[0][hwlevel].x + x,
--		     t->image[0][hwlevel].y + y, width, height);
--
--	r300EmitWait(rmesa, R300_WAIT_2D);
--}
--
--static void r300UploadRectSubImage(r300ContextPtr rmesa,
--				   r300TexObjPtr t,
--				   struct gl_texture_image *texImage,
--				   GLint x, GLint y, GLint width, GLint height)
--{
--	const struct gl_texture_format *texFormat = texImage->TexFormat;
--	int blit_format, dstPitch, done;
--
--	switch (texFormat->TexelBytes) {
--	case 1:
--		blit_format = R300_CP_COLOR_FORMAT_CI8;
--		break;
--	case 2:
--		blit_format = R300_CP_COLOR_FORMAT_RGB565;
--		break;
--	case 4:
--		blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
--		break;
--	case 8:
--	case 16:
--		blit_format = R300_CP_COLOR_FORMAT_CI8;
--		break;
--	default:
--		return;
--	}
--
--	t->image[0][0].data = texImage->Data;
--
--	/* Currently don't need to cope with small pitches.
--	 */
--	width = texImage->Width;
--	height = texImage->Height;
--	dstPitch = t->pitch;
--
--	if (texFormat->TexelBytes > 4) {
--		width *= texFormat->TexelBytes;
--	}
--
--	if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) {
--		/* In this case, could also use GART texturing.  This is
--		 * currently disabled, but has been tested & works.
--		 */
--		t->offset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
--		t->pitch = texImage->RowStride * texFormat->TexelBytes - 32;
--
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr,
--				"Using GART texturing for rectangular client texture\n");
--
--		/* Release FB memory allocated for this image:
--		 */
--		/* FIXME This may not be correct as driSwapOutTextureObject sets
--		 * FIXME dirty_images.  It may be fine, though.
--		 */
--		if (t->base.memBlock) {
--			driSwapOutTextureObject((driTextureObject *) t);
--		}
--	} else if (texImage->IsClientData) {
--		/* Data already in GART memory, with usable pitch.
--		 */
--		GLuint srcPitch;
--		srcPitch = texImage->RowStride * texFormat->TexelBytes;
--		r300EmitBlit(rmesa,
--			     blit_format,
--			     srcPitch,
--			     r300GartOffsetFromVirtual(rmesa, texImage->Data),
--			     dstPitch, t->bufAddr, 0, 0, 0, 0, width, height);
--	} else {
--		/* Data not in GART memory, or bad pitch.
--		 */
--		for (done = 0; done < height;) {
--			struct r300_dma_region region;
--			int lines =
--			    MIN2(height - done, RADEON_BUFFER_SIZE / dstPitch);
--			int src_pitch;
--			char *tex;
--
--			src_pitch = texImage->RowStride * texFormat->TexelBytes;
--
--			tex = (char *)texImage->Data + done * src_pitch;
--
--			memset(&region, 0, sizeof(region));
--			r300AllocDmaRegion(rmesa, &region, lines * dstPitch,
--					   1024);
--
--			/* Copy texdata to dma:
--			 */
--			if (RADEON_DEBUG & DEBUG_TEXTURE)
--				fprintf(stderr,
--					"%s: src_pitch %d dst_pitch %d\n",
--					__FUNCTION__, src_pitch, dstPitch);
--
--			if (src_pitch == dstPitch) {
--				memcpy(region.address + region.start, tex,
--				       lines * src_pitch);
--			} else {
--				char *buf = region.address + region.start;
--				int i;
--				for (i = 0; i < lines; i++) {
--					memcpy(buf, tex, src_pitch);
--					buf += dstPitch;
--					tex += src_pitch;
--				}
--			}
--
--			r300EmitWait(rmesa, R300_WAIT_3D);
--
--			/* Blit to framebuffer
--			 */
--			r300EmitBlit(rmesa,
--				     blit_format,
--				     dstPitch, GET_START(&region),
--				     dstPitch | (t->tile_bits >> 16),
--				     t->bufAddr, 0, 0, 0, done, width, lines);
--
--			r300EmitWait(rmesa, R300_WAIT_2D);
--#ifdef USER_BUFFERS
--			r300_mem_use(rmesa, region.buf->id);
--#endif
--
--			r300ReleaseDmaRegion(rmesa, &region, __FUNCTION__);
--			done += lines;
--		}
--	}
--}
--
--/**
-- * Upload the texture image associated with texture \a t at the specified
-- * level at the address relative to \a start.
-- */
--static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t,
--			       GLint hwlevel,
--			       GLint x, GLint y, GLint width, GLint height,
--			       GLuint face)
--{
--	struct gl_texture_image *texImage = NULL;
--	GLuint offset;
--	GLint imageWidth, imageHeight;
--	GLint ret;
--	drm_radeon_texture_t tex;
--	drm_radeon_tex_image_t tmp;
--	const int level = hwlevel + t->base.firstLevel;
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE) {
--		fprintf(stderr,
--			"%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n",
--			__FUNCTION__, (void *)t, (void *)t->base.tObj, level,
--			width, height, face);
--	}
--
--	ASSERT(face < 6);
--
--	/* Ensure we have a valid texture to upload */
--	if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) {
--		_mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
--		return;
--	}
--
--	texImage = t->base.tObj->Image[face][level];
--
--	if (!texImage) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: texImage %d is NULL!\n",
--				__FUNCTION__, level);
--		return;
--	}
--	if (!texImage->Data) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: image data is NULL!\n",
--				__FUNCTION__);
--		return;
--	}
--
--	if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--		assert(level == 0);
--		assert(hwlevel == 0);
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: image data is rectangular\n",
--				__FUNCTION__);
--		r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height);
--		return;
--	} else if (texImage->IsClientData) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr,
--				"%s: image data is in GART client storage\n",
--				__FUNCTION__);
--		r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y,
--					     width, height);
--		return;
--	} else if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr, "%s: image data is in normal memory\n",
--			__FUNCTION__);
--
--	imageWidth = texImage->Width;
--	imageHeight = texImage->Height;
--
--	offset = t->bufAddr;
--
--	if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
--		GLint imageX = 0;
--		GLint imageY = 0;
--		GLint blitX = t->image[face][hwlevel].x;
--		GLint blitY = t->image[face][hwlevel].y;
--		GLint blitWidth = t->image[face][hwlevel].width;
--		GLint blitHeight = t->image[face][hwlevel].height;
--		fprintf(stderr, "   upload image: %d,%d at %d,%d\n",
--			imageWidth, imageHeight, imageX, imageY);
--		fprintf(stderr, "   upload  blit: %d,%d at %d,%d\n",
--			blitWidth, blitHeight, blitX, blitY);
--		fprintf(stderr, "       blit ofs: 0x%07x level: %d/%d\n",
--			(GLuint) offset, hwlevel, level);
--	}
--
--	t->image[face][hwlevel].data = texImage->Data;
--
--	/* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
--	 * NOTE: we're always use a 1KB-wide blit and I8 texture format.
--	 * We used to use 1, 2 and 4-byte texels and used to use the texture
--	 * width to dictate the blit width - but that won't work for compressed
--	 * textures. (Brian)
--	 * NOTE: can't do that with texture tiling. (sroland)
--	 */
--	tex.offset = offset;
--	tex.image = &tmp;
--	/* copy (x,y,width,height,data) */
--	memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp));
--
--	if (texImage->TexFormat->TexelBytes > 4) {
--		const int log2TexelBytes =
--		    (3 + (texImage->TexFormat->TexelBytes >> 4));
--		tex.format = RADEON_TXFORMAT_I8;	/* any 1-byte texel format */
--		tex.pitch =
--		    MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
--			 64, 1);
--		tex.height = imageHeight;
--		tex.width = imageWidth << log2TexelBytes;
--		tex.offset += (tmp.x << log2TexelBytes) & ~1023;
--		tmp.x = tmp.x % (1024 >> log2TexelBytes);
--		tmp.width = tmp.width << log2TexelBytes;
--	} else if (texImage->TexFormat->TexelBytes) {
--		/* use multi-byte upload scheme */
--		tex.height = imageHeight;
--		tex.width = imageWidth;
--		switch (texImage->TexFormat->TexelBytes) {
--		case 1:
--			tex.format = RADEON_TXFORMAT_I8;
--			break;
--		case 2:
--			tex.format = RADEON_TXFORMAT_AI88;
--			break;
--		case 4:
--			tex.format = RADEON_TXFORMAT_ARGB8888;
--			break;
--		}
--		tex.pitch =
--		    MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
--			 64, 1);
--		tex.offset += tmp.x & ~1023;
--		tmp.x = tmp.x % 1024;
--
--		if (t->tile_bits & R300_TXO_MICRO_TILE) {
--			/* need something like "tiled coordinates" ? */
--			tmp.y = tmp.x / (tex.pitch * 128) * 2;
--			tmp.x =
--			    tmp.x % (tex.pitch * 128) / 2 /
--			    texImage->TexFormat->TexelBytes;
--			tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
--		} else {
--			tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
--		}
--#if 1
--		if ((t->tile_bits & R300_TXO_MACRO_TILE) &&
--		    (texImage->Width * texImage->TexFormat->TexelBytes >= 256)
--		    && ((!(t->tile_bits & R300_TXO_MICRO_TILE)
--			 && (texImage->Height >= 8))
--			|| (texImage->Height >= 16))) {
--			/* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
--			   OR if height is smaller than 8 automatically, but if micro tiling is active
--			   the limit is height 16 instead ? */
--			tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
--		}
--#endif
--	} else {
--		/* In case of for instance 8x8 texture (2x2 dxt blocks),
--		   padding after the first two blocks is needed (only
--		   with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
--		/* set tex.height to 1/4 since 1 "macropixel" (dxt-block)
--		   has 4 real pixels. Needed so the kernel module reads
--		   the right amount of data. */
--		tex.format = RADEON_TXFORMAT_I8;	/* any 1-byte texel format */
--		tex.pitch = (R300_BLIT_WIDTH_BYTES / 64);
--		tex.height = (imageHeight + 3) / 4;
--		tex.width = (imageWidth + 3) / 4;
--		if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) {
--			tex.width *= 8;
--		} else {
--			tex.width *= 16;
--		}
--	}
--
--	LOCK_HARDWARE(&rmesa->radeon);
--	do {
--		ret =
--		    drmCommandWriteRead(rmesa->radeon.dri.fd,
--					DRM_RADEON_TEXTURE, &tex,
--					sizeof(drm_radeon_texture_t));
--		if (ret) {
--			if (RADEON_DEBUG & DEBUG_IOCTL)
--				fprintf(stderr,
--					"DRM_RADEON_TEXTURE:  again!\n");
--			usleep(1);
--		}
--	} while (ret == -EAGAIN);
--
--	UNLOCK_HARDWARE(&rmesa->radeon);
--
--	if (ret) {
--		fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret);
--		fprintf(stderr, "   offset=0x%08x\n", offset);
--		fprintf(stderr, "   image width=%d height=%d\n",
--			imageWidth, imageHeight);
--		fprintf(stderr, "    blit width=%d height=%d data=%p\n",
--			t->image[face][hwlevel].width,
--			t->image[face][hwlevel].height,
--			t->image[face][hwlevel].data);
--		_mesa_exit(-1);
--	}
--}
- 
- /**
-  * Upload the texture images associated with texture \a t.  This might
-@@ -493,69 +69,32 @@ static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t,
- 
- int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face)
- {
--	const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
--
- 	if (t->image_override)
- 		return 0;
-+	if (!t->mt)
-+		return 0;
- 
- 	if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
--		fprintf(stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
--			(void *)rmesa->radeon.glCtx, (void *)t->base.tObj,
--			t->base.totalSize, t->base.firstLevel,
--			t->base.lastLevel);
-+		fprintf(stderr, "%s( %p, %p ) lvls=%d-%d\n", __FUNCTION__,
-+			(void *)rmesa->radeon.glCtx, t,
-+			t->mt->firstLevel, t->mt->lastLevel);
- 	}
- 
--	if (t->base.totalSize == 0)
--		return 0;
--
- 	if (RADEON_DEBUG & DEBUG_SYNC) {
- 		fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
- 		radeonFinish(rmesa->radeon.glCtx);
- 	}
- 
--	LOCK_HARDWARE(&rmesa->radeon);
--
--	if (t->base.memBlock == NULL) {
--		int heap;
--
--		heap = driAllocateTexture(rmesa->texture_heaps, rmesa->nr_heaps,
--					  (driTextureObject *) t);
--		if (heap == -1) {
--			UNLOCK_HARDWARE(&rmesa->radeon);
--			return -1;
--		}
--
--		/* Set the base offset of the texture image */
--		t->bufAddr = rmesa->radeon.radeonScreen->texOffset[heap]
--		    + t->base.memBlock->ofs;
--		t->offset = t->bufAddr;
--
--		if (!(t->base.tObj->Image[0][0]->IsClientData)) {
--			/* hope it's safe to add that here... */
--			t->offset |= t->tile_bits;
--		}
--	}
--
--	/* Let the world know we've used this memory recently.
--	 */
--	driUpdateTextureLRU((driTextureObject *) t);
--	UNLOCK_HARDWARE(&rmesa->radeon);
--
- 	/* Upload any images that are new */
--	if (t->base.dirty_images[face]) {
--		int i;
-+	if (t->dirty_images[face]) {
-+		int i, numLevels = t->mt->lastLevel - t->mt->firstLevel + 1;
- 		for (i = 0; i < numLevels; i++) {
--			if ((t->base.
--			     dirty_images[face] & (1 <<
--						   (i + t->base.firstLevel))) !=
--			    0) {
--				r300UploadSubImage(rmesa, t, i, 0, 0,
--						   t->image[face][i].width,
--						   t->image[face][i].height,
--						   face);
-+			if (t->dirty_images[face] & (1 << (i + t->mt->firstLevel))) {
-+				r300_miptree_upload_image(t->mt, face, t->mt->firstLevel + i,
-+					t->base.Image[face][t->mt->firstLevel + i]);
- 			}
- 		}
--		t->base.dirty_images[face] = 0;
-+		t->dirty_images[face] = 0;
- 	}
- 
- 	if (RADEON_DEBUG & DEBUG_SYNC) {
-diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
-index bdd20b1..1b24738 100644
---- a/src/mesa/drivers/dri/r300/r300_texstate.c
-+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
-@@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_state.h"
- #include "r300_ioctl.h"
- #include "radeon_ioctl.h"
-+#include "r300_mipmap_tree.h"
- #include "r300_tex.h"
- #include "r300_reg.h"
- 
-@@ -148,8 +149,7 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
- 	if (!tObj)
- 		return;
- 
--	t = (r300TexObjPtr) tObj->DriverData;
--
-+	t = r300_tex_obj(tObj);
- 
- 	switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) {
- 	case MESA_FORMAT_Z16:
-@@ -189,118 +189,59 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
- }
- 
- 
--/**
-- * Compute sizes and fill in offset and blit information for the given
-- * image (determined by \p face and \p level).
-- *
-- * \param curOffset points to the offset at which the image is to be stored
-- * and is updated by this function according to the size of the image.
-- */
--static void compute_tex_image_offset(
--	struct gl_texture_object *tObj,
--	GLuint face,
--	GLint level,
--	GLint* curOffset)
-+static void calculate_first_last_level(struct gl_texture_object *tObj,
-+				       GLuint *pfirstLevel, GLuint *plastLevel)
- {
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--	const struct gl_texture_image* texImage;
--	GLuint blitWidth = R300_BLIT_WIDTH_BYTES;
--	GLuint texelBytes;
--	GLuint size;
--
--	texImage = tObj->Image[0][level + t->base.firstLevel];
--	if (!texImage)
--		return;
--
--	texelBytes = texImage->TexFormat->TexelBytes;
--
--	/* find image size in bytes */
--	if (texImage->IsCompressed) {
--		if ((t->format & R300_TX_FORMAT_DXT1) ==
--			R300_TX_FORMAT_DXT1) {
--			// fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format);
--			if ((texImage->Width + 3) < 8)	/* width one block */
--				size = texImage->CompressedSize * 4;
--			else if ((texImage->Width + 3) < 16)
--				size = texImage->CompressedSize * 2;
--			else
--				size = texImage->CompressedSize;
-+	const struct gl_texture_image * const baseImage =
-+		tObj->Image[0][tObj->BaseLevel];
-+
-+	/* These must be signed values.  MinLod and MaxLod can be negative numbers,
-+	* and having firstLevel and lastLevel as signed prevents the need for
-+	* extra sign checks.
-+	*/
-+	int   firstLevel;
-+	int   lastLevel;
-+
-+	/* Yes, this looks overly complicated, but it's all needed.
-+	*/
-+	switch (tObj->Target) {
-+	case GL_TEXTURE_1D:
-+	case GL_TEXTURE_2D:
-+	case GL_TEXTURE_3D:
-+	case GL_TEXTURE_CUBE_MAP:
-+		if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
-+			/* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
-+			*/
-+			firstLevel = lastLevel = tObj->BaseLevel;
- 		} else {
--			/* DXT3/5, 16 bytes per block */
--			WARN_ONCE
--				("DXT 3/5 suffers from multitexturing problems!\n");
--			// fprintf(stderr,"DXT 3/5 %d\n", texImage->Width);
--			if ((texImage->Width + 3) < 8)
--				size = texImage->CompressedSize * 2;
--			else
--				size = texImage->CompressedSize;
-+			firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5);
-+			firstLevel = MAX2(firstLevel, tObj->BaseLevel);
-+			firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2);
-+			lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5);
-+			lastLevel = MAX2(lastLevel, tObj->BaseLevel);
-+			lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
-+			lastLevel = MIN2(lastLevel, tObj->MaxLevel);
-+			lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
- 		}
--	} else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--		size =
--			((texImage->Width * texelBytes +
--			63) & ~63) * texImage->Height;
--		blitWidth = 64 / texelBytes;
--	} else if (t->tile_bits & R300_TXO_MICRO_TILE) {
--		/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
--			though the actual offset may be different (if texture is less than
--			32 bytes width) to the untiled case */
--		int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
--		size =
--			(w * ((texImage->Height + 1) / 2)) *
--			texImage->Depth;
--		blitWidth = MAX2(texImage->Width, 64 / texelBytes);
--	} else {
--		int w = (texImage->Width * texelBytes + 31) & ~31;
--		size = w * texImage->Height * texImage->Depth;
--		blitWidth = MAX2(texImage->Width, 64 / texelBytes);
--	}
--	assert(size > 0);
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n",
--			texImage->Width, texImage->Height,
--			texImage->Depth,
--			texImage->TexFormat->TexelBytes,
--			texImage->InternalFormat);
--
--	/* All images are aligned to a 32-byte offset */
--	*curOffset = (*curOffset + 0x1f) & ~0x1f;
--
--	if (texelBytes) {
--		/* fix x and y coords up later together with offset */
--		t->image[face][level].x = *curOffset;
--		t->image[face][level].y = 0;
--		t->image[face][level].width =
--			MIN2(size / texelBytes, blitWidth);
--		t->image[face][level].height =
--			(size / texelBytes) / t->image[face][level].width;
--	} else {
--		t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES;
--		t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES;
--		t->image[face][level].width =
--			MIN2(size, R300_BLIT_WIDTH_BYTES);
--		t->image[face][level].height = size / t->image[face][level].width;
-+		break;
-+	case GL_TEXTURE_RECTANGLE_NV:
-+	case GL_TEXTURE_4D_SGIS:
-+		firstLevel = lastLevel = 0;
-+		break;
-+	default:
-+		return;
- 	}
- 
--	if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr,
--			"level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
--			level, face, texImage->Width, texImage->Height,
--			t->image[face][level].x, t->image[face][level].y,
--			t->image[face][level].width, t->image[face][level].height,
--			size, *curOffset);
--
--	*curOffset += size;
-+	/* save these values */
-+	*pfirstLevel = firstLevel;
-+	*plastLevel = lastLevel;
- }
- 
- 
--
- /**
-- * This function computes the number of bytes of storage needed for
-- * the given texture object (all mipmap levels, all cube faces).
-- * The \c image[face][level].x/y/width/height parameters for upload/blitting
-- * are computed here.  \c filter, \c format, etc. will be set here
-- * too.
-+ * This function ensures a validated miptree is available.
-+ *
-+ * Additionally, some texture format bits are configured here.
-  *
-  * \param rmesa Context pointer
-  * \param tObj GL texture object whose images are to be posted to
-@@ -309,13 +250,13 @@ static void compute_tex_image_offset(
- static void r300SetTexImages(r300ContextPtr rmesa,
- 			     struct gl_texture_object *tObj)
- {
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
-+	r300TexObjPtr t = r300_tex_obj(tObj);
- 	const struct gl_texture_image *baseImage =
- 	    tObj->Image[0][tObj->BaseLevel];
--	GLint curOffset;
--	GLint i, texelBytes;
--	GLint numLevels;
--	GLint log2Width, log2Height, log2Depth;
-+	GLint texelBytes;
-+	GLuint firstLevel = 0, lastLevel = 0;
-+
-+	calculate_first_last_level(tObj, &firstLevel, &lastLevel);
- 
- 	/* Set the hardware texture format
- 	 */
-@@ -335,107 +276,59 @@ static void r300SetTexImages(r300ContextPtr rmesa,
- 	}
- 
- 	texelBytes = baseImage->TexFormat->TexelBytes;
--
--	/* Compute which mipmap levels we really want to send to the hardware.
--	 */
--	driCalculateTextureFirstLastLevel((driTextureObject *) t);
--	log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
--	log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
--	log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2;
--
--	numLevels = t->base.lastLevel - t->base.firstLevel + 1;
--
--	assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
--
--	/* Calculate mipmap offsets and dimensions for blitting (uploading)
--	 * The idea is that we lay out the mipmap levels within a block of
--	 * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
--	 */
- 	t->tile_bits = 0;
- 
--	/* figure out if this texture is suitable for tiling. */
--#if 0				/* Disabled for now */
--	if (texelBytes) {
--		if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
--		    /* texrect might be able to use micro tiling too in theory? */
--		    (baseImage->Height > 1)) {
--
--			/* allow 32 (bytes) x 1 mip (which will use two times the space
--			   the non-tiled version would use) max if base texture is large enough */
--			if ((numLevels == 1) ||
--			    (((baseImage->Width * texelBytes /
--			       baseImage->Height) <= 32)
--			     && (baseImage->Width * texelBytes > 64))
--			    ||
--			    ((baseImage->Width * texelBytes /
--			      baseImage->Height) <= 16)) {
--				t->tile_bits |= R300_TXO_MICRO_TILE;
--			}
--		}
-+	if (tObj->Target == GL_TEXTURE_CUBE_MAP)
-+		t->format |= R300_TX_FORMAT_CUBIC_MAP;
- 
--		if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
--			/* we can set macro tiling even for small textures, they will be untiled anyway */
--			t->tile_bits |= R300_TXO_MACRO_TILE;
-+	if (!t->image_override) {
-+		GLuint compressed = baseImage->IsCompressed ? baseImage->TexFormat->MesaFormat : 0;
-+
-+		if (t->mt) {
-+			if (t->mt->firstLevel != firstLevel ||
-+			    t->mt->lastLevel != lastLevel ||
-+			    t->mt->width0 != baseImage->Width ||
-+			    t->mt->height0 != baseImage->Height ||
-+			    t->mt->depth0 != baseImage->Depth ||
-+			    t->mt->bpp != texelBytes ||
-+			    t->mt->tilebits != t->tile_bits ||
-+			    t->mt->compressed != compressed) {
-+				r300_miptree_destroy(t->mt);
-+				t->mt = 0;
-+			}
- 		}
--	}
--#endif
--
--	curOffset = 0;
- 
--	if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
--		ASSERT(log2Width == log2Height);
--		t->format |= R300_TX_FORMAT_CUBIC_MAP;
--
--		for(i = 0; i < numLevels; i++) {
--			GLuint face;
--			for(face = 0; face < 6; face++)
--				compute_tex_image_offset(tObj, face, i, &curOffset);
-+		if (!t->mt) {
-+			t->mt = r300_miptree_create(rmesa, t, tObj->Target,
-+				firstLevel, lastLevel,
-+				baseImage->Width, baseImage->Height, baseImage->Depth,
-+				texelBytes, t->tile_bits, compressed);
-+			memset(t->dirty_images, 0xff, sizeof(t->dirty_images));
- 		}
--	} else {
--		for (i = 0; i < numLevels; i++)
--			compute_tex_image_offset(tObj, 0, i, &curOffset);
- 	}
- 
--	/* Align the total size of texture memory block.
--	 */
--	t->base.totalSize =
--	    (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
--
--	t->size =
--	    (((tObj->Image[0][t->base.firstLevel]->Width -
--	       1) << R300_TX_WIDTHMASK_SHIFT)
--	     | ((tObj->Image[0][t->base.firstLevel]->Height - 1) <<
--		R300_TX_HEIGHTMASK_SHIFT))
--	    | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT);
--
-+	t->size = (((tObj->Image[0][firstLevel]->Width - 1) << R300_TX_WIDTHMASK_SHIFT)
-+		| ((tObj->Image[0][firstLevel]->Height - 1) << R300_TX_HEIGHTMASK_SHIFT))
-+		| ((lastLevel - firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT);
- 	t->pitch = 0;
- 
--	/* Only need to round to nearest 32 for textures, but the blitter
--	 * requires 64-byte aligned pitches, and we may/may not need the
--	 * blitter.   NPOT only!
--	 */
- 	if (baseImage->IsCompressed) {
--		t->pitch |=
--		    (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
-+		t->pitch |= (tObj->Image[0][firstLevel]->Width + 63) & ~(63);
- 	} else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
- 		unsigned int align = (64 / texelBytes) - 1;
--		t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width *
-+		t->pitch |= ((tObj->Image[0][firstLevel]->Width *
- 			     texelBytes) + 63) & ~(63);
- 		t->size |= R300_TX_SIZE_TXPITCH_EN;
- 		if (!t->image_override)
--			t->pitch_reg =
--			    (((tObj->Image[0][t->base.firstLevel]->Width) +
--			      align) & ~align) - 1;
-+			t->pitch_reg = (((tObj->Image[0][firstLevel]->Width) + align) & ~align) - 1;
- 	} else {
--		t->pitch |=
--		    ((tObj->Image[0][t->base.firstLevel]->Width *
--		      texelBytes) + 63) & ~(63);
-+		t->pitch |= ((tObj->Image[0][firstLevel]->Width * texelBytes) + 63) & ~(63);
- 	}
- 
- 	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
--	    if (tObj->Image[0][t->base.firstLevel]->Width > 2048)
-+	    if (tObj->Image[0][firstLevel]->Width > 2048)
- 		t->pitch_reg |= R500_TXWIDTH_BIT11;
--	    if (tObj->Image[0][t->base.firstLevel]->Height > 2048)
-+	    if (tObj->Image[0][firstLevel]->Height > 2048)
- 		t->pitch_reg |= R500_TXHEIGHT_BIT11;
- 	}
- }
-@@ -449,17 +342,15 @@ static GLboolean r300EnableTexture2D(GLcontext * ctx, int unit)
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- 	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
-+	r300TexObjPtr t = r300_tex_obj(tObj);
- 
- 	ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
- 
--	if (t->base.dirty_images[0]) {
-+	if (!t->mt || t->dirty_images[0]) {
- 		R300_FIREVERTICES(rmesa);
- 
- 		r300SetTexImages(rmesa, tObj);
--		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
--		if (!t->base.memBlock && !t->image_override)
--			return GL_FALSE;
-+		r300UploadTexImages(rmesa, t, 0);
- 	}
- 
- 	return GL_TRUE;
-@@ -470,7 +361,7 @@ static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit)
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- 	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
-+	r300TexObjPtr t = r300_tex_obj(tObj);
- 
- 	ASSERT(tObj->Target == GL_TEXTURE_3D);
- 
-@@ -479,12 +370,10 @@ static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit)
- 		return GL_FALSE;
- 	}
- 
--	if (t->base.dirty_images[0]) {
-+	if (!t->mt || t->dirty_images[0]) {
- 		R300_FIREVERTICES(rmesa);
- 		r300SetTexImages(rmesa, tObj);
--		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
--		if (!t->base.memBlock)
--			return GL_FALSE;
-+		r300UploadTexImages(rmesa, t, 0);
- 	}
- 
- 	return GL_TRUE;
-@@ -495,14 +384,15 @@ static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit)
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- 	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
-+	r300TexObjPtr t = r300_tex_obj(tObj);
- 	GLuint face;
- 
- 	ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
- 
--	if (t->base.dirty_images[0] || t->base.dirty_images[1] ||
--	    t->base.dirty_images[2] || t->base.dirty_images[3] ||
--	    t->base.dirty_images[4] || t->base.dirty_images[5]) {
-+	if (!t->mt ||
-+	    t->dirty_images[0] || t->dirty_images[1] ||
-+	    t->dirty_images[2] || t->dirty_images[3] ||
-+	    t->dirty_images[4] || t->dirty_images[5]) {
- 		/* flush */
- 		R300_FIREVERTICES(rmesa);
- 		/* layout memory space, once for all faces */
-@@ -511,18 +401,11 @@ static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit)
- 
- 	/* upload (per face) */
- 	for (face = 0; face < 6; face++) {
--		if (t->base.dirty_images[face]) {
--			r300UploadTexImages(rmesa,
--					    (r300TexObjPtr) tObj->DriverData,
--					    face);
-+		if (t->dirty_images[face]) {
-+			r300UploadTexImages(rmesa, t, face);
- 		}
- 	}
- 
--	if (!t->base.memBlock) {
--		/* texmem alloc failed, use s/w fallback */
--		return GL_FALSE;
--	}
--
- 	return GL_TRUE;
- }
- 
-@@ -531,18 +414,15 @@ static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit)
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- 	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
-+	r300TexObjPtr t = r300_tex_obj(tObj);
- 
- 	ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
- 
--	if (t->base.dirty_images[0]) {
-+	if (!t->mt || t->dirty_images[0]) {
- 		R300_FIREVERTICES(rmesa);
- 
- 		r300SetTexImages(rmesa, tObj);
--		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
--		if (!t->base.memBlock && !t->image_override &&
--		    !rmesa->prefer_gart_client_texturing)
--			return GL_FALSE;
-+		r300UploadTexImages(rmesa, t, 0);
- 	}
- 
- 	return GL_TRUE;
-@@ -550,34 +430,19 @@ static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit)
- 
- static GLboolean r300UpdateTexture(GLcontext * ctx, int unit)
- {
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- 	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
-+	r300TexObjPtr t = r300_tex_obj(tObj);
- 
- 	/* Fallback if there's a texture border */
- 	if (tObj->Image[0][tObj->BaseLevel]->Border > 0)
- 		return GL_FALSE;
- 
--	/* Update state if this is a different texture object to last
--	 * time.
--	 */
--	if (rmesa->state.texture.unit[unit].texobj != t) {
--		if (rmesa->state.texture.unit[unit].texobj != NULL) {
--			/* The old texture is no longer bound to this texture unit.
--			 * Mark it as such.
--			 */
--
--			rmesa->state.texture.unit[unit].texobj->base.bound &=
--			    ~(1 << unit);
--		}
--
--		rmesa->state.texture.unit[unit].texobj = t;
--		t->base.bound |= (1 << unit);
--		driUpdateTextureLRU((driTextureObject *) t);	/* XXX: should be locked! */
--	}
-+	/* Fallback if memory upload didn't work */
-+	if (!t->mt)
-+		return GL_FALSE;
- 
--	return !t->border_fallback;
-+	return GL_TRUE;
- }
- 
- void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
-@@ -586,20 +451,18 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
- 	r300ContextPtr rmesa = pDRICtx->driverPrivate;
- 	struct gl_texture_object *tObj =
- 	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
--	r300TexObjPtr t;
-+	r300TexObjPtr t = r300_tex_obj(tObj);
- 	uint32_t pitch_val;
- 
- 	if (!tObj)
- 		return;
- 
--	t = (r300TexObjPtr) tObj->DriverData;
--
- 	t->image_override = GL_TRUE;
- 
- 	if (!offset)
- 		return;
- 
--	t->offset = offset;
-+	t->override_offset = offset;
- 	t->pitch_reg &= (1 << 13) -1;
- 	pitch_val = pitch;
- 
-diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c
-index 3fc724a..a84c8fc 100644
---- a/src/mesa/drivers/dri/r300/radeon_context.c
-+++ b/src/mesa/drivers/dri/r300/radeon_context.c
-@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "state.h"
- #include "matrix.h"
- #include "framebuffer.h"
-+#include "drirenderbuffer.h"
- 
- #include "drivers/common/driverfuncs.h"
- #include "swrast/swrast.h"
-@@ -258,6 +259,52 @@ void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
-     }
- }
- 
-+static void
-+radeon_make_renderbuffer_current(radeonContextPtr radeon,
-+				 GLframebuffer *draw)
-+{
-+	int size = radeon->radeonScreen->driScreen->fbSize;
-+	void *map = 0;
-+	/* if radeon->fake */
-+	struct radeon_renderbuffer *rb;
-+	uint32_t offset;
-+	if (!radeon->bufmgr)
-+		return;
-+
-+	if ((rb = (void *)draw->Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
-+
-+		offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->front.offset : radeon->radeonScreen->frontOffset;
-+		if (!rb->bo) 
-+			rb->bo = dri_bo_alloc_static(&radeon->bufmgr->base, "front buffer",
-+						     radeon->radeonScreen->frontOffset, size, map,
-+						     DRM_BO_FLAG_MEM_VRAM);
-+		fprintf(stderr,"front is %p\n", rb->bo);
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->frontPitch;
-+	}
-+	if ((rb = (void *)draw->Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
-+		offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->back.offset : radeon->radeonScreen->backOffset;
-+		if (!rb->bo) 
-+			rb->bo = dri_bo_alloc_static(&radeon->bufmgr->base, "back buffer",
-+						     radeon->radeonScreen->backOffset, size, map,
-+						     DRM_BO_FLAG_MEM_VRAM);
-+		fprintf(stderr,"back is %p\n", rb->bo);
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->backPitch;
-+	}
-+	if ((rb = (void *)draw->Attachment[BUFFER_DEPTH].Renderbuffer)) {
-+		offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->depth.offset : radeon->radeonScreen->depthOffset;
-+		if (!rb->bo)
-+			rb->bo = dri_bo_alloc_static(&radeon->bufmgr->base, "depth buffer",
-+						     radeon->radeonScreen->depthOffset, size, map,
-+						     DRM_BO_FLAG_MEM_VRAM);
-+		fprintf(stderr,"depth is %p\n", rb->bo);
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->depthPitch;
-+	}
-+}
-+
-+	
- /* Force the context `c' to be the current context and associate with it
-  * buffer `b'.
-  */
-@@ -265,51 +312,57 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
- 			    __DRIdrawablePrivate * driDrawPriv,
- 			    __DRIdrawablePrivate * driReadPriv)
- {
--	if (driContextPriv) {
--		radeonContextPtr radeon =
--			(radeonContextPtr) driContextPriv->driverPrivate;
-+	radeonContextPtr radeon;
-+	GLframebuffer *dfb, *rfb;
- 
-+	if (!driContextPriv) {
- 		if (RADEON_DEBUG & DEBUG_DRI)
--			fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
--				radeon->glCtx);
--
--		if (radeon->dri.drawable != driDrawPriv) {
--			if (driDrawPriv->swap_interval == (unsigned)-1) {
--				driDrawPriv->vblFlags =
--					(radeon->radeonScreen->irq != 0)
--					? driGetDefaultVBlankFlags(&radeon->
--								   optionCache)
--					: VBLANK_FLAG_NO_IRQ;
-+			fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
-+		_mesa_make_current(NULL, NULL, NULL);
-+		return GL_TRUE;
-+	}
- 
--				driDrawableInitVBlank(driDrawPriv);
--			}
--		}
-+	radeon = (radeonContextPtr) driContextPriv->driverPrivate;
-+	dfb = driDrawPriv->driverPrivate;
-+	rfb = driReadPriv->driverPrivate;
- 
--		radeon->dri.readable = driReadPriv;
-+	if (RADEON_DEBUG & DEBUG_DRI)
-+		fprintf(stderr, "%s ctx %p\n", __FUNCTION__, radeon->glCtx);
- 
--		if (radeon->dri.drawable != driDrawPriv ||
--		    radeon->lastStamp != driDrawPriv->lastStamp) {
--			radeon->dri.drawable = driDrawPriv;
-+	driUpdateFramebufferSize(radeon->glCtx, driDrawPriv);
-+	if (driReadPriv != driDrawPriv)
-+		driUpdateFramebufferSize(radeon->glCtx, driReadPriv);
- 
--			radeonSetCliprects(radeon);
--			r300UpdateViewportOffset(radeon->glCtx);
--		}
-+	radeon_make_renderbuffer_current(radeon, dfb);
- 
--		_mesa_make_current(radeon->glCtx,
--				    (GLframebuffer *) driDrawPriv->
--				    driverPrivate,
--				    (GLframebuffer *) driReadPriv->
--				    driverPrivate);
-+	_mesa_make_current(radeon->glCtx, dfb, rfb);
- 
--		_mesa_update_state(radeon->glCtx);		
-+	if (radeon->dri.drawable != driDrawPriv) {
-+		if (driDrawPriv->swap_interval == (unsigned)-1) {
-+			driDrawPriv->vblFlags =
-+				(radeon->radeonScreen->irq != 0)
-+				? driGetDefaultVBlankFlags(&radeon->
-+							   optionCache)
-+					: VBLANK_FLAG_NO_IRQ;
-+			
-+			driDrawableInitVBlank(driDrawPriv);
-+		}
-+	}
- 
--		radeonUpdatePageFlipping(radeon);
--	} else {
--		if (RADEON_DEBUG & DEBUG_DRI)
--			fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
--		_mesa_make_current(0, 0, 0);
-+	radeon->dri.readable = driReadPriv;
-+
-+	if (radeon->dri.drawable != driDrawPriv ||
-+	    radeon->lastStamp != driDrawPriv->lastStamp) {
-+		radeon->dri.drawable = driDrawPriv;
-+		
-+		radeonSetCliprects(radeon);
-+		r300UpdateViewportOffset(radeon->glCtx);
- 	}
- 
-+	_mesa_update_state(radeon->glCtx);		
-+
-+	radeonUpdatePageFlipping(radeon);
-+
- 	if (RADEON_DEBUG & DEBUG_DRI)
- 		fprintf(stderr, "End %s\n", __FUNCTION__);
- 	return GL_TRUE;
-diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h
-index 7458d63..828853b 100644
---- a/src/mesa/drivers/dri/r300/radeon_context.h
-+++ b/src/mesa/drivers/dri/r300/radeon_context.h
-@@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "drm.h"
- #include "dri_util.h"
- #include "colormac.h"
-+#include "radeon_buffer.h"
- 
- struct radeon_context;
- typedef struct radeon_context radeonContextRec;
-@@ -132,12 +133,13 @@ struct radeon_scissor_state {
- 
- struct radeon_colorbuffer_state {
- 	GLuint clear;
--	GLint drawOffset, drawPitch;
-+	struct radeon_renderbuffer *rrb;
- };
- 
- struct radeon_state {
- 	struct radeon_colorbuffer_state color;
- 	struct radeon_scissor_state scissor;
-+	struct radeon_renderbuffer *depth_buffer;
- };
- 
- /**
-@@ -185,6 +187,8 @@ struct radeon_context {
- 	/* Configuration cache
- 	 */
- 	driOptionCache optionCache;
-+
-+	struct radeon_bufmgr *bufmgr;
- };
- 
- #define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx))
-diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.c b/src/mesa/drivers/dri/r300/radeon_ioctl.c
-index 0c1a195..486ce8e 100644
---- a/src/mesa/drivers/dri/r300/radeon_ioctl.c
-+++ b/src/mesa/drivers/dri/r300/radeon_ioctl.c
-@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "swrast/swrast.h"
- #include "r300_context.h"
- #include "radeon_ioctl.h"
-+#include "radeon_buffer.h"
- #include "r300_ioctl.h"
- #include "r300_state.h"
- #include "radeon_reg.h"
-@@ -171,7 +172,7 @@ void radeonCopyBuffer(__DRIdrawablePrivate * dPriv,
- 	assert(dPriv->driContextPriv->driverPrivate);
- 
- 	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--
-+	
- 	if (RADEON_DEBUG & DEBUG_IOCTL) {
- 		fprintf(stderr, "\n%s( %p )\n\n", __FUNCTION__,
- 			(void *)radeon->glCtx);
-@@ -261,6 +262,8 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv)
- 	GLint ret;
- 	GLboolean missed_target;
- 	__DRIscreenPrivate *psp = dPriv->driScreenPriv;
-+	GLframebuffer *fb = dPriv->driverPrivate;
-+	struct radeon_renderbuffer *rrb;
- 
- 	assert(dPriv);
- 	assert(dPriv->driContextPriv);
-@@ -268,6 +271,8 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv)
- 
- 	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
- 
-+	rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+
- 	if (RADEON_DEBUG & DEBUG_IOCTL) {
- 		fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
- 			radeon->sarea->pfCurrentPage);
-@@ -315,32 +320,10 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv)
- 	radeon->swap_count++;
- 	(void)(*psp->systemTime->getUST) (&radeon->swap_ust);
- 
--        driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer, 
-+        driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer,
-                              radeon->sarea->pfCurrentPage);
- 
--	if (radeon->sarea->pfCurrentPage == 1) {
--		radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
--		radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
--	} else {
--		radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
--		radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
--	}
--
--	if (IS_R300_CLASS(radeon->radeonScreen)) {
--		r300ContextPtr r300 = (r300ContextPtr)radeon;
--		R300_STATECHANGE(r300, cb);
--		r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset + 
--						r300->radeon.radeonScreen->fbLocation;
--		r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
--		
--		if (r300->radeon.radeonScreen->cpp == 4)
--			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
--		else
--			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
--	
--		if (r300->radeon.sarea->tiling_enabled)
--			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
--	}
-+	radeon->state.color.rrb = rrb;
- }
- 
- void radeonWaitForIdleLocked(radeonContextPtr radeon)
-diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c
-index d54a821..3529555 100644
---- a/src/mesa/drivers/dri/r300/radeon_lock.c
-+++ b/src/mesa/drivers/dri/r300/radeon_lock.c
-@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_state.h"
- #include "r300_context.h"
- #include "r300_state.h"
-+#include "r300_mem.h"
- 
- #include "framebuffer.h"
- 
-@@ -59,6 +60,8 @@ int prevLockLine = 0;
- void radeonUpdatePageFlipping(radeonContextPtr rmesa)
- {
- 	int use_back;
-+	__DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
-+	GLframebuffer *fb = drawable->driverPrivate;
- 
- 	rmesa->doPageFlip = rmesa->sarea->pfState;
- 	if (rmesa->glCtx->WinSysDrawBuffer) {
-@@ -72,16 +75,12 @@ void radeonUpdatePageFlipping(radeonContextPtr rmesa)
- 	     BUFFER_BACK_LEFT) : 1;
- 	use_back ^= (rmesa->sarea->pfCurrentPage == 1);
- 
--	if (use_back) {
--		rmesa->state.color.drawOffset =
--		    rmesa->radeonScreen->backOffset;
--		rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch;
--	} else {
--		rmesa->state.color.drawOffset =
--		    rmesa->radeonScreen->frontOffset;
--		rmesa->state.color.drawPitch =
--		    rmesa->radeonScreen->frontPitch;
--	}
-+	if (use_back)
-+		rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+	else
-+		rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+
-+	rmesa->state.depth_buffer = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
- }
- 
- /* Update the hardware state.  This is called if another context has
-@@ -125,12 +124,8 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
- 	}
- 
- 	if (sarea->ctx_owner != rmesa->dri.hwContext) {
--		int i;
--
- 		sarea->ctx_owner = rmesa->dri.hwContext;
--		for (i = 0; i < r300->nr_heaps; i++) {
--			DRI_AGE_TEXTURES(r300->texture_heaps[i]);
--		}
-+		radeonBufmgrContendedLockTake(r300->radeon.bufmgr);
- 	}
- 
- 	rmesa->lost_context = GL_TRUE;
-diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c
-index f1bc56e..7ea0842 100644
---- a/src/mesa/drivers/dri/r300/radeon_span.c
-+++ b/src/mesa/drivers/dri/r300/radeon_span.c
-@@ -48,7 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_ioctl.h"
- #include "radeon_span.h"
- 
--#include "drirenderbuffer.h"
-+#include "radeon_buffer.h"
- 
- #define DBG 0
- 
-@@ -58,21 +58,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-  * information.
-  */
- #define LOCAL_VARS						\
--   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
--   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
-+   struct radeon_renderbuffer *rrb = (void *) rb;		\
-+   const __DRIdrawablePrivate *dPriv = rrb->dPriv;		\
-    const GLuint bottom = dPriv->h - 1;				\
--   GLubyte *buf = (GLubyte *) drb->flippedData			\
--      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
-+   GLubyte *buf = (GLubyte *) rrb->bo->virtual			\
-+      + (dPriv->y * rrb->pitch + dPriv->x) * rrb->cpp;	\
-    GLuint p;							\
-    (void) p;
- 
- #define LOCAL_DEPTH_VARS				\
--   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
--   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
-+   struct radeon_renderbuffer *rrb = (void *) rb;	\
-+   const __DRIdrawablePrivate *dPriv = rrb->dPriv;	\
-    const GLuint bottom = dPriv->h - 1;			\
-    GLuint xo = dPriv->x;				\
-    GLuint yo = dPriv->y;				\
--   GLubyte *buf = (GLubyte *) drb->Base.Data;
-+   GLubyte *buf = (GLubyte *) rrb->base.Data;
- 
- #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
- 
-@@ -93,7 +93,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #define TAG(x)    radeon##x##_RGB565
- #define TAG2(x,y) radeon##x##_RGB565##y
--#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
-+#define GET_PTR(X,Y) (buf + ((Y) * rrb->pitch + (X)) * 2)
- #include "spantmp2.h"
- 
- /* 32 bit, ARGB8888 color spanline and pixel functions
-@@ -103,7 +103,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #define TAG(x)    radeon##x##_ARGB8888
- #define TAG2(x,y) radeon##x##_ARGB8888##y
--#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
-+#define GET_PTR(X,Y) (buf + ((Y) * rrb->pitch + (X)) * 4)
- #include "spantmp2.h"
- 
- /* ================================================================
-@@ -120,10 +120,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-  * too...
-  */
- 
--static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
-+static GLuint radeon_mba_z32(const struct radeon_renderbuffer * rrb,
-+			     GLint x, GLint y)
- {
--	GLuint pitch = drb->pitch;
--	if (drb->depthHasSurface) {
-+	GLuint pitch = rrb->pitch;
-+	if (rrb->depthHasSurface) {
- 		return 4 * (x + y * pitch);
- 	} else {
- 		GLuint ba, address = 0;	/* a[0..1] = 0           */
-@@ -148,10 +149,10 @@ static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
- }
- 
- static INLINE GLuint
--radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
-+radeon_mba_z16(const struct radeon_renderbuffer *rrb, GLint x, GLint y)
- {
--	GLuint pitch = drb->pitch;
--	if (drb->depthHasSurface) {
-+	GLuint pitch = rrb->pitch;
-+	if (rrb->depthHasSurface) {
- 		return 2 * (x + y * pitch);
- 	} else {
- 		GLuint ba, address = 0;	/* a[0]    = 0           */
-@@ -173,10 +174,10 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
- /* 16-bit depth buffer functions
-  */
- #define WRITE_DEPTH( _x, _y, d )					\
--   *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
-+   *(GLushort *)(buf + radeon_mba_z16( rrb, _x + xo, _y + yo )) = d;
- 
- #define READ_DEPTH( d, _x, _y )						\
--   d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
-+   d = *(GLushort *)(buf + radeon_mba_z16( rrb, _x + xo, _y + yo ));
- 
- #define TAG(x) radeon##x##_z16
- #include "depthtmp.h"
-@@ -189,7 +190,7 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
- #ifdef COMPILE_R300
- #define WRITE_DEPTH( _x, _y, d )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
-+   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
-    GLuint tmp = *(GLuint *)(buf + offset);				\
-    tmp &= 0x000000ff;							\
-    tmp |= ((d << 8) & 0xffffff00);					\
-@@ -198,7 +199,7 @@ do {									\
- #else
- #define WRITE_DEPTH( _x, _y, d )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
-+   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
-    GLuint tmp = *(GLuint *)(buf + offset);				\
-    tmp &= 0xff000000;							\
-    tmp |= ((d) & 0x00ffffff);						\
-@@ -209,12 +210,12 @@ do {									\
- #ifdef COMPILE_R300
- #define READ_DEPTH( d, _x, _y )						\
-   do { \
--    d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,		\
-+    d = (*(GLuint *)(buf + radeon_mba_z32( rrb, _x + xo,		\
- 					 _y + yo )) & 0xffffff00) >> 8; \
-   }while(0)
- #else
- #define READ_DEPTH( d, _x, _y )						\
--   d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,			\
-+   d = *(GLuint *)(buf + radeon_mba_z32( rrb, _x + xo,			\
- 					 _y + yo )) & 0x00ffffff;
- #endif
- 
-@@ -230,7 +231,7 @@ do {									\
- #ifdef COMPILE_R300
- #define WRITE_STENCIL( _x, _y, d )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
-+   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
-    GLuint tmp = *(GLuint *)(buf + offset);				\
-    tmp &= 0xffffff00;							\
-    tmp |= (d) & 0xff;							\
-@@ -239,7 +240,7 @@ do {									\
- #else
- #define WRITE_STENCIL( _x, _y, d )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
-+   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
-    GLuint tmp = *(GLuint *)(buf + offset);				\
-    tmp &= 0x00ffffff;							\
-    tmp |= (((d) & 0xff) << 24);						\
-@@ -250,14 +251,14 @@ do {									\
- #ifdef COMPILE_R300
- #define READ_STENCIL( d, _x, _y )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
-+   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
-    GLuint tmp = *(GLuint *)(buf + offset);				\
-    d = tmp & 0x000000ff;						\
- } while (0)
- #else
- #define READ_STENCIL( d, _x, _y )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
-+   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
-    GLuint tmp = *(GLuint *)(buf + offset);				\
-    d = (tmp & 0xff000000) >> 24;					\
- } while (0)
-@@ -300,10 +301,10 @@ static void radeonSpanRenderStart(GLcontext * ctx)
- 	 */
- 	{
- 		int p;
--		driRenderbuffer *drb =
--			(driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
-+		struct radeon_renderbuffer *rrb =
-+			(void *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
- 		volatile int *buf =
--			(volatile int *)(rmesa->dri.screen->pFB + drb->offset);
-+			(volatile int *)(rmesa->dri.screen->pFB + rrb->bo->offset);
- 		p = *buf;
- 	}
- }
-@@ -326,20 +327,17 @@ void radeonInitSpanFuncs(GLcontext * ctx)
- /**
-  * Plug in the Get/Put routines for the given driRenderbuffer.
-  */
--void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
-+void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
- {
--	if (drb->Base.InternalFormat == GL_RGBA) {
--		if (vis->redBits == 5 && vis->greenBits == 6
--		    && vis->blueBits == 5) {
--			radeonInitPointers_RGB565(&drb->Base);
--		} else {
--			radeonInitPointers_ARGB8888(&drb->Base);
--		}
--	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
--		radeonInitDepthPointers_z16(&drb->Base);
--	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
--		radeonInitDepthPointers_z24_s8(&drb->Base);
--	} else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
--		radeonInitStencilPointers_z24_s8(&drb->Base);
--	}
-+    if (rrb->base.InternalFormat == GL_RGB5) {
-+	radeonInitPointers_RGB565(&rrb->base);
-+    } else if (rrb->base.InternalFormat == GL_RGBA8) {
-+	radeonInitPointers_ARGB8888(&rrb->base);
-+    } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT16) {
-+	radeonInitDepthPointers_z16(&rrb->base);
-+    } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT24) {
-+	radeonInitDepthPointers_z24_s8(&rrb->base);
-+    } else if (rrb->base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
-+	radeonInitStencilPointers_z24_s8(&rrb->base);
-+    }
- }
-diff --git a/src/mesa/drivers/dri/r300/radeon_state.c b/src/mesa/drivers/dri/r300/radeon_state.c
-index d81318c..a7720da 100644
---- a/src/mesa/drivers/dri/r300/radeon_state.c
-+++ b/src/mesa/drivers/dri/r300/radeon_state.c
-@@ -222,14 +222,6 @@ void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state)
- void radeonInitState(radeonContextPtr radeon)
- {
- 	radeon->Fallback = 0;
--
--	if (radeon->glCtx->Visual.doubleBufferMode && radeon->sarea->pfCurrentPage == 0) {
--		radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
--		radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
--	} else {
--		radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
--		radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
--	}
- }
- 
- 
-diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer.h b/src/mesa/drivers/dri/radeon/radeon_buffer.h
-new file mode 100644
-index 0000000..730c40b
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_buffer.h
-@@ -0,0 +1,50 @@
-+/*
-+ * Copyright 2008 Red Hat, Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software")
-+ * to deal in the software without restriction, including without limitation
-+ * on the rights to use, copy, modify, merge, publish, distribute, sub
-+ * license, and/or sell copies of the Software, and to permit persons to whom
-+ * them Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice (including the next
-+ * paragraph) shall be included in all copies or substantial portions of the
-+ * Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTIBILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
-+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER
-+ * IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF OR IN
-+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ * Authors:
-+ *	Adam Jackson <ajax@redhat.com>
-+ */
-+
-+#ifndef RADEON_BUFFER_H
-+#define RADEON_BUFFER_H
-+
-+#include "dri_bufmgr.h"
-+
-+struct radeon_renderbuffer
-+{
-+    struct gl_renderbuffer base;
-+    dri_bo *bo;
-+    unsigned int cpp;
-+    /* unsigned int offset; */
-+    unsigned int pitch;
-+    unsigned int height;
-+
-+    /* boo Xorg 6.8.2 compat */
-+    int depthHasSurface;
-+
-+    __DRIdrawablePrivate *dPriv;
-+};
-+
-+struct radeon_bufmgr {
-+	dri_bufmgr base;
-+};
-+
-+#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
-index 84b5c46..10a49d2 100644
---- a/src/mesa/drivers/dri/radeon/radeon_screen.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
-@@ -46,6 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_chipset.h"
- #include "radeon_macros.h"
- #include "radeon_screen.h"
-+#include "radeon_buffer.h"
- #if !RADEON_COMMON
- #include "radeon_context.h"
- #include "radeon_span.h"
-@@ -69,6 +70,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "GL/internal/dri_interface.h"
- 
-+#include <errno.h>
-+#include <sys/ioctl.h>
-+
- /* Radeon configuration
-  */
- #include "xmlpool.h"
-@@ -350,6 +354,79 @@ static const __DRItexOffsetExtension r300texOffsetExtension = {
- };
- #endif
- 
-+
-+static void
-+radeon_gem_update_handle(radeonScreenPtr screen, __DRIscreenPrivate *sPriv,
-+			 struct radeon_gem_object *gem_obj)
-+{
-+     struct drm_gem_close close_args;
-+     struct drm_gem_open args;
-+     struct drm_radeon_gem_mmap mmap_args;
-+     struct drm_radeon_gem_pin pin_args;
-+     int ret;
-+   
-+     if (gem_obj->gem_handle) {
-+	     close_args.handle = gem_obj->gem_handle;
-+
-+	     ioctl(sPriv->fd, DRM_IOCTL_GEM_CLOSE, &close_args);
-+	     gem_obj->gem_handle = 0;
-+     }
-+
-+     /* do open */
-+     args.name = gem_obj->gem_name;
-+     ret = ioctl(sPriv->fd, DRM_IOCTL_GEM_OPEN, &args);
-+     if (ret)
-+	     return;
-+     
-+     gem_obj->gem_handle = args.handle;
-+     gem_obj->size = args.size;
-+     
-+     mmap_args.handle = gem_obj->gem_handle;
-+     mmap_args.size = gem_obj->size;
-+     mmap_args.offset = 0;
-+     
-+     ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GEM_MMAP, &mmap_args,
-+			       sizeof(mmap_args));
-+     
-+     if (ret)
-+	     return;
-+     
-+     gem_obj->map = (void *)(unsigned long)(mmap_args.addr_ptr);
-+     
-+     pin_args.handle = gem_obj->gem_handle;
-+     pin_args.alignment = 0;
-+     
-+     ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GEM_PIN, &pin_args,
-+			       sizeof(pin_args));
-+     
-+     if (ret)
-+	     return;
-+     
-+     gem_obj->offset = pin_args.offset;
-+     
-+     fprintf(stderr,"handle %d, size %llx, ptr %p, offset %llx\n", gem_obj->gem_handle,
-+	     gem_obj->size, gem_obj->map, gem_obj->offset);
-+}
-+
-+static int
-+radeon_init_mm_buffers(radeonScreenPtr screen, __DRIscreenPrivate *sPriv,
-+		       RADEONDRIPtr dri_priv)
-+{
-+	/* STOP GAP HERE */
-+
-+	screen->front.gem_name = dri_priv->frontOffset;
-+	radeon_gem_update_handle(screen, sPriv, &screen->front);
-+	screen->back.gem_name = dri_priv->backOffset;
-+	radeon_gem_update_handle(screen, sPriv, &screen->back);
-+	screen->depth.gem_name = dri_priv->depthOffset;
-+	radeon_gem_update_handle(screen, sPriv, &screen->depth);
-+
-+	screen->vram_texture.gem_name = dri_priv->textureOffset;
-+	radeon_gem_update_handle(screen, sPriv, &screen->vram_texture);
-+	screen->vram_texture.gem_name = dri_priv->gartTexHandle;
-+	radeon_gem_update_handle(screen, sPriv, &screen->gart_texture);
-+}
-+
- /* Create the device specific screen private data struct.
-  */
- static radeonScreenPtr
-@@ -389,6 +466,21 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
-    screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
-    {
-       int ret;
-+
-+#ifdef RADEON_PARAM_KERNEL_MM
-+     ret = radeonGetParam( sPriv->fd, RADEON_PARAM_KERNEL_MM,
-+                            &screen->kernel_mm);
-+
-+      if (ret && ret != -EINVAL) {
-+         FREE( screen );
-+         fprintf(stderr, "drm_radeon_getparam_t (RADEON_OFFSET): %d\n", ret);
-+         return NULL;
-+      }
-+
-+      if (ret == -EINVAL)
-+          screen->kernel_mm = 0;
-+#endif
-+
-       ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET,
- 			    &screen->gart_buffer_offset);
- 
-@@ -422,32 +514,34 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
-       screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
-    }
- 
--   screen->mmio.handle = dri_priv->registerHandle;
--   screen->mmio.size   = dri_priv->registerSize;
--   if ( drmMap( sPriv->fd,
--		screen->mmio.handle,
--		screen->mmio.size,
--		&screen->mmio.map ) ) {
--      FREE( screen );
--      __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
--      return NULL;
--   }
-+   if (!screen->kernel_mm) {
-+      screen->mmio.handle = dri_priv->registerHandle;
-+      screen->mmio.size   = dri_priv->registerSize;
-+      if ( drmMap( sPriv->fd,
-+		   screen->mmio.handle,
-+		   screen->mmio.size,
-+		   &screen->mmio.map ) ) {
-+	 FREE( screen );
-+	 __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
-+	 return NULL;
-+      }
- 
--   RADEONMMIO = screen->mmio.map;
-+      RADEONMMIO = screen->mmio.map;
- 
--   screen->status.handle = dri_priv->statusHandle;
--   screen->status.size   = dri_priv->statusSize;
--   if ( drmMap( sPriv->fd,
--		screen->status.handle,
--		screen->status.size,
--		&screen->status.map ) ) {
--      drmUnmap( screen->mmio.map, screen->mmio.size );
--      FREE( screen );
--      __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
--      return NULL;
-+      screen->status.handle = dri_priv->statusHandle;
-+      screen->status.size   = dri_priv->statusSize;
-+      if ( drmMap( sPriv->fd,
-+		   screen->status.handle,
-+		   screen->status.size,
-+		   &screen->status.map ) ) {
-+	 drmUnmap( screen->mmio.map, screen->mmio.size );
-+	 FREE( screen );
-+	 __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
-+	 return NULL;
-+      }
-+      screen->scratch = (__volatile__ u_int32_t *)
-+	 ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
-    }
--   screen->scratch = (__volatile__ u_int32_t *)
--      ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
- 
-    screen->buffers = drmMapBufs( sPriv->fd );
-    if ( !screen->buffers ) {
-@@ -458,22 +552,24 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
-       return NULL;
-    }
- 
--   if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
--      screen->gartTextures.handle = dri_priv->gartTexHandle;
--      screen->gartTextures.size   = dri_priv->gartTexMapSize;
--      if ( drmMap( sPriv->fd,
--		   screen->gartTextures.handle,
--		   screen->gartTextures.size,
--		   (drmAddressPtr)&screen->gartTextures.map ) ) {
--	 drmUnmapBufs( screen->buffers );
--	 drmUnmap( screen->status.map, screen->status.size );
--	 drmUnmap( screen->mmio.map, screen->mmio.size );
--	 FREE( screen );
--	 __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
--	 return NULL;
-+   if (!screen->kernel_mm) {
-+      if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
-+	 screen->gartTextures.handle = dri_priv->gartTexHandle;
-+	 screen->gartTextures.size   = dri_priv->gartTexMapSize;
-+	 if ( drmMap( sPriv->fd,
-+		      screen->gartTextures.handle,
-+		      screen->gartTextures.size,
-+		      (drmAddressPtr)&screen->gartTextures.map ) ) {
-+	    drmUnmapBufs( screen->buffers );
-+	    drmUnmap( screen->status.map, screen->status.size );
-+	    drmUnmap( screen->mmio.map, screen->mmio.size );
-+	    FREE( screen );
-+	    __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
-+	    return NULL;
-+	 }
-+	 
-+	 screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
-       }
--
--      screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
-    }
- 
-    screen->chip_flags = 0;
-@@ -840,7 +936,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
-    ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION,
-                          &temp);
-    if (ret) {
--       if (screen->chip_family < CHIP_FAMILY_RS690)
-+       if (screen->chip_family < CHIP_FAMILY_RS690 && !screen->kernel_mm)
- 	   screen->fbLocation      = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16;
-        else {
-            FREE( screen );
-@@ -881,55 +977,58 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
-        }
-    }
- 
--   if ( sPriv->drm_version.minor >= 10 ) {
--      drm_radeon_setparam_t sp;
-+   if (!screen->kernel_mm) {
-+      if ( sPriv->drm_version.minor >= 10 ) {
-+	 drm_radeon_setparam_t sp;
- 
--      sp.param = RADEON_SETPARAM_FB_LOCATION;
--      sp.value = screen->fbLocation;
-+	 sp.param = RADEON_SETPARAM_FB_LOCATION;
-+	 sp.value = screen->fbLocation;
- 
--      drmCommandWrite( sPriv->fd, DRM_RADEON_SETPARAM,
--		       &sp, sizeof( sp ) );
--   }
--
--   screen->frontOffset	= dri_priv->frontOffset;
--   screen->frontPitch	= dri_priv->frontPitch;
--   screen->backOffset	= dri_priv->backOffset;
--   screen->backPitch	= dri_priv->backPitch;
--   screen->depthOffset	= dri_priv->depthOffset;
--   screen->depthPitch	= dri_priv->depthPitch;
--
--   /* Check if ddx has set up a surface reg to cover depth buffer */
--   screen->depthHasSurface = (sPriv->ddx_version.major > 4) ||
--      /* these chips don't use tiled z without hyperz. So always pretend
--         we have set up a surface which will cause linear reads/writes */
--      ((screen->chip_family & RADEON_CLASS_R100) &&
--      !(screen->chip_flags & RADEON_CHIPSET_TCL));
--
--   if ( dri_priv->textureSize == 0 ) {
--      screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->gart_texture_offset;
--      screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->gartTexMapSize;
--      screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] =
--	 dri_priv->log2GARTTexGran;
--   } else {
--      screen->texOffset[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureOffset
--				               + screen->fbLocation;
--      screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureSize;
--      screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] =
--	 dri_priv->log2TexGran;
--   }
-+	 drmCommandWrite( sPriv->fd, DRM_RADEON_SETPARAM,
-+			  &sp, sizeof( sp ) );
-+      }
- 
--   if ( !screen->gartTextures.map || dri_priv->textureSize == 0
--	|| getenv( "RADEON_GARTTEXTURING_FORCE_DISABLE" ) ) {
--      screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1;
--      screen->texOffset[RADEON_GART_TEX_HEAP] = 0;
--      screen->texSize[RADEON_GART_TEX_HEAP] = 0;
--      screen->logTexGranularity[RADEON_GART_TEX_HEAP] = 0;
-+      screen->frontOffset	= dri_priv->frontOffset;
-+      screen->frontPitch	= dri_priv->frontPitch;
-+      screen->backOffset	= dri_priv->backOffset;
-+      screen->backPitch	= dri_priv->backPitch;
-+      screen->depthOffset	= dri_priv->depthOffset;
-+      screen->depthPitch	= dri_priv->depthPitch;
-+   
-+      /* Check if ddx has set up a surface reg to cover depth buffer */
-+      screen->depthHasSurface = (sPriv->ddx_version.major > 4) ||
-+	 /* these chips don't use tiled z without hyperz. So always pretend
-+	    we have set up a surface which will cause linear reads/writes */
-+	 ((screen->chip_family & RADEON_CLASS_R100) &&
-+	  !(screen->chip_flags & RADEON_CHIPSET_TCL));
-+      
-+      if ( dri_priv->textureSize == 0 ) {
-+	 screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->gart_texture_offset;
-+	 screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->gartTexMapSize;
-+	 screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] =
-+	    dri_priv->log2GARTTexGran;
-+      } else {
-+	 screen->texOffset[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureOffset
-+	    + screen->fbLocation;
-+	 screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureSize;
-+	 screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] =
-+	    dri_priv->log2TexGran;
-+      }
-+      
-+      if ( !screen->gartTextures.map || dri_priv->textureSize == 0
-+	   || getenv( "RADEON_GARTTEXTURING_FORCE_DISABLE" ) ) {
-+	 screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1;
-+	 screen->texOffset[RADEON_GART_TEX_HEAP] = 0;
-+	 screen->texSize[RADEON_GART_TEX_HEAP] = 0;
-+	 screen->logTexGranularity[RADEON_GART_TEX_HEAP] = 0;
-+      } else {
-+	 screen->numTexHeaps = RADEON_NR_TEX_HEAPS;
-+	 screen->texOffset[RADEON_GART_TEX_HEAP] = screen->gart_texture_offset;
-+	 screen->texSize[RADEON_GART_TEX_HEAP] = dri_priv->gartTexMapSize;
-+	 screen->logTexGranularity[RADEON_GART_TEX_HEAP] = dri_priv->log2GARTTexGran;
-+      }
-    } else {
--      screen->numTexHeaps = RADEON_NR_TEX_HEAPS;
--      screen->texOffset[RADEON_GART_TEX_HEAP] = screen->gart_texture_offset;
--      screen->texSize[RADEON_GART_TEX_HEAP] = dri_priv->gartTexMapSize;
--      screen->logTexGranularity[RADEON_GART_TEX_HEAP] =
--	 dri_priv->log2GARTTexGran;
-+      radeon_init_mm_buffers(screen, sPriv, dri_priv);
-    }
- 
-    i = 0;
-@@ -975,12 +1074,14 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv )
-    if (!screen)
-       return;
- 
--   if ( screen->gartTextures.map ) {
--      drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
--   }
-    drmUnmapBufs( screen->buffers );
--   drmUnmap( screen->status.map, screen->status.size );
--   drmUnmap( screen->mmio.map, screen->mmio.size );
-+   if (!screen->kernel_mm) {
-+      if ( screen->gartTextures.map ) {
-+	 drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
-+      }
-+      drmUnmap( screen->status.map, screen->status.size );
-+      drmUnmap( screen->mmio.map, screen->mmio.size );
-+   }
- 
-    /* free all option information */
-    driDestroyOptionInfo (&screen->optionCache);
-@@ -1004,6 +1105,158 @@ radeonInitDriver( __DRIscreenPrivate *sPriv )
-    return GL_TRUE;
- }
- 
-+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
-+static GLboolean
-+radeon_alloc_window_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
-+			    GLenum intFormat, GLuint w, GLuint h)
-+{
-+    rb->Width = w;
-+    rb->Height = h;
-+    rb->_ActualFormat = intFormat;
-+
-+    return GL_TRUE;
-+}
-+
-+
-+static struct radeon_renderbuffer *
-+radeon_create_renderbuffer(GLenum format)
-+{
-+    struct radeon_renderbuffer *ret;
-+
-+    ret = CALLOC_STRUCT(radeon_renderbuffer);
-+    if (!ret)
-+	return NULL;
-+
-+    _mesa_init_renderbuffer(&ret->base, 0);
-+
-+    /* XXX format junk */
-+    switch (format) {
-+	case GL_RGB5:
-+	    ret->base._ActualFormat = GL_RGB5;
-+	    ret->base._BaseFormat = GL_RGBA;
-+	    ret->base.RedBits = 5;
-+	    ret->base.GreenBits = 6;
-+	    ret->base.BlueBits = 5;
-+	    ret->base.DataType = GL_UNSIGNED_BYTE;
-+	    break;
-+	case GL_RGBA8:
-+	    ret->base._ActualFormat = GL_RGBA8;
-+	    ret->base._BaseFormat = GL_RGBA;
-+	    ret->base.RedBits = 8;
-+	    ret->base.GreenBits = 8;
-+	    ret->base.BlueBits = 8;
-+	    ret->base.AlphaBits = 8;
-+	    ret->base.DataType = GL_UNSIGNED_BYTE;
-+	    break;
-+	case GL_STENCIL_INDEX8_EXT:
-+	    ret->base._ActualFormat = GL_STENCIL_INDEX8_EXT;
-+	    ret->base._BaseFormat = GL_STENCIL_INDEX;
-+	    ret->base.StencilBits = 8; 
-+	    ret->base.DataType = GL_UNSIGNED_BYTE;
-+	    break;
-+	case GL_DEPTH_COMPONENT16:
-+	    ret->base._ActualFormat = GL_DEPTH_COMPONENT16;
-+	    ret->base._BaseFormat = GL_DEPTH_COMPONENT;
-+	    ret->base.DepthBits = 16;
-+	    ret->base.DataType = GL_UNSIGNED_SHORT;
-+	    break;
-+	case GL_DEPTH_COMPONENT24:
-+	    ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
-+	    ret->base._BaseFormat = GL_DEPTH_COMPONENT;
-+	    ret->base.DepthBits = 24;
-+	    ret->base.DataType = GL_UNSIGNED_INT;
-+	    break;
-+	case GL_DEPTH24_STENCIL8_EXT:
-+	    ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
-+	    ret->base._BaseFormat = GL_DEPTH_STENCIL_EXT;
-+	    ret->base.DepthBits = 24;
-+	    ret->base.StencilBits = 8;
-+	    ret->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
-+	    break;
-+	default:
-+	    /* whoops */
-+	    break;
-+    }
-+
-+    ret->base.InternalFormat = format;
-+
-+    ret->base.AllocStorage = radeon_alloc_window_storage;
-+
-+    radeonSetSpanFunctions(ret);
-+
-+    return ret;
-+}
-+
-+/**
-+ * Create the Mesa framebuffer and renderbuffers for a given window/drawable.
-+ *
-+ * \todo This function (and its interface) will need to be updated to support
-+ * pbuffers.
-+ */
-+static GLboolean
-+radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
-+                    __DRIdrawablePrivate *driDrawPriv,
-+                    const __GLcontextModes *mesaVis,
-+                    GLboolean isPixmap )
-+{
-+   radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private;
-+
-+    const GLboolean swDepth = GL_FALSE;
-+    const GLboolean swAlpha = GL_FALSE;
-+    const GLboolean swAccum = mesaVis->accumRedBits > 0;
-+    const GLboolean swStencil = mesaVis->stencilBits > 0 &&
-+	mesaVis->depthBits != 24;
-+    GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8);
-+    GLenum depthFormat = GL_NONE;
-+    struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
-+
-+    if (mesaVis->depthBits == 16)
-+	depthFormat = GL_DEPTH_COMPONENT16;
-+    else if (mesaVis->depthBits == 24)
-+	depthFormat = GL_DEPTH_COMPONENT24;
-+
-+    /* front color renderbuffer */
-+    {
-+	struct radeon_renderbuffer *front =
-+	    radeon_create_renderbuffer(rgbFormat);
-+	_mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &front->base);
-+    }
-+
-+    /* back color renderbuffer */
-+    if (mesaVis->doubleBufferMode) {
-+	struct radeon_renderbuffer *back =
-+	    radeon_create_renderbuffer(GL_RGBA);
-+	_mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &back->base);
-+    }
-+
-+    /* depth renderbuffer */
-+    if (depthFormat != GL_NONE) {
-+	struct radeon_renderbuffer *depth =
-+	    radeon_create_renderbuffer(depthFormat);
-+	_mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depth->base);
-+	depth->depthHasSurface = screen->depthHasSurface;
-+    }
-+
-+    /* stencil renderbuffer */
-+    if (mesaVis->stencilBits > 0 && !swStencil) {
-+	struct radeon_renderbuffer *stencil =
-+	    radeon_create_renderbuffer(GL_STENCIL_INDEX8_EXT);
-+	_mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencil->base);
-+	stencil->depthHasSurface = screen->depthHasSurface;
-+    }
-+
-+    _mesa_add_soft_renderbuffers(fb,
-+	    GL_FALSE, /* color */
-+	    swDepth,
-+	    swStencil,
-+	    swAccum,
-+	    swAlpha,
-+	    GL_FALSE /* aux */);
-+    driDrawPriv->driverPrivate = (void *) fb;
-+
-+    return (driDrawPriv->driverPrivate != NULL);
-+}
-+#else
- 
- /**
-  * Create the Mesa framebuffer and renderbuffers for a given window/drawable.
-@@ -1105,6 +1358,11 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
- }
- 
- 
-+
-+
-+#endif
-+
-+
- static void
- radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
- {
-@@ -1199,11 +1457,11 @@ radeonInitScreen(__DRIscreenPrivate *psp)
-    if (!radeonInitDriver(psp))
-        return NULL;
- 
-+   /* for now fill in all modes */
-    return radeonFillInModes( psp,
- 			     dri_priv->bpp,
- 			     (dri_priv->bpp == 16) ? 16 : 24,
--			     (dri_priv->bpp == 16) ? 0  : 8,
--			     (dri_priv->backOffset != dri_priv->depthOffset) );
-+			     (dri_priv->bpp == 16) ? 0  : 8, 1);
- }
- 
- 
-diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h
-index ab859d5..82eb7d8 100644
---- a/src/mesa/drivers/dri/radeon/radeon_screen.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
-@@ -55,6 +55,14 @@ typedef struct {
-    drmAddress map;			/* Mapping of the DRM region */
- } radeonRegionRec, *radeonRegionPtr;
- 
-+struct radeon_gem_object {
-+   uint32_t gem_name;
-+   uint32_t gem_handle;
-+   uint64_t size;
-+   void *map;
-+   uint64_t offset;
-+};
-+
- typedef struct {
-    int chip_family;
-    int chip_flags;
-@@ -107,6 +115,13 @@ typedef struct {
-    const __DRIextension *extensions[8];
- 
-    int num_gb_pipes;
-+
-+   int kernel_mm;
-+   struct radeon_gem_object front;
-+   struct radeon_gem_object back;
-+   struct radeon_gem_object depth;
-+   struct radeon_gem_object vram_texture;
-+   struct radeon_gem_object gart_texture;
- } radeonScreenRec, *radeonScreenPtr;
- 
- #define IS_R100_CLASS(screen) \
-diff --git a/src/mesa/drivers/dri/radeon/radeon_span.h b/src/mesa/drivers/dri/radeon/radeon_span.h
-index 9abe086..1650a9b 100644
---- a/src/mesa/drivers/dri/radeon/radeon_span.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_span.h
-@@ -44,7 +44,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "drirenderbuffer.h"
- 
-+#include "radeon_buffer.h"
-+
- extern void radeonInitSpanFuncs(GLcontext * ctx);
--extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis);
- 
-+#if COMPILE_R300
-+extern void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
-+#else
-+extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis);
-+#endif
- #endif
diff --git a/sources b/sources
index d1667ae..8c9b49d 100644
--- a/sources
+++ b/sources
@@ -1,2 +1,2 @@
 6ae05158e678f4594343f32c2ca50515  gl-manpages-1.0.1.tar.bz2
-937234d8b7b8528295b7080fbcf0a532  mesa-20080814.tar.bz2
+d5e2a6d63b4611ec38aaab19b8f68117  mesa-20080905.tar.bz2