diff --git a/.cvsignore b/.cvsignore
index a02cf06..5315713 100644
--- a/.cvsignore
+++ b/.cvsignore
@@ -3,3 +3,4 @@ xdriinfo-1.0.2.tar.bz2
 MesaDemos-7.3.tar.bz2
 MesaLib-7.3.tar.bz2
 mesa-20090322.tar.bz2
+mesa-20090723.tar.bz2
diff --git a/mesa-7.1-osmesa-version.patch b/mesa-7.1-osmesa-version.patch
index cd41ad2..177c0c3 100644
--- a/mesa-7.1-osmesa-version.patch
+++ b/mesa-7.1-osmesa-version.patch
@@ -10,12 +10,3 @@ diff -up Mesa-7.1/src/mesa/drivers/osmesa/Makefile.jx Mesa-7.1/src/mesa/drivers/
  		-install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \
  		-id $(INSTALL_LIB_DIR)/lib$(OSMESA_LIB).$(MESA_MAJOR).dylib \
  		$(OSMESA_LIB_DEPS) $(OBJECTS)
-@@ -58,7 +58,7 @@ $(TOP)/lib/$(OSMESA_LIB_NAME): $(OBJECTS
- # with all the other Mesa sources (compiled with -DCHAN_BITS=16/32
- osmesa16: $(OBJECTS) $(CORE_MESA)
- 	$(MKLIB) -o $(OSMESA_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
--		-major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \
-+		-major 6 -minor 5 -patch 3 \
- 		-install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \
- 		-id $(INSTALL_LIB_DIR)/lib$(OSMESA_LIB).$(MESA_MAJOR).dylib \
- 		$(OSMESA_LIB_DEPS) $(OBJECTS) $(CORE_MESA)
diff --git a/mesa-7.5-r300-batch-accounting.patch b/mesa-7.5-r300-batch-accounting.patch
deleted file mode 100644
index 2fbfcc2..0000000
--- a/mesa-7.5-r300-batch-accounting.patch
+++ /dev/null
@@ -1,12 +0,0 @@
-diff -up mesa-20090428/src/mesa/drivers/dri/r300/r300_cmdbuf.c.jx mesa-20090428/src/mesa/drivers/dri/r300/r300_cmdbuf.c
---- mesa-20090428/src/mesa/drivers/dri/r300/r300_cmdbuf.c.jx	2009-05-18 11:17:36.000000000 -0400
-+++ mesa-20090428/src/mesa/drivers/dri/r300/r300_cmdbuf.c	2009-05-21 16:32:32.000000000 -0400
-@@ -251,7 +251,7 @@ static void emit_cb_offset(GLcontext *ct
-             OUT_BATCH(((rrb->width - 1) << R300_SCISSORS_X_SHIFT) |
-                     ((rrb->height - 1) << R300_SCISSORS_Y_SHIFT));
-             END_BATCH();
--            BEGIN_BATCH_NO_AUTOSTATE(16);
-+            BEGIN_BATCH_NO_AUTOSTATE(7);
-             for (i = 0; i < 1; i++) {
-                 OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2);
-                 OUT_BATCH((0 << R300_CLIPRECT_X_SHIFT) | (0 << R300_CLIPRECT_Y_SHIFT));
diff --git a/mesa-no-mach64.patch b/mesa-no-mach64.patch
index 397531e..450c54d 100644
--- a/mesa-no-mach64.patch
+++ b/mesa-no-mach64.patch
@@ -1,45 +1,46 @@
---- configure.ac.mach64	2008-09-05 13:53:24.000000000 +1000
-+++ configure.ac	2008-09-05 13:53:39.000000000 +1000
-@@ -656,7 +656,7 @@
+diff -up mesa-20090723/configure.ac.mach64 mesa-20090723/configure.ac
+--- mesa-20090723/configure.ac.mach64	2009-07-23 12:11:01.000000000 +1000
++++ mesa-20090723/configure.ac	2009-07-23 12:11:22.000000000 +1000
+@@ -713,7 +713,7 @@ if test "$mesa_driver" = dri; then
              # because there is no x86-64 system where they could *ever*
              # be used.
              if test "x$DRI_DIRS" = "xyes"; then
--                DRI_DIRS="i915 i965 mach64 mga r128 r200 r300 radeon \
-+                DRI_DIRS="i915 i965 mga r128 r200 r300 radeon \
+-                DRI_DIRS="i915 i965 mach64 mga r128 r200 r300 r600 radeon \
++                DRI_DIRS="i915 i965 mga r128 r200 r300 r600 radeon \
                      savage tdfx unichrome swrast"
              fi
              ;;
-@@ -664,13 +664,13 @@
+@@ -721,13 +721,13 @@ if test "$mesa_driver" = dri; then
              # Build only the drivers for cards that exist on PowerPC.
              # At some point MGA will be added, but not yet.
              if test "x$DRI_DIRS" = "xyes"; then
--                DRI_DIRS="mach64 r128 r200 r300 radeon tdfx swrast"
-+                DRI_DIRS="r128 r200 r300 radeon tdfx swrast"
+-                DRI_DIRS="mach64 r128 r200 r300 r600 radeon tdfx swrast"
++                DRI_DIRS="r128 r200 r300 r600 radeon tdfx swrast"
              fi
              ;;
          sparc*)
              # Build only the drivers for cards that exist on sparc`
              if test "x$DRI_DIRS" = "xyes"; then
--                DRI_DIRS="mach64 r128 r200 r300 radeon ffb swrast"
-+                DRI_DIRS="r128 r200 r300 radeon ffb swrast"
+-                DRI_DIRS="mach64 r128 r200 r300 r600 radeon ffb swrast"
++                DRI_DIRS="r128 r200 r300 r600 radeon ffb swrast"
              fi
              ;;
          esac
-@@ -689,7 +689,7 @@
+@@ -746,7 +746,7 @@ if test "$mesa_driver" = dri; then
          # ffb and gamma are missing because they have not been converted
          # to use the new interface.
          if test "x$DRI_DIRS" = "xyes"; then
--            DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 radeon tdfx \
-+            DRI_DIRS="i810 i915 i965 mga r128 r200 r300 radeon tdfx \
+-            DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 r600 radeon tdfx \
++            DRI_DIRS="i810 i915 i965 mga r128 r200 r300 r600 radeon tdfx \
                  unichrome savage sis swrast"
          fi
          ;;
-@@ -704,7 +704,7 @@
+@@ -765,7 +765,7 @@ if test "$mesa_driver" = dri; then
  
      # default drivers
      if test "x$DRI_DIRS" = "xyes"; then
--        DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 radeon s3v \
-+        DRI_DIRS="i810 i915 i965 mga r128 r200 r300 radeon s3v \
+-        DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 r600 radeon s3v \
++        DRI_DIRS="i810 i915 i965 mga r128 r200 r300 r600 radeon s3v \
              savage sis tdfx trident unichrome ffb swrast"
      fi
  
diff --git a/mesa.spec b/mesa.spec
index 0a852f1..238ee83 100644
--- a/mesa.spec
+++ b/mesa.spec
@@ -21,7 +21,7 @@
 Summary: Mesa graphics libraries
 Name: mesa
 Version: 7.6
-Release: 0.3%{?dist}
+Release: 0.4%{?dist}
 License: MIT
 Group: System Environment/Libraries
 URL: http://www.mesa3d.org
diff --git a/radeon-rewrite-emit1clip.patch b/radeon-rewrite-emit1clip.patch
deleted file mode 100644
index 1262bfa..0000000
--- a/radeon-rewrite-emit1clip.patch
+++ /dev/null
@@ -1,13 +0,0 @@
-diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
-index afca0e2..a006e8f 100644
---- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
-+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
-@@ -252,7 +252,7 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
-                     ((rrb->height - 1) << R300_SCISSORS_Y_SHIFT));
-             END_BATCH();
-             BEGIN_BATCH_NO_AUTOSTATE(16);
--            for (i = 0; i < 4; i++) {
-+            for (i = 0; i < 1; i++) {
-                 OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2);
-                 OUT_BATCH((0 << R300_CLIPRECT_X_SHIFT) | (0 << R300_CLIPRECT_Y_SHIFT));
-                 OUT_BATCH(((rrb->width - 1) << R300_CLIPRECT_X_SHIFT) | ((rrb->height - 1) << R300_CLIPRECT_Y_SHIFT));
diff --git a/radeon-rewrite.patch b/radeon-rewrite.patch
deleted file mode 100644
index ee7a4b4..0000000
--- a/radeon-rewrite.patch
+++ /dev/null
@@ -1,40231 +0,0 @@
-diff --git a/configs/autoconf.in b/configs/autoconf.in
-index b61d7f3..0f9306d 100644
---- a/configs/autoconf.in
-+++ b/configs/autoconf.in
-@@ -20,6 +20,8 @@ CXXFLAGS = @CPPFLAGS@ @CXXFLAGS@ \
- 	$(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES)
- LDFLAGS = @LDFLAGS@
- EXTRA_LIB_PATH = @EXTRA_LIB_PATH@
-+RADEON_CFLAGS = @RADEON_CFLAGS@
-+RADEON_LDFLAGS = @RADEON_LDFLAGS@
- 
- # Assembler
- MESA_ASM_SOURCES = @MESA_ASM_SOURCES@
-diff --git a/configure.ac b/configure.ac
-index 8412cdc..231b7e9 100644
---- a/configure.ac
-+++ b/configure.ac
-@@ -446,6 +446,8 @@ AC_SUBST([GALLIUM_WINSYS_DRM_DIRS])
- AC_SUBST([GALLIUM_DRIVERS_DIRS])
- AC_SUBST([GALLIUM_AUXILIARY_DIRS])
- AC_SUBST([GALLIUM_STATE_TRACKERS_DIRS])
-+AC_SUBST([RADEON_CFLAGS])
-+AC_SUBST([RADEON_LDFLAGS])
- 
- dnl
- dnl User supplied program configuration
-@@ -573,6 +575,13 @@ dri)
-     GL_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED dri2proto >= $DRI2PROTO_REQUIRED"
-     DRI_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED"
- 
-+    PKG_CHECK_MODULES([LIBDRM_RADEON], [libdrm_radeon], HAVE_LIBDRM_RADEON=yes, HAVE_LIBDRM_RADEON=no)
-+
-+    if test "$HAVE_LIBDRM_RADEON" = yes; then
-+	RADEON_CFLAGS="-DHAVE_LIBDRM_RADEON=1 $LIBDRM_RADEON_CFLAGS"
-+	RADEON_LDFLAGS=$LIBDRM_RADEON_LIBS
-+    fi
-+
-     # find the DRI deps for libGL
-     if test "$x11_pkgconfig" = yes; then
-         # add xcb modules if necessary
-diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c
-index e112720..ae0e61e 100644
---- a/src/mesa/drivers/dri/common/dri_util.c
-+++ b/src/mesa/drivers/dri/common/dri_util.c
-@@ -37,6 +37,9 @@
- typedef GLboolean ( * PFNGLXGETMSCRATEOMLPROC) (__DRIdrawable *drawable, int32_t *numerator, int32_t *denominator);
- #endif
- 
-+static void dri_get_drawable(__DRIdrawable *pdp);
-+static void dri_put_drawable(__DRIdrawable *pdp);
-+
- /**
-  * This is just a token extension used to signal that the driver
-  * supports setting a read drawable.
-@@ -127,7 +130,7 @@ static int driUnbindContext(__DRIcontext *pcp)
- 	return GL_FALSE;
-     }
- 
--    pdp->refcount--;
-+    dri_put_drawable(pdp);
- 
-     if (prp != pdp) {
-         if (prp->refcount == 0) {
-@@ -135,7 +138,7 @@ static int driUnbindContext(__DRIcontext *pcp)
- 	    return GL_FALSE;
- 	}
- 
--	prp->refcount--;
-+    	dri_put_drawable(prp);
-     }
- 
- 
-@@ -170,10 +173,10 @@ static int driBindContext(__DRIcontext *pcp,
- 	pcp->driReadablePriv = prp;
- 	if (pdp) {
- 	    pdp->driContextPriv = pcp;
--	    pdp->refcount++;
-+    	    dri_get_drawable(pdp);
- 	}
- 	if ( prp && pdp != prp ) {
--	    prp->refcount++;
-+    	    dri_get_drawable(prp);
- 	}
-     }
- 
-@@ -430,7 +433,7 @@ driCreateNewDrawable(__DRIscreen *psp, const __DRIconfig *config,
- 
-     pdp->loaderPrivate = data;
-     pdp->hHWDrawable = hwDrawable;
--    pdp->refcount = 0;
-+    pdp->refcount = 1;
-     pdp->pStamp = NULL;
-     pdp->lastStamp = 0;
-     pdp->index = 0;
-@@ -483,12 +486,19 @@ dri2CreateNewDrawable(__DRIscreen *screen,
-     return pdraw;
- }
- 
--
--static void
--driDestroyDrawable(__DRIdrawable *pdp)
-+static void dri_get_drawable(__DRIdrawable *pdp)
-+{
-+    pdp->refcount++;
-+}
-+	
-+static void dri_put_drawable(__DRIdrawable *pdp)
- {
-     __DRIscreenPrivate *psp;
- 
-+    pdp->refcount--;
-+    if (pdp->refcount)
-+	return;
-+
-     if (pdp) {
- 	psp = pdp->driScreenPriv;
-         (*psp->DriverAPI.DestroyBuffer)(pdp);
-@@ -504,6 +514,12 @@ driDestroyDrawable(__DRIdrawable *pdp)
-     }
- }
- 
-+static void
-+driDestroyDrawable(__DRIdrawable *pdp)
-+{
-+    dri_put_drawable(pdp);
-+}
-+
- /*@}*/
- 
- 
-diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
-index e9144ac..6a246ed 100644
---- a/src/mesa/drivers/dri/r200/Makefile
-+++ b/src/mesa/drivers/dri/r200/Makefile
-@@ -3,6 +3,8 @@
- TOP = ../../../../..
- include $(TOP)/configs/current
- 
-+CFLAGS += $(RADEON_CFLAGS)
-+
- LIBNAME = r200_dri.so
- 
- MINIGLX_SOURCES = server/radeon_dri.c 
-@@ -11,25 +13,36 @@ ifeq ($(USING_EGL), 1)
- EGL_SOURCES = server/radeon_egl.c
- endif
- 
-+RADEON_COMMON_SOURCES = \
-+	radeon_texture.c \
-+	radeon_common_context.c \
-+	radeon_common.c \
-+	radeon_dma.c \
-+	radeon_lock.c \
-+	radeon_bo_legacy.c \
-+	radeon_cs_legacy.c \
-+	radeon_mipmap_tree.c \
-+	radeon_span.c \
-+	radeon_fbo.c
-+
-+
- DRIVER_SOURCES = r200_context.c \
- 		 r200_ioctl.c \
--		 r200_lock.c \
- 		 r200_state.c \
- 		 r200_state_init.c \
- 		 r200_cmdbuf.c \
- 		 r200_pixel.c \
- 		 r200_tex.c \
--		 r200_texmem.c \
- 		 r200_texstate.c \
- 		 r200_tcl.c \
- 		 r200_swtcl.c \
--		 r200_span.c \
- 		 r200_maos.c \
- 		 r200_sanity.c \
- 		 r200_fragshader.c \
- 		 r200_vertprog.c \
- 		 radeon_screen.c \
--		 $(EGL_SOURCES)
-+		 $(EGL_SOURCES) \
-+		 $(RADEON_COMMON_SOURCES)
- 
- C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
- 
-@@ -48,7 +61,30 @@ SYMLINKS = \
- COMMON_SYMLINKS = \
- 	radeon_chipset.h \
- 	radeon_screen.c \
--	radeon_screen.h
-+	radeon_screen.h \
-+	radeon_bo_legacy.c \
-+	radeon_cs_legacy.c \
-+	radeon_bo_legacy.h \
-+	radeon_cs_legacy.h \
-+	radeon_bocs_wrapper.h \
-+	radeon_span.h \
-+	radeon_span.c \
-+	radeon_lock.c \
-+	radeon_lock.h \
-+	radeon_common.c \
-+	radeon_common_context.c \
-+	radeon_common_context.h \
-+	radeon_common.h \
-+	radeon_cmdbuf.h \
-+	radeon_mipmap_tree.c \
-+	radeon_mipmap_tree.h \
-+	radeon_texture.c \
-+	radeon_texture.h \
-+	radeon_dma.c \
-+	radeon_dma.h \
-+	radeon_fbo.c
-+
-+DRI_LIB_DEPS += $(RADEON_LDFLAGS)
- 
- ##### TARGETS #####
- 
-diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c
-index e163377..e34ea96 100644
---- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c
-+++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c
-@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "swrast/swrast.h"
- #include "main/simple_list.h"
- 
-+#include "radeon_common.h"
- #include "r200_context.h"
- #include "r200_state.h"
- #include "r200_ioctl.h"
-@@ -45,18 +46,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r200_sanity.h"
- #include "radeon_reg.h"
- 
--static void print_state_atom( struct r200_state_atom *state )
--{
--   int i;
--
--   fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
--
--   if (0 & R200_DEBUG & DEBUG_VERBOSE) 
--      for (i = 0 ; i < state->cmd_size ; i++) 
--	 fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
--
--}
--
- /* The state atoms will be emitted in the order they appear in the atom list,
-  * so this step is important.
-  */
-@@ -64,141 +53,56 @@ void r200SetUpAtomList( r200ContextPtr rmesa )
- {
-    int i, mtu;
- 
--   mtu = rmesa->glCtx->Const.MaxTextureUnits;
--
--   make_empty_list(&rmesa->hw.atomlist);
--   rmesa->hw.atomlist.name = "atom-list";
--
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ctx );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.set );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lin );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msk );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpt );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vtx );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vap );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vte );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msc );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cst );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.zbs );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcl );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msl );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcg );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.grd );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tam );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tf );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.atf );
-+   mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
-+
-+   make_empty_list(&rmesa->radeon.hw.atomlist);
-+   rmesa->radeon.hw.atomlist.name = "atom-list";
-+
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ctx );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.set );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lin );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msk );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpt );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vtx );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vap );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vte );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msc );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cst );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.zbs );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcl );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msl );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcg );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.grd );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.fog );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tam );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tf );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.atf );
-    for (i = 0; i < mtu; ++i)
--       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] );
-+       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i] );
-    for (i = 0; i < mtu; ++i)
--       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cube[i] );
-+       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i] );
-    for (i = 0; i < 6; ++i)
--       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pix[i] );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[0] );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[1] );
-+       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pix[i] );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[0] );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[1] );
-    for (i = 0; i < 8; ++i)
--       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] );
-+       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i] );
-    for (i = 0; i < 3 + mtu; ++i)
--       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mat[i] );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.eye );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.glt );
-+       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i] );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.eye );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.glt );
-    for (i = 0; i < 2; ++i)
--      insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mtl[i] );
-+      insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mtl[i] );
-    for (i = 0; i < 6; ++i)
--       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ucp[i] );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.spr );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ptp );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.prf );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pvs );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[0] );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[1] );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[0] );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[1] );
--}
--
--static void r200SaveHwState( r200ContextPtr rmesa )
--{
--   struct r200_state_atom *atom;
--   char * dest = rmesa->backup_store.cmd_buf;
--
--   if (R200_DEBUG & DEBUG_STATE)
--      fprintf(stderr, "%s\n", __FUNCTION__);
--
--   rmesa->backup_store.cmd_used = 0;
--
--   foreach( atom, &rmesa->hw.atomlist ) {
--      if ( atom->check( rmesa->glCtx, atom->idx ) ) {
--	 int size = atom->cmd_size * 4;
--	 memcpy( dest, atom->cmd, size);
--	 dest += size;
--	 rmesa->backup_store.cmd_used += size;
--	 if (R200_DEBUG & DEBUG_STATE)
--	    print_state_atom( atom );
--      }
--   }
--
--   assert( rmesa->backup_store.cmd_used <= R200_CMD_BUF_SZ );
--   if (R200_DEBUG & DEBUG_STATE)
--      fprintf(stderr, "Returning to r200EmitState\n");
--}
--
--void r200EmitState( r200ContextPtr rmesa )
--{
--   char *dest;
--   int mtu;
--   struct r200_state_atom *atom;
--
--   if (R200_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
--      fprintf(stderr, "%s\n", __FUNCTION__);
--
--   if (rmesa->save_on_next_emit) {
--      r200SaveHwState(rmesa);
--      rmesa->save_on_next_emit = GL_FALSE;
--   }
--
--   if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
--      return;
--
--   mtu = rmesa->glCtx->Const.MaxTextureUnits;
--
--   /* To avoid going across the entire set of states multiple times, just check
--    * for enough space for the case of emitting all state, and inline the
--    * r200AllocCmdBuf code here without all the checks.
--    */
--   r200EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size );
--
--   /* we need to calculate dest after EnsureCmdBufSpace
--      as we may flush the buffer - airlied */
--   dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
--   if (R200_DEBUG & DEBUG_STATE) {
--      foreach( atom, &rmesa->hw.atomlist ) {
--	 if ( atom->dirty || rmesa->hw.all_dirty ) {
--	    if ( atom->check( rmesa->glCtx, atom->idx ) )
--	       print_state_atom( atom );
--	    else
--	       fprintf(stderr, "skip state %s\n", atom->name);
--	 }
--      }
--   }
--
--   foreach( atom, &rmesa->hw.atomlist ) {
--      if ( rmesa->hw.all_dirty )
--	 atom->dirty = GL_TRUE;
--      if ( atom->dirty ) {
--	 if ( atom->check( rmesa->glCtx, atom->idx ) ) {
--	    int size = atom->cmd_size * 4;
--	    memcpy( dest, atom->cmd, size);
--	    dest += size;
--	    rmesa->store.cmd_used += size;
--	    atom->dirty = GL_FALSE;
--	 }
--      }
--   }
--
--   assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ );
--
--   rmesa->hw.is_dirty = GL_FALSE;
--   rmesa->hw.all_dirty = GL_FALSE;
-+       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i] );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.spr );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ptp );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.prf );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pvs );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[0] );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[1] );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[0] );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[1] );
- }
- 
- /* Fire a section of the retained (indexed_verts) buffer as a regular
-@@ -208,51 +112,80 @@ void r200EmitVbufPrim( r200ContextPtr rmesa,
-                        GLuint primitive,
-                        GLuint vertex_nr )
- {
--   drm_radeon_cmd_header_t *cmd;
-+   BATCH_LOCALS(&rmesa->radeon);
- 
-    assert(!(primitive & R200_VF_PRIM_WALK_IND));
-    
--   r200EmitState( rmesa );
-+   radeonEmitState(&rmesa->radeon);
-    
-    if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
-       fprintf(stderr, "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__,
- 	      rmesa->store.cmd_used/4, primitive, vertex_nr);
--   
--   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, VBUF_BUFSZ,
--						  __FUNCTION__ );
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
--   cmd[1].i = R200_CP_CMD_3D_DRAW_VBUF_2;
--   cmd[2].i = (primitive | 
--	       R200_VF_PRIM_WALK_LIST |
--	       R200_VF_COLOR_ORDER_RGBA |
--	       (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
-+ 
-+   BEGIN_BATCH(3);
-+   OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
-+   OUT_BATCH(primitive | R200_VF_PRIM_WALK_LIST | R200_VF_COLOR_ORDER_RGBA |
-+	     (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
-+   END_BATCH();
- }
- 
-+static void r200FireEB(r200ContextPtr rmesa, int vertex_count, int type)
-+{
-+	BATCH_LOCALS(&rmesa->radeon);
-+
-+	if (vertex_count > 0) {
-+		BEGIN_BATCH(8+2);
-+		OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_INDX_2, 0);
-+		OUT_BATCH(R200_VF_PRIM_WALK_IND |
-+			  ((vertex_count + 0) << 16) |
-+			  type);
-+		
-+		if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+			OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
-+			OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
-+			OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset,
-+					rmesa->radeon.tcl.elt_dma_bo,
-+					rmesa->radeon.tcl.elt_dma_offset,
-+					RADEON_GEM_DOMAIN_GTT, 0, 0);
-+			OUT_BATCH(vertex_count/2);
-+		} else {
-+			OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
-+			OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
-+			OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset);
-+			OUT_BATCH(vertex_count/2);
-+			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+					      rmesa->radeon.tcl.elt_dma_bo,
-+					      RADEON_GEM_DOMAIN_GTT, 0, 0);
-+		}
-+		END_BATCH();
-+	}
-+}
- 
--void r200FlushElts( r200ContextPtr rmesa )
-+void r200FlushElts(GLcontext *ctx)
- {
--   int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
--   int dwords;
--   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 12)) / 2;
-+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
-+   int nr, elt_used = rmesa->tcl.elt_used;
- 
-    if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
--      fprintf(stderr, "%s\n", __FUNCTION__);
-+     fprintf(stderr, "%s %x %d\n", __FUNCTION__, rmesa->tcl.hw_primitive, elt_used);
-+
-+   assert( rmesa->radeon.dma.flush == r200FlushElts );
-+   rmesa->radeon.dma.flush = NULL;
-+
-+   elt_used = (elt_used + 2) & ~2;
-+
-+   nr = elt_used / 2;
- 
--   assert( rmesa->dma.flush == r200FlushElts );
--   rmesa->dma.flush = NULL;
-+   radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo);
- 
--   /* Cope with odd number of elts:
--    */
--   rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
--   dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
-+   r200FireEB(rmesa, nr, rmesa->tcl.hw_primitive);
- 
--   cmd[1] |= (dwords - 3) << 16;
--   cmd[2] |= nr << R200_VF_VERTEX_NUMBER_SHIFT;
-+   radeon_bo_unref(rmesa->radeon.tcl.elt_dma_bo);
-+   rmesa->radeon.tcl.elt_dma_bo = NULL;
- 
-    if (R200_DEBUG & DEBUG_SYNC) {
-       fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
--      r200Finish( rmesa->glCtx );
-+      radeonFinish( rmesa->radeon.glCtx );
-    }
- }
- 
-@@ -261,7 +194,6 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
- 				    GLuint primitive,
- 				    GLuint min_nr )
- {
--   drm_radeon_cmd_header_t *cmd;
-    GLushort *retval;
- 
-    if (R200_DEBUG & DEBUG_IOCTL)
-@@ -269,30 +201,30 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
- 
-    assert((primitive & R200_VF_PRIM_WALK_IND));
-    
--   r200EmitState( rmesa );
--   
--   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, ELTS_BUFSZ(min_nr),
--						__FUNCTION__ );
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
--   cmd[1].i = R200_CP_CMD_3D_DRAW_INDX_2;
--   cmd[2].i = (primitive | 
--	       R200_VF_PRIM_WALK_IND |
--	       R200_VF_COLOR_ORDER_RGBA);
-+   radeonEmitState(&rmesa->radeon);
-+
-+   rmesa->radeon.tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
-+					  0, R200_ELT_BUF_SZ, 4,
-+					  RADEON_GEM_DOMAIN_GTT, 0);
-+   rmesa->radeon.tcl.elt_dma_offset = 0;
-+   rmesa->tcl.elt_used = min_nr * 2;
-+
-+   radeon_validate_bo(&rmesa->radeon, rmesa->radeon.tcl.elt_dma_bo,
-+                      RADEON_GEM_DOMAIN_GTT, 0);
-+   if (radeon_revalidate_bos(rmesa->radeon.glCtx) == GL_FALSE)
-+      fprintf(stderr,"failure to revalidate BOs - badness\n");
- 
-+   radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1);
-+   retval = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset;
-    
--   retval = (GLushort *)(cmd+3);
- 
-    if (R200_DEBUG & DEBUG_PRIMS)
--      fprintf(stderr, "%s: header 0x%x prim %x \n",
--	      __FUNCTION__,
--	      cmd[1].i, primitive);
-+      fprintf(stderr, "%s: header prim %x \n",
-+	      __FUNCTION__, primitive);
- 
--   assert(!rmesa->dma.flush);
--   rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
--   rmesa->dma.flush = r200FlushElts;
--
--   rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
-+   assert(!rmesa->radeon.dma.flush);
-+   rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
-+   rmesa->radeon.dma.flush = r200FlushElts;
- 
-    return retval;
- }
-@@ -300,129 +232,119 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
- 
- 
- void r200EmitVertexAOS( r200ContextPtr rmesa,
--			  GLuint vertex_size,
--			  GLuint offset )
-+			GLuint vertex_size,
-+ 			struct radeon_bo *bo,
-+			GLuint offset )
- {
--   drm_radeon_cmd_header_t *cmd;
-+   BATCH_LOCALS(&rmesa->radeon);
- 
-    if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
-       fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
- 	      __FUNCTION__, vertex_size, offset);
- 
--   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, VERT_AOS_BUFSZ,
--						  __FUNCTION__ );
- 
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
--   cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (2 << 16);
--   cmd[2].i = 1;
--   cmd[3].i = vertex_size | (vertex_size << 8);
--   cmd[4].i = offset;
-+   BEGIN_BATCH(5);
-+   OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, 2);
-+   OUT_BATCH(1);
-+   OUT_BATCH(vertex_size | (vertex_size << 8));
-+   OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
-+   END_BATCH();
- }
--		       
- 
--void r200EmitAOS( r200ContextPtr rmesa,
--		    struct r200_dma_region **component,
--		    GLuint nr,
--		    GLuint offset )
-+void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset)
- {
--   drm_radeon_cmd_header_t *cmd;
--   int sz = AOS_BUFSZ(nr);
-+   BATCH_LOCALS(&rmesa->radeon);
-+   uint32_t voffset;
-+   int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
-    int i;
--   int *tmp;
--
--   if (R200_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s nr arrays: %d\n", __FUNCTION__, nr);
--
--   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, sz, __FUNCTION__ );
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
--   cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (((sz / sizeof(int)) - 3) << 16);
--   cmd[2].i = nr;
--   tmp = &cmd[0].i;
--   cmd += 3;
--
--   for (i = 0 ; i < nr ; i++) {
--      if (i & 1) {
--	 cmd[0].i |= ((component[i]->aos_stride << 24) | 
--		      (component[i]->aos_size << 16));
--	 cmd[2].i = (component[i]->aos_start + 
--		     offset * component[i]->aos_stride * 4);
--	 cmd += 3;
-+   
-+   if (RADEON_DEBUG & DEBUG_VERTS)
-+      fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
-+	      offset);
-+
-+   BEGIN_BATCH(sz+2+ (nr*2));
-+   OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, sz - 1);
-+   OUT_BATCH(nr);
-+
-+    
-+   if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+      for (i = 0; i + 1 < nr; i += 2) {
-+	 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
-+		   (rmesa->radeon.tcl.aos[i].stride << 8) |
-+		   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
-+		   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
-+			
-+	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
-+	 OUT_BATCH_RELOC(voffset,
-+			 rmesa->radeon.tcl.aos[i].bo,
-+			 voffset,
-+			 RADEON_GEM_DOMAIN_GTT,
-+			 0, 0);
-+	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
-+	 OUT_BATCH_RELOC(voffset,
-+			 rmesa->radeon.tcl.aos[i+1].bo,
-+			 voffset,
-+			 RADEON_GEM_DOMAIN_GTT,
-+			 0, 0);
-       }
--      else {
--	 cmd[0].i = ((component[i]->aos_stride << 8) | 
--		     (component[i]->aos_size << 0));
--	 cmd[1].i = (component[i]->aos_start + 
--		     offset * component[i]->aos_stride * 4);
-+      
-+      if (nr & 1) {
-+	 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
-+		   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
-+	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
-+	 OUT_BATCH_RELOC(voffset,
-+			 rmesa->radeon.tcl.aos[nr - 1].bo,
-+			 voffset,
-+			 RADEON_GEM_DOMAIN_GTT,
-+			 0, 0);
-+      }
-+   } else {
-+      for (i = 0; i + 1 < nr; i += 2) {
-+	 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
-+		   (rmesa->radeon.tcl.aos[i].stride << 8) |
-+		   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
-+		   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
-+	 
-+	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
-+	 OUT_BATCH(voffset);
-+	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
-+	 OUT_BATCH(voffset);
-+      }
-+      
-+      if (nr & 1) {
-+	 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
-+		   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
-+	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
-+	 OUT_BATCH(voffset);
-+      }
-+      for (i = 0; i + 1 < nr; i += 2) {
-+	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
-+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+			       rmesa->radeon.tcl.aos[i+0].bo,
-+			       RADEON_GEM_DOMAIN_GTT,
-+			       0, 0);
-+	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
-+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+			       rmesa->radeon.tcl.aos[i+1].bo,
-+			       RADEON_GEM_DOMAIN_GTT,
-+			       0, 0);
-+      }
-+      if (nr & 1) {
-+	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
-+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+			       rmesa->radeon.tcl.aos[nr-1].bo,
-+			       RADEON_GEM_DOMAIN_GTT,
-+			       0, 0);
-       }
-    }
--
--   if (R200_DEBUG & DEBUG_VERTS) {
--      fprintf(stderr, "%s:\n", __FUNCTION__);
--      for (i = 0 ; i < sz ; i++)
--	 fprintf(stderr, "   %d: %x\n", i, tmp[i]);
--   }
--}
--
--void r200EmitBlit( r200ContextPtr rmesa,
--		   GLuint color_fmt,
--		   GLuint src_pitch,
--		   GLuint src_offset,
--		   GLuint dst_pitch,
--		   GLuint dst_offset,
--		   GLint srcx, GLint srcy,
--		   GLint dstx, GLint dsty,
--		   GLuint w, GLuint h )
--{
--   drm_radeon_cmd_header_t *cmd;
--
--   if (R200_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
--	      __FUNCTION__, 
--	      src_pitch, src_offset, srcx, srcy,
--	      dst_pitch, dst_offset, dstx, dsty,
--	      w, h);
--
--   assert( (src_pitch & 63) == 0 );
--   assert( (dst_pitch & 63) == 0 );
--   assert( (src_offset & 1023) == 0 );
--   assert( (dst_offset & 1023) == 0 );
--   assert( w < (1<<16) );
--   assert( h < (1<<16) );
--
--   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 8 * sizeof(int),
--						  __FUNCTION__ );
--
--
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
--   cmd[1].i = R200_CP_CMD_BITBLT_MULTI | (5 << 16);
--   cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
--	       RADEON_GMC_DST_PITCH_OFFSET_CNTL |
--	       RADEON_GMC_BRUSH_NONE |
--	       (color_fmt << 8) |
--	       RADEON_GMC_SRC_DATATYPE_COLOR |
--	       RADEON_ROP3_S |
--	       RADEON_DP_SRC_SOURCE_MEMORY |
--	       RADEON_GMC_CLR_CMP_CNTL_DIS |
--	       RADEON_GMC_WR_MSK_DIS );
--
--   cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10);
--   cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10);
--   cmd[5].i = (srcx << 16) | srcy;
--   cmd[6].i = (dstx << 16) | dsty; /* dst */
--   cmd[7].i = (w << 16) | h;
--}
--
--
--void r200EmitWait( r200ContextPtr rmesa, GLuint flags )
--{
--   drm_radeon_cmd_header_t *cmd;
--
--   assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
--
--   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 1 * sizeof(int),
--					   __FUNCTION__ );
--   cmd[0].i = 0;
--   cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
--   cmd[0].wait.flags = flags;
-+   END_BATCH();
- }
-diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
-index c067515..f80f0d8 100644
---- a/src/mesa/drivers/dri/r200/r200_context.c
-+++ b/src/mesa/drivers/dri/r200/r200_context.c
-@@ -54,7 +54,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r200_context.h"
- #include "r200_ioctl.h"
- #include "r200_state.h"
--#include "r200_span.h"
- #include "r200_pixel.h"
- #include "r200_tex.h"
- #include "r200_swtcl.h"
-@@ -62,6 +61,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r200_maos.h"
- #include "r200_vertprog.h"
- 
-+#include "radeon_span.h"
-+
- #define need_GL_ARB_vertex_program
- #define need_GL_ATI_fragment_shader
- #define need_GL_EXT_blend_minmax
-@@ -71,6 +72,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define need_GL_EXT_blend_func_separate
- #define need_GL_NV_vertex_program
- #define need_GL_ARB_point_parameters
-+#define need_GL_EXT_framebuffer_object
- #include "extension_helper.h"
- 
- #define DRIVER_DATE	"20060602"
-@@ -78,9 +80,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "vblank.h"
- #include "utils.h"
- #include "xmlpool.h" /* for symbolic values of enum-type options */
--#ifndef R200_DEBUG
--int R200_DEBUG = (0);
--#endif
- 
- /* Return various strings for glGetString().
-  */
-@@ -89,8 +88,8 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name )
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-    static char buffer[128];
-    unsigned   offset;
--   GLuint agp_mode = (rmesa->r200Screen->card_type == RADEON_CARD_PCI)? 0 :
--      rmesa->r200Screen->AGPMode;
-+   GLuint agp_mode = (rmesa->radeon.radeonScreen->card_type == RADEON_CARD_PCI)? 0 :
-+      rmesa->radeon.radeonScreen->AGPMode;
- 
-    switch ( name ) {
-    case GL_VENDOR:
-@@ -101,7 +100,7 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name )
- 				     agp_mode );
- 
-       sprintf( & buffer[ offset ], " %sTCL",
--	       !(rmesa->TclFallback & R200_TCL_FALLBACK_TCL_DISABLE)
-+	       !(rmesa->radeon.TclFallback & R200_TCL_FALLBACK_TCL_DISABLE)
- 	       ? "" : "NO-" );
- 
-       return (GLubyte *)buffer;
-@@ -126,6 +125,7 @@ const struct dri_extension card_extensions[] =
-     { "GL_EXT_blend_minmax",               GL_EXT_blend_minmax_functions },
-     { "GL_EXT_blend_subtract",             NULL },
-     { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
-+    { "GL_EXT_packed_depth_stencil",	   NULL},
-     { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
-     { "GL_EXT_stencil_wrap",               NULL },
-     { "GL_EXT_texture_edge_clamp",         NULL },
-@@ -167,6 +167,11 @@ const struct dri_extension point_extensions[] = {
-     { NULL,                                NULL }
- };
- 
-+const struct dri_extension mm_extensions[] = {
-+  { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
-+  { NULL, NULL }
-+};
-+
- extern const struct tnl_pipeline_stage _r200_render_stage;
- extern const struct tnl_pipeline_stage _r200_tcl_stage;
- 
-@@ -234,6 +239,39 @@ static const struct dri_debug_control debug_control[] =
-     { NULL,    0 }
- };
- 
-+static void r200_get_lock(radeonContextPtr radeon)
-+{
-+   r200ContextPtr rmesa = (r200ContextPtr)radeon;
-+   drm_radeon_sarea_t *sarea = radeon->sarea;
-+
-+   R200_STATECHANGE( rmesa, ctx );
-+   if (rmesa->radeon.sarea->tiling_enabled) {
-+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
-+   }
-+   else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE;
-+
-+   if ( sarea->ctx_owner != rmesa->radeon.dri.hwContext ) {
-+      sarea->ctx_owner = rmesa->radeon.dri.hwContext;
-+      if (!radeon->radeonScreen->kernel_mm)
-+         radeon_bo_legacy_texture_age(radeon->radeonScreen->bom);
-+   }
-+
-+}
-+
-+static void r200_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
-+{
-+}
-+
-+
-+static void r200_init_vtbl(radeonContextPtr radeon)
-+{
-+   radeon->vtbl.get_lock = r200_get_lock;
-+   radeon->vtbl.update_viewport_offset = r200UpdateViewportOffset;
-+   radeon->vtbl.emit_cs_header = r200_vtbl_emit_cs_header;
-+   radeon->vtbl.swtcl_flush = r200_swtcl_flush;
-+   radeon->vtbl.fallback = r200Fallback;
-+}
-+
- 
- /* Create the device specific rendering context.
-  */
-@@ -245,9 +283,9 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
-    radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private);
-    struct dd_function_table functions;
-    r200ContextPtr rmesa;
--   GLcontext *ctx, *shareCtx;
-+   GLcontext *ctx;
-    int i;
--   int tcl_mode, fthrottle_mode;
-+   int tcl_mode;
- 
-    assert(glVisual);
-    assert(driContextPriv);
-@@ -257,7 +295,8 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
-    rmesa = (r200ContextPtr) CALLOC( sizeof(*rmesa) );
-    if ( !rmesa )
-       return GL_FALSE;
--      
-+
-+   r200_init_vtbl(&rmesa->radeon);
-    /* init exp fog table data */
-    r200InitStaticFogData();
- 
-@@ -265,12 +304,12 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
-     * Do this here so that initialMaxAnisotropy is set before we create
-     * the default textures.
-     */
--   driParseConfigFiles (&rmesa->optionCache, &screen->optionCache,
-+   driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache,
- 			screen->driScreen->myNum, "r200");
--   rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache,
--                                                 "def_max_anisotropy");
-+   rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
-+							"def_max_anisotropy");
- 
--   if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) {
-+   if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
-       if ( sPriv->drm_version.minor < 13 )
- 	 fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
- 			  "disabling.\n", sPriv->drm_version.minor );
-@@ -291,59 +330,15 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
-    r200InitTextureFuncs(&functions);
-    r200InitShaderFuncs(&functions); 
- 
--   /* Allocate and initialize the Mesa context */
--   if (sharedContextPrivate)
--      shareCtx = ((r200ContextPtr) sharedContextPrivate)->glCtx;
--   else
--      shareCtx = NULL;
--   rmesa->glCtx = _mesa_create_context(glVisual, shareCtx,
--                                       &functions, (void *) rmesa);
--   if (!rmesa->glCtx) {
--      FREE(rmesa);
--      return GL_FALSE;
--   }
--   driContextPriv->driverPrivate = rmesa;
--
--   /* Init r200 context data */
--   rmesa->dri.context = driContextPriv;
--   rmesa->dri.screen = sPriv;
--   rmesa->dri.drawable = NULL; /* Set by XMesaMakeCurrent */
--   rmesa->dri.hwContext = driContextPriv->hHWContext;
--   rmesa->dri.hwLock = &sPriv->pSAREA->lock;
--   rmesa->dri.fd = sPriv->fd;
--   rmesa->dri.drmMinor = sPriv->drm_version.minor;
--
--   rmesa->r200Screen = screen;
--   rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA +
--				       screen->sarea_priv_offset);
--
--
--   rmesa->dma.buf0_address = rmesa->r200Screen->buffers->list[0].address;
--
--   (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) );
--   make_empty_list( & rmesa->swapped );
--
--   rmesa->nr_heaps = 1 /* screen->numTexHeaps */ ;
--   assert(rmesa->nr_heaps < RADEON_NR_TEX_HEAPS);
--   for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
--      rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa,
--	    screen->texSize[i],
--	    12,
--	    RADEON_NR_TEX_REGIONS,
--	    (drmTextureRegionPtr)rmesa->sarea->tex_list[i],
--	    & rmesa->sarea->tex_age[i],
--	    & rmesa->swapped,
--	    sizeof( r200TexObj ),
--	    (destroy_texture_object_t *) r200DestroyTexObj );
-+   if (!radeonInitContext(&rmesa->radeon, &functions,
-+			  glVisual, driContextPriv,
-+			  sharedContextPrivate)) {
-+     FREE(rmesa);
-+     return GL_FALSE;
-    }
--   rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache,
--					   "texture_depth");
--   if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
--      rmesa->texture_depth = ( screen->cpp == 4 ) ?
--	 DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
- 
--   rmesa->swtcl.RenderIndex = ~0;
--   rmesa->hw.all_dirty = 1;
-+   rmesa->radeon.swtcl.RenderIndex = ~0;
-+   rmesa->radeon.hw.all_dirty = 1;
- 
-    /* Set the maximum texture size small enough that we can guarentee that
-     * all texture units can bind a maximal texture and have all of them in
-@@ -351,29 +346,13 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
-     * setting allow larger textures.
-     */
- 
--   ctx = rmesa->glCtx;
--   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache,
-+   ctx = rmesa->radeon.glCtx;
-+   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache,
- 						 "texture_units");
-    ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
-    ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
- 
--   i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures");
--
--   driCalculateMaxTextureLevels( rmesa->texture_heaps,
--				 rmesa->nr_heaps,
--				 & ctx->Const,
--				 4,
--				 11, /* max 2D texture size is 2048x2048 */
--#if ENABLE_HW_3D_TEXTURE
--				 8,  /* max 3D texture size is 256^3 */
--#else
--				 0,  /* 3D textures unsupported */
--#endif
--				 11, /* max cube texture size is 2048x2048 */
--				 11, /* max texture rectangle size is 2048x2048 */
--				 12,
--				 GL_FALSE,
--				 i );
-+   i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures");
- 
-    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
- 
-@@ -383,7 +362,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
-    ctx->Const.MinPointSizeAA = 1.0;
-    ctx->Const.MaxPointSizeAA = 1.0;
-    ctx->Const.PointSizeGranularity = 0.0625;
--   if (rmesa->r200Screen->drmSupportsPointSprites)
-+   if (rmesa->radeon.radeonScreen->drmSupportsPointSprites)
-       ctx->Const.MaxPointSize = 2047.0;
-    else
-       ctx->Const.MaxPointSize = 1.0;
-@@ -439,32 +418,35 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
-    _math_matrix_set_identity( &rmesa->tmpmat );
- 
-    driInitExtensions( ctx, card_extensions, GL_TRUE );
--   if (!(rmesa->r200Screen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) {
-+
-+   if (rmesa->radeon.radeonScreen->kernel_mm)
-+     driInitExtensions(ctx, mm_extensions, GL_FALSE);
-+   if (!(rmesa->radeon.radeonScreen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) {
-      /* yuv textures don't work with some chips - R200 / rv280 okay so far
- 	others get the bit ordering right but don't actually do YUV-RGB conversion */
-       _mesa_enable_extension( ctx, "GL_MESA_ycbcr_texture" );
-    }
--   if (rmesa->glCtx->Mesa_DXTn) {
-+   if (rmesa->radeon.glCtx->Mesa_DXTn) {
-       _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
-       _mesa_enable_extension( ctx, "GL_S3_s3tc" );
-    }
--   else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) {
-+   else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) {
-       _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
-    }
- 
--   if (rmesa->r200Screen->drmSupportsCubeMapsR200)
-+   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200)
-       _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" );
--   if (rmesa->r200Screen->drmSupportsBlendColor) {
-+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
-        driInitExtensions( ctx, blend_extensions, GL_FALSE );
-    }
--   if(rmesa->r200Screen->drmSupportsVertexProgram)
-+   if(rmesa->radeon.radeonScreen->drmSupportsVertexProgram)
-       driInitSingleExtension( ctx, ARB_vp_extension );
--   if(driQueryOptionb(&rmesa->optionCache, "nv_vertex_program"))
-+   if(driQueryOptionb(&rmesa->radeon.optionCache, "nv_vertex_program"))
-       driInitSingleExtension( ctx, NV_vp_extension );
- 
--   if ((ctx->Const.MaxTextureUnits == 6) && rmesa->r200Screen->drmSupportsFragShader)
-+   if ((ctx->Const.MaxTextureUnits == 6) && rmesa->radeon.radeonScreen->drmSupportsFragShader)
-       driInitSingleExtension( ctx, ATI_fs_extension );
--   if (rmesa->r200Screen->drmSupportsPointSprites)
-+   if (rmesa->radeon.radeonScreen->drmSupportsPointSprites)
-       driInitExtensions( ctx, point_extensions, GL_FALSE );
- #if 0
-    r200InitDriverFuncs( ctx );
-@@ -474,33 +456,16 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
- #endif
-    /* plug in a few more device driver functions */
-    /* XXX these should really go right after _mesa_init_driver_functions() */
-+   radeon_fbo_init(&rmesa->radeon);
-+   radeonInitSpanFuncs( ctx );
-    r200InitPixelFuncs( ctx );
--   r200InitSpanFuncs( ctx );
-    r200InitTnlFuncs( ctx );
-    r200InitState( rmesa );
-    r200InitSwtcl( ctx );
- 
--   fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode");
--   rmesa->iw.irq_seq = -1;
--   rmesa->irqsEmitted = 0;
--   rmesa->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
--		     rmesa->r200Screen->irq);
--
--   rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
--
--   if (!rmesa->do_irqs)
--      fprintf(stderr,
--	      "IRQ's not enabled, falling back to %s: %d %d\n",
--	      rmesa->do_usleeps ? "usleeps" : "busy waits",
--	      fthrottle_mode,
--	      rmesa->r200Screen->irq);
--
-    rmesa->prefer_gart_client_texturing = 
-       (getenv("R200_GART_CLIENT_TEXTURES") != 0);
- 
--   (*sPriv->systemTime->getUST)( & rmesa->swap_ust );
--
--
- #if DO_DEBUG
-    R200_DEBUG  = driParseDebugString( getenv( "R200_DEBUG" ),
- 				      debug_control );
-@@ -508,202 +473,21 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
- 				      debug_control );
- #endif
- 
--   tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode");
--   if (driQueryOptionb(&rmesa->optionCache, "no_rast")) {
-+   tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
-+   if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) {
-       fprintf(stderr, "disabling 3D acceleration\n");
-       FALLBACK(rmesa, R200_FALLBACK_DISABLE, 1);
-    }
-    else if (tcl_mode == DRI_CONF_TCL_SW || getenv("R200_NO_TCL") ||
--	    !(rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL)) {
--      if (rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL) {
--	 rmesa->r200Screen->chip_flags &= ~RADEON_CHIPSET_TCL;
-+	    !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
-+      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
-+	 rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
- 	 fprintf(stderr, "Disabling HW TCL support\n");
-       }
--      TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1);
--   }
--
--   return GL_TRUE;
--}
--
--
--/* Destroy the device specific context.
-- */
--/* Destroy the Mesa and driver specific context data.
-- */
--void r200DestroyContext( __DRIcontextPrivate *driContextPriv )
--{
--   GET_CURRENT_CONTEXT(ctx);
--   r200ContextPtr rmesa = (r200ContextPtr) driContextPriv->driverPrivate;
--   r200ContextPtr current = ctx ? R200_CONTEXT(ctx) : NULL;
--
--   /* check if we're deleting the currently bound context */
--   if (rmesa == current) {
--      R200_FIREVERTICES( rmesa );
--      _mesa_make_current(NULL, NULL, NULL);
--   }
--
--   /* Free r200 context resources */
--   assert(rmesa); /* should never be null */
--   if ( rmesa ) {
--      GLboolean   release_texture_heaps;
--
--
--      release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1);
--      _swsetup_DestroyContext( rmesa->glCtx );
--      _tnl_DestroyContext( rmesa->glCtx );
--      _vbo_DestroyContext( rmesa->glCtx );
--      _swrast_DestroyContext( rmesa->glCtx );
--
--      r200DestroySwtcl( rmesa->glCtx );
--      r200ReleaseArrays( rmesa->glCtx, ~0 );
--
--      if (rmesa->dma.current.buf) {
--	 r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
--	 r200FlushCmdBuf( rmesa, __FUNCTION__ );
--      }
--
--      if (rmesa->state.scissor.pClipRects) {
--	 FREE(rmesa->state.scissor.pClipRects);
--	 rmesa->state.scissor.pClipRects = NULL;
--      }
--
--      if ( release_texture_heaps ) {
--         /* This share group is about to go away, free our private
--          * texture object data.
--          */
--         int i;
--
--         for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
--	    driDestroyTextureHeap( rmesa->texture_heaps[ i ] );
--	    rmesa->texture_heaps[ i ] = NULL;
--         }
--
--	 assert( is_empty_list( & rmesa->swapped ) );
--      }
--
--      /* free the Mesa context */
--      rmesa->glCtx->DriverCtx = NULL;
--      _mesa_destroy_context( rmesa->glCtx );
--
--      /* free the option cache */
--      driDestroyOptionCache (&rmesa->optionCache);
--
--      FREE( rmesa );
-+      TCL_FALLBACK(rmesa->radeon.glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1);
-    }
--}
--
- 
--
--
--void
--r200SwapBuffers( __DRIdrawablePrivate *dPriv )
--{
--   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
--      r200ContextPtr rmesa;
--      GLcontext *ctx;
--      rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
--      ctx = rmesa->glCtx;
--      if (ctx->Visual.doubleBufferMode) {
--         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
--         if ( rmesa->doPageFlip ) {
--            r200PageFlip( dPriv );
--         }
--         else {
--	     r200CopyBuffer( dPriv, NULL );
--         }
--      }
--   }
--   else {
--      /* XXX this shouldn't be an error but we can't handle it for now */
--      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
--   }
--}
--
--void
--r200CopySubBuffer( __DRIdrawablePrivate *dPriv,
--		   int x, int y, int w, int h )
--{
--   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
--      r200ContextPtr rmesa;
--      GLcontext *ctx;
--      rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
--      ctx = rmesa->glCtx;
--      if (ctx->Visual.doubleBufferMode) {
--	 drm_clip_rect_t rect;
--	 rect.x1 = x + dPriv->x;
--	 rect.y1 = (dPriv->h - y - h) + dPriv->y;
--	 rect.x2 = rect.x1 + w;
--	 rect.y2 = rect.y1 + h;
--         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
--	 r200CopyBuffer( dPriv, &rect );
--      }
--   }
--   else {
--      /* XXX this shouldn't be an error but we can't handle it for now */
--      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
--   }
--}
--
--/* Force the context `c' to be the current context and associate with it
-- * buffer `b'.
-- */
--GLboolean
--r200MakeCurrent( __DRIcontextPrivate *driContextPriv,
--                   __DRIdrawablePrivate *driDrawPriv,
--                   __DRIdrawablePrivate *driReadPriv )
--{
--   if ( driContextPriv ) {
--      r200ContextPtr newCtx = 
--	 (r200ContextPtr) driContextPriv->driverPrivate;
--
--      if (R200_DEBUG & DEBUG_DRI)
--	 fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)newCtx->glCtx);
--
--      newCtx->dri.readable = driReadPriv;
--
--      if ( newCtx->dri.drawable != driDrawPriv ||
--           newCtx->lastStamp != driDrawPriv->lastStamp ) {
--	 if (driDrawPriv->swap_interval == (unsigned)-1) {
--	    driDrawPriv->vblFlags = (newCtx->r200Screen->irq != 0)
--	       ? driGetDefaultVBlankFlags(&newCtx->optionCache)
--	       : VBLANK_FLAG_NO_IRQ;
--
--	    driDrawableInitVBlank( driDrawPriv );
--	 }
--
--	 newCtx->dri.drawable = driDrawPriv;
--
--	 r200SetCliprects(newCtx);
--	 r200UpdateViewportOffset( newCtx->glCtx );
--      }
--
--      _mesa_make_current( newCtx->glCtx,
--			  (GLframebuffer *) driDrawPriv->driverPrivate,
--			  (GLframebuffer *) driReadPriv->driverPrivate );
--
--      _mesa_update_state( newCtx->glCtx );
--      r200ValidateState( newCtx->glCtx );
--
--   } else {
--      if (R200_DEBUG & DEBUG_DRI)
--	 fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
--      _mesa_make_current( NULL, NULL, NULL );
--   }
--
--   if (R200_DEBUG & DEBUG_DRI)
--      fprintf(stderr, "End %s\n", __FUNCTION__);
-    return GL_TRUE;
- }
- 
--/* Force the context `c' to be unbound from its buffer.
-- */
--GLboolean
--r200UnbindContext( __DRIcontextPrivate *driContextPriv )
--{
--   r200ContextPtr rmesa = (r200ContextPtr) driContextPriv->driverPrivate;
--
--   if (R200_DEBUG & DEBUG_DRI)
--      fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)rmesa->glCtx);
- 
--   return GL_TRUE;
--}
-diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h
-index 14a1dda..6267293 100644
---- a/src/mesa/drivers/dri/r200/r200_context.h
-+++ b/src/mesa/drivers/dri/r200/r200_context.h
-@@ -53,51 +53,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #error This driver requires a newer libdrm to compile
- #endif
- 
-+#include "radeon_screen.h"
-+#include "radeon_common.h"
-+
-+#include "radeon_lock.h"
-+
- struct r200_context;
- typedef struct r200_context r200ContextRec;
- typedef struct r200_context *r200ContextPtr;
- 
--/* This union is used to avoid warnings/miscompilation
--   with float to uint32_t casts due to strict-aliasing */
--typedef union { GLfloat f; uint32_t ui32; } float_ui32_type;
--
--#include "r200_lock.h"
--#include "radeon_screen.h"
- #include "main/mm.h"
- 
--/* Flags for software fallback cases */
--/* See correponding strings in r200_swtcl.c */
--#define R200_FALLBACK_TEXTURE           0x01
--#define R200_FALLBACK_DRAW_BUFFER       0x02
--#define R200_FALLBACK_STENCIL           0x04
--#define R200_FALLBACK_RENDER_MODE       0x08
--#define R200_FALLBACK_DISABLE           0x10
--#define R200_FALLBACK_BORDER_MODE       0x20
--
--/* The blit width for texture uploads
-- */
--#define BLIT_WIDTH_BYTES 1024
--
--/* Use the templated vertex format:
-- */
--#define COLOR_IS_RGBA
--#define TAG(x) r200##x
--#include "tnl_dd/t_dd_vertex.h"
--#undef TAG
--
--typedef void (*r200_tri_func)( r200ContextPtr,
--				 r200Vertex *,
--				 r200Vertex *,
--				 r200Vertex * );
--
--typedef void (*r200_line_func)( r200ContextPtr,
--				  r200Vertex *,
--				  r200Vertex * );
--
--typedef void (*r200_point_func)( r200ContextPtr,
--				   r200Vertex * );
--
--
- struct r200_vertex_program {
-         struct gl_vertex_program mesa_program; /* Must be first */
-         int translated;
-@@ -112,93 +78,11 @@ struct r200_vertex_program {
-         int fogmode;
- };
- 
--struct r200_colorbuffer_state {
--   GLuint clear;
--#if 000
--   GLint drawOffset, drawPitch;
--#endif
--   int roundEnable;
--};
--
--
--struct r200_depthbuffer_state {
--   GLuint clear;
--   GLfloat scale;
--};
--
--#if 000
--struct r200_pixel_state {
--   GLint readOffset, readPitch;
--};
--#endif
--
--struct r200_scissor_state {
--   drm_clip_rect_t rect;
--   GLboolean enabled;
--
--   GLuint numClipRects;			/* Cliprects active */
--   GLuint numAllocedClipRects;		/* Cliprects available */
--   drm_clip_rect_t *pClipRects;
--};
--
--struct r200_stencilbuffer_state {
--   GLboolean hwBuffer;
--   GLuint clear;			/* rb3d_stencilrefmask value */
--};
--
--struct r200_stipple_state {
--   GLuint mask[32];
--};
--
--
--
--#define TEX_0   0x1
--#define TEX_1   0x2
--#define TEX_2	0x4
--#define TEX_3	0x8
--#define TEX_4	0x10
--#define TEX_5	0x20
--#define TEX_ALL 0x3f
--
--typedef struct r200_tex_obj r200TexObj, *r200TexObjPtr;
--
--/* Texture object in locally shared texture space.
-- */
--struct r200_tex_obj {
--   driTextureObject   base;
--
--   GLuint bufAddr;			/* Offset to start of locally
--					   shared texture block */
--
--   GLuint dirty_state;		        /* Flags (1 per texunit) for
--					   whether or not this texobj
--					   has dirty hardware state
--					   (pp_*) that needs to be
--					   brought into the
--					   texunit. */
--
--   drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
--					/* Six, for the cube faces */
--   GLboolean image_override;		/* Image overridden by GLX_EXT_tfp */
--
--   GLuint pp_txfilter;		        /* hardware register values */
--   GLuint pp_txformat;
--   GLuint pp_txformat_x;
--   GLuint pp_txoffset;		        /* Image location in texmem.
--					   All cube faces follow. */
--   GLuint pp_txsize;		        /* npot only */
--   GLuint pp_txpitch;		        /* npot only */
--   GLuint pp_border_color;
--   GLuint pp_cubic_faces;	        /* cube face 1,2,3,4 log2 sizes */
--
--   GLboolean  border_fallback;
--
--   GLuint tile_bits;			/* hw texture tile bits used on this texture */
--};
-+#define R200_TEX_ALL 0x3f
- 
- 
- struct r200_texture_env_state {
--   r200TexObjPtr texobj;
-+   radeonTexObjPtr texobj;
-    GLuint outputreg;
-    GLuint unitneeded;
- };
-@@ -210,19 +94,6 @@ struct r200_texture_state {
- };
- 
- 
--struct r200_state_atom {
--   struct r200_state_atom *next, *prev;
--   const char *name;		         /* for debug */
--   int cmd_size;		         /* size in bytes */
--   GLuint idx;
--   int *cmd;			         /* one or more cmd's */
--   int *lastcmd;			 /* one or more cmd's */
--   GLboolean dirty;
--   GLboolean (*check)( GLcontext *, int );    /* is this state active? */
--};
--   
--
--
- /* Trying to keep these relatively short as the variables are becoming
-  * extravagently long.  Drop the driver name prefix off the front of
-  * everything - I think we know which driver we're in by now, and keep the
-@@ -597,181 +468,79 @@ struct r200_state_atom {
- 
- 
- struct r200_hw_state {
--   /* Head of the linked list of state atoms. */
--   struct r200_state_atom atomlist;
--
-    /* Hardware state, stored as cmdbuf commands:  
-     *   -- Need to doublebuffer for
-     *           - reviving state after loss of context
-     *           - eliding noop statechange loops? (except line stipple count)
-     */
--   struct r200_state_atom ctx;
--   struct r200_state_atom set;
--   struct r200_state_atom vte;
--   struct r200_state_atom lin;
--   struct r200_state_atom msk;
--   struct r200_state_atom vpt;
--   struct r200_state_atom vap;
--   struct r200_state_atom vtx;
--   struct r200_state_atom tcl;
--   struct r200_state_atom msl;
--   struct r200_state_atom tcg;
--   struct r200_state_atom msc;
--   struct r200_state_atom cst;
--   struct r200_state_atom tam;
--   struct r200_state_atom tf;
--   struct r200_state_atom tex[6];
--   struct r200_state_atom cube[6];
--   struct r200_state_atom zbs;
--   struct r200_state_atom mtl[2];
--   struct r200_state_atom mat[9];
--   struct r200_state_atom lit[8]; /* includes vec, scl commands */
--   struct r200_state_atom ucp[6];
--   struct r200_state_atom pix[6]; /* pixshader stages */
--   struct r200_state_atom eye; /* eye pos */
--   struct r200_state_atom grd; /* guard band clipping */
--   struct r200_state_atom fog;
--   struct r200_state_atom glt;
--   struct r200_state_atom prf;
--   struct r200_state_atom afs[2];
--   struct r200_state_atom pvs;
--   struct r200_state_atom vpi[2];
--   struct r200_state_atom vpp[2];
--   struct r200_state_atom atf;
--   struct r200_state_atom spr;
--   struct r200_state_atom ptp;
--
--   int max_state_size;	/* Number of bytes necessary for a full state emit. */
--   GLboolean is_dirty, all_dirty;
-+   struct radeon_state_atom ctx;
-+   struct radeon_state_atom set;
-+   struct radeon_state_atom vte;
-+   struct radeon_state_atom lin;
-+   struct radeon_state_atom msk;
-+   struct radeon_state_atom vpt;
-+   struct radeon_state_atom vap;
-+   struct radeon_state_atom vtx;
-+   struct radeon_state_atom tcl;
-+   struct radeon_state_atom msl;
-+   struct radeon_state_atom tcg;
-+   struct radeon_state_atom msc;
-+   struct radeon_state_atom cst;
-+   struct radeon_state_atom tam;
-+   struct radeon_state_atom tf;
-+   struct radeon_state_atom tex[6];
-+   struct radeon_state_atom cube[6];
-+   struct radeon_state_atom zbs;
-+   struct radeon_state_atom mtl[2];
-+   struct radeon_state_atom mat[9];
-+   struct radeon_state_atom lit[8]; /* includes vec, scl commands */
-+   struct radeon_state_atom ucp[6];
-+   struct radeon_state_atom pix[6]; /* pixshader stages */
-+   struct radeon_state_atom eye; /* eye pos */
-+   struct radeon_state_atom grd; /* guard band clipping */
-+   struct radeon_state_atom fog;
-+   struct radeon_state_atom glt;
-+   struct radeon_state_atom prf;
-+   struct radeon_state_atom afs[2];
-+   struct radeon_state_atom pvs;
-+   struct radeon_state_atom vpi[2];
-+   struct radeon_state_atom vpp[2];
-+   struct radeon_state_atom atf;
-+   struct radeon_state_atom spr;
-+   struct radeon_state_atom ptp;
- };
- 
- struct r200_state {
-    /* Derived state for internal purposes:
-     */
--   struct r200_colorbuffer_state color;
--   struct r200_depthbuffer_state depth;
--#if 00
--   struct r200_pixel_state pixel;
--#endif
--   struct r200_scissor_state scissor;
--   struct r200_stencilbuffer_state stencil;
--   struct r200_stipple_state stipple;
-+   struct radeon_stipple_state stipple;
-    struct r200_texture_state texture;
-    GLuint envneeded;
- };
- 
--/* Need refcounting on dma buffers:
-- */
--struct r200_dma_buffer {
--   int refcount;		/* the number of retained regions in buf */
--   drmBufPtr buf;
--};
--
--#define GET_START(rvb) (rmesa->r200Screen->gart_buffer_offset +		\
--			(rvb)->address - rmesa->dma.buf0_address +	\
--			(rvb)->start)
--
--/* A retained region, eg vertices for indexed vertices.
-- */
--struct r200_dma_region {
--   struct r200_dma_buffer *buf;
--   char *address;		/* == buf->address */
--   int start, end, ptr;		/* offsets from start of buf */
--   int aos_start;
--   int aos_stride;
--   int aos_size;
--};
--
--
--struct r200_dma {
--   /* Active dma region.  Allocations for vertices and retained
--    * regions come from here.  Also used for emitting random vertices,
--    * these may be flushed by calling flush_current();
--    */
--   struct r200_dma_region current;
--   
--   void (*flush)( r200ContextPtr );
--
--   char *buf0_address;		/* start of buf[0], for index calcs */
--   GLuint nr_released_bufs;	/* flush after so many buffers released */
--};
--
--struct r200_dri_mirror {
--   __DRIcontextPrivate	*context;	/* DRI context */
--   __DRIscreenPrivate	*screen;	/* DRI screen */
--   __DRIdrawablePrivate	*drawable;	/* DRI drawable bound to this ctx */
--   __DRIdrawablePrivate	*readable;	/* DRI readable bound to this ctx */
--
--   drm_context_t hwContext;
--   drm_hw_lock_t *hwLock;
--   int fd;
--   int drmMinor;
--};
--
--
- #define R200_CMD_BUF_SZ  (16*1024) 
- 
--struct r200_store {
--   GLuint statenr;
--   GLuint primnr;
--   char cmd_buf[R200_CMD_BUF_SZ];
--   int cmd_used;   
--   int elts_start;
--};
--
--
-+#define R200_ELT_BUF_SZ  (16*1024) 
- /* r200_tcl.c
-  */
- struct r200_tcl_info {
-    GLuint hw_primitive;
- 
--/* hw can handle 12 components max */
--   struct r200_dma_region *aos_components[12];
--   GLuint nr_aos_components;
--
-    GLuint *Elts;
- 
--   struct r200_dma_region indexed_verts;
--   struct r200_dma_region vertex_data[15];
-+   int elt_used;
-+
- };
- 
- 
- /* r200_swtcl.c
-  */
- struct r200_swtcl_info {
--   GLuint RenderIndex;
--   
--   /**
--    * Size of a hardware vertex.  This is calculated when \c ::vertex_attrs is
--    * installed in the Mesa state vector.
--    */
--   GLuint vertex_size;
- 
--   /**
--    * Attributes instructing the Mesa TCL pipeline where / how to put vertex
--    * data in the hardware buffer.
--    */
--   struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
- 
--   /**
--    * Number of elements of \c ::vertex_attrs that are actually used.
--    */
--   GLuint vertex_attr_count;
--
--   /**
--    * Cached pointer to the buffer where Mesa will store vertex data.
--    */
--   GLubyte *verts;
--
--   /* Fallback rasterization functions
--    */
--   r200_point_func draw_point;
--   r200_line_func draw_line;
--   r200_tri_func draw_tri;
--
--   GLuint hw_primitive;
--   GLenum render_primitive;
--   GLuint numverts;
-+   radeon_point_func draw_point;
-+   radeon_line_func draw_line;
-+   radeon_tri_func draw_tri;
- 
-    /**
-     * Offset of the 4UB color data within a hardware (swtcl) vertex.
-@@ -787,27 +556,10 @@ struct r200_swtcl_info {
-     * Should Mesa project vertex data or will the hardware do it?
-     */
-    GLboolean needproj;
--
--   struct r200_dma_region indexed_verts;
--};
--
--
--struct r200_ioctl {
--   GLuint vertex_offset;
--   GLuint vertex_size;
- };
- 
- 
- 
--#define R200_MAX_PRIMS 64
--
--
--
--struct r200_prim {
--   GLuint start;
--   GLuint end;
--   GLuint prim;
--};
- 
-    /* A maximum total of 29 elements per vertex:  3 floats for position, 3
-     * floats for normal, 4 floats for color, 4 bytes for secondary color,
-@@ -822,9 +574,8 @@ struct r200_prim {
- 
- #define R200_MAX_VERTEX_SIZE ((3*6)+11)
- 
--
- struct r200_context {
--   GLcontext *glCtx;			/* Mesa context */
-+   struct radeon_context radeon;
- 
-    /* Driver and hardware state management
-     */
-@@ -832,56 +583,15 @@ struct r200_context {
-    struct r200_state state;
-    struct r200_vertex_program *curr_vp_hw;
- 
--   /* Texture object bookkeeping
--    */
--   unsigned              nr_heaps;
--   driTexHeap          * texture_heaps[ RADEON_NR_TEX_HEAPS ];
--   driTextureObject      swapped;
--   int                   texture_depth;
--   float                 initialMaxAnisotropy;
--
--   /* Rasterization and vertex state:
--    */
--   GLuint TclFallback;
--   GLuint Fallback;
--   GLuint NewGLState;
--   DECLARE_RENDERINPUTS(tnl_index_bitset);	/* index of bits for last tnl_install_attrs */
--
-    /* Vertex buffers
-     */
--   struct r200_ioctl ioctl;
--   struct r200_dma dma;
--   struct r200_store store;
--   /* A full state emit as of the first state emit in the main store, in case
--    * the context is lost.
--    */
--   struct r200_store backup_store;
--
--   /* Page flipping
--    */
--   GLuint doPageFlip;
--
--   /* Busy waiting
--    */
--   GLuint do_usleeps;
--   GLuint do_irqs;
--   GLuint irqsEmitted;
--   drm_radeon_irq_wait_t iw;
-+   struct radeon_ioctl ioctl;
-+   struct radeon_store store;
- 
-    /* Clientdata textures;
-     */
-    GLuint prefer_gart_client_texturing;
- 
--   /* Drawable, cliprect and scissor information
--    */
--   GLuint numClipRects;			/* Cliprects for the draw buffer */
--   drm_clip_rect_t *pClipRects;
--   unsigned int lastStamp;
--   GLboolean lost_context;
--   GLboolean save_on_next_emit;
--   radeonScreenPtr r200Screen;	/* Screen private DRI data */
--   drm_radeon_sarea_t *sarea;		/* Private SAREA data */
--
-    /* TCL stuff
-     */
-    GLmatrix TexGenMatrix[R200_MAX_TEXTURE_UNITS];
-@@ -893,15 +603,6 @@ struct r200_context {
-    GLuint TexGenCompSel;
-    GLmatrix tmpmat;
- 
--   /* buffer swap
--    */
--   int64_t swap_ust;
--   int64_t swap_missed_ust;
--
--   GLuint swap_count;
--   GLuint swap_missed_count;
--
--
-    /* r200_tcl.c
-     */
-    struct r200_tcl_info tcl;
-@@ -910,14 +611,6 @@ struct r200_context {
-     */
-    struct r200_swtcl_info swtcl;
- 
--   /* Mirrors of some DRI state
--    */
--   struct r200_dri_mirror dri;
--
--   /* Configuration cache
--    */
--   driOptionCache optionCache;
--
-    GLboolean using_hyperz;
-    GLboolean texmicrotile;
- 
-@@ -927,28 +620,10 @@ struct r200_context {
- #define R200_CONTEXT(ctx)		((r200ContextPtr)(ctx->DriverCtx))
- 
- 
--static INLINE GLuint r200PackColor( GLuint cpp,
--					GLubyte r, GLubyte g,
--					GLubyte b, GLubyte a )
--{
--   switch ( cpp ) {
--   case 2:
--      return PACK_COLOR_565( r, g, b );
--   case 4:
--      return PACK_COLOR_8888( a, r, g, b );
--   default:
--      return 0;
--   }
--}
--
--
- extern void r200DestroyContext( __DRIcontextPrivate *driContextPriv );
- extern GLboolean r200CreateContext( const __GLcontextModes *glVisual,
- 				    __DRIcontextPrivate *driContextPriv,
- 				    void *sharedContextPrivate);
--extern void r200SwapBuffers( __DRIdrawablePrivate *dPriv );
--extern void r200CopySubBuffer( __DRIdrawablePrivate * dPriv,
--			       int x, int y, int w, int h );
- extern GLboolean r200MakeCurrent( __DRIcontextPrivate *driContextPriv,
- 				  __DRIdrawablePrivate *driDrawPriv,
- 				  __DRIdrawablePrivate *driReadPriv );
-@@ -957,28 +632,9 @@ extern GLboolean r200UnbindContext( __DRIcontextPrivate *driContextPriv );
- /* ================================================================
-  * Debugging:
-  */
--#define DO_DEBUG		1
- 
--#if DO_DEBUG
--extern int R200_DEBUG;
--#else
--#define R200_DEBUG		0
--#endif
-+#define R200_DEBUG RADEON_DEBUG
-+
- 
--#define DEBUG_TEXTURE	0x001
--#define DEBUG_STATE	0x002
--#define DEBUG_IOCTL	0x004
--#define DEBUG_PRIMS	0x008
--#define DEBUG_VERTS	0x010
--#define DEBUG_FALLBACKS	0x020
--#define DEBUG_VFMT	0x040
--#define DEBUG_CODEGEN	0x080
--#define DEBUG_VERBOSE	0x100
--#define DEBUG_DRI       0x200
--#define DEBUG_DMA       0x400
--#define DEBUG_SANITY    0x800
--#define DEBUG_SYNC      0x1000
--#define DEBUG_PIXEL     0x2000
--#define DEBUG_MEMORY    0x4000
- 
- #endif /* __R200_CONTEXT_H__ */
-diff --git a/src/mesa/drivers/dri/r200/r200_fragshader.c b/src/mesa/drivers/dri/r200/r200_fragshader.c
-index d514b28..85c1b7b 100644
---- a/src/mesa/drivers/dri/r200/r200_fragshader.c
-+++ b/src/mesa/drivers/dri/r200/r200_fragshader.c
-@@ -522,7 +522,7 @@ static void r200UpdateFSConstants( GLcontext *ctx )
- 	 CLAMPED_FLOAT_TO_UBYTE(con_byte[2], ctx->ATIFragmentShader.GlobalConstants[i][2]);
- 	 CLAMPED_FLOAT_TO_UBYTE(con_byte[3], ctx->ATIFragmentShader.GlobalConstants[i][3]);
-       }
--      rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = r200PackColor (
-+      rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = radeonPackColor (
- 	 4, con_byte[0], con_byte[1], con_byte[2], con_byte[3] );
-    }
- }
-diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c
-index 0741e57..0262aea 100644
---- a/src/mesa/drivers/dri/r200/r200_ioctl.c
-+++ b/src/mesa/drivers/dri/r200/r200_ioctl.c
-@@ -41,6 +41,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/context.h"
- #include "swrast/swrast.h"
- 
-+
-+
-+#include "radeon_common.h"
-+#include "radeon_lock.h"
- #include "r200_context.h"
- #include "r200_state.h"
- #include "r200_ioctl.h"
-@@ -54,635 +58,19 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define R200_TIMEOUT             512
- #define R200_IDLE_RETRY           16
- 
--
--static void r200WaitForIdle( r200ContextPtr rmesa );
--
--
--/* At this point we were in FlushCmdBufLocked but we had lost our context, so
-- * we need to unwire our current cmdbuf, hook the one with the saved state in
-- * it, flush it, and then put the current one back.  This is so commands at the
-- * start of a cmdbuf can rely on the state being kept from the previous one.
-- */
--static void r200BackUpAndEmitLostStateLocked( r200ContextPtr rmesa )
--{
--   GLuint nr_released_bufs;
--   struct r200_store saved_store;
--
--   if (rmesa->backup_store.cmd_used == 0)
--      return;
--
--   if (R200_DEBUG & DEBUG_STATE)
--      fprintf(stderr, "Emitting backup state on lost context\n");
--
--   rmesa->lost_context = GL_FALSE;
--
--   nr_released_bufs = rmesa->dma.nr_released_bufs;
--   saved_store = rmesa->store;
--   rmesa->dma.nr_released_bufs = 0;
--   rmesa->store = rmesa->backup_store;
--   r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
--   rmesa->dma.nr_released_bufs = nr_released_bufs;
--   rmesa->store = saved_store;
--}
--
--int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller )
--{
--   int ret, i;
--   drm_radeon_cmd_buffer_t cmd;
--
--   if (rmesa->lost_context)
--      r200BackUpAndEmitLostStateLocked( rmesa );
--
--   if (R200_DEBUG & DEBUG_IOCTL) {
--      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
--
--      if (0 & R200_DEBUG & DEBUG_VERBOSE) 
--	 for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
--	    fprintf(stderr, "%d: %x\n", i/4, 
--		    *(int *)(&rmesa->store.cmd_buf[i]));
--   }
--
--   if (R200_DEBUG & DEBUG_DMA)
--      fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
--	      rmesa->dma.nr_released_bufs);
--
--
--   if (R200_DEBUG & DEBUG_SANITY) {
--      if (rmesa->state.scissor.enabled) 
--	 ret = r200SanityCmdBuffer( rmesa, 
--				    rmesa->state.scissor.numClipRects,
--				    rmesa->state.scissor.pClipRects);
--      else
--	 ret = r200SanityCmdBuffer( rmesa, 
--				    rmesa->numClipRects,
--				    rmesa->pClipRects);
--      if (ret) {
--	 fprintf(stderr, "drmSanityCommandWrite: %d\n", ret);	 
--	 goto out;
--      }
--   }
--
--
--   if (R200_DEBUG & DEBUG_MEMORY) {
--      if (! driValidateTextureHeaps( rmesa->texture_heaps, rmesa->nr_heaps,
--				     & rmesa->swapped ) ) {
--	 fprintf( stderr, "%s: texture memory is inconsistent - expect "
--		  "mangled textures\n", __FUNCTION__ );
--      }
--   }
--
--
--   cmd.bufsz = rmesa->store.cmd_used;
--   cmd.buf = rmesa->store.cmd_buf;
--
--   if (rmesa->state.scissor.enabled) {
--      cmd.nbox = rmesa->state.scissor.numClipRects;
--      cmd.boxes = (drm_clip_rect_t *)rmesa->state.scissor.pClipRects;
--   } else {
--      cmd.nbox = rmesa->numClipRects;
--      cmd.boxes = (drm_clip_rect_t *)rmesa->pClipRects;
--   }
--
--   ret = drmCommandWrite( rmesa->dri.fd,
--			  DRM_RADEON_CMDBUF,
--			  &cmd, sizeof(cmd) );
--
--   if (ret)
--      fprintf(stderr, "drmCommandWrite: %d\n", ret);
--
--   if (R200_DEBUG & DEBUG_SYNC) {
--      fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__);
--      r200WaitForIdleLocked( rmesa );
--   }
--
--
-- out:
--   rmesa->store.primnr = 0;
--   rmesa->store.statenr = 0;
--   rmesa->store.cmd_used = 0;
--   rmesa->dma.nr_released_bufs = 0;
--   rmesa->save_on_next_emit = 1;
--
--   return ret;
--}
--
--
--/* Note: does not emit any commands to avoid recursion on
-- * r200AllocCmdBuf.
-- */
--void r200FlushCmdBuf( r200ContextPtr rmesa, const char *caller )
--{
--   int ret;
--
--   LOCK_HARDWARE( rmesa );
--
--   ret = r200FlushCmdBufLocked( rmesa, caller );
--
--   UNLOCK_HARDWARE( rmesa );
--
--   if (ret) {
--      fprintf(stderr, "drmRadeonCmdBuffer: %d (exiting)\n", ret);
--      exit(ret);
--   }
--}
--
--
--/* =============================================================
-- * Hardware vertex buffer handling
-- */
--
--
--void r200RefillCurrentDmaRegion( r200ContextPtr rmesa )
--{
--   struct r200_dma_buffer *dmabuf;
--   int fd = rmesa->dri.fd;
--   int index = 0;
--   int size = 0;
--   drmDMAReq dma;
--   int ret;
--
--   if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
--      fprintf(stderr, "%s\n", __FUNCTION__);  
--
--   if (rmesa->dma.flush) {
--      rmesa->dma.flush( rmesa );
--   }
--
--   if (rmesa->dma.current.buf)
--      r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
--
--   if (rmesa->dma.nr_released_bufs > 4)
--      r200FlushCmdBuf( rmesa, __FUNCTION__ );
--
--   dma.context = rmesa->dri.hwContext;
--   dma.send_count = 0;
--   dma.send_list = NULL;
--   dma.send_sizes = NULL;
--   dma.flags = 0;
--   dma.request_count = 1;
--   dma.request_size = RADEON_BUFFER_SIZE;
--   dma.request_list = &index;
--   dma.request_sizes = &size;
--   dma.granted_count = 0;
--
--   LOCK_HARDWARE(rmesa);	/* no need to validate */
--
--   while (1) {
--      ret = drmDMA( fd, &dma );
--      if (ret == 0)
--	 break;
--   
--      if (rmesa->dma.nr_released_bufs) {
--	 r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
--      }
--
--      if (rmesa->do_usleeps) {
--	 UNLOCK_HARDWARE( rmesa );
--	 DO_USLEEP( 1 );
--	 LOCK_HARDWARE( rmesa );
--      }
--   }
--
--   UNLOCK_HARDWARE(rmesa);
--
--   if (R200_DEBUG & DEBUG_DMA)
--      fprintf(stderr, "Allocated buffer %d\n", index);
--
--   dmabuf = CALLOC_STRUCT( r200_dma_buffer );
--   dmabuf->buf = &rmesa->r200Screen->buffers->list[index];
--   dmabuf->refcount = 1;
--
--   rmesa->dma.current.buf = dmabuf;
--   rmesa->dma.current.address = dmabuf->buf->address;
--   rmesa->dma.current.end = dmabuf->buf->total;
--   rmesa->dma.current.start = 0;
--   rmesa->dma.current.ptr = 0;
--}
--
--void r200ReleaseDmaRegion( r200ContextPtr rmesa,
--			     struct r200_dma_region *region,
--			     const char *caller )
--{
--   if (R200_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
--   
--   if (!region->buf)
--      return;
--
--   if (rmesa->dma.flush)
--      rmesa->dma.flush( rmesa );
--
--   if (--region->buf->refcount == 0) {
--      drm_radeon_cmd_header_t *cmd;
--
--      if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
--	 fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
--		 region->buf->buf->idx);  
--      
--      cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, sizeof(*cmd), 
--						     __FUNCTION__ );
--      cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
--      cmd->dma.buf_idx = region->buf->buf->idx;
--      FREE(region->buf);
--      rmesa->dma.nr_released_bufs++;
--   }
--
--   region->buf = NULL;
--   region->start = 0;
--}
--
--/* Allocates a region from rmesa->dma.current.  If there isn't enough
-- * space in current, grab a new buffer (and discard what was left of current)
-- */
--void r200AllocDmaRegion( r200ContextPtr rmesa, 
--			   struct r200_dma_region *region,
--			   int bytes,
--			   int alignment )
-+static void r200UserClear(GLcontext *ctx, GLuint mask)
- {
--   if (R200_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
--
--   if (rmesa->dma.flush)
--      rmesa->dma.flush( rmesa );
--
--   if (region->buf)
--      r200ReleaseDmaRegion( rmesa, region, __FUNCTION__ );
--
--   alignment--;
--   rmesa->dma.current.start = rmesa->dma.current.ptr = 
--      (rmesa->dma.current.ptr + alignment) & ~alignment;
--
--   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
--      r200RefillCurrentDmaRegion( rmesa );
--
--   region->start = rmesa->dma.current.start;
--   region->ptr = rmesa->dma.current.start;
--   region->end = rmesa->dma.current.start + bytes;
--   region->address = rmesa->dma.current.address;
--   region->buf = rmesa->dma.current.buf;
--   region->buf->refcount++;
--
--   rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
--   rmesa->dma.current.start = 
--      rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;  
--
--   assert( rmesa->dma.current.ptr <= rmesa->dma.current.end );
-+   radeon_clear_tris(ctx, mask);
- }
- 
--/* ================================================================
-- * SwapBuffers with client-side throttling
-- */
--
--static uint32_t r200GetLastFrame(r200ContextPtr rmesa)
--{
--   drm_radeon_getparam_t gp;
--   int ret;
--   uint32_t frame;
--
--   gp.param = RADEON_PARAM_LAST_FRAME;
--   gp.value = (int *)&frame;
--   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
--			      &gp, sizeof(gp) );
--   if ( ret ) {
--      fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret );
--      exit(1);
--   }
--
--   return frame;
--}
--
--static void r200EmitIrqLocked( r200ContextPtr rmesa )
--{
--   drm_radeon_irq_emit_t ie;
--   int ret;
--
--   ie.irq_seq = &rmesa->iw.irq_seq;
--   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, 
--			      &ie, sizeof(ie) );
--   if ( ret ) {
--      fprintf( stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, ret );
--      exit(1);
--   }
--}
--
--
--static void r200WaitIrq( r200ContextPtr rmesa )
--{
--   int ret;
--
--   do {
--      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
--			     &rmesa->iw, sizeof(rmesa->iw) );
--   } while (ret && (errno == EINTR || errno == EBUSY));
--
--   if ( ret ) {
--      fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
--      exit(1);
--   }
--}
--
--
--static void r200WaitForFrameCompletion( r200ContextPtr rmesa )
--{
--   drm_radeon_sarea_t *sarea = rmesa->sarea;
--
--   if (rmesa->do_irqs) {
--      if (r200GetLastFrame(rmesa) < sarea->last_frame) {
--	 if (!rmesa->irqsEmitted) {
--	    while (r200GetLastFrame (rmesa) < sarea->last_frame)
--	       ;
--	 }
--	 else {
--	    UNLOCK_HARDWARE( rmesa ); 
--	    r200WaitIrq( rmesa );	
--	    LOCK_HARDWARE( rmesa ); 
--	 }
--	 rmesa->irqsEmitted = 10;
--      }
--
--      if (rmesa->irqsEmitted) {
--	 r200EmitIrqLocked( rmesa );
--	 rmesa->irqsEmitted--;
--      }
--   } 
--   else {
--      while (r200GetLastFrame (rmesa) < sarea->last_frame) {
--	 UNLOCK_HARDWARE( rmesa ); 
--	 if (rmesa->do_usleeps) 
--	    DO_USLEEP( 1 );
--	 LOCK_HARDWARE( rmesa ); 
--      }
--   }
--}
--
--
--
--/* Copy the back color buffer to the front color buffer.
-- */
--void r200CopyBuffer( __DRIdrawablePrivate *dPriv,
--		      const drm_clip_rect_t	 *rect)
--{
--   r200ContextPtr rmesa;
--   GLint nbox, i, ret;
--   GLboolean   missed_target;
--   int64_t ust;
--   __DRIscreenPrivate *psp = dPriv->driScreenPriv;
--
--   assert(dPriv);
--   assert(dPriv->driContextPriv);
--   assert(dPriv->driContextPriv->driverPrivate);
--
--   rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
--
--   if ( R200_DEBUG & DEBUG_IOCTL ) {
--      fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *)rmesa->glCtx );
--   }
--
--   R200_FIREVERTICES( rmesa );
--
--   LOCK_HARDWARE( rmesa );
--
--
--   /* Throttle the frame rate -- only allow one pending swap buffers
--    * request at a time.
--    */
--   r200WaitForFrameCompletion( rmesa );
--   if (!rect)
--   {
--       UNLOCK_HARDWARE( rmesa );
--       driWaitForVBlank( dPriv, & missed_target );
--       LOCK_HARDWARE( rmesa );
--   }
--
--   nbox = dPriv->numClipRects; /* must be in locked region */
--
--   for ( i = 0 ; i < nbox ; ) {
--      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
--      drm_clip_rect_t *box = dPriv->pClipRects;
--      drm_clip_rect_t *b = rmesa->sarea->boxes;
--      GLint n = 0;
--
--      for ( ; i < nr ; i++ ) {
--
--	  *b = box[i];
--
--	  if (rect)
--	  {
--	     if (rect->x1 > b->x1)
--		 b->x1 = rect->x1;
--	     if (rect->y1 > b->y1)
--		 b->y1 = rect->y1;
--	     if (rect->x2 < b->x2)
--		 b->x2 = rect->x2;
--	     if (rect->y2 < b->y2)
--		 b->y2 = rect->y2;
--
--	     if (b->x1 >= b->x2 || b->y1 >= b->y2)
--		 continue;
--	  }
--
--	  b++;
--	  n++;
--      }
--      rmesa->sarea->nbox = n;
--
--      if (!n)
--	 continue;
--
--      ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
--
--      if ( ret ) {
--	 fprintf( stderr, "DRM_R200_SWAP_BUFFERS: return = %d\n", ret );
--	 UNLOCK_HARDWARE( rmesa );
--	 exit( 1 );
--      }
--   }
--
--   UNLOCK_HARDWARE( rmesa );
--   if (!rect)
--   {
--       rmesa->hw.all_dirty = GL_TRUE;
--
--       rmesa->swap_count++;
--       (*psp->systemTime->getUST)( & ust );
--       if ( missed_target ) {
--	   rmesa->swap_missed_count++;
--	   rmesa->swap_missed_ust = ust - rmesa->swap_ust;
--       }
--
--       rmesa->swap_ust = ust;
--
--       sched_yield();
--   }
--}
--
--void r200PageFlip( __DRIdrawablePrivate *dPriv )
--{
--   r200ContextPtr rmesa;
--   GLint ret;
--   GLboolean   missed_target;
--   __DRIscreenPrivate *psp = dPriv->driScreenPriv;
--
--   assert(dPriv);
--   assert(dPriv->driContextPriv);
--   assert(dPriv->driContextPriv->driverPrivate);
--
--   rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
--
--   if ( R200_DEBUG & DEBUG_IOCTL ) {
--      fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
--	      rmesa->sarea->pfCurrentPage);
--   }
--
--   R200_FIREVERTICES( rmesa );
--   LOCK_HARDWARE( rmesa );
--
--   if (!dPriv->numClipRects) {
--      UNLOCK_HARDWARE( rmesa );
--      usleep( 10000 );		/* throttle invisible client 10ms */
--      return;
--   }
--
--   /* Need to do this for the perf box placement:
--    */
--   {
--      drm_clip_rect_t *box = dPriv->pClipRects;
--      drm_clip_rect_t *b = rmesa->sarea->boxes;
--      b[0] = box[0];
--      rmesa->sarea->nbox = 1;
--   }
--
--   /* Throttle the frame rate -- only allow a few pending swap buffers
--    * request at a time.
--    */
--   r200WaitForFrameCompletion( rmesa );
--   UNLOCK_HARDWARE( rmesa );
--   driWaitForVBlank( dPriv, & missed_target );
--   if ( missed_target ) {
--      rmesa->swap_missed_count++;
--      (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust );
--   }
--   LOCK_HARDWARE( rmesa );
--
--   ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
--
--   UNLOCK_HARDWARE( rmesa );
--
--   if ( ret ) {
--      fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
--      exit( 1 );
--   }
--
--   rmesa->swap_count++;
--   (void) (*psp->systemTime->getUST)( & rmesa->swap_ust );
--
--#if 000
--   if ( rmesa->sarea->pfCurrentPage == 1 ) {
--	 rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset;
--	 rmesa->state.color.drawPitch  = rmesa->r200Screen->frontPitch;
--   } else {
--	 rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset;
--	 rmesa->state.color.drawPitch  = rmesa->r200Screen->backPitch;
--   }
--
--   R200_STATECHANGE( rmesa, ctx );
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset
--					   + rmesa->r200Screen->fbLocation;
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH]  = rmesa->state.color.drawPitch;
--   if (rmesa->sarea->tiling_enabled) {
--      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
--   }
--#else
--   /* Get ready for drawing next frame.  Update the renderbuffers'
--    * flippedOffset/Pitch fields so we draw into the right place.
--    */
--   driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
--                        rmesa->sarea->pfCurrentPage);
--
--
--   r200UpdateDrawBuffer(rmesa->glCtx);
--#endif
--}
--
--
--/* ================================================================
-- * Buffer clear
-- */
--static void r200Clear( GLcontext *ctx, GLbitfield mask )
-+static void r200KernelClear(GLcontext *ctx, GLuint flags)
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
--   GLuint flags = 0;
--   GLuint color_mask = 0;
--   GLint ret, i;
--   GLint cx, cy, cw, ch;
--
--   if ( R200_DEBUG & DEBUG_IOCTL ) {
--      fprintf( stderr, "r200Clear\n");
--   }
--
--   {
--      LOCK_HARDWARE( rmesa );
--      UNLOCK_HARDWARE( rmesa );
--      if ( dPriv->numClipRects == 0 ) 
--	 return;
--   }
--
--   r200Flush( ctx );
--
--   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
--      flags |= RADEON_FRONT;
--      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
--      mask &= ~BUFFER_BIT_FRONT_LEFT;
--   }
--
--   if ( mask & BUFFER_BIT_BACK_LEFT ) {
--      flags |= RADEON_BACK;
--      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
--      mask &= ~BUFFER_BIT_BACK_LEFT;
--   }
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-+   GLint cx, cy, cw, ch, ret;
-+   GLuint i;
- 
--   if ( mask & BUFFER_BIT_DEPTH ) {
--      flags |= RADEON_DEPTH;
--      mask &= ~BUFFER_BIT_DEPTH;
--   }
--
--   if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) {
--      flags |= RADEON_STENCIL;
--      mask &= ~BUFFER_BIT_STENCIL;
--   }
--
--   if ( mask ) {
--      if (R200_DEBUG & DEBUG_FALLBACKS)
--	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
--      _swrast_Clear( ctx, mask );
--   }
--
--   if ( !flags ) 
--      return;
--
--   if (rmesa->using_hyperz) {
--      flags |= RADEON_USE_COMP_ZBUF;
--/*      if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200)
--	 flags |= RADEON_USE_HIERZ; */
--      if (!(rmesa->state.stencil.hwBuffer) ||
--	 ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
--	    ((rmesa->state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) {
--	  flags |= RADEON_CLEAR_FASTZ;
--      }
--   }
--
--   LOCK_HARDWARE( rmesa );
--
--   /* compute region after locking: */
--   cx = ctx->DrawBuffer->_Xmin;
--   cy = ctx->DrawBuffer->_Ymin;
--   cw = ctx->DrawBuffer->_Xmax - cx;
--   ch = ctx->DrawBuffer->_Ymax - cy;
--
--   /* Flip top to bottom */
--   cx += dPriv->x;
--   cy  = dPriv->y + dPriv->h - cy - ch;
-+   LOCK_HARDWARE( &rmesa->radeon );
- 
-    /* Throttle the number of clear ioctls we do.
-     */
-@@ -693,7 +81,7 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
- 
-       gp.param = RADEON_PARAM_LAST_CLEAR;
-       gp.value = (int *)&clear;
--      ret = drmCommandWriteRead( rmesa->dri.fd,
-+      ret = drmCommandWriteRead( rmesa->radeon.dri.fd,
- 		      DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
- 
-       if ( ret ) {
-@@ -703,24 +91,34 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
- 
-       /* Clear throttling needs more thought.
-        */
--      if ( rmesa->sarea->last_clear - clear <= 25 ) {
-+      if ( rmesa->radeon.sarea->last_clear - clear <= 25 ) {
- 	 break;
-       }
-       
--      if (rmesa->do_usleeps) {
--	 UNLOCK_HARDWARE( rmesa );
-+      if (rmesa->radeon.do_usleeps) {
-+	 UNLOCK_HARDWARE( &rmesa->radeon );
- 	 DO_USLEEP( 1 );
--	 LOCK_HARDWARE( rmesa );
-+	 LOCK_HARDWARE( &rmesa->radeon );
-       }
-    }
- 
-    /* Send current state to the hardware */
--   r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
-+   rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
-+
-+
-+  /* compute region after locking: */
-+   cx = ctx->DrawBuffer->_Xmin;
-+   cy = ctx->DrawBuffer->_Ymin;
-+   cw = ctx->DrawBuffer->_Xmax - cx;
-+   ch = ctx->DrawBuffer->_Ymax - cy;
- 
-+   /* Flip top to bottom */
-+   cx += dPriv->x;
-+   cy  = dPriv->y + dPriv->h - cy - ch;
-    for ( i = 0 ; i < dPriv->numClipRects ; ) {
-       GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
-       drm_clip_rect_t *box = dPriv->pClipRects;
--      drm_clip_rect_t *b = rmesa->sarea->boxes;
-+      drm_clip_rect_t *b = rmesa->radeon.sarea->boxes;
-       drm_radeon_clear_t clear;
-       drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
-       GLint n = 0;
-@@ -755,17 +153,17 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
- 	 }
-       }
- 
--      rmesa->sarea->nbox = n;
-+      rmesa->radeon.sarea->nbox = n;
- 
-       clear.flags       = flags;
--      clear.clear_color = rmesa->state.color.clear;
--      clear.clear_depth = rmesa->state.depth.clear;	/* needed for hyperz */
-+      clear.clear_color = rmesa->radeon.state.color.clear;
-+      clear.clear_depth = rmesa->radeon.state.depth.clear;	/* needed for hyperz */
-       clear.color_mask  = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
--      clear.depth_mask  = rmesa->state.stencil.clear;
-+      clear.depth_mask  = rmesa->radeon.state.stencil.clear;
-       clear.depth_boxes = depth_boxes;
- 
-       n--;
--      b = rmesa->sarea->boxes;
-+      b = rmesa->radeon.sarea->boxes;
-       for ( ; n >= 0 ; n-- ) {
- 	 depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1;
- 	 depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1;
-@@ -774,84 +172,91 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
- 	 depth_boxes[n].f[CLEAR_DEPTH] = ctx->Depth.Clear;
-       }
- 
--      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
-+      ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR,
- 			     &clear, sizeof(clear));
- 
- 
-       if ( ret ) {
--	 UNLOCK_HARDWARE( rmesa );
-+	 UNLOCK_HARDWARE( &rmesa->radeon );
- 	 fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
- 	 exit( 1 );
-       }
-    }
--
--   UNLOCK_HARDWARE( rmesa );
--   rmesa->hw.all_dirty = GL_TRUE;
-+   UNLOCK_HARDWARE( &rmesa->radeon );
- }
-+/* ================================================================
-+ * Buffer clear
-+ */
-+static void r200Clear( GLcontext *ctx, GLbitfield mask )
-+{
-+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-+   GLuint flags = 0;
-+   GLuint color_mask = 0;
-+   GLuint orig_mask = mask;
- 
-+   if ( R200_DEBUG & DEBUG_IOCTL ) {
-+       fprintf( stderr, "r200Clear %x %d\n", mask, rmesa->radeon.sarea->pfCurrentPage);
-+   }
- 
--void r200WaitForIdleLocked( r200ContextPtr rmesa )
--{
--    int ret;
--    int i = 0;
--    
--    do {
--       ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_CP_IDLE);
--       if (ret) 
--	  DO_USLEEP( 1 );
--    } while (ret && ++i < 100);
--    
--    if ( ret < 0 ) {
--       UNLOCK_HARDWARE( rmesa );
--       fprintf( stderr, "Error: R200 timed out... exiting\n" );
--       exit( -1 );
--    }
--}
-+   {
-+      LOCK_HARDWARE( &rmesa->radeon );
-+      UNLOCK_HARDWARE( &rmesa->radeon );
-+      if ( dPriv->numClipRects == 0 ) 
-+	 return;
-+   }
- 
-+   radeonFlush( ctx );
- 
--static void r200WaitForIdle( r200ContextPtr rmesa )
--{
--   LOCK_HARDWARE(rmesa);
--   r200WaitForIdleLocked( rmesa );
--   UNLOCK_HARDWARE(rmesa);
--}
-+   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
-+      flags |= RADEON_FRONT;
-+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
-+      mask &= ~BUFFER_BIT_FRONT_LEFT;
-+   }
- 
-+   if ( mask & BUFFER_BIT_BACK_LEFT ) {
-+      flags |= RADEON_BACK;
-+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
-+      mask &= ~BUFFER_BIT_BACK_LEFT;
-+   }
- 
--void r200Flush( GLcontext *ctx )
--{
--   r200ContextPtr rmesa = R200_CONTEXT( ctx );
-+   if ( mask & BUFFER_BIT_DEPTH ) {
-+      flags |= RADEON_DEPTH;
-+      mask &= ~BUFFER_BIT_DEPTH;
-+   }
- 
--   if (R200_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s\n", __FUNCTION__);
-+   if ( (mask & BUFFER_BIT_STENCIL) ) {
-+      flags |= RADEON_STENCIL;
-+      mask &= ~BUFFER_BIT_STENCIL;
-+   }
- 
--   if (rmesa->dma.flush)
--      rmesa->dma.flush( rmesa );
-+   if ( mask ) {
-+      if (R200_DEBUG & DEBUG_FALLBACKS)
-+	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
-+      _swrast_Clear( ctx, mask );
-+   }
- 
--   r200EmitState( rmesa );
--   
--   if (rmesa->store.cmd_used)
--      r200FlushCmdBuf( rmesa, __FUNCTION__ );
--}
-+   if ( !flags ) 
-+      return;
- 
--/* Make sure all commands have been sent to the hardware and have
-- * completed processing.
-- */
--void r200Finish( GLcontext *ctx )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   r200Flush( ctx );
-+   if (rmesa->using_hyperz) {
-+      flags |= RADEON_USE_COMP_ZBUF;
-+/*      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
-+	 flags |= RADEON_USE_HIERZ; */
-+      if (!((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
-+	    ((rmesa->radeon.state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) {
-+	  flags |= RADEON_CLEAR_FASTZ;
-+      }
-+   }
- 
--   if (rmesa->do_irqs) {
--      LOCK_HARDWARE( rmesa );
--      r200EmitIrqLocked( rmesa );
--      UNLOCK_HARDWARE( rmesa );
--      r200WaitIrq( rmesa );
-+   if (rmesa->radeon.radeonScreen->kernel_mm)
-+      r200UserClear(ctx, orig_mask);
-+   else {
-+      r200KernelClear(ctx, flags);
-+      rmesa->radeon.hw.all_dirty = GL_TRUE;
-    }
--   else 
--      r200WaitForIdle( rmesa );
- }
- 
--
- /* This version of AllocateMemoryMESA allocates only GART memory, and
-  * only does so after the point at which the driver has been
-  * initialized.
-@@ -875,7 +280,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
-       fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq, 
- 	      writefreq, priority);
- 
--   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->r200Screen->gartTextures.map)
-+   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map)
-       return NULL;
- 
-    if (getenv("R200_NO_ALLOC"))
-@@ -886,7 +291,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
-    alloc.size = size;
-    alloc.region_offset = &region_offset;
- 
--   ret = drmCommandWriteRead( rmesa->r200Screen->driScreen->fd,
-+   ret = drmCommandWriteRead( rmesa->radeon.radeonScreen->driScreen->fd,
- 			      DRM_RADEON_ALLOC,
- 			      &alloc, sizeof(alloc));
-    
-@@ -896,7 +301,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
-    }
-    
-    {
--      char *region_start = (char *)rmesa->r200Screen->gartTextures.map;
-+      char *region_start = (char *)rmesa->radeon.radeonScreen->gartTextures.map;
-       return (void *)(region_start + region_offset);
-    }
- }
-@@ -914,24 +319,24 @@ void r200FreeMemoryMESA(__DRIscreen *screen, GLvoid *pointer)
-    if (R200_DEBUG & DEBUG_IOCTL)
-       fprintf(stderr, "%s %p\n", __FUNCTION__, pointer);
- 
--   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->r200Screen->gartTextures.map) {
-+   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map) {
-       fprintf(stderr, "%s: no context\n", __FUNCTION__);
-       return;
-    }
- 
--   region_offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map;
-+   region_offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
- 
-    if (region_offset < 0 || 
--       region_offset > rmesa->r200Screen->gartTextures.size) {
-+       region_offset > rmesa->radeon.radeonScreen->gartTextures.size) {
-       fprintf(stderr, "offset %d outside range 0..%d\n", region_offset,
--	      rmesa->r200Screen->gartTextures.size);
-+	      rmesa->radeon.radeonScreen->gartTextures.size);
-       return;
-    }
- 
-    memfree.region = RADEON_MEM_REGION_GART;
-    memfree.region_offset = region_offset;
-    
--   ret = drmCommandWrite( rmesa->r200Screen->driScreen->fd,
-+   ret = drmCommandWrite( rmesa->radeon.radeonScreen->driScreen->fd,
- 			  DRM_RADEON_FREE,
- 			  &memfree, sizeof(memfree));
-    
-@@ -956,16 +361,16 @@ GLuint r200GetMemoryOffsetMESA(__DRIscreen *screen, const GLvoid *pointer)
- 
-    card_offset = r200GartOffsetFromVirtual( rmesa, pointer );
- 
--   return card_offset - rmesa->r200Screen->gart_base;
-+   return card_offset - rmesa->radeon.radeonScreen->gart_base;
- }
- 
- GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer,
- 			   GLint size )
- {
--   ptrdiff_t offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map;
-+   ptrdiff_t offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
-    int valid = (size >= 0 &&
- 		offset >= 0 &&
--		offset + size < rmesa->r200Screen->gartTextures.size);
-+		offset + size < rmesa->radeon.radeonScreen->gartTextures.size);
- 
-    if (R200_DEBUG & DEBUG_IOCTL)
-       fprintf(stderr, "r200IsGartMemory( %p ) : %d\n", pointer, valid );
-@@ -976,12 +381,12 @@ GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer,
- 
- GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer )
- {
--   ptrdiff_t offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map;
-+   ptrdiff_t offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
- 
--   if (offset < 0 || offset > rmesa->r200Screen->gartTextures.size)
-+   if (offset < 0 || offset > rmesa->radeon.radeonScreen->gartTextures.size)
-       return ~0;
-    else
--      return rmesa->r200Screen->gart_texture_offset + offset;
-+      return rmesa->radeon.radeonScreen->gart_texture_offset + offset;
- }
- 
- 
-@@ -989,7 +394,7 @@ GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer )
- void r200InitIoctlFuncs( struct dd_function_table *functions )
- {
-     functions->Clear = r200Clear;
--    functions->Finish = r200Finish;
--    functions->Flush = r200Flush;
-+    functions->Finish = radeonFinish;
-+    functions->Flush = radeonFlush;
- }
- 
-diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.h b/src/mesa/drivers/dri/r200/r200_ioctl.h
-index f7458e4..2a4b8a1 100644
---- a/src/mesa/drivers/dri/r200/r200_ioctl.h
-+++ b/src/mesa/drivers/dri/r200/r200_ioctl.h
-@@ -37,65 +37,30 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "main/simple_list.h"
- #include "radeon_dri.h"
--#include "r200_lock.h"
-+
-+#include "radeon_bocs_wrapper.h"
- 
- #include "xf86drm.h"
- #include "drm.h"
- #include "radeon_drm.h"
- 
--extern void r200EmitState( r200ContextPtr rmesa );
- extern void r200EmitVertexAOS( r200ContextPtr rmesa,
--				 GLuint vertex_size,
--				 GLuint offset );
-+			       GLuint vertex_size,
-+			       struct radeon_bo *bo,
-+			       GLuint offset );
- 
- extern void r200EmitVbufPrim( r200ContextPtr rmesa,
- 				GLuint primitive,
- 				GLuint vertex_nr );
- 
--extern void r200FlushElts( r200ContextPtr rmesa );
-+extern void r200FlushElts(GLcontext *ctx);
- 
- extern GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
- 					   GLuint primitive,
- 					   GLuint min_nr );
- 
--extern void r200EmitAOS( r200ContextPtr rmesa,
--			   struct r200_dma_region **regions,
--			   GLuint n,
--			   GLuint offset );
--
--extern void r200EmitBlit( r200ContextPtr rmesa,
--			  GLuint color_fmt,
--			  GLuint src_pitch,
--			  GLuint src_offset,
--			  GLuint dst_pitch,
--			  GLuint dst_offset,
--			  GLint srcx, GLint srcy,
--			  GLint dstx, GLint dsty,
--			  GLuint w, GLuint h );
--
--extern void r200EmitWait( r200ContextPtr rmesa, GLuint flags );
--
--extern void r200FlushCmdBuf( r200ContextPtr rmesa, const char * );
--extern int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller );
--
--extern void r200RefillCurrentDmaRegion( r200ContextPtr rmesa );
--
--extern void r200AllocDmaRegion( r200ContextPtr rmesa,
--				  struct r200_dma_region *region,
--				  int bytes, 
--				  int alignment );
--
--extern void r200ReleaseDmaRegion( r200ContextPtr rmesa,
--				    struct r200_dma_region *region,
--				    const char *caller );
--
--extern void r200CopyBuffer( __DRIdrawablePrivate *drawable,
--			    const drm_clip_rect_t      *rect);
--extern void r200PageFlip( __DRIdrawablePrivate *drawable );
--extern void r200Flush( GLcontext *ctx );
--extern void r200Finish( GLcontext *ctx );
--extern void r200WaitForIdleLocked( r200ContextPtr rmesa );
--extern void r200WaitForVBlank( r200ContextPtr rmesa );
-+extern void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset);
-+
- extern void r200InitIoctlFuncs( struct dd_function_table *functions );
- 
- extern void *r200AllocateMemoryMESA( __DRIscreen *screen, GLsizei size, GLfloat readfreq,
-@@ -119,8 +84,8 @@ void r200SetUpAtomList( r200ContextPtr rmesa );
-  */
- #define R200_NEWPRIM( rmesa )			\
- do {						\
--   if ( rmesa->dma.flush )			\
--      rmesa->dma.flush( rmesa );	\
-+   if ( rmesa->radeon.dma.flush )			\
-+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	\
- } while (0)
- 
- /* Can accomodate several state changes and primitive changes without
-@@ -130,7 +95,7 @@ do {						\
- do {								\
-    R200_NEWPRIM( rmesa );					\
-    rmesa->hw.ATOM.dirty = GL_TRUE;				\
--   rmesa->hw.is_dirty = GL_TRUE;				\
-+   rmesa->radeon.hw.is_dirty = GL_TRUE;				\
- } while (0)
- 
- #define R200_DB_STATE( ATOM )			        \
-@@ -139,13 +104,13 @@ do {								\
- 
- static INLINE int R200_DB_STATECHANGE( 
-    r200ContextPtr rmesa,
--   struct r200_state_atom *atom )
-+   struct radeon_state_atom *atom )
- {
-    if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
--      int *tmp;
-+      GLuint *tmp;
-       R200_NEWPRIM( rmesa );
-       atom->dirty = GL_TRUE;
--      rmesa->hw.is_dirty = GL_TRUE;
-+      rmesa->radeon.hw.is_dirty = GL_TRUE;
-       tmp = atom->cmd; 
-       atom->cmd = atom->lastcmd;
-       atom->lastcmd = tmp;
-@@ -156,15 +121,6 @@ static INLINE int R200_DB_STATECHANGE(
- }
- 
- 
--/* Fire the buffered vertices no matter what.
-- */
--#define R200_FIREVERTICES( rmesa )			\
--do {							\
--   if ( rmesa->store.cmd_used || rmesa->dma.flush ) {	\
--      r200Flush( rmesa->glCtx );			\
--   }							\
--} while (0)
--
- /* Command lengths.  Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ
-  * are available, you will also be adding an rmesa->state.max_state_size because
-  * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
-@@ -174,36 +130,36 @@ do {							\
- #define ELTS_BUFSZ(nr)	(12 + nr * 2)
- #define VBUF_BUFSZ	(3 * sizeof(int))
- 
--/* Ensure that a minimum amount of space is available in the command buffer.
-- * This is used to ensure atomicity of state updates with the rendering requests
-- * that rely on them.
-- *
-- * An alternative would be to implement a "soft lock" such that when the buffer
-- * wraps at an inopportune time, we grab the lock, flush the current buffer,
-- * and hang on to the lock until the critical section is finished and we flush
-- * the buffer again and unlock.
-- */
--static INLINE void r200EnsureCmdBufSpace( r200ContextPtr rmesa, int bytes )
-+static inline uint32_t cmdpacket3(int cmd_type)
- {
--   if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ)
--      r200FlushCmdBuf( rmesa, __FUNCTION__ );
--   assert( bytes <= R200_CMD_BUF_SZ );
--}
-+  drm_radeon_cmd_header_t cmd;
- 
--/* Alloc space in the command buffer
-- */
--static INLINE char *r200AllocCmdBuf( r200ContextPtr rmesa,
--					 int bytes, const char *where )
--{
--   char * head;
-+  cmd.i = 0;
-+  cmd.header.cmd_type = cmd_type;
- 
--   if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ)
--      r200FlushCmdBuf( rmesa, where );
-+  return (uint32_t)cmd.i;
- 
--   head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
--   rmesa->store.cmd_used += bytes;
--   assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ );
--   return head;
- }
- 
-+#define OUT_BATCH_PACKET3(packet, num_extra) do {	      \
-+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
-+      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3));				      \
-+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
-+    } else {						      \
-+      OUT_BATCH(CP_PACKET2);				      \
-+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
-+    }							      \
-+  } while(0)
-+
-+#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do {	      \
-+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
-+      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP));	      \
-+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
-+    } else {						      \
-+      OUT_BATCH(CP_PACKET2);				      \
-+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
-+    }							      \
-+  } while(0)
-+
-+
- #endif /* __R200_IOCTL_H__ */
-diff --git a/src/mesa/drivers/dri/r200/r200_lock.c b/src/mesa/drivers/dri/r200/r200_lock.c
-deleted file mode 100644
-index 99661a4..0000000
---- a/src/mesa/drivers/dri/r200/r200_lock.c
-+++ /dev/null
-@@ -1,116 +0,0 @@
--/*
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- */
-- 
--#include "r200_context.h"
--#include "r200_lock.h"
--#include "r200_tex.h"
--#include "r200_state.h"
--#include "r200_ioctl.h"
--
--#include "drirenderbuffer.h"
--
--
--#if DEBUG_LOCKING
--char *prevLockFile = NULL;
--int prevLockLine = 0;
--#endif
--
--/* Turn on/off page flipping according to the flags in the sarea:
-- */
--static void
--r200UpdatePageFlipping( r200ContextPtr rmesa )
--{
--   rmesa->doPageFlip = rmesa->sarea->pfState;
--   if (rmesa->glCtx->WinSysDrawBuffer) {
--      driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
--                           rmesa->sarea->pfCurrentPage);
--   }
--}
--
--
--
--/* Update the hardware state.  This is called if another main/context.has
-- * grabbed the hardware lock, which includes the X server.  This
-- * function also updates the driver's window state after the X server
-- * moves, resizes or restacks a window -- the change will be reflected
-- * in the drawable position and clip rects.  Since the X server grabs
-- * the hardware lock when it changes the window state, this routine will
-- * automatically be called after such a change.
-- */
--void r200GetLock( r200ContextPtr rmesa, GLuint flags )
--{
--   __DRIdrawablePrivate *drawable = rmesa->dri.drawable;
--   __DRIdrawablePrivate *readable = rmesa->dri.readable;
--   __DRIscreenPrivate *sPriv = rmesa->dri.screen;
--   drm_radeon_sarea_t *sarea = rmesa->sarea;
--   int i;
--
--   drmGetLock( rmesa->dri.fd, rmesa->dri.hwContext, flags );
--
--   /* The window might have moved, so we might need to get new clip
--    * rects.
--    *
--    * NOTE: This releases and regrabs the hw lock to allow the X server
--    * to respond to the DRI protocol request for new drawable info.
--    * Since the hardware state depends on having the latest drawable
--    * clip rects, all state checking must be done _after_ this call.
--    */
--   DRI_VALIDATE_DRAWABLE_INFO( sPriv, drawable );
--   if (drawable != readable) {
--      DRI_VALIDATE_DRAWABLE_INFO( sPriv, readable );
--   }
--
--   if ( rmesa->lastStamp != drawable->lastStamp ) {
--      r200UpdatePageFlipping( rmesa );
--      r200SetCliprects( rmesa );
--      r200UpdateViewportOffset( rmesa->glCtx );
--      driUpdateFramebufferSize(rmesa->glCtx, drawable);
--   }
--
--   R200_STATECHANGE( rmesa, ctx );
--   if (rmesa->sarea->tiling_enabled) {
--      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
--   }
--   else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE;
--
--   if ( sarea->ctx_owner != rmesa->dri.hwContext ) {
--      sarea->ctx_owner = rmesa->dri.hwContext;
--   }
--
--   for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
--      DRI_AGE_TEXTURES( rmesa->texture_heaps[ i ] );
--   }
--
--   rmesa->lost_context = GL_TRUE;
--}
-diff --git a/src/mesa/drivers/dri/r200/r200_lock.h b/src/mesa/drivers/dri/r200/r200_lock.h
-deleted file mode 100644
-index 4ff9890..0000000
---- a/src/mesa/drivers/dri/r200/r200_lock.h
-+++ /dev/null
-@@ -1,106 +0,0 @@
--/*
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- */
--
--#ifndef __R200_LOCK_H__
--#define __R200_LOCK_H__
--
--extern void r200GetLock( r200ContextPtr rmesa, GLuint flags );
--
--/* Turn DEBUG_LOCKING on to find locking conflicts.
-- */
--#define DEBUG_LOCKING	0
--
--#if DEBUG_LOCKING
--extern char *prevLockFile;
--extern int prevLockLine;
--
--#define DEBUG_LOCK()							\
--   do {									\
--      prevLockFile = (__FILE__);					\
--      prevLockLine = (__LINE__);					\
--   } while (0)
--
--#define DEBUG_RESET()							\
--   do {									\
--      prevLockFile = 0;							\
--      prevLockLine = 0;							\
--   } while (0)
--
--#define DEBUG_CHECK_LOCK()						\
--   do {									\
--      if ( prevLockFile ) {						\
--	 fprintf( stderr,						\
--		  "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",	\
--		  prevLockFile, prevLockLine, __FILE__, __LINE__ );	\
--	 exit( 1 );							\
--      }									\
--   } while (0)
--
--#else
--
--#define DEBUG_LOCK()
--#define DEBUG_RESET()
--#define DEBUG_CHECK_LOCK()
--
--#endif
--
--/*
-- * !!! We may want to separate locks from locks with validation.  This
-- * could be used to improve performance for those things commands that
-- * do not do any drawing !!!
-- */
--
--
--/* Lock the hardware and validate our state.
-- */
--#define LOCK_HARDWARE( rmesa )					\
--   do {								\
--      char __ret = 0;						\
--      DEBUG_CHECK_LOCK();					\
--      DRM_CAS( rmesa->dri.hwLock, rmesa->dri.hwContext,		\
--	       (DRM_LOCK_HELD | rmesa->dri.hwContext), __ret );	\
--      if ( __ret )						\
--	 r200GetLock( rmesa, 0 );				\
--      DEBUG_LOCK();						\
--   } while (0)
--
--#define UNLOCK_HARDWARE( rmesa )					\
--   do {									\
--      DRM_UNLOCK( rmesa->dri.fd,					\
--		  rmesa->dri.hwLock,					\
--		  rmesa->dri.hwContext );				\
--      DEBUG_RESET();							\
--   } while (0)
--
--#endif /* __R200_LOCK_H__ */
-diff --git a/src/mesa/drivers/dri/r200/r200_maos.h b/src/mesa/drivers/dri/r200/r200_maos.h
-index d3ed06d..16a7047 100644
---- a/src/mesa/drivers/dri/r200/r200_maos.h
-+++ b/src/mesa/drivers/dri/r200/r200_maos.h
-@@ -38,6 +38,5 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r200_context.h"
- 
- extern void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev );
--extern void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs );
- 
- #endif
-diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
-index 8512b9a..383a0c4 100644
---- a/src/mesa/drivers/dri/r200/r200_maos_arrays.c
-+++ b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
-@@ -50,110 +50,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r200_maos.h"
- #include "r200_tcl.h"
- 
--
--#if 0
--/* Usage:
-- *   - from r200_tcl_render
-- *   - call r200EmitArrays to ensure uptodate arrays in dma
-- *   - emit primitives (new type?) which reference the data
-- *       -- need to use elts for lineloop, quads, quadstrip/flat
-- *       -- other primitives are all well-formed (need tristrip-1,fake-poly)
-- *
-- */
--static void emit_ubyte_rgba3( GLcontext *ctx,
--		       struct r200_dma_region *rvb,
--		       char *data,
--		       int stride,
--		       int count )
--{
--   int i;
--   r200_color_t *out = (r200_color_t *)(rvb->start + rvb->address);
--
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d out %p\n",
--	      __FUNCTION__, count, stride, (void *)out);
--
--   for (i = 0; i < count; i++) {
--      out->red   = *data;
--      out->green = *(data+1);
--      out->blue  = *(data+2);
--      out->alpha = 0xFF;
--      out++;
--      data += stride;
--   }
--}
--
--static void emit_ubyte_rgba4( GLcontext *ctx,
--			      struct r200_dma_region *rvb,
--			      char *data,
--			      int stride,
--			      int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   if (stride == 4) {
--      for (i = 0; i < count; i++)
--	 ((int *)out)[i] = LE32_TO_CPU(((int *)data)[i]);
--   } else {
--      for (i = 0; i < count; i++) {
--	 *(int *)out++ = LE32_TO_CPU(*(int *)data);
--	 data += stride;
--      }
--   }
--}
--
--
--static void emit_ubyte_rgba( GLcontext *ctx,
--			     struct r200_dma_region *rvb,
--			     char *data,
--			     int size,
--			     int stride,
--			     int count )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
--
--   assert (!rvb->buf);
--
--   if (stride == 0) {
--      r200AllocDmaRegion( rmesa, rvb, 4, 4 );
--      count = 1;
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 0;
--      rvb->aos_size = 1;
--   }
--   else {
--      r200AllocDmaRegion( rmesa, rvb, 4 * count, 4 );	/* alignment? */
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 1;
--      rvb->aos_size = 1;
--   }
--
--   /* Emit the data
--    */
--   switch (size) {
--   case 3:
--      emit_ubyte_rgba3( ctx, rvb, data, stride, count );
--      break;
--   case 4:
--      emit_ubyte_rgba4( ctx, rvb, data, stride, count );
--      break;
--   default:
--      assert(0);
--      exit(1);
--      break;
--   }
--}
--#endif
--
--
- #if defined(USE_X86_ASM)
- #define COPY_DWORDS( dst, src, nr )					\
- do {									\
-@@ -174,204 +70,34 @@ do {						\
- } while (0)
- #endif
- 
--
--static void emit_vecfog( GLcontext *ctx,
--			 struct r200_dma_region *rvb,
--			 char *data,
--			 int stride,
--			 int count )
-+static void r200_emit_vecfog(GLcontext *ctx, struct radeon_aos *aos,
-+			     GLvoid *data, int stride, int count)
- {
--   int i;
--   GLfloat *out;
--
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   assert (!rvb->buf);
--
--   if (stride == 0) {
--      r200AllocDmaRegion( rmesa, rvb, 4, 4 );
--      count = 1;
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 0;
--      rvb->aos_size = 1;
--   }
--   else {
--      r200AllocDmaRegion( rmesa, rvb, count * 4, 4 );	/* alignment? */
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 1;
--      rvb->aos_size = 1;
--   }
--
--   /* Emit the data
--    */
--   out = (GLfloat *)(rvb->address + rvb->start);
--   for (i = 0; i < count; i++) {
--      out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data );
--      out++;
--      data += stride;
--   }
--
-+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+	uint32_t *out;
-+	int i;
-+	int size = 1;
-+
-+	if (stride == 0) {
-+		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
-+		count = 1;
-+		aos->stride = 0;
-+	} else {
-+		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
-+		aos->stride = size;
-+	}
-+
-+	aos->components = size;
-+	aos->count = count;
-+
-+	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
-+	for (i = 0; i < count; i++) {
-+	  out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data );
-+	  out++;
-+	  data += stride;
-+	}
- }
- 
--
--static void emit_vec4( GLcontext *ctx,
--		       struct r200_dma_region *rvb,
--		       char *data,
--		       int stride,
--		       int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   if (stride == 4)
--      COPY_DWORDS( out, data, count );
--   else
--      for (i = 0; i < count; i++) {
--	 out[0] = *(int *)data;
--	 out++;
--	 data += stride;
--      }
--}
--
--
--static void emit_vec8( GLcontext *ctx,
--		       struct r200_dma_region *rvb,
--		       char *data,
--		       int stride,
--		       int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   if (stride == 8)
--      COPY_DWORDS( out, data, count*2 );
--   else
--      for (i = 0; i < count; i++) {
--	 out[0] = *(int *)data;
--	 out[1] = *(int *)(data+4);
--	 out += 2;
--	 data += stride;
--      }
--}
--
--static void emit_vec12( GLcontext *ctx,
--		       struct r200_dma_region *rvb,
--		       char *data,
--		       int stride,
--		       int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d out %p data %p\n",
--	      __FUNCTION__, count, stride, (void *)out, (void *)data);
--
--   if (stride == 12)
--      COPY_DWORDS( out, data, count*3 );
--   else
--      for (i = 0; i < count; i++) {
--	 out[0] = *(int *)data;
--	 out[1] = *(int *)(data+4);
--	 out[2] = *(int *)(data+8);
--	 out += 3;
--	 data += stride;
--      }
--}
--
--static void emit_vec16( GLcontext *ctx,
--			struct r200_dma_region *rvb,
--			char *data,
--			int stride,
--			int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   if (stride == 16)
--      COPY_DWORDS( out, data, count*4 );
--   else
--      for (i = 0; i < count; i++) {
--	 out[0] = *(int *)data;
--	 out[1] = *(int *)(data+4);
--	 out[2] = *(int *)(data+8);
--	 out[3] = *(int *)(data+12);
--	 out += 4;
--	 data += stride;
--      }
--}
--
--
--static void emit_vector( GLcontext *ctx,
--			 struct r200_dma_region *rvb,
--			 char *data,
--			 int size,
--			 int stride,
--			 int count )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d size %d stride %d\n",
--	      __FUNCTION__, count, size, stride);
--
--   assert (!rvb->buf);
--
--   if (stride == 0) {
--      r200AllocDmaRegion( rmesa, rvb, size * 4, 4 );
--      count = 1;
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 0;
--      rvb->aos_size = size;
--   }
--   else {
--      r200AllocDmaRegion( rmesa, rvb, size * count * 4, 4 );	/* alignment? */
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = size;
--      rvb->aos_size = size;
--   }
--
--   /* Emit the data
--    */
--   switch (size) {
--   case 1:
--      emit_vec4( ctx, rvb, data, stride, count );
--      break;
--   case 2:
--      emit_vec8( ctx, rvb, data, stride, count );
--      break;
--   case 3:
--      emit_vec12( ctx, rvb, data, stride, count );
--      break;
--   case 4:
--      emit_vec16( ctx, rvb, data, stride, count );
--      break;
--   default:
--      assert(0);
--      exit(1);
--      break;
--   }
--
--}
--
--
--
- /* Emit any changed arrays to new GART memory, re-emit a packet to
-  * update the arrays.  
-  */
-@@ -379,12 +105,12 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
- {
-    r200ContextPtr rmesa = R200_CONTEXT( ctx );
-    struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
--   struct r200_dma_region **component = rmesa->tcl.aos_components;
-    GLuint nr = 0;
-    GLuint vfmt0 = 0, vfmt1 = 0;
-    GLuint count = VB->Count;
-    GLuint i, emitsize;
- 
-+   //   fprintf(stderr,"emit arrays\n");
-    for ( i = 0; i < 15; i++ ) {
-       GLubyte attrib = vimap_rev[i];
-       if (attrib != 255) {
-@@ -416,20 +142,20 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
- 	 case 3:
- 	    /* special handling to fix up fog. Will get us into trouble with vbos...*/
- 	    assert(attrib == VERT_ATTRIB_FOG);
--	    if (!rmesa->tcl.vertex_data[i].buf) {
-+	    if (!rmesa->radeon.tcl.aos[i].bo) {
- 	       if (ctx->VertexProgram._Enabled)
--		  emit_vector( ctx,
--			 &(rmesa->tcl.vertex_data[i]),
--			 (char *)VB->AttribPtr[attrib]->data,
--			 1,
--			 VB->AttribPtr[attrib]->stride,
--			 count);
-+		  rcommon_emit_vector( ctx,
-+				       &(rmesa->radeon.tcl.aos[nr]),
-+				       (char *)VB->AttribPtr[attrib]->data,
-+				       1,
-+				       VB->AttribPtr[attrib]->stride,
-+				       count);
- 	       else
--		  emit_vecfog( ctx,
--			 &(rmesa->tcl.vertex_data[i]),
--			 (char *)VB->AttribPtr[attrib]->data,
--			 VB->AttribPtr[attrib]->stride,
--			 count);
-+		 r200_emit_vecfog( ctx,
-+				   &(rmesa->radeon.tcl.aos[nr]),
-+				   (char *)VB->AttribPtr[attrib]->data,
-+				   VB->AttribPtr[attrib]->stride,
-+				   count);
- 	    }
- 	    vfmt0 |= R200_VTX_DISCRETE_FOG;
- 	    goto after_emit;
-@@ -473,17 +199,17 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
- 	 default:
- 	    assert(0);
- 	 }
--	 if (!rmesa->tcl.vertex_data[i].buf) {
--	    emit_vector( ctx,
--			 &(rmesa->tcl.vertex_data[i]),
--			 (char *)VB->AttribPtr[attrib]->data,
--			 emitsize,
--			 VB->AttribPtr[attrib]->stride,
--			 count );
-+	 if (!rmesa->radeon.tcl.aos[nr].bo) {
-+	   rcommon_emit_vector( ctx,
-+				&(rmesa->radeon.tcl.aos[nr]),
-+				(char *)VB->AttribPtr[attrib]->data,
-+				emitsize,
-+				VB->AttribPtr[attrib]->stride,
-+				count );
- 	 }
- after_emit:
- 	 assert(nr < 12);
--	 component[nr++] = &rmesa->tcl.vertex_data[i];
-+	 nr++;
-       }
-    }
- 
-@@ -494,19 +220,6 @@ after_emit:
-       rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = vfmt1;
-    }
- 
--   rmesa->tcl.nr_aos_components = nr;
-+   rmesa->radeon.tcl.aos_count = nr;
- }
- 
--
--void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs )
--{
--   r200ContextPtr rmesa = R200_CONTEXT( ctx );
--
--   /* only do it for changed inputs ? */
--   int i;
--   for (i = 0; i < 15; i++) {
--      if (newinputs & (1 << i))
--	 r200ReleaseDmaRegion( rmesa,
--	    &rmesa->tcl.vertex_data[i], __FUNCTION__ );
--   }
--}
-diff --git a/src/mesa/drivers/dri/r200/r200_pixel.c b/src/mesa/drivers/dri/r200/r200_pixel.c
-index 2797cbb..354daef 100644
---- a/src/mesa/drivers/dri/r200/r200_pixel.c
-+++ b/src/mesa/drivers/dri/r200/r200_pixel.c
-@@ -51,7 +51,7 @@ check_color( const GLcontext *ctx, GLenum type, GLenum format,
- 	     const void *pixels, GLint sz, GLint pitch )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   GLuint cpp = rmesa->r200Screen->cpp;
-+   GLuint cpp = rmesa->radeon.radeonScreen->cpp;
- 
-    if (R200_DEBUG & DEBUG_PIXEL)
-       fprintf(stderr, "%s\n", __FUNCTION__);
-@@ -137,8 +137,8 @@ clip_pixelrect( const GLcontext *ctx,
-    if (*height <= 0)
-       return GL_FALSE;
- 
--   *size = ((*y + *height - 1) * rmesa->r200Screen->frontPitch +
--	    (*x + *width - 1) * rmesa->r200Screen->cpp);
-+   *size = ((*y + *height - 1) * rmesa->radeon.radeonScreen->frontPitch +
-+	    (*x + *width - 1) * rmesa->radeon.radeonScreen->cpp);
- 
-    return GL_TRUE;
- }
-@@ -153,19 +153,20 @@ r200TryReadPixels( GLcontext *ctx,
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-    GLint pitch = pack->RowLength ? pack->RowLength : width;
-    GLint blit_format;
--   GLuint cpp = rmesa->r200Screen->cpp;
-+   GLuint cpp = rmesa->radeon.radeonScreen->cpp;
-    GLint size = width * height * cpp;
- 
-+   return GL_FALSE;
-+#if 0
-    if (R200_DEBUG & DEBUG_PIXEL)
-       fprintf(stderr, "%s\n", __FUNCTION__);
- 
-    /* Only accelerate reading to GART buffers.
-     */
-    if ( !r200IsGartMemory(rmesa, pixels, 
--			 pitch * height * rmesa->r200Screen->cpp ) ) {
-+			 pitch * height * rmesa->radeon.radeonScreen->cpp ) ) {
-       if (R200_DEBUG & DEBUG_PIXEL)
- 	 fprintf(stderr, "%s: dest not GART\n", __FUNCTION__);
--      return GL_FALSE;
-    }
- 
-    /* Need GL_PACK_INVERT_MESA to cope with upsidedown results from
-@@ -180,7 +181,7 @@ r200TryReadPixels( GLcontext *ctx,
-    if (!check_color(ctx, type, format, pack, pixels, size, pitch))
-       return GL_FALSE;
- 
--   switch ( rmesa->r200Screen->cpp ) {
-+   switch ( rmesa->radeon.radeonScreen->cpp ) {
-    case 4:
-       blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
-       break;
-@@ -197,14 +198,14 @@ r200TryReadPixels( GLcontext *ctx,
-     * a full command buffer expects to be called unlocked.  As a
-     * workaround, immediately flush the buffer on aquiring the lock.
-     */
--   LOCK_HARDWARE( rmesa );
-+   LOCK_HARDWARE( &rmesa->radeon );
- 
-    if (rmesa->store.cmd_used)
--      r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
-+      rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
- 
-    if (!clip_pixelrect(ctx, ctx->ReadBuffer, &x, &y, &width, &height,
- 		       &size)) {
--      UNLOCK_HARDWARE( rmesa );
-+      UNLOCK_HARDWARE( &rmesa->radeon );
-       if (R200_DEBUG & DEBUG_PIXEL)
- 	 fprintf(stderr, "%s totally clipped -- nothing to do\n",
- 		 __FUNCTION__);
-@@ -212,14 +213,14 @@ r200TryReadPixels( GLcontext *ctx,
-    }
- 
-    {
--      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-+      __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-       driRenderbuffer *drb = (driRenderbuffer *) ctx->ReadBuffer->_ColorReadBuffer;
-       int nbox = dPriv->numClipRects;
-       int src_offset = drb->offset
--		     + rmesa->r200Screen->fbLocation;
-+		     + rmesa->radeon.radeonScreen->fbLocation;
-       int src_pitch = drb->pitch * drb->cpp;
-       int dst_offset = r200GartOffsetFromVirtual( rmesa, pixels );
--      int dst_pitch = pitch * rmesa->r200Screen->cpp;
-+      int dst_pitch = pitch * rmesa->radeon.radeonScreen->cpp;
-       drm_clip_rect_t *box = dPriv->pClipRects;
-       int i;
- 
-@@ -257,12 +258,12 @@ r200TryReadPixels( GLcontext *ctx,
- 		       bw, bh );
-       }
- 
--      r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
-+      rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
-    }
--   UNLOCK_HARDWARE( rmesa );
--
--   r200Finish( ctx ); /* required by GL */
-+   UNLOCK_HARDWARE( &rmesa->radeon );
- 
-+   radeonFinish( ctx ); /* required by GL */
-+#endif
-    return GL_TRUE;
- }
- 
-@@ -292,7 +293,7 @@ static void do_draw_pix( GLcontext *ctx,
- 			 GLuint planemask)
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-    drm_clip_rect_t *box = dPriv->pClipRects;
-    struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorDrawBuffers[0];
-    driRenderbuffer *drb = (driRenderbuffer *) rb;
-@@ -301,12 +302,12 @@ static void do_draw_pix( GLcontext *ctx,
-    int blit_format;
-    int size;
-    int src_offset = r200GartOffsetFromVirtual( rmesa, pixels );
--   int src_pitch = pitch * rmesa->r200Screen->cpp;
-+   int src_pitch = pitch * rmesa->radeon.radeonScreen->cpp;
- 
-    if (R200_DEBUG & DEBUG_PIXEL)
-       fprintf(stderr, "%s\n", __FUNCTION__);
--
--   switch ( rmesa->r200Screen->cpp ) {
-+#if 0
-+   switch ( rmesa->radeon.radeonScreen->cpp ) {
-    case 2:
-       blit_format = R200_CP_COLOR_FORMAT_RGB565;
-       break;
-@@ -318,17 +319,17 @@ static void do_draw_pix( GLcontext *ctx,
-    }
- 
- 
--   LOCK_HARDWARE( rmesa );
-+   LOCK_HARDWARE( &rmesa->radeon );
- 
-    if (rmesa->store.cmd_used)
--      r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
-+      rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
- 
-    y -= height;			/* cope with pixel zoom */
-    
-    if (!clip_pixelrect(ctx, ctx->DrawBuffer,
- 		       &x, &y, &width, &height,
- 		       &size)) {
--      UNLOCK_HARDWARE( rmesa );
-+      UNLOCK_HARDWARE( &rmesa->radeon );
-       return;
-    }
- 
-@@ -357,15 +358,16 @@ static void do_draw_pix( GLcontext *ctx,
- 		    blit_format,
- 		    src_pitch, src_offset,
- 		    drb->pitch * drb->cpp,
--		    drb->offset + rmesa->r200Screen->fbLocation,
-+		    drb->offset + rmesa->radeon.radeonScreen->fbLocation,
- 		    bx - x, by - y,
- 		    bx, by,
- 		    bw, bh );
-    }
- 
--   r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
--   r200WaitForIdleLocked( rmesa ); /* required by GL */
--   UNLOCK_HARDWARE( rmesa );
-+   rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
-+   radeonWaitForIdleLocked( &rmesa->radeon ); /* required by GL */
-+   UNLOCK_HARDWARE( &rmesa->radeon );
-+#endif
- }
- 
- 
-@@ -381,7 +383,7 @@ r200TryDrawPixels( GLcontext *ctx,
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-    GLint pitch = unpack->RowLength ? unpack->RowLength : width;
-    GLuint planemask;
--   GLuint cpp = rmesa->r200Screen->cpp;
-+   GLuint cpp = rmesa->radeon.radeonScreen->cpp;
-    GLint size = height * pitch * cpp;
- 
-    if (R200_DEBUG & DEBUG_PIXEL)
-@@ -395,7 +397,7 @@ r200TryDrawPixels( GLcontext *ctx,
-    case GL_RGB:
-    case GL_RGBA:
-    case GL_BGRA:
--      planemask = r200PackColor(cpp,
-+      planemask = radeonPackColor(cpp,
- 				ctx->Color.ColorMask[RCOMP],
- 				ctx->Color.ColorMask[GCOMP],
- 				ctx->Color.ColorMask[BCOMP],
-@@ -431,7 +433,7 @@ r200TryDrawPixels( GLcontext *ctx,
-       return GL_FALSE;
-    }
- 
--   if ( r200IsGartMemory(rmesa, pixels, size) )
-+   if (0)// r200IsGartMemory(rmesa, pixels, size) )
-    {
-       do_draw_pix( ctx, x, y, width, height, pitch, pixels, planemask );
-       return GL_TRUE;
-@@ -471,7 +473,7 @@ r200Bitmap( GLcontext *ctx, GLint px, GLint py,
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
- 
--   if (rmesa->Fallback)
-+   if (rmesa->radeon.Fallback)
-       _swrast_Bitmap( ctx, px, py, width, height, unpack, bitmap );
-    else
-       r200PointsBitmap( ctx, px, py, width, height, unpack, bitmap );
-diff --git a/src/mesa/drivers/dri/r200/r200_reg.h b/src/mesa/drivers/dri/r200/r200_reg.h
-index 5ce287f..526a624 100644
---- a/src/mesa/drivers/dri/r200/r200_reg.h
-+++ b/src/mesa/drivers/dri/r200/r200_reg.h
-@@ -463,8 +463,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define     R200_VSC_UPDATE_USER_COLOR_1_ENABLE    0x00020000
- /* gap */
- #define R200_SE_TCL_VECTOR_INDX_REG                0x2200
-+#       define RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT  16
-+#       define RADEON_VEC_INDX_DWORD_COUNT_SHIFT     28
- #define R200_SE_TCL_VECTOR_DATA_REG                0x2204
- #define R200_SE_TCL_SCALAR_INDX_REG                0x2208
-+#       define RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT  16
- #define R200_SE_TCL_SCALAR_DATA_REG                0x220c
- /* gap */
- #define R200_SE_TCL_MATRIX_SEL_0                   0x2230
-@@ -949,6 +952,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define     R200_LOD_BIAS_MASK                        (0xfff80000)
- #define     R200_LOD_BIAS_SHIFT                       19
- #define R200_PP_TXSIZE_0                  0x2c0c /* NPOT only */
-+#define R200_PP_TX_WIDTHMASK_SHIFT 0
-+#define R200_PP_TX_HEIGHTMASK_SHIFT 16
-+
- #define R200_PP_TXPITCH_0                 0x2c10 /* NPOT only */
- #define R200_PP_BORDER_COLOR_0            0x2c14
- #define R200_PP_CUBIC_FACES_0             0x2c18
-diff --git a/src/mesa/drivers/dri/r200/r200_span.c b/src/mesa/drivers/dri/r200/r200_span.c
-deleted file mode 100644
-index 9783678..0000000
---- a/src/mesa/drivers/dri/r200/r200_span.c
-+++ /dev/null
-@@ -1,307 +0,0 @@
--/*
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- */
--
--#include "main/glheader.h"
--#include "main/imports.h"
--#include "main/colormac.h"
--#include "swrast/swrast.h"
--
--#include "r200_context.h"
--#include "r200_ioctl.h"
--#include "r200_state.h"
--#include "r200_span.h"
--#include "r200_tex.h"
--
--#define DBG 0
--
--/*
-- * Note that all information needed to access pixels in a renderbuffer
-- * should be obtained through the gl_renderbuffer parameter, not per-context
-- * information.
-- */
--#define LOCAL_VARS						\
--   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
--   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
--   const GLuint bottom = dPriv->h - 1;				\
--   GLubyte *buf = (GLubyte *) drb->flippedData			\
--      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
--   GLuint p;							\
--   (void) p;
--
--#define LOCAL_DEPTH_VARS				\
--   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
--   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
--   const GLuint bottom = dPriv->h - 1;			\
--   GLuint xo = dPriv->x;				\
--   GLuint yo = dPriv->y;				\
--   GLubyte *buf = (GLubyte *) drb->Base.Data;
--
--#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
--
--#define Y_FLIP(Y) (bottom - (Y))
--
--#define HW_LOCK() 
--
--#define HW_UNLOCK()							
--
--
--
--/* ================================================================
-- * Color buffer
-- */
--
--/* 16 bit, RGB565 color spanline and pixel functions
-- */
--#define SPANTMP_PIXEL_FMT GL_RGB
--#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
--
--#define TAG(x)    r200##x##_RGB565
--#define TAG2(x,y) r200##x##_RGB565##y
--#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
--#include "spantmp2.h"
--
--/* 32 bit, ARGB8888 color spanline and pixel functions
-- */
--#define SPANTMP_PIXEL_FMT GL_BGRA
--#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
--
--#define TAG(x)    r200##x##_ARGB8888
--#define TAG2(x,y) r200##x##_ARGB8888##y
--#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
--#include "spantmp2.h"
--
--
--/* ================================================================
-- * Depth buffer
-- */
--
--/* The Radeon family has depth tiling on all the time, so we have to convert
-- * the x,y coordinates into the memory bus address (mba) in the same
-- * manner as the engine.  In each case, the linear block address (ba)
-- * is calculated, and then wired with x and y to produce the final
-- * memory address.
-- * The chip will do address translation on its own if the surface registers
-- * are set up correctly. It is not quite enough to get it working with hyperz too...
-- */
--
--/* extract bit 'b' of x, result is zero or one */
--#define BIT(x,b) ((x & (1<<b))>>b)
--
--static GLuint
--r200_mba_z32( driRenderbuffer *drb, GLint x, GLint y )
--{
--   GLuint pitch = drb->pitch;
--   if (drb->depthHasSurface) {
--      return 4 * (x + y * pitch);
--   }
--   else {
--      GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 5) + ((x & 0x7FF) >> 5);
--      GLuint a = 
--         (BIT(x,0) << 2) |
--         (BIT(y,0) << 3) |
--         (BIT(x,1) << 4) |
--         (BIT(y,1) << 5) |
--         (BIT(x,3) << 6) |
--         (BIT(x,4) << 7) |
--         (BIT(x,2) << 8) |
--         (BIT(y,2) << 9) |
--         (BIT(y,3) << 10) |
--         (((pitch & 0x20) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) |
--         ((b >> 1) << 12);
--      return a;
--   }
--}
--
--static GLuint
--r200_mba_z16( driRenderbuffer *drb, GLint x, GLint y )
--{
--   GLuint pitch = drb->pitch;
--   if (drb->depthHasSurface) {
--      return 2 * (x + y * pitch);
--   }
--   else {
--      GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 6) + ((x & 0x7FF) >> 6);
--      GLuint a = 
--         (BIT(x,0) << 1) |
--         (BIT(y,0) << 2) |
--         (BIT(x,1) << 3) |
--         (BIT(y,1) << 4) |
--         (BIT(x,2) << 5) |
--         (BIT(x,4) << 6) |
--         (BIT(x,5) << 7) |
--         (BIT(x,3) << 8) |
--         (BIT(y,2) << 9) |
--         (BIT(y,3) << 10) |
--         (((pitch & 0x40) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) |
--         ((b >> 1) << 12);
--      return a;
--   }
--}
--
--
--/* 16-bit depth buffer functions
-- */
--#define VALUE_TYPE GLushort
--
--#define WRITE_DEPTH( _x, _y, d )					\
--   *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo )) = d;
--
--#define READ_DEPTH( d, _x, _y )						\
--   d = *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo ));
--
--#define TAG(x) r200##x##_z16
--#include "depthtmp.h"
--
--
--/* 24 bit depth, 8 bit stencil depthbuffer functions
-- */
--#define VALUE_TYPE GLuint
--
--#define WRITE_DEPTH( _x, _y, d )					\
--do {									\
--   GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   tmp &= 0xff000000;							\
--   tmp |= ((d) & 0x00ffffff);						\
--   *(GLuint *)(buf + offset) = tmp;					\
--} while (0)
--
--#define READ_DEPTH( d, _x, _y )						\
--   d = *(GLuint *)(buf + r200_mba_z32( drb, _x + xo,			\
--					 _y + yo )) & 0x00ffffff;
--
--#define TAG(x) r200##x##_z24_s8
--#include "depthtmp.h"
--
--
--/* ================================================================
-- * Stencil buffer
-- */
--
--/* 24 bit depth, 8 bit stencil depthbuffer functions
-- */
--#define WRITE_STENCIL( _x, _y, d )					\
--do {									\
--   GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   tmp &= 0x00ffffff;							\
--   tmp |= (((d) & 0xff) << 24);						\
--   *(GLuint *)(buf + offset) = tmp;					\
--} while (0)
--
--#define READ_STENCIL( d, _x, _y )					\
--do {									\
--   GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   tmp &= 0xff000000;							\
--   d = tmp >> 24;							\
--} while (0)
--
--#define TAG(x) r200##x##_z24_s8
--#include "stenciltmp.h"
--
--
--/* Move locking out to get reasonable span performance (10x better
-- * than doing this in HW_LOCK above).  WaitForIdle() is the main
-- * culprit.
-- */
--
--static void r200SpanRenderStart( GLcontext *ctx )
--{
--   r200ContextPtr rmesa = R200_CONTEXT( ctx );
--
--   R200_FIREVERTICES( rmesa );
--   LOCK_HARDWARE( rmesa );
--   r200WaitForIdleLocked( rmesa );
--
--   /* Read & rewrite the first pixel in the frame buffer.  This should
--    * be a noop, right?  In fact without this conform fails as reading
--    * from the framebuffer sometimes produces old results -- the
--    * on-card read cache gets mixed up and doesn't notice that the
--    * framebuffer has been updated.
--    *
--    * In the worst case this is buggy too as p might get the wrong
--    * value first time, so really need a hidden pixel somewhere for this.
--    */
--   {
--      int p;
--      driRenderbuffer *drb =
--	 (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
--      volatile int *buf =
--	 (volatile int *)(rmesa->dri.screen->pFB + drb->offset);
--      p = *buf;
--      *buf = p;
--   }
--}
--
--static void r200SpanRenderFinish( GLcontext *ctx )
--{
--   r200ContextPtr rmesa = R200_CONTEXT( ctx );
--   _swrast_flush( ctx );
--   UNLOCK_HARDWARE( rmesa );
--}
--
--void r200InitSpanFuncs( GLcontext *ctx )
--{
--   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
--   swdd->SpanRenderStart          = r200SpanRenderStart;
--   swdd->SpanRenderFinish         = r200SpanRenderFinish; 
--}
--
--
--
--/**
-- * Plug in the Get/Put routines for the given driRenderbuffer.
-- */
--void
--radeonSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
--{
--   if (drb->Base.InternalFormat == GL_RGBA) {
--      if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) {
--         r200InitPointers_RGB565(&drb->Base);
--      }
--      else {
--         r200InitPointers_ARGB8888(&drb->Base);
--      }
--   }
--   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
--      r200InitDepthPointers_z16(&drb->Base);
--   }
--   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
--      r200InitDepthPointers_z24_s8(&drb->Base);
--   }
--   else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
--      r200InitStencilPointers_z24_s8(&drb->Base);
--   }
--}
-diff --git a/src/mesa/drivers/dri/r200/r200_span.h b/src/mesa/drivers/dri/r200/r200_span.h
-deleted file mode 100644
-index bae5644..0000000
---- a/src/mesa/drivers/dri/r200/r200_span.h
-+++ /dev/null
-@@ -1,45 +0,0 @@
--/*
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- */
--
--#ifndef __R200_SPAN_H__
--#define __R200_SPAN_H__
--
--#include "drirenderbuffer.h"
--
--extern void r200InitSpanFuncs( GLcontext *ctx );
--
--extern void
--radeonSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis);
--
--#endif
-diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c
-index 2fcc87c..ebf389e 100644
---- a/src/mesa/drivers/dri/r200/r200_state.c
-+++ b/src/mesa/drivers/dri/r200/r200_state.c
-@@ -47,6 +47,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "tnl/t_pipeline.h"
- #include "swrast_setup/swrast_setup.h"
- 
-+#include "radeon_common.h"
-+#include "radeon_mipmap_tree.h"
- #include "r200_context.h"
- #include "r200_ioctl.h"
- #include "r200_state.h"
-@@ -114,8 +116,8 @@ static void r200BlendColor( GLcontext *ctx, const GLfloat cf[4] )
-    CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
-    CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
-    CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
--   if (rmesa->r200Screen->drmSupportsBlendColor)
--      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = r200PackColor( 4, color[0], color[1], color[2], color[3] );
-+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
-+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = radeonPackColor( 4, color[0], color[1], color[2], color[3] );
- }
- 
- /**
-@@ -213,7 +215,7 @@ static void r200_set_blend_state( GLcontext * ctx )
- 
-    R200_STATECHANGE( rmesa, ctx );
- 
--   if (rmesa->r200Screen->drmSupportsBlendColor) {
-+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
-       if (ctx->Color.ColorLogicOpEnabled) {
-          rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ROP_ENABLE;
-          rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func;
-@@ -278,7 +280,7 @@ static void r200_set_blend_state( GLcontext * ctx )
-       return;
-    }
- 
--   if (!rmesa->r200Screen->drmSupportsBlendColor) {
-+   if (!rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
-       rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
-       return;
-    }
-@@ -383,10 +385,10 @@ static void r200ClearDepth( GLcontext *ctx, GLclampd d )
- 
-    switch ( format ) {
-    case R200_DEPTH_FORMAT_16BIT_INT_Z:
--      rmesa->state.depth.clear = d * 0x0000ffff;
-+      rmesa->radeon.state.depth.clear = d * 0x0000ffff;
-       break;
-    case R200_DEPTH_FORMAT_24BIT_INT_Z:
--      rmesa->state.depth.clear = d * 0x00ffffff;
-+      rmesa->radeon.state.depth.clear = d * 0x00ffffff;
-       break;
-    }
- }
-@@ -480,7 +482,7 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
-    case GL_FOG_COLOR: 
-       R200_STATECHANGE( rmesa, ctx );
-       UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
--      i = r200PackColor( 4, col[0], col[1], col[2], 0 );
-+      i = radeonPackColor( 4, col[0], col[1], col[2], 0 );
-       rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_COLOR_MASK;
-       rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= i;
-       break;
-@@ -521,102 +523,6 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
-    }
- }
- 
--
--/* =============================================================
-- * Scissoring
-- */
--
--
--static GLboolean intersect_rect( drm_clip_rect_t *out,
--				 drm_clip_rect_t *a,
--				 drm_clip_rect_t *b )
--{
--   *out = *a;
--   if ( b->x1 > out->x1 ) out->x1 = b->x1;
--   if ( b->y1 > out->y1 ) out->y1 = b->y1;
--   if ( b->x2 < out->x2 ) out->x2 = b->x2;
--   if ( b->y2 < out->y2 ) out->y2 = b->y2;
--   if ( out->x1 >= out->x2 ) return GL_FALSE;
--   if ( out->y1 >= out->y2 ) return GL_FALSE;
--   return GL_TRUE;
--}
--
--
--void r200RecalcScissorRects( r200ContextPtr rmesa )
--{
--   drm_clip_rect_t *out;
--   int i;
--
--   /* Grow cliprect store?
--    */
--   if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
--      while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
--	 rmesa->state.scissor.numAllocedClipRects += 1;	/* zero case */
--	 rmesa->state.scissor.numAllocedClipRects *= 2;
--      }
--
--      if (rmesa->state.scissor.pClipRects)
--	 FREE(rmesa->state.scissor.pClipRects);
--
--      rmesa->state.scissor.pClipRects = 
--	 MALLOC( rmesa->state.scissor.numAllocedClipRects * 
--		 sizeof(drm_clip_rect_t) );
--
--      if ( rmesa->state.scissor.pClipRects == NULL ) {
--	 rmesa->state.scissor.numAllocedClipRects = 0;
--	 return;
--      }
--   }
--   
--   out = rmesa->state.scissor.pClipRects;
--   rmesa->state.scissor.numClipRects = 0;
--
--   for ( i = 0 ; i < rmesa->numClipRects ;  i++ ) {
--      if ( intersect_rect( out, 
--			   &rmesa->pClipRects[i], 
--			   &rmesa->state.scissor.rect ) ) {
--	 rmesa->state.scissor.numClipRects++;
--	 out++;
--      }
--   }
--}
--
--
--static void r200UpdateScissor( GLcontext *ctx )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--
--   if ( rmesa->dri.drawable ) {
--      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
--
--      int x = ctx->Scissor.X;
--      int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
--      int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
--      int h = dPriv->h - ctx->Scissor.Y - 1;
--
--      rmesa->state.scissor.rect.x1 = x + dPriv->x;
--      rmesa->state.scissor.rect.y1 = y + dPriv->y;
--      rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
--      rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
--
--      r200RecalcScissorRects( rmesa );
--   }
--}
--
--
--static void r200Scissor( GLcontext *ctx,
--			   GLint x, GLint y, GLsizei w, GLsizei h )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--
--   if ( ctx->Scissor.Enabled ) {
--      R200_FIREVERTICES( rmesa );	/* don't pipeline cliprect changes */
--      r200UpdateScissor( ctx );
--   }
--
--}
--
--
- /* =============================================================
-  * Culling
-  */
-@@ -803,7 +709,7 @@ static void r200ColorMask( GLcontext *ctx,
- 			   GLboolean b, GLboolean a )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   GLuint mask = r200PackColor( rmesa->r200Screen->cpp,
-+   GLuint mask = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
- 				ctx->Color.ColorMask[RCOMP],
- 				ctx->Color.ColorMask[GCOMP],
- 				ctx->Color.ColorMask[BCOMP],
-@@ -834,7 +740,8 @@ static void r200PolygonOffset( GLcontext *ctx,
- 			       GLfloat factor, GLfloat units )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   float_ui32_type constant =  { units * rmesa->state.depth.scale };
-+   const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
-+   float_ui32_type constant =  { units * depthScale };
-    float_ui32_type factoru = { factor };
- 
- /*    factor *= 2; */
-@@ -861,15 +768,15 @@ static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask )
- 
-    /* TODO: push this into cmd mechanism
-     */
--   R200_FIREVERTICES( rmesa );
--   LOCK_HARDWARE( rmesa );
-+   radeon_firevertices(&rmesa->radeon);
-+   LOCK_HARDWARE( &rmesa->radeon );
- 
-    /* FIXME: Use window x,y offsets into stipple RAM.
-     */
-    stipple.mask = rmesa->state.stipple.mask;
--   drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, 
-+   drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, 
-                     &stipple, sizeof(stipple) );
--   UNLOCK_HARDWARE( rmesa );
-+   UNLOCK_HARDWARE( &rmesa->radeon );
- }
- 
- static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
-@@ -881,7 +788,7 @@ static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
-     * cases work. 
-     */
-    TCL_FALLBACK( ctx, R200_TCL_FALLBACK_UNFILLED, flag);
--   if (rmesa->TclFallback) {
-+   if (rmesa->radeon.TclFallback) {
-       r200ChooseRenderState( ctx );
-       r200ChooseVertexState( ctx );
-    }
-@@ -958,7 +865,7 @@ static void r200UpdateSpecular( GLcontext *ctx )
- 
-    /* Update vertex/render formats
-     */
--   if (rmesa->TclFallback) { 
-+   if (rmesa->radeon.TclFallback) { 
-       r200ChooseRenderState( ctx );
-       r200ChooseVertexState( ctx );
-    }
-@@ -1430,7 +1337,7 @@ static void r200LightModelfv( GLcontext *ctx, GLenum pname,
- 	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHT_TWOSIDE;
- 	 else
- 	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~(R200_LIGHT_TWOSIDE);
--	 if (rmesa->TclFallback) {
-+	 if (rmesa->radeon.TclFallback) {
- 	    r200ChooseRenderState( ctx );
- 	    r200ChooseVertexState( ctx );
- 	 }
-@@ -1675,7 +1582,7 @@ static void r200ClearStencil( GLcontext *ctx, GLint s )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
- 
--   rmesa->state.stencil.clear = 
-+   rmesa->radeon.state.stencil.clear = 
-       ((GLuint) (ctx->Stencil.Clear & 0xff) |
-        (0xff << R200_STENCIL_MASK_SHIFT) |
-        ((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT));
-@@ -1700,19 +1607,29 @@ static void r200ClearStencil( GLcontext *ctx, GLint s )
- void r200UpdateWindow( GLcontext *ctx )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
--   GLfloat xoffset = (GLfloat)dPriv->x;
--   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-+   GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
-+   GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
-    const GLfloat *v = ctx->Viewport._WindowMap.m;
-+   const GLboolean render_to_fbo = (ctx->DrawBuffer ? (ctx->DrawBuffer->Name != 0) : 0);
-+   const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
-+   GLfloat y_scale, y_bias;
-+
-+   if (render_to_fbo) {
-+      y_scale = 1.0;
-+      y_bias = 0;
-+   } else {
-+      y_scale = -1.0;
-+      y_bias = yoffset;
-+   }
- 
-    float_ui32_type sx = { v[MAT_SX] };
-    float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
--   float_ui32_type sy = { - v[MAT_SY] };
--   float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y };
--   float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale };
--   float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale };
-+   float_ui32_type sy = { v[MAT_SY] * y_scale };
-+   float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias + SUBPIXEL_Y };
-+   float_ui32_type sz = { v[MAT_SZ] * depthScale };
-+   float_ui32_type tz = { v[MAT_TZ] * depthScale };
- 
--   R200_FIREVERTICES( rmesa );
-    R200_STATECHANGE( rmesa, vpt );
- 
-    rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = sx.ui32;
-@@ -1733,6 +1650,8 @@ static void r200Viewport( GLcontext *ctx, GLint x, GLint y,
-     * values, or keep the originals hanging around.
-     */
-    r200UpdateWindow( ctx );
-+
-+   radeon_viewport(ctx, x, y, width, height);
- }
- 
- static void r200DepthRange( GLcontext *ctx, GLclampd nearval,
-@@ -1744,7 +1663,7 @@ static void r200DepthRange( GLcontext *ctx, GLclampd nearval,
- void r200UpdateViewportOffset( GLcontext *ctx )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-    GLfloat xoffset = (GLfloat)dPriv->x;
-    GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
-    const GLfloat *v = ctx->Viewport._WindowMap.m;
-@@ -1774,8 +1693,8 @@ void r200UpdateViewportOffset( GLcontext *ctx )
-                 R200_STIPPLE_Y_OFFSET_MASK);
- 
-          /* add magic offsets, then invert */
--         stx = 31 - ((rmesa->dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK);
--         sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
-+         stx = 31 - ((rmesa->radeon.dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK);
-+         sty = 31 - ((rmesa->radeon.dri.drawable->y + rmesa->radeon.dri.drawable->h - 1)
-                      & R200_STIPPLE_COORD_MASK);
- 
-          m |= ((stx << R200_STIPPLE_X_OFFSET_SHIFT) |
-@@ -1788,7 +1707,7 @@ void r200UpdateViewportOffset( GLcontext *ctx )
-       }
-    }
- 
--   r200UpdateScissor( ctx );
-+   radeonUpdateScissor( ctx );
- }
- 
- 
-@@ -1805,7 +1724,7 @@ static void r200ClearColor( GLcontext *ctx, const GLfloat c[4] )
-    CLAMPED_FLOAT_TO_UBYTE(color[1], c[1]);
-    CLAMPED_FLOAT_TO_UBYTE(color[2], c[2]);
-    CLAMPED_FLOAT_TO_UBYTE(color[3], c[3]);
--   rmesa->state.color.clear = r200PackColor( rmesa->r200Screen->cpp,
-+   rmesa->radeon.state.color.clear = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
-                                              color[0], color[1],
-                                              color[2], color[3] );
- }
-@@ -1848,96 +1767,6 @@ static void r200LogicOpCode( GLcontext *ctx, GLenum opcode )
-    rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = r200_rop_tab[rop];
- }
- 
--
--/*
-- * Set up the cliprects for either front or back-buffer drawing.
-- */
--void r200SetCliprects( r200ContextPtr rmesa )
--{
--   __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
--   __DRIdrawablePrivate *const readable = rmesa->dri.readable;
--   GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate;
--   GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate;
--
--   if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BIT_BACK_LEFT) {
--      /* Can't ignore 2d windows if we are page flipping.
--       */
--      if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) {
--         rmesa->numClipRects = drawable->numClipRects;
--         rmesa->pClipRects = drawable->pClipRects;
--      }
--      else {
--         rmesa->numClipRects = drawable->numBackClipRects;
--         rmesa->pClipRects = drawable->pBackClipRects;
--      }
--   }
--   else {
--     /* front buffer (or none, or multiple buffers) */
--     rmesa->numClipRects = drawable->numClipRects;
--     rmesa->pClipRects = drawable->pClipRects;
--  }
--
--   if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) {
--      _mesa_resize_framebuffer(rmesa->glCtx, draw_fb,
--			       drawable->w, drawable->h);
--      draw_fb->Initialized = GL_TRUE;
--   }
--
--   if (drawable != readable) {
--      if ((read_fb->Width != readable->w) ||
--	  (read_fb->Height != readable->h)) {
--	 _mesa_resize_framebuffer(rmesa->glCtx, read_fb,
--				  readable->w, readable->h);
--	 read_fb->Initialized = GL_TRUE;
--      }
--   }
--
--   if (rmesa->state.scissor.enabled)
--      r200RecalcScissorRects( rmesa );
--
--   rmesa->lastStamp = drawable->lastStamp;
--}
--
--
--static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--
--   if (R200_DEBUG & DEBUG_DRI)
--      fprintf(stderr, "%s %s\n", __FUNCTION__,
--	      _mesa_lookup_enum_by_nr( mode ));
--
--   R200_FIREVERTICES(rmesa);	/* don't pipeline cliprect changes */
--
--   if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
--      /* 0 (GL_NONE) buffers or multiple color drawing buffers */
--      FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE );
--      return;
--   }
--
--   switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
--   case BUFFER_FRONT_LEFT:
--   case BUFFER_BACK_LEFT:
--      FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE );
--      break;
--   default:
--      FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE );
--      return;
--   }
--
--   r200SetCliprects( rmesa );
--
--   /* We'll set the drawing engine's offset/pitch parameters later
--    * when we update other state.
--    */
--}
--
--
--static void r200ReadBuffer( GLcontext *ctx, GLenum mode )
--{
--   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
--}
--
- /* =============================================================
-  * State enable/disable
-  */
-@@ -2013,10 +1842,10 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
-       R200_STATECHANGE(rmesa, ctx );
-       if ( state ) {
- 	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_DITHER_ENABLE;
--	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable;
-+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
-       } else {
- 	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_DITHER_ENABLE;
--	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->state.color.roundEnable;
-+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->radeon.state.color.roundEnable;
-       }
-       break;
- 
-@@ -2031,7 +1860,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
- 	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK;
-       }
-       r200UpdateSpecular( ctx ); /* for PK_SPEC */
--      if (rmesa->TclFallback) 
-+      if (rmesa->radeon.TclFallback) 
- 	 r200ChooseVertexState( ctx );
-       _mesa_allow_light_in_model( ctx, !state );
-       break;
-@@ -2068,7 +1897,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
-    case GL_LIGHTING:
-       r200UpdateSpecular(ctx);
-       /* for reflection map fixup - might set recheck_texgen for all units too */
--      rmesa->NewGLState |= _NEW_TEXTURE;
-+      rmesa->radeon.NewGLState |= _NEW_TEXTURE;
-       break;
- 
-    case GL_LINE_SMOOTH:
-@@ -2181,21 +2010,30 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
-    }
- 
-    case GL_SCISSOR_TEST:
--      R200_FIREVERTICES( rmesa );
--      rmesa->state.scissor.enabled = state;
--      r200UpdateScissor( ctx );
-+      radeon_firevertices(&rmesa->radeon);
-+      rmesa->radeon.state.scissor.enabled = state;
-+      radeonUpdateScissor( ctx );
-       break;
- 
-    case GL_STENCIL_TEST:
--      if ( rmesa->state.stencil.hwBuffer ) {
--	 R200_STATECHANGE( rmesa, ctx );
--	 if ( state ) {
--	    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_STENCIL_ENABLE;
-+      {
-+	 GLboolean hw_stencil = GL_FALSE;
-+	 if (ctx->DrawBuffer) {
-+	    struct radeon_renderbuffer *rrbStencil
-+	       = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
-+	    hw_stencil = (rrbStencil && rrbStencil->bo);
-+	 }
-+
-+	 if (hw_stencil) {
-+	    R200_STATECHANGE( rmesa, ctx );
-+	    if ( state ) {
-+	       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_STENCIL_ENABLE;
-+	    } else {
-+	       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_STENCIL_ENABLE;
-+	    }
- 	 } else {
--	    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_STENCIL_ENABLE;
-+	    FALLBACK( rmesa, R200_FALLBACK_STENCIL, state );
- 	 }
--      } else {
--	 FALLBACK( rmesa, R200_FALLBACK_STENCIL, state );
-       }
-       break;
- 
-@@ -2432,64 +2270,73 @@ static void update_texturematrix( GLcontext *ctx )
-    }
- }
- 
--
--
--/**
-- * Tell the card where to render (offset, pitch).
-- * Effected by glDrawBuffer, etc
-- */
--void
--r200UpdateDrawBuffer(GLcontext *ctx)
-+static GLboolean r200ValidateBuffers(GLcontext *ctx)
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   struct gl_framebuffer *fb = ctx->DrawBuffer;
--   driRenderbuffer *drb;
-+   struct radeon_renderbuffer *rrb;
-+   int i;
- 
--   if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
--      /* draw to front */
--      drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
--   }
--   else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
--      /* draw to back */
--      drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
--   }
--   else {
--      /* drawing to multiple buffers, or none */
--      return;
-+   radeon_validate_reset_bos(&rmesa->radeon);
-+   
-+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
-+   /* color buffer */
-+   if (rrb && rrb->bo) {
-+     radeon_validate_bo(&rmesa->radeon, rrb->bo,
-+			0, RADEON_GEM_DOMAIN_VRAM);
-    }
- 
--   assert(drb);
--   assert(drb->flippedPitch);
-+   /* depth buffer */
-+   rrb = radeon_get_depthbuffer(&rmesa->radeon);
-+   /* color buffer */
-+   if (rrb && rrb->bo) {
-+     radeon_validate_bo(&rmesa->radeon, rrb->bo,
-+			0, RADEON_GEM_DOMAIN_VRAM);
-+   }
- 
--   R200_STATECHANGE( rmesa, ctx );
-+   for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
-+      radeonTexObj *t;
-+      
-+      if (!ctx->Texture.Unit[i]._ReallyEnabled)
-+	 continue;
- 
--   /* Note: we used the (possibly) page-flipped values */
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
--     = ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
--	& R200_COLOROFFSET_MASK);
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
--   if (rmesa->sarea->tiling_enabled) {
--      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
-+      t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
-+      if (t->image_override && t->bo)
-+	radeon_validate_bo(&rmesa->radeon, t->bo,
-+			   RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
-+      else if (t->mt->bo)
-+	radeon_validate_bo(&rmesa->radeon, t->mt->bo,
-+			   RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
-    }
--}
- 
-+   if (rmesa->radeon.dma.current)
-+      radeon_validate_bo(&rmesa->radeon, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0);
- 
-+   return radeon_revalidate_bos(ctx);
-+}
- 
--void r200ValidateState( GLcontext *ctx )
-+GLboolean r200ValidateState( GLcontext *ctx )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   GLuint new_state = rmesa->NewGLState;
-+   GLuint new_state = rmesa->radeon.NewGLState;
- 
-    if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
--     r200UpdateDrawBuffer(ctx);
-+      _mesa_update_framebuffer(ctx);
-+      /* this updates the DrawBuffer's Width/Height if it's a FBO */
-+      _mesa_update_draw_buffer_bounds(ctx);
-+      
-+      R200_STATECHANGE(rmesa, ctx);
-    }
- 
-    if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) {
-       r200UpdateTextureState( ctx );
--      new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
-+      new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
-       r200UpdateLocalViewer( ctx );
-    }
- 
-+   /* we need to do a space check here */
-+   if (!r200ValidateBuffers(ctx))
-+     return GL_FALSE;
-+
- /* FIXME: don't really need most of these when vertex progs are enabled */
- 
-    /* Need an event driven matrix update?
-@@ -2533,7 +2380,8 @@ void r200ValidateState( GLcontext *ctx )
-       else TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, 0);
-    }
- 
--   rmesa->NewGLState = 0;
-+   rmesa->radeon.NewGLState = 0;
-+   return GL_TRUE;
- }
- 
- 
-@@ -2544,7 +2392,7 @@ static void r200InvalidateState( GLcontext *ctx, GLuint new_state )
-    _vbo_InvalidateState( ctx, new_state );
-    _tnl_InvalidateState( ctx, new_state );
-    _ae_invalidate_state( ctx, new_state );
--   R200_CONTEXT(ctx)->NewGLState |= new_state;
-+   R200_CONTEXT(ctx)->radeon.NewGLState |= new_state;
- }
- 
- /* A hack.  The r200 can actually cope just fine with materials
-@@ -2573,12 +2421,13 @@ static void r200WrapRunPipeline( GLcontext *ctx )
-    GLboolean has_material;
- 
-    if (0)
--      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
-+      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
- 
-    /* Validate state:
-     */
--   if (rmesa->NewGLState)
--      r200ValidateState( ctx );
-+   if (rmesa->radeon.NewGLState)
-+      if (!r200ValidateState( ctx ))
-+	 FALLBACK(rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE);
- 
-    has_material = !ctx->VertexProgram._Enabled && ctx->Light.Enabled && check_material( ctx );
- 
-@@ -2603,8 +2452,8 @@ void r200InitStateFuncs( struct dd_function_table *functions )
-    functions->UpdateState		= r200InvalidateState;
-    functions->LightingSpaceChange	= r200LightingSpaceChange;
- 
--   functions->DrawBuffer		= r200DrawBuffer;
--   functions->ReadBuffer		= r200ReadBuffer;
-+   functions->DrawBuffer		= radeonDrawBuffer;
-+   functions->ReadBuffer		= radeonReadBuffer;
- 
-    functions->AlphaFunc			= r200AlphaFunc;
-    functions->BlendColor		= r200BlendColor;
-@@ -2636,7 +2485,7 @@ void r200InitStateFuncs( struct dd_function_table *functions )
-    functions->PointParameterfv		= r200PointParameter;
-    functions->PointSize			= r200PointSize;
-    functions->RenderMode		= r200RenderMode;
--   functions->Scissor			= r200Scissor;
-+   functions->Scissor			= radeonScissor;
-    functions->ShadeModel		= r200ShadeModel;
-    functions->StencilFuncSeparate	= r200StencilFuncSeparate;
-    functions->StencilMaskSeparate	= r200StencilMaskSeparate;
-diff --git a/src/mesa/drivers/dri/r200/r200_state.h b/src/mesa/drivers/dri/r200/r200_state.h
-index a917163..23cf8ae 100644
---- a/src/mesa/drivers/dri/r200/r200_state.h
-+++ b/src/mesa/drivers/dri/r200/r200_state.h
-@@ -43,23 +43,17 @@ extern void r200InitTnlFuncs( GLcontext *ctx );
- 
- extern void r200UpdateMaterial( GLcontext *ctx );
- 
--extern void r200SetCliprects( r200ContextPtr rmesa );
--extern void r200RecalcScissorRects( r200ContextPtr rmesa );
- extern void r200UpdateViewportOffset( GLcontext *ctx );
- extern void r200UpdateWindow( GLcontext *ctx );
- extern void r200UpdateDrawBuffer(GLcontext *ctx);
- 
--extern void r200ValidateState( GLcontext *ctx );
--
--extern void r200PrintDirty( r200ContextPtr rmesa,
--			      const char *msg );
--
-+extern GLboolean r200ValidateState( GLcontext *ctx );
- 
- extern void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode );
- #define FALLBACK( rmesa, bit, mode ) do {				\
-    if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n",		\
- 		     __FUNCTION__, bit, mode );				\
--   r200Fallback( rmesa->glCtx, bit, mode );				\
-+   r200Fallback( rmesa->radeon.glCtx, bit, mode );				\
- } while (0)
- 
- extern void r200LightingSpaceChange( GLcontext *ctx );
-diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c
-index 9e4677e..75262e4 100644
---- a/src/mesa/drivers/dri/r200/r200_state_init.c
-+++ b/src/mesa/drivers/dri/r200/r200_state_init.c
-@@ -43,6 +43,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "tnl/t_pipeline.h"
- #include "swrast_setup/swrast_setup.h"
- 
-+#include "radeon_common.h"
-+#include "radeon_mipmap_tree.h"
- #include "r200_context.h"
- #include "r200_ioctl.h"
- #include "r200_state.h"
-@@ -52,31 +54,129 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "xmlpool.h"
- 
-+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
-+ * 1.3 cmdbuffers allow all previous state to be updated as well as
-+ * the tcl scalar and vector areas.
-+ */
-+static struct {
-+	int start;
-+	int len;
-+	const char *name;
-+} packet[RADEON_MAX_STATE_PACKETS] = {
-+	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
-+	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
-+	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
-+	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
-+	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
-+	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
-+	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
-+	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
-+	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
-+	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
-+	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
-+	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
-+	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
-+	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
-+	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
-+	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
-+	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
-+	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
-+	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
-+	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
-+	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
-+		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
-+	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
-+	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
-+	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
-+	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
-+	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
-+	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
-+	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
-+	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
-+	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
-+	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
-+	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
-+	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
-+	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
-+	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
-+	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
-+	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
-+	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
-+	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
-+	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
-+	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
-+	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
-+	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
-+	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
-+	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
-+	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
-+	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
-+	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
-+	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
-+	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
-+	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
-+	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
-+	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
-+	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
-+	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
-+	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
-+	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
-+	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
-+	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
-+	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
-+	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
-+	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
-+		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
-+	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
-+	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
-+	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
-+	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
-+	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
-+	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
-+	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
-+	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
-+	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
-+	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
-+	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
-+	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
-+	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
-+	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
-+	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
-+	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
-+	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
-+	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
-+	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
-+	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
-+	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
-+	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
-+	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
-+	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
-+	{R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"},     /* 85 */
-+	{R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
-+	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
-+	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
-+	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
-+	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
-+	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
-+	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
-+	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
-+	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
-+};
-+
- /* =============================================================
-  * State initialization
-  */
--
--void r200PrintDirty( r200ContextPtr rmesa, const char *msg )
-+static int cmdpkt( r200ContextPtr rmesa, int id ) 
- {
--   struct r200_state_atom *l;
--
--   fprintf(stderr, msg);
--   fprintf(stderr, ": ");
-+   drm_radeon_cmd_header_t h;
- 
--   foreach(l, &rmesa->hw.atomlist) {
--      if (l->dirty || rmesa->hw.all_dirty)
--	 fprintf(stderr, "%s, ", l->name);
-+   if (rmesa->radeon.radeonScreen->kernel_mm) {
-+     return CP_PACKET0(packet[id].start, packet[id].len - 1);
-+   } else {
-+     h.i = 0;
-+     h.packet.cmd_type = RADEON_CMD_PACKET;
-+     h.packet.packet_id = id;
-    }
--
--   fprintf(stderr, "\n");
--}
--
--static int cmdpkt( int id ) 
--{
--   drm_radeon_cmd_header_t h;
--   h.i = 0;
--   h.packet.cmd_type = RADEON_CMD_PACKET;
--   h.packet.packet_id = id;
-    return h.i;
- }
- 
-@@ -127,150 +227,475 @@ static int cmdscl2( int offset, int stride, int count )
- }
- 
- #define CHECK( NM, FLAG )				\
--static GLboolean check_##NM( GLcontext *ctx, int idx )	\
-+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \
- {							\
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
--   (void) idx;						\
-    (void) rmesa;					\
--   return FLAG;						\
-+   return (FLAG) ? atom->cmd_size : 0;			\
- }
- 
- #define TCL_CHECK( NM, FLAG )				\
--static GLboolean check_##NM( GLcontext *ctx, int idx )	\
--{							\
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
--   (void) idx;						\
--   return !rmesa->TclFallback && !ctx->VertexProgram._Enabled && (FLAG);	\
-+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \
-+{									\
-+   r200ContextPtr rmesa = R200_CONTEXT(ctx);				\
-+   return (!rmesa->radeon.TclFallback && !ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \
- }
- 
- #define TCL_OR_VP_CHECK( NM, FLAG )			\
--static GLboolean check_##NM( GLcontext *ctx, int idx )	\
-+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
- {							\
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
--   (void) idx;						\
--   return !rmesa->TclFallback && (FLAG);		\
-+   return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0;	\
- }
- 
- #define VP_CHECK( NM, FLAG )				\
--static GLboolean check_##NM( GLcontext *ctx, int idx )	\
--{							\
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
--   (void) idx;						\
--   return !rmesa->TclFallback && ctx->VertexProgram._Enabled && (FLAG);		\
-+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
-+{									\
-+   r200ContextPtr rmesa = R200_CONTEXT(ctx);				\
-+   (void) atom;								\
-+   return (!rmesa->radeon.TclFallback && ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \
- }
- 
--
- CHECK( always, GL_TRUE )
- CHECK( never, GL_FALSE )
- CHECK( tex_any, ctx->Texture._EnabledUnits )
- CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled) );
--CHECK( tex_pair, (rmesa->state.texture.unit[idx].unitneeded | rmesa->state.texture.unit[idx & ~1].unitneeded) )
--CHECK( tex, rmesa->state.texture.unit[idx].unitneeded )
-+CHECK( tex_pair, (rmesa->state.texture.unit[atom->idx].unitneeded | rmesa->state.texture.unit[atom->idx & ~1].unitneeded) )
-+CHECK( tex, rmesa->state.texture.unit[atom->idx].unitneeded )
- CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled )
--CHECK( texenv, (rmesa->state.envneeded & (1 << idx) && !ctx->ATIFragmentShader._Enabled) )
-+   CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled) )
- CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)) )
- CHECK( afs, ctx->ATIFragmentShader._Enabled )
--CHECK( tex_cube, rmesa->state.texture.unit[idx].unitneeded & TEXTURE_CUBE_BIT )
-+CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT )
- TCL_CHECK( tcl_fog, ctx->Fog.Enabled )
- TCL_CHECK( tcl, GL_TRUE )
--TCL_CHECK( tcl_tex, rmesa->state.texture.unit[idx].unitneeded )
-+TCL_CHECK( tcl_tex, rmesa->state.texture.unit[atom->idx].unitneeded )
- TCL_CHECK( tcl_lighting, ctx->Light.Enabled )
--TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[idx].Enabled )
--TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << idx)) )
-+TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled )
-+TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))) )
- TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE )
- VP_CHECK( tcl_vp, GL_TRUE )
- VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64 )
- VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96 )
- 
-+#define OUT_VEC(hdr, data) do {			\
-+    drm_radeon_cmd_header_t h;					\
-+    h.i = hdr;								\
-+    OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
-+    OUT_BATCH(0);							\
-+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
-+    OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
-+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1));	\
-+    OUT_BATCH_TABLE((data), h.vectors.count);				\
-+  } while(0)
-+
-+#define OUT_VECLINEAR(hdr, data) do {					\
-+    drm_radeon_cmd_header_t h;						\
-+    uint32_t _start, _sz;						\
-+    h.i = hdr;								\
-+    _start = h.veclinear.addr_lo | (h.veclinear.addr_hi << 8);		\
-+    _sz = h.veclinear.count * 4;					\
-+    OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
-+    OUT_BATCH(0);							\
-+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
-+    OUT_BATCH(_start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));	\
-+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, _sz - 1));	\
-+    OUT_BATCH_TABLE((data), _sz);					\
-+  } while(0)
-+
-+#define OUT_SCL(hdr, data) do {					\
-+    drm_radeon_cmd_header_t h;						\
-+    h.i = hdr;								\
-+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
-+    OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
-+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
-+    OUT_BATCH_TABLE((data), h.scalars.count);				\
-+  } while(0)
-+
-+#define OUT_SCL2(hdr, data) do {					\
-+    drm_radeon_cmd_header_t h;						\
-+    h.i = hdr;								\
-+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
-+    OUT_BATCH((h.scalars.offset + 0x100) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
-+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
-+    OUT_BATCH_TABLE((data), h.scalars.count);				\
-+  } while(0)
-+
-+static void mtl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+
-+   dwords += 6;
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_VEC(atom->cmd[MTL_CMD_0], (atom->cmd+1));
-+   OUT_SCL2(atom->cmd[MTL_CMD_1], (atom->cmd + 18));
-+   END_BATCH();
-+}
- 
--/* Initialize the context's hardware state.
-- */
--void r200InitState( r200ContextPtr rmesa )
-+static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom)
- {
--   GLcontext *ctx = rmesa->glCtx;
--   GLuint color_fmt, depth_fmt, i;
--   GLint drawPitch, drawOffset;
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+
-+   dwords += 8;
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
-+   OUT_VEC(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
-+   END_BATCH();
-+}
- 
--   switch ( rmesa->r200Screen->cpp ) {
--   case 2:
--      color_fmt = R200_COLOR_FORMAT_RGB565;
--      break;
--   case 4:
--      color_fmt = R200_COLOR_FORMAT_ARGB8888;
--      break;
--   default:
--      fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
--      exit( -1 );
-+static void ptp_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+
-+   dwords += 8;
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_VEC(atom->cmd[PTP_CMD_0], atom->cmd+1);
-+   OUT_VEC(atom->cmd[PTP_CMD_1], atom->cmd+PTP_CMD_1+1);
-+   END_BATCH();
-+}
-+
-+static void veclinear_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+
-+   dwords += 4;
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_VECLINEAR(atom->cmd[0], atom->cmd+1);
-+   END_BATCH();
-+}
-+
-+static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+
-+   dwords += 2;
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_SCL(atom->cmd[0], atom->cmd+1);
-+   END_BATCH();
-+}
-+
-+
-+static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+
-+   dwords += 4;
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_VEC(atom->cmd[0], atom->cmd+1);
-+   END_BATCH();
-+}
-+
-+static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   struct radeon_renderbuffer *rrb;
-+   uint32_t cbpitch;
-+   uint32_t zbpitch, depth_fmt;
-+   uint32_t dwords = atom->cmd_size;
-+
-+   /* output the first 7 bytes of context */
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords+2+2);
-+   OUT_BATCH_TABLE(atom->cmd, 5);
-+
-+   rrb = radeon_get_depthbuffer(&r200->radeon);
-+   if (!rrb) {
-+     OUT_BATCH(0);
-+     OUT_BATCH(0);
-+   } else {
-+     zbpitch = (rrb->pitch / rrb->cpp);
-+     if (r200->using_hyperz)
-+       zbpitch |= RADEON_DEPTH_HYPERZ;
-+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+     OUT_BATCH(zbpitch);
-+     if (rrb->cpp == 4) 
-+       depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; 
-+     else 
-+       depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z; 
-+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK; 
-+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt; 
-+   }
-+     
-+   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
-+   OUT_BATCH(atom->cmd[CTX_CMD_1]);
-+   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
-+
-+   rrb = radeon_get_colorbuffer(&r200->radeon);
-+   if (!rrb || !rrb->bo) {
-+     OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
-+     OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]);
-+   } else {
-+     atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10); 
-+     if (rrb->cpp == 4) 
-+       atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888; 
-+     else 
-+       atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565; 
-+ 
-+     OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); 
-+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-    }
- 
--   rmesa->state.color.clear = 0x00000000;
-+   OUT_BATCH(atom->cmd[CTX_CMD_2]);
- 
--   switch ( ctx->Visual.depthBits ) {
--   case 16:
--      rmesa->state.depth.clear = 0x0000ffff;
--      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
--      depth_fmt = R200_DEPTH_FORMAT_16BIT_INT_Z;
--      rmesa->state.stencil.clear = 0x00000000;
--      break;
--   case 24:
--      rmesa->state.depth.clear = 0x00ffffff;
--      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
--      depth_fmt = R200_DEPTH_FORMAT_24BIT_INT_Z;
--      rmesa->state.stencil.clear = 0xffff0000;
--      break;
--   default:
--      fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
--	       ctx->Visual.depthBits );
--      exit( -1 );
-+   if (!rrb || !rrb->bo) {
-+     OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]);
-+   } else {
-+     cbpitch = (rrb->pitch / rrb->cpp);
-+     if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
-+       cbpitch |= R200_COLOR_TILE_ENABLE;
-+     OUT_BATCH(cbpitch);
-    }
- 
--   /* Only have hw stencil when depth buffer is 24 bits deep */
--   rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
--				     ctx->Visual.depthBits == 24 );
-+   if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM)
-+     OUT_BATCH_TABLE((atom->cmd + 14), 4);
- 
--   rmesa->Fallback = 0;
-+   END_BATCH();
-+}
- 
--   if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
--      drawOffset = rmesa->r200Screen->backOffset;
--      drawPitch  = rmesa->r200Screen->backPitch;
--   } else {
--      drawOffset = rmesa->r200Screen->frontOffset;
--      drawPitch  = rmesa->r200Screen->frontPitch;
-+static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   struct radeon_renderbuffer *rrb, *drb;
-+   uint32_t cbpitch = 0;
-+   uint32_t zbpitch = 0;
-+   uint32_t dwords = atom->cmd_size;
-+   uint32_t depth_fmt;
-+
-+   rrb = radeon_get_colorbuffer(&r200->radeon);
-+   if (!rrb || !rrb->bo) {
-+      return;
-    }
--#if 000
--   if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
--      rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset;
--      rmesa->state.color.drawPitch  = rmesa->r200Screen->backPitch;
-+
-+   atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
-+   if (rrb->cpp == 4)
-+	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
-+   else
-+	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
-+
-+   cbpitch = (rrb->pitch / rrb->cpp);
-+   if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
-+       cbpitch |= R200_COLOR_TILE_ENABLE;
-+
-+   drb = radeon_get_depthbuffer(&r200->radeon);
-+   if (drb) {
-+     zbpitch = (drb->pitch / drb->cpp);
-+     if (drb->cpp == 4)
-+        depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
-+     else
-+        depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
-+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
-+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
-+   }
-+
-+   dwords = 10;
-+   if (drb)
-+     dwords += 6;
-+   if (rrb)
-+     dwords += 6;
-+
-+   /* output the first 7 bytes of context */
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+
-+   /* In the CS case we need to split this up */
-+   OUT_BATCH(CP_PACKET0(packet[0].start, 3));
-+   OUT_BATCH_TABLE((atom->cmd + 1), 4);
-+
-+   if (drb) {
-+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
-+     OUT_BATCH_RELOC(0, drb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+
-+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
-+     OUT_BATCH(zbpitch);
-+   }
-+
-+   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
-+   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
-+   OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
-+   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
-+   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
-+
-+
-+   if (rrb) {
-+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
-+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+
-+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
-+     OUT_BATCH(cbpitch);
-+   }
-+
-+   if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
-+     OUT_BATCH_TABLE((atom->cmd + 14), 4);
-+   }
-+
-+   END_BATCH();
-+}
-+
-+static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+   int i = atom->idx;
-+   radeonTexObj *t = r200->state.texture.unit[i].texobj;
-+   radeon_mipmap_level *lvl;
-+
-+   if (t && t->mt && !t->image_override)
-+     dwords += 2;
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_BATCH_TABLE(atom->cmd, 10);
-+
-+   if (t && t->mt && !t->image_override) {
-+     if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) {
-+   	lvl = &t->mt->levels[0];
-+	OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset,
-+			RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+     } else {
-+        OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
-+		     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+     }
-+   } else if (!t) {
-+     /* workaround for old CS mechanism */
-+     OUT_BATCH(r200->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]);
-    } else {
--      rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset;
--      rmesa->state.color.drawPitch  = rmesa->r200Screen->frontPitch;
-+     OUT_BATCH(t->override_offset);
-    }
- 
--   rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset;
--   rmesa->state.pixel.readPitch  = rmesa->state.color.drawPitch;
--#endif
-+   END_BATCH();
-+}
-+
-+static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+   int i = atom->idx;
-+   radeonTexObj *t = r200->state.texture.unit[i].texobj;
-+   radeon_mipmap_level *lvl;
-+   int hastexture = 1;
-+
-+   if (!t)
-+	hastexture = 0;
-+   else {
-+	if (!t->mt && !t->bo)
-+		hastexture = 0;
-+   }
-+
-+   dwords += 2;
-+   if (hastexture)
-+     dwords += 2;
-+   else
-+     dwords -= 2;
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+
-+   OUT_BATCH(CP_PACKET0(R200_PP_TXFILTER_0 + (24 * i), 7));
-+   OUT_BATCH_TABLE((atom->cmd + 1), 8);
-+
-+   if (hastexture) {
-+     OUT_BATCH(CP_PACKET0(R200_PP_TXOFFSET_0 + (24 * i), 0));
-+     if (t->mt && !t->image_override) {
-+        if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) {
-+            lvl = &t->mt->levels[0];
-+	    OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset,
-+			RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+        } else {
-+           OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
-+		     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+        }
-+      } else {
-+	if (t->bo)
-+            OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
-+                            RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+      }
-+   }
-+   END_BATCH();
-+}
-+
-+
-+static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+   int i = atom->idx;
-+   radeonTexObj *t = r200->state.texture.unit[i].texobj;
-+   GLuint size;
-+
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords + (2 * 5));
-+   OUT_BATCH_TABLE(atom->cmd, 3);
-+
-+   if (t && !t->image_override) {
-+     size = t->mt->totalsize / 6;
-+     OUT_BATCH_RELOC(0, t->mt->bo, size, RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+     OUT_BATCH_RELOC(0, t->mt->bo, size * 2, RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+     OUT_BATCH_RELOC(0, t->mt->bo, size * 3, RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+     OUT_BATCH_RELOC(0, t->mt->bo, size * 4, RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+     OUT_BATCH_RELOC(0, t->mt->bo, size * 5, RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+   }
-+   END_BATCH();
-+}
-+
-+/* Initialize the context's hardware state.
-+ */
-+void r200InitState( r200ContextPtr rmesa )
-+{
-+   GLcontext *ctx = rmesa->radeon.glCtx;
-+   GLuint i;
- 
--   rmesa->hw.max_state_size = 0;
-+   rmesa->radeon.state.color.clear = 0x00000000;
-+
-+   switch ( ctx->Visual.depthBits ) {
-+   case 16:
-+      rmesa->radeon.state.depth.clear = 0x0000ffff;
-+      rmesa->radeon.state.stencil.clear = 0x00000000;
-+      break;
-+   case 24:
-+   default:
-+      rmesa->radeon.state.depth.clear = 0x00ffffff;
-+      rmesa->radeon.state.stencil.clear = 0xffff0000;
-+      break;
-+   }
-+
-+   rmesa->radeon.Fallback = 0;
-+
-+   rmesa->radeon.hw.max_state_size = 0;
- 
- #define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX )				\
-    do {								\
-       rmesa->hw.ATOM.cmd_size = SZ;				\
--      rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int));	\
--      rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int));	\
-+      rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int));	\
-+      rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int));	\
-       rmesa->hw.ATOM.name = NM;					\
-       rmesa->hw.ATOM.idx = IDX;					\
-       rmesa->hw.ATOM.check = check_##CHK;			\
-       rmesa->hw.ATOM.dirty = GL_FALSE;				\
--      rmesa->hw.max_state_size += SZ * sizeof(int);		\
-+      rmesa->radeon.hw.max_state_size += SZ * sizeof(int);		\
-    } while (0)
- 
- 
-    /* Allocate state buffers:
-     */
--   if (rmesa->r200Screen->drmSupportsBlendColor)
-+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
-       ALLOC_STATE( ctx, always, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 );
-    else
-       ALLOC_STATE( ctx, always, CTX_STATE_SIZE_OLDDRM, "CTX/context", 0 );
-+
-+   if (rmesa->radeon.radeonScreen->kernel_mm)
-+     rmesa->hw.ctx.emit = ctx_emit_cs;
-+   else
-+     rmesa->hw.ctx.emit = ctx_emit;
-    ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 );
-    ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
-    ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
-@@ -282,8 +707,8 @@ void r200InitState( r200ContextPtr rmesa )
-    ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 );
-    ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
-    ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 );
--   if (rmesa->r200Screen->drmSupportsFragShader) {
--      if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) {
-+   if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
-+      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
-       /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
- 	 ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 );
- 	 ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 );
-@@ -303,7 +728,7 @@ void r200InitState( r200ContextPtr rmesa )
-       ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
-    }
-    else {
--      if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) {
-+      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
- 	 ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 );
- 	 ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 );
- 	 ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
-@@ -321,13 +746,21 @@ void r200InitState( r200ContextPtr rmesa )
-       ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
-       ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
-    }
--   if (rmesa->r200Screen->drmSupportsCubeMapsR200) {
-+
-+   for (i = 0; i < 5; i++)
-+      if (rmesa->radeon.radeonScreen->kernel_mm)
-+          rmesa->hw.tex[i].emit = tex_emit_cs;
-+      else
-+          rmesa->hw.tex[i].emit = tex_emit;
-+   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200) {
-       ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
-       ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 );
-       ALLOC_STATE( cube[2], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-2", 2 );
-       ALLOC_STATE( cube[3], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-3", 3 );
-       ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
-       ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
-+      for (i = 0; i < 5; i++)
-+	rmesa->hw.cube[i].emit = cube_emit;
-    }
-    else {
-       ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
-@@ -337,7 +770,8 @@ void r200InitState( r200ContextPtr rmesa )
-       ALLOC_STATE( cube[4], never, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
-       ALLOC_STATE( cube[5], never, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
-    }
--   if (rmesa->r200Screen->drmSupportsVertexProgram) {
-+
-+   if (rmesa->radeon.radeonScreen->drmSupportsVertexProgram) {
-       ALLOC_STATE( pvs, tcl_vp, PVS_STATE_SIZE, "PVS/pvscntl", 0 );
-       ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 );
-       ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 );
-@@ -390,13 +824,13 @@ void r200InitState( r200ContextPtr rmesa )
-    ALLOC_STATE( pix[3], texenv, PIX_STATE_SIZE, "PIX/pixstage-3", 3 );
-    ALLOC_STATE( pix[4], texenv, PIX_STATE_SIZE, "PIX/pixstage-4", 4 );
-    ALLOC_STATE( pix[5], texenv, PIX_STATE_SIZE, "PIX/pixstage-5", 5 );
--   if (rmesa->r200Screen->drmSupportsTriPerf) {
-+   if (rmesa->radeon.radeonScreen->drmSupportsTriPerf) {
-       ALLOC_STATE( prf, always, PRF_STATE_SIZE, "PRF/performance-tri", 0 );
-    }
-    else {
-       ALLOC_STATE( prf, never, PRF_STATE_SIZE, "PRF/performance-tri", 0 );
-    }
--   if (rmesa->r200Screen->drmSupportsPointSprites) {
-+   if (rmesa->radeon.radeonScreen->drmSupportsPointSprites) {
-       ALLOC_STATE( spr, always, SPR_STATE_SIZE, "SPR/pointsprite", 0 );
-       ALLOC_STATE( ptp, tcl, PTP_STATE_SIZE, "PTP/pointparams", 0 );
-    }
-@@ -409,87 +843,115 @@ void r200InitState( r200ContextPtr rmesa )
- 
-    /* Fill in the packet headers:
-     */
--   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC);
--   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL);
--   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH);
--   if (rmesa->r200Screen->drmSupportsBlendColor)
--      rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(R200_EMIT_RB3D_BLENDCOLOR);
--   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN);
--   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH);
--   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK);
--   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE);
--   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL);
--   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC);
--   rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(R200_EMIT_PP_CNTL_X);
--   rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(R200_EMIT_RB3D_DEPTHXY_OFFSET);
--   rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(R200_EMIT_RE_AUX_SCISSOR_CNTL);
--   rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(R200_EMIT_RE_SCISSOR_TL_0);
--   rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(R200_EMIT_SE_VAP_CNTL_STATUS);
--   rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(R200_EMIT_RE_POINTSIZE);
--   rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
--   rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(R200_EMIT_PP_TAM_DEBUG3);
--   rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(R200_EMIT_TFACTOR_0);
--   if (rmesa->r200Screen->drmSupportsFragShader) {
--      rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(R200_EMIT_ATF_TFACTOR);
--      rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_0);
--      rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
--      rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_1);
--      rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
--      rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_2);
--      rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
--      rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_3);
--      rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
--      rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_4);
--      rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
--      rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_5);
--      rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
-+   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
-+   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
-+   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
-+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
-+      rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(rmesa, R200_EMIT_RB3D_BLENDCOLOR);
-+   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
-+   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
-+   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
-+   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
-+   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
-+   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
-+   rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CNTL_X);
-+   rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(rmesa, R200_EMIT_RB3D_DEPTHXY_OFFSET);
-+   rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(rmesa, R200_EMIT_RE_AUX_SCISSOR_CNTL);
-+   rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(rmesa, R200_EMIT_RE_SCISSOR_TL_0);
-+   rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(rmesa, R200_EMIT_SE_VAP_CNTL_STATUS);
-+   rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(rmesa, R200_EMIT_RE_POINTSIZE);
-+   rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(rmesa, R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
-+   rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TAM_DEBUG3);
-+   rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(rmesa, R200_EMIT_TFACTOR_0);
-+   if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
-+      rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(rmesa, R200_EMIT_ATF_TFACTOR);
-+      rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_0);
-+      rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0);
-+      rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_1);
-+      rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1);
-+      rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_2);
-+      rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2);
-+      rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_3);
-+      rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3);
-+      rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_4);
-+      rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4);
-+      rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_5);
-+      rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5);
-    } else {
--      rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0);
--      rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
--      rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1);
--      rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
--      rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_2);
--      rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
--      rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_3);
--      rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
--      rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_4);
--      rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
--      rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_5);
--      rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
--   }
--   rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_0);
--   rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_1);
--   rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(R200_EMIT_VAP_PVS_CNTL);
--   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_0);
--   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_0);
--   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_1);
--   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_1);
--   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_2);
--   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_2);
--   rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_3);
--   rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_3);
--   rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_4);
--   rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_4);
--   rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_5);
--   rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_5);
--   rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_0);
--   rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_1);
--   rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_2);
--   rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_3);
--   rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_4);
--   rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_5);
--   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
--   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(R200_EMIT_TCL_LIGHT_MODEL_CTL_0);
--   rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(R200_EMIT_TCL_UCP_VERT_BLEND_CTL);
--   rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(R200_EMIT_TEX_PROC_CTL_2);
--   rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(R200_EMIT_MATRIX_SELECT_0);
--   rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(R200_EMIT_VAP_CTL);
--   rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(R200_EMIT_VTX_FMT_0);
--   rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(R200_EMIT_OUTPUT_VTX_COMP_SEL);
--   rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(R200_EMIT_SE_VTX_STATE_CNTL);
--   rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(R200_EMIT_VTE_CNTL);
--   rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(R200_EMIT_PP_TRI_PERF_CNTL);
--   rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(R200_EMIT_TCL_POINT_SPRITE_CNTL);
-+      rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_0);
-+      rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0);
-+      rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_1);
-+      rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1);
-+      rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_2);
-+      rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2);
-+      rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_3);
-+      rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3);
-+      rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_4);
-+      rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4);
-+      rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_5);
-+      rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5);
-+   }
-+   rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_0);
-+   rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_1);
-+   rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_PVS_CNTL);
-+   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_0);
-+   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_0);
-+   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_1);
-+   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_1);
-+   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_2);
-+   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_2);
-+   rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_3);
-+   rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_3);
-+   rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_4);
-+   rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_4);
-+   rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_5);
-+   rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_5);
-+   rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_0);
-+   rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_1);
-+   rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_2);
-+   rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_3);
-+   rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_4);
-+   rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_5);
-+   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
-+   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_LIGHT_MODEL_CTL_0);
-+   rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(rmesa, R200_EMIT_TCL_UCP_VERT_BLEND_CTL);
-+   rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(rmesa, R200_EMIT_TEX_PROC_CTL_2);
-+   rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(rmesa, R200_EMIT_MATRIX_SELECT_0);
-+   rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_CTL);
-+   rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTX_FMT_0);
-+   rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(rmesa, R200_EMIT_OUTPUT_VTX_COMP_SEL);
-+   rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(rmesa, R200_EMIT_SE_VTX_STATE_CNTL);
-+   rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTE_CNTL);
-+   rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TRI_PERF_CNTL);
-+   rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_POINT_SPRITE_CNTL);
-+   if (rmesa->radeon.radeonScreen->kernel_mm) {
-+	rmesa->hw.mtl[0].emit = mtl_emit;
-+	rmesa->hw.mtl[1].emit = mtl_emit;
-+
-+	rmesa->hw.vpi[0].emit = veclinear_emit;
-+	rmesa->hw.vpi[1].emit = veclinear_emit;
-+	rmesa->hw.vpp[0].emit = veclinear_emit;
-+	rmesa->hw.vpp[1].emit = veclinear_emit;
-+
-+	rmesa->hw.grd.emit = scl_emit;
-+	rmesa->hw.fog.emit = vec_emit;
-+	rmesa->hw.glt.emit = vec_emit;
-+	rmesa->hw.eye.emit = vec_emit;
-+
-+	for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++)
-+	  rmesa->hw.mat[i].emit = vec_emit;
-+
-+	for (i = 0; i < 8; i++)
-+	  rmesa->hw.lit[i].emit = lit_emit;
-+
-+	for (i = 0; i < 6; i++)
-+	  rmesa->hw.ucp[i].emit = vec_emit;
-+
-+	rmesa->hw.ptp.emit = ptp_emit;
-+   }
-+
-+
-+   
-    rmesa->hw.mtl[0].cmd[MTL_CMD_0] = 
-       cmdvec( R200_VS_MAT_0_EMISS, 1, 16 );
-    rmesa->hw.mtl[0].cmd[MTL_CMD_1] = 
-@@ -567,7 +1029,7 @@ void r200InitState( r200ContextPtr rmesa )
- 				(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
- 				(R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
- 
--   if (rmesa->r200Screen->drmSupportsBlendColor) {
-+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
-       rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = 0x00000000;
-       rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
- 				(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
-@@ -578,18 +1040,17 @@ void r200InitState( r200ContextPtr rmesa )
-    }
- 
-    rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
--      rmesa->r200Screen->depthOffset + rmesa->r200Screen->fbLocation;
-+      rmesa->radeon.radeonScreen->depthOffset + rmesa->radeon.radeonScreen->fbLocation;
- 
-    rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = 
--      ((rmesa->r200Screen->depthPitch &
-+      ((rmesa->radeon.radeonScreen->depthPitch &
- 	R200_DEPTHPITCH_MASK) |
-        R200_DEPTH_ENDIAN_NO_SWAP);
-    
-    if (rmesa->using_hyperz)
-       rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= R200_DEPTH_HYPERZ;
- 
--   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt |
--					       R200_Z_TEST_LESS |
-+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (R200_Z_TEST_LESS |
- 					       R200_STENCIL_TEST_ALWAYS |
- 					       R200_STENCIL_FAIL_KEEP |
- 					       R200_STENCIL_ZPASS_KEEP |
-@@ -599,15 +1060,14 @@ void r200InitState( r200ContextPtr rmesa )
-    if (rmesa->using_hyperz) {
-       rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_COMPRESSION_ENABLE |
- 						  R200_Z_DECOMPRESSION_ENABLE;
--/*      if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200)
-+/*      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
- 	 rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
-    }
- 
-    rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (R200_ANTI_ALIAS_NONE 
-  				     | R200_TEX_BLEND_0_ENABLE);
- 
--   rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = color_fmt;
--   switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) {
-+   switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
-    case DRI_CONF_DITHER_XERRORDIFFRESET:
-       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_INIT;
-       break;
-@@ -615,41 +1075,19 @@ void r200InitState( r200ContextPtr rmesa )
-       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_SCALE_DITHER_ENABLE;
-       break;
-    }
--   if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) ==
-+   if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
- 	DRI_CONF_ROUND_ROUND )
--      rmesa->state.color.roundEnable = R200_ROUND_ENABLE;
-+      rmesa->radeon.state.color.roundEnable = R200_ROUND_ENABLE;
-    else
--      rmesa->state.color.roundEnable = 0;
--   if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) ==
-+      rmesa->radeon.state.color.roundEnable = 0;
-+   if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
- 	DRI_CONF_COLOR_REDUCTION_DITHER )
-       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE;
-    else
--      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable;
--
--#if 000
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((rmesa->state.color.drawOffset +
--					       rmesa->r200Screen->fbLocation)
--					      & R200_COLOROFFSET_MASK);
--
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((rmesa->state.color.drawPitch &
--					      R200_COLORPITCH_MASK) |
--					     R200_COLOR_ENDIAN_NO_SWAP);
--#else
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset +
--					       rmesa->r200Screen->fbLocation)
--					      & R200_COLOROFFSET_MASK);
--
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch &
--					      R200_COLORPITCH_MASK) |
--					     R200_COLOR_ENDIAN_NO_SWAP);
--#endif
--   /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */
--   if (rmesa->sarea->tiling_enabled) {
--      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
--   }
-+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
- 
-    rmesa->hw.prf.cmd[PRF_PP_TRI_PERF] = R200_TRI_CUTOFF_MASK - R200_TRI_CUTOFF_MASK * 
--			driQueryOptionf (&rmesa->optionCache,"texture_blend_quality");
-+			driQueryOptionf (&rmesa->radeon.optionCache,"texture_blend_quality");
-    rmesa->hw.prf.cmd[PRF_PP_PERF_CNTL] = 0;
- 
-    rmesa->hw.set.cmd[SET_SE_CNTL] = (R200_FFACE_CULL_CCW |
-@@ -704,7 +1142,7 @@ void r200InitState( r200ContextPtr rmesa )
- 						R200_VC_NO_SWAP;
- #endif
- 
--   if (!(rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL)) {
-+   if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
-       /* Bypass TCL */
-       rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] |= (1<<8);
-    }
-@@ -743,28 +1181,28 @@ void r200InitState( r200ContextPtr rmesa )
-       rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] =
-          (/* R200_TEXCOORD_PROJ | */
-           0x100000);	/* Small default bias */
--      if (rmesa->r200Screen->drmSupportsFragShader) {
-+      if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
- 	 rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] =
--	     rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+	     rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
- 	 rmesa->hw.tex[i].cmd[TEX_PP_CUBIC_FACES] = 0;
- 	 rmesa->hw.tex[i].cmd[TEX_PP_TXMULTI_CTL] = 0;
-       }
-       else {
- 	  rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_OLDDRM] =
--	     rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+	     rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-      }
- 
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] =
--         rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F2] =
--         rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F3] =
--         rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F4] =
--         rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F5] =
--         rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
- 
-       rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND] =
-          (R200_TXC_ARG_A_ZERO |
-@@ -967,5 +1405,7 @@ void r200InitState( r200ContextPtr rmesa )
- 
-    r200LightingSpaceChange( ctx );
- 
--   rmesa->hw.all_dirty = GL_TRUE;
-+   rmesa->radeon.hw.all_dirty = GL_TRUE;
-+
-+   rcommonInitCmdBuf(&rmesa->radeon);
- }
-diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c
-index b25f028..712da98 100644
---- a/src/mesa/drivers/dri/r200/r200_swtcl.c
-+++ b/src/mesa/drivers/dri/r200/r200_swtcl.c
-@@ -55,27 +55,24 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r200_tcl.h"
- 
- 
--static void flush_last_swtcl_prim( r200ContextPtr rmesa  );
--
--
- /***********************************************************************
-  *                         Initialization 
-  ***********************************************************************/
- 
- #define EMIT_ATTR( ATTR, STYLE, F0 )					\
- do {									\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR);	\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE);	\
--   rmesa->swtcl.vertex_attr_count++;					\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);	\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);	\
-+   rmesa->radeon.swtcl.vertex_attr_count++;					\
-    fmt_0 |= F0;								\
- } while (0)
- 
- #define EMIT_PAD( N )							\
- do {									\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0;		\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD;	\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N);		\
--   rmesa->swtcl.vertex_attr_count++;					\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;		\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;	\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);		\
-+   rmesa->radeon.swtcl.vertex_attr_count++;					\
- } while (0)
- 
- static void r200SetVertexFormat( GLcontext *ctx )
-@@ -100,7 +97,7 @@ static void r200SetVertexFormat( GLcontext *ctx )
-    }
- 
-    assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
--   rmesa->swtcl.vertex_attr_count = 0;
-+   rmesa->radeon.swtcl.vertex_attr_count = 0;
- 
-    /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
-     * build up a hardware vertex.
-@@ -185,7 +182,7 @@ static void r200SetVertexFormat( GLcontext *ctx )
-       rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= R200_FOG_USE_SPEC_ALPHA;
-    }
- 
--   if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) ||
-+   if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) ||
- 	(rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0) ||
- 	(rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) {
-       R200_NEWPRIM(rmesa);
-@@ -193,26 +190,20 @@ static void r200SetVertexFormat( GLcontext *ctx )
-       rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = fmt_0;
-       rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = fmt_1;
- 
--      rmesa->swtcl.vertex_size =
-+      rmesa->radeon.swtcl.vertex_size =
- 	  _tnl_install_attrs( ctx,
--			      rmesa->swtcl.vertex_attrs, 
--			      rmesa->swtcl.vertex_attr_count,
-+			      rmesa->radeon.swtcl.vertex_attrs, 
-+			      rmesa->radeon.swtcl.vertex_attr_count,
- 			      NULL, 0 );
--      rmesa->swtcl.vertex_size /= 4;
--      RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
-+      rmesa->radeon.swtcl.vertex_size /= 4;
-+      RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset );
-    }
- }
- 
- 
- static void r200RenderStart( GLcontext *ctx )
- {
--   r200ContextPtr rmesa = R200_CONTEXT( ctx );
--
-    r200SetVertexFormat( ctx );
--
--   if (rmesa->dma.flush != 0 && 
--       rmesa->dma.flush != flush_last_swtcl_prim)
--      rmesa->dma.flush( rmesa );
- }
- 
- 
-@@ -232,7 +223,7 @@ void r200ChooseVertexState( GLcontext *ctx )
-     * rasterization fallback.  As this function will be called again when we
-     * leave a rasterization fallback, we can just skip it for now.
-     */
--   if (rmesa->Fallback != 0)
-+   if (rmesa->radeon.Fallback != 0)
-       return;
- 
-    vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL];
-@@ -273,78 +264,27 @@ void r200ChooseVertexState( GLcontext *ctx )
-    }
- }
- 
--
--/* Flush vertices in the current dma region.
-- */
--static void flush_last_swtcl_prim( r200ContextPtr rmesa  )
-+void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
- {
--   if (R200_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s\n", __FUNCTION__);
--
--   rmesa->dma.flush = NULL;
--
--   if (rmesa->dma.current.buf) {
--      struct r200_dma_region *current = &rmesa->dma.current;
--      GLuint current_offset = (rmesa->r200Screen->gart_buffer_offset +
--			       current->buf->buf->idx * RADEON_BUFFER_SIZE + 
--			       current->start);
--
--      assert (!(rmesa->swtcl.hw_primitive & R200_VF_PRIM_WALK_IND));
--
--      assert (current->start + 
--	      rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
--	      current->ptr);
--
--      if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
--	 r200EnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ +
--			        rmesa->hw.max_state_size + VBUF_BUFSZ );
--	 r200EmitVertexAOS( rmesa,
--			      rmesa->swtcl.vertex_size,
--			      current_offset);
--
--	 r200EmitVbufPrim( rmesa,
--			   rmesa->swtcl.hw_primitive,
--			   rmesa->swtcl.numverts);
--      }
--
--      rmesa->swtcl.numverts = 0;
--      current->start = current->ptr;
--   }
--}
--
--
--/* Alloc space in the current dma region.
-- */
--static INLINE void *
--r200AllocDmaLowVerts( r200ContextPtr rmesa, int nverts, int vsize )
--{
--   GLuint bytes = vsize * nverts;
--
--   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
--      r200RefillCurrentDmaRegion( rmesa );
--
--   if (!rmesa->dma.flush) {
--      rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
--      rmesa->dma.flush = flush_last_swtcl_prim;
--   }
-+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
-+   rcommonEnsureCmdBufSpace(&rmesa->radeon,
-+			    rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
-+			    __FUNCTION__);
- 
--   ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
--   ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
--   ASSERT( rmesa->dma.current.start + 
--	   rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
--	   rmesa->dma.current.ptr );
- 
-+   radeonEmitState(&rmesa->radeon);
-+   r200EmitVertexAOS( rmesa,
-+		      rmesa->radeon.swtcl.vertex_size,
-+		      rmesa->radeon.dma.current,
-+		      current_offset);
- 
--   {
--      GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
--      rmesa->dma.current.ptr += bytes;
--      rmesa->swtcl.numverts += nverts;
--      return head;
--   }
-+		      
-+   r200EmitVbufPrim( rmesa,
-+		     rmesa->radeon.swtcl.hw_primitive,
-+		     rmesa->radeon.swtcl.numverts);
- 
- }
- 
--
- /**************************************************************************/
- 
- 
-@@ -392,13 +332,13 @@ static void r200ResetLineStipple( GLcontext *ctx );
- #undef LOCAL_VARS
- #undef ALLOC_VERTS
- #define CTX_ARG r200ContextPtr rmesa
--#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
--#define ALLOC_VERTS( n, size ) r200AllocDmaLowVerts( rmesa, n, size * 4 )
-+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
-+#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 )
- #define LOCAL_VARS						\
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
--   const char *r200verts = (char *)rmesa->swtcl.verts;
--#define VERT(x) (r200Vertex *)(r200verts + ((x) * vertsize * sizeof(int)))
--#define VERTEX r200Vertex 
-+   const char *r200verts = (char *)rmesa->radeon.swtcl.verts;
-+#define VERT(x) (radeonVertex *)(r200verts + ((x) * vertsize * sizeof(int)))
-+#define VERTEX radeonVertex 
- #define DO_DEBUG_VERTS (1 && (R200_DEBUG & DEBUG_VERTS))
- 
- #undef TAG
-@@ -456,11 +396,11 @@ static struct {
- #define VERT_Y(_v) _v->v.y
- #define VERT_Z(_v) _v->v.z
- #define AREA_IS_CCW( a ) (a < 0)
--#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int)))
-+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
- 
- #define VERT_SET_RGBA( v, c )  					\
- do {								\
--   r200_color_t *color = (r200_color_t *)&((v)->ui[coloroffset]);	\
-+   radeon_color_t *color = (radeon_color_t *)&((v)->ui[coloroffset]);	\
-    UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);		\
-    UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);		\
-    UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);		\
-@@ -472,7 +412,7 @@ do {								\
- #define VERT_SET_SPEC( v, c )					\
- do {								\
-    if (specoffset) {						\
--      r200_color_t *spec = (r200_color_t *)&((v)->ui[specoffset]);	\
-+      radeon_color_t *spec = (radeon_color_t *)&((v)->ui[specoffset]);	\
-       UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]);	\
-       UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]);	\
-       UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]);	\
-@@ -481,8 +421,8 @@ do {								\
- #define VERT_COPY_SPEC( v0, v1 )			\
- do {							\
-    if (specoffset) {					\
--      r200_color_t *spec0 = (r200_color_t *)&((v0)->ui[specoffset]);	\
--      r200_color_t *spec1 = (r200_color_t *)&((v1)->ui[specoffset]);	\
-+      radeon_color_t *spec0 = (radeon_color_t *)&((v0)->ui[specoffset]);	\
-+      radeon_color_t *spec1 = (radeon_color_t *)&((v1)->ui[specoffset]);	\
-       spec0->red   = spec1->red;	\
-       spec0->green = spec1->green;	\
-       spec0->blue  = spec1->blue; 	\
-@@ -513,7 +453,7 @@ do {							\
-  ***********************************************************************/
- 
- #define RASTERIZE(x) r200RasterPrimitive( ctx, reduced_hw_prim(ctx, x) )
--#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
-+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
- #undef TAG
- #define TAG(x) x
- #include "tnl_dd/t_dd_unfilled.h"
-@@ -569,8 +509,8 @@ static void init_rast_tab( void )
- #undef LOCAL_VARS
- #define LOCAL_VARS						\
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
--   const GLuint vertsize = rmesa->swtcl.vertex_size;		\
--   const char *r200verts = (char *)rmesa->swtcl.verts;		\
-+   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;		\
-+   const char *r200verts = (char *)rmesa->radeon.swtcl.verts;		\
-    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
-    const GLboolean stipple = ctx->Line.StippleFlag;		\
-    (void) elt; (void) stipple;
-@@ -599,13 +539,13 @@ void r200ChooseRenderState( GLcontext *ctx )
-    GLuint index = 0;
-    GLuint flags = ctx->_TriangleCaps;
- 
--   if (!rmesa->TclFallback || rmesa->Fallback) 
-+   if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback) 
-       return;
- 
-    if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R200_TWOSIDE_BIT;
-    if (flags & DD_TRI_UNFILLED)      index |= R200_UNFILLED_BIT;
- 
--   if (index != rmesa->swtcl.RenderIndex) {
-+   if (index != rmesa->radeon.swtcl.RenderIndex) {
-       tnl->Driver.Render.Points = rast_tab[index].points;
-       tnl->Driver.Render.Line = rast_tab[index].line;
-       tnl->Driver.Render.ClippedLine = rast_tab[index].line;
-@@ -622,7 +562,7 @@ void r200ChooseRenderState( GLcontext *ctx )
- 	 tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
-       }
- 
--      rmesa->swtcl.RenderIndex = index;
-+      rmesa->radeon.swtcl.RenderIndex = index;
-    }
- }
- 
-@@ -636,7 +576,7 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
- 
--   if (rmesa->swtcl.hw_primitive != hwprim) {
-+   if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
-       /* need to disable perspective-correct texturing for point sprites */
-       if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) {
- 	 if (rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE) {
-@@ -649,14 +589,14 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim )
- 	 rmesa->hw.set.cmd[SET_RE_CNTL] |= R200_PERSPECTIVE_ENABLE;
-       }
-       R200_NEWPRIM( rmesa );
--      rmesa->swtcl.hw_primitive = hwprim;
-+      rmesa->radeon.swtcl.hw_primitive = hwprim;
-    }
- }
- 
- static void r200RenderPrimitive( GLcontext *ctx, GLenum prim )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   rmesa->swtcl.render_primitive = prim;
-+   rmesa->radeon.swtcl.render_primitive = prim;
-    if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) 
-       r200RasterPrimitive( ctx, reduced_hw_prim(ctx, prim) );
- }
-@@ -701,15 +641,15 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
--   GLuint oldfallback = rmesa->Fallback;
-+   GLuint oldfallback = rmesa->radeon.Fallback;
- 
-    if (mode) {
--      rmesa->Fallback |= bit;
-+      rmesa->radeon.Fallback |= bit;
-       if (oldfallback == 0) {
--	 R200_FIREVERTICES( rmesa );
-+	 radeon_firevertices(&rmesa->radeon);
- 	 TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_TRUE );
- 	 _swsetup_Wakeup( ctx );
--	 rmesa->swtcl.RenderIndex = ~0;
-+	 rmesa->radeon.swtcl.RenderIndex = ~0;
-          if (R200_DEBUG & DEBUG_FALLBACKS) {
-             fprintf(stderr, "R200 begin rasterization fallback: 0x%x %s\n",
-                     bit, getFallbackString(bit));
-@@ -717,7 +657,7 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
-       }
-    }
-    else {
--      rmesa->Fallback &= ~bit;
-+      rmesa->radeon.Fallback &= ~bit;
-       if (oldfallback == bit) {
- 
- 	 _swrast_flush( ctx );
-@@ -731,14 +671,14 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
- 
- 	 tnl->Driver.Render.ResetLineStipple = r200ResetLineStipple;
- 	 TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_FALSE );
--	 if (rmesa->TclFallback) {
--	    /* These are already done if rmesa->TclFallback goes to
-+	 if (rmesa->radeon.TclFallback) {
-+	    /* These are already done if rmesa->radeon.TclFallback goes to
- 	     * zero above. But not if it doesn't (R200_NO_TCL for
- 	     * example?)
- 	     */
- 	    _tnl_invalidate_vertex_state( ctx, ~0 );
- 	    _tnl_invalidate_vertices( ctx, ~0 );
--	    RENDERINPUTS_ZERO( rmesa->tnl_index_bitset );
-+	    RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset );
- 	    r200ChooseVertexState( ctx );
- 	    r200ChooseRenderState( ctx );
- 	 }
-@@ -772,7 +712,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-    const GLfloat *rc = ctx->Current.RasterColor; 
-    GLint row, col;
--   r200Vertex vert;
-+   radeonVertex vert;
-    GLuint orig_vte;
-    GLuint h;
- 
-@@ -794,7 +734,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
-       vte |= R200_VTX_W0_FMT;
-       vap &= ~R200_VAP_FORCE_W_TO_ONE;
- 
--      rmesa->swtcl.vertex_size = 5;
-+      rmesa->radeon.swtcl.vertex_size = 5;
- 
-       if ( (rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0)
- 	   || (rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) {
-@@ -871,10 +811,10 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
- 
-    /* Update window height
-     */
--   LOCK_HARDWARE( rmesa );
--   UNLOCK_HARDWARE( rmesa );
--   h = rmesa->dri.drawable->h + rmesa->dri.drawable->y;
--   px += rmesa->dri.drawable->x;
-+   LOCK_HARDWARE( &rmesa->radeon );
-+   UNLOCK_HARDWARE( &rmesa->radeon );
-+   h = rmesa->radeon.dri.drawable->h + rmesa->radeon.dri.drawable->y;
-+   px += rmesa->radeon.dri.drawable->x;
- 
-    /* Clipping handled by existing mechansims in r200_ioctl.c?
-     */
-@@ -929,7 +869,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
- 
-    /* Need to restore vertexformat?
-     */
--   if (rmesa->TclFallback)
-+   if (rmesa->radeon.TclFallback)
-       r200ChooseVertexState( ctx );
- }
- 
-@@ -962,17 +902,9 @@ void r200InitSwtcl( GLcontext *ctx )
-    _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
- 		       36 * sizeof(GLfloat) );
-    
--   rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
--   rmesa->swtcl.RenderIndex = ~0;
--   rmesa->swtcl.render_primitive = GL_TRIANGLES;
--   rmesa->swtcl.hw_primitive = 0;
-+   rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
-+   rmesa->radeon.swtcl.RenderIndex = ~0;
-+   rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
-+   rmesa->radeon.swtcl.hw_primitive = 0;
- }
- 
--
--void r200DestroySwtcl( GLcontext *ctx )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--
--   if (rmesa->swtcl.indexed_verts.buf) 
--      r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, __FUNCTION__ );
--}
-diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.h b/src/mesa/drivers/dri/r200/r200_swtcl.h
-index 8c29fd0..b090587 100644
---- a/src/mesa/drivers/dri/r200/r200_swtcl.h
-+++ b/src/mesa/drivers/dri/r200/r200_swtcl.h
-@@ -39,7 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r200_context.h"
- 
- extern void r200InitSwtcl( GLcontext *ctx );
--extern void r200DestroySwtcl( GLcontext *ctx );
- 
- extern void r200ChooseRenderState( GLcontext *ctx );
- extern void r200ChooseVertexState( GLcontext *ctx );
-@@ -52,15 +51,11 @@ extern void r200BuildVertices( GLcontext *ctx, GLuint start, GLuint count,
- extern void r200PrintSetupFlags(char *msg, GLuint flags );
- 
- 
--extern void r200_emit_indexed_verts( GLcontext *ctx,
--				       GLuint start,
--				       GLuint count );
--
- extern void r200_translate_vertex( GLcontext *ctx, 
--				     const r200Vertex *src, 
-+				     const radeonVertex *src, 
- 				     SWvertex *dst );
- 
--extern void r200_print_vertex( GLcontext *ctx, const r200Vertex *v );
-+extern void r200_print_vertex( GLcontext *ctx, const radeonVertex *v );
- 
- extern void r200_import_float_colors( GLcontext *ctx );
- extern void r200_import_float_spec_colors( GLcontext *ctx );
-@@ -70,5 +65,5 @@ extern void r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
- 			      const struct gl_pixelstore_attrib *unpack,
- 			      const GLubyte *bitmap );
- 
--
-+void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
- #endif
-diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c
-index 99aecfe..5803709 100644
---- a/src/mesa/drivers/dri/r200/r200_tcl.c
-+++ b/src/mesa/drivers/dri/r200/r200_tcl.c
-@@ -123,7 +123,7 @@ static GLboolean discrete_prim[0x10] = {
- 
- #define RESET_STIPPLE() do {			\
-    R200_STATECHANGE( rmesa, lin );		\
--   r200EmitState( rmesa );			\
-+   radeonEmitState(&rmesa->radeon);			\
- } while (0)
- 
- #define AUTO_STIPPLE( mode )  do {		\
-@@ -134,7 +134,7 @@ static GLboolean discrete_prim[0x10] = {
-    else						\
-       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
- 	 ~R200_LINE_PATTERN_AUTO_RESET;	\
--   r200EmitState( rmesa );			\
-+   radeonEmitState(&rmesa->radeon);			\
- } while (0)
- 
- 
-@@ -142,26 +142,24 @@ static GLboolean discrete_prim[0x10] = {
- 
- static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr ) 
- {
--   if (rmesa->dma.flush == r200FlushElts &&
--       rmesa->store.cmd_used + nr*2 < R200_CMD_BUF_SZ) {
-+   if (rmesa->radeon.dma.flush == r200FlushElts &&
-+       rmesa->tcl.elt_used + nr*2 < R200_ELT_BUF_SZ) {
- 
--      GLushort *dest = (GLushort *)(rmesa->store.cmd_buf +
--				    rmesa->store.cmd_used);
-+      GLushort *dest = (GLushort *)(rmesa->radeon.tcl.elt_dma_bo->ptr +
-+				    rmesa->tcl.elt_used);
- 
--      rmesa->store.cmd_used += nr*2;
-+      rmesa->tcl.elt_used += nr*2;
- 
-       return dest;
-    }
-    else {
--      if (rmesa->dma.flush)
--	 rmesa->dma.flush( rmesa );
-+      if (rmesa->radeon.dma.flush)
-+	 rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
- 
--      r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
--			     rmesa->hw.max_state_size + ELTS_BUFSZ(nr) );
-+      rcommonEnsureCmdBufSpace(&rmesa->radeon, AOS_BUFSZ(rmesa->radeon.tcl.aos_count), __FUNCTION__);
- 
-       r200EmitAOS( rmesa,
--		   rmesa->tcl.aos_components,
--		   rmesa->tcl.nr_aos_components, 0 );
-+		   rmesa->radeon.tcl.aos_count, 0 );
- 
-       return r200AllocEltsOpenEnded( rmesa, rmesa->tcl.hw_primitive, nr );
-    }
-@@ -188,13 +186,14 @@ static void r200EmitPrim( GLcontext *ctx,
-    r200ContextPtr rmesa = R200_CONTEXT( ctx );
-    r200TclPrimitive( ctx, prim, hwprim );
-    
--   r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
--			  rmesa->hw.max_state_size + VBUF_BUFSZ );
-+   //   fprintf(stderr,"Emit prim %d\n", rmesa->radeon.tcl.aos_count);
-+   rcommonEnsureCmdBufSpace( &rmesa->radeon,
-+			     AOS_BUFSZ(rmesa->radeon.tcl.aos_count) +
-+			     rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ );
- 
-    r200EmitAOS( rmesa,
--		  rmesa->tcl.aos_components,
--		  rmesa->tcl.nr_aos_components,
--		  start );
-+		rmesa->radeon.tcl.aos_count,
-+		start );
-    
-    /* Why couldn't this packet have taken an offset param?
-     */
-@@ -394,7 +393,7 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
- 
-    /* TODO: separate this from the swtnl pipeline 
-     */
--   if (rmesa->TclFallback)
-+   if (rmesa->radeon.TclFallback)
-       return GL_TRUE;	/* fallback to software t&l */
- 
-    if (R200_DEBUG & DEBUG_PRIMS)
-@@ -405,8 +404,9 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
- 
-    /* Validate state:
-     */
--   if (rmesa->NewGLState)
--      r200ValidateState( ctx );
-+   if (rmesa->radeon.NewGLState)
-+      if (!r200ValidateState( ctx ))
-+         return GL_TRUE; /* fallback to sw t&l */
- 
-    if (!ctx->VertexProgram._Enabled) {
-    /* NOTE: inputs != tnl->render_inputs - these are the untransformed
-@@ -481,7 +481,7 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
- 
-    /* Do the actual work:
-     */
--   r200ReleaseArrays( ctx, ~0 /* stage->changed_inputs */ );
-+   radeonReleaseArrays( ctx, ~0 /* stage->changed_inputs */ );
-    r200EmitArrays( ctx, vimap_rev );
- 
-    rmesa->tcl.Elts = VB->Elts;
-@@ -545,7 +545,7 @@ static void transition_to_swtnl( GLcontext *ctx )
-    tnl->Driver.NotifyMaterialChange = 
-       _mesa_validate_all_lighting_tables;
- 
--   r200ReleaseArrays( ctx, ~0 );
-+   radeonReleaseArrays( ctx, ~0 );
- 
-    /* Still using the D3D based hardware-rasterizer from the radeon;
-     * need to put the card into D3D mode to make it work:
-@@ -565,15 +565,11 @@ static void transition_to_hwtnl( GLcontext *ctx )
- 
-    tnl->Driver.NotifyMaterialChange = r200UpdateMaterial;
- 
--   if ( rmesa->dma.flush )			
--      rmesa->dma.flush( rmesa );	
-+   if ( rmesa->radeon.dma.flush )			
-+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	
- 
--   rmesa->dma.flush = NULL;
-+   rmesa->radeon.dma.flush = NULL;
-    
--   if (rmesa->swtcl.indexed_verts.buf) 
--      r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
--			      __FUNCTION__ );
--
-    R200_STATECHANGE( rmesa, vap );
-    rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_TCL_ENABLE;
-    rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_FORCE_W_TO_ONE;
-@@ -631,10 +627,10 @@ static char *getFallbackString(GLuint bit)
- void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   GLuint oldfallback = rmesa->TclFallback;
-+   GLuint oldfallback = rmesa->radeon.TclFallback;
- 
-    if (mode) {
--      rmesa->TclFallback |= bit;
-+      rmesa->radeon.TclFallback |= bit;
-       if (oldfallback == 0) {
- 	 if (R200_DEBUG & DEBUG_FALLBACKS) 
- 	    fprintf(stderr, "R200 begin tcl fallback %s\n",
-@@ -643,7 +639,7 @@ void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
-       }
-    }
-    else {
--      rmesa->TclFallback &= ~bit;
-+      rmesa->radeon.TclFallback &= ~bit;
-       if (oldfallback == bit) {
- 	 if (R200_DEBUG & DEBUG_FALLBACKS) 
- 	    fprintf(stderr, "R200 end tcl fallback %s\n",
-diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c
-index 259f35a..9f79157 100644
---- a/src/mesa/drivers/dri/r200/r200_tex.c
-+++ b/src/mesa/drivers/dri/r200/r200_tex.c
-@@ -43,8 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/teximage.h"
- #include "main/texobj.h"
- 
--#include "texmem.h"
--
-+#include "radeon_mipmap_tree.h"
- #include "r200_context.h"
- #include "r200_state.h"
- #include "r200_ioctl.h"
-@@ -63,10 +62,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-  * \param twrap Wrap mode for the \a t texture coordinate
-  */
- 
--static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap )
-+static void r200SetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap )
- {
-    GLboolean  is_clamp = GL_FALSE;
-    GLboolean  is_clamp_to_border = GL_FALSE;
-+   struct gl_texture_object *tObj = &t->base;
- 
-    t->pp_txfilter &= ~(R200_CLAMP_S_MASK | R200_CLAMP_T_MASK | R200_BORDER_MODE_D3D);
- 
-@@ -103,7 +103,7 @@ static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum
-       _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__);
-    }
- 
--   if (t->base.tObj->Target != GL_TEXTURE_1D) {
-+   if (tObj->Target != GL_TEXTURE_1D) {
-       switch ( twrap ) {
-       case GL_REPEAT:
-          t->pp_txfilter |= R200_CLAMP_T_WRAP;
-@@ -180,7 +180,7 @@ static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum
-    t->border_fallback = (is_clamp && is_clamp_to_border);
- }
- 
--static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max )
-+static void r200SetTexMaxAnisotropy( radeonTexObjPtr t, GLfloat max )
- {
-    t->pp_txfilter &= ~R200_MAX_ANISO_MASK;
- 
-@@ -205,10 +205,13 @@ static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max )
-  * \param magf Texture magnification mode
-  */
- 
--static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf )
-+static void r200SetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
- {
-    GLuint anisotropy = (t->pp_txfilter & R200_MAX_ANISO_MASK);
- 
-+   /* Force revalidation to account for switches from/to mipmapping. */
-+   t->validated = GL_FALSE;
-+
-    t->pp_txfilter &= ~(R200_MIN_FILTER_MASK | R200_MAG_FILTER_MASK);
-    t->pp_txformat_x &= ~R200_VOLUME_FILTER_MASK;
- 
-@@ -267,701 +270,16 @@ static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf )
-    }
- }
- 
--static void r200SetTexBorderColor( r200TexObjPtr t, const GLfloat color[4] )
-+static void r200SetTexBorderColor( radeonTexObjPtr t, const GLfloat color[4] )
- {
-    GLubyte c[4];
-    CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
-    CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
-    CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
-    CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
--   t->pp_border_color = r200PackColor( 4, c[0], c[1], c[2], c[3] );
--}
--
--
--/**
-- * Allocate space for and load the mesa images into the texture memory block.
-- * This will happen before drawing with a new texture, or drawing with a
-- * texture after it was swapped out or teximaged again.
-- */
--
--static r200TexObjPtr r200AllocTexObj( struct gl_texture_object *texObj )
--{
--   r200TexObjPtr t;
--
--   t = CALLOC_STRUCT( r200_tex_obj );
--   texObj->DriverData = t;
--   if ( t != NULL ) {
--      if ( R200_DEBUG & DEBUG_TEXTURE ) {
--	 fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj, 
--		  (void *)t );
--      }
--
--      /* Initialize non-image-dependent parts of the state:
--       */
--      t->base.tObj = texObj;
--      t->border_fallback = GL_FALSE;
--
--      make_empty_list( & t->base );
--
--      r200SetTexWrap( t, texObj->WrapS, texObj->WrapT, texObj->WrapR );
--      r200SetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
--      r200SetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
--      r200SetTexBorderColor( t, texObj->BorderColor );
--   }
--
--   return t;
--}
--
--/* try to find a format which will only need a memcopy */
--static const struct gl_texture_format *
--r200Choose8888TexFormat( GLenum srcFormat, GLenum srcType )
--{
--   const GLuint ui = 1;
--   const GLubyte littleEndian = *((const GLubyte *) &ui);
--
--   if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
--       (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
--       (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
--       (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
--      return &_mesa_texformat_rgba8888;
--   }
--   else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
--       (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
--       (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
--       (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
--      return &_mesa_texformat_rgba8888_rev;
--   }
--   else return _dri_texformat_argb8888;
--}
--
--static const struct gl_texture_format *
--r200ChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
--                           GLenum format, GLenum type )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   const GLboolean do32bpt =
--       ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 );
--   const GLboolean force16bpt =
--       ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 );
--   (void) format;
--
--   switch ( internalFormat ) {
--   case 4:
--   case GL_RGBA:
--   case GL_COMPRESSED_RGBA:
--      switch ( type ) {
--      case GL_UNSIGNED_INT_10_10_10_2:
--      case GL_UNSIGNED_INT_2_10_10_10_REV:
--	 return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555;
--      case GL_UNSIGNED_SHORT_4_4_4_4:
--      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
--	 return _dri_texformat_argb4444;
--      case GL_UNSIGNED_SHORT_5_5_5_1:
--      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
--	 return _dri_texformat_argb1555;
--      default:
--         return do32bpt ?
--	    r200Choose8888TexFormat(format, type) : _dri_texformat_argb4444;
--      }
--
--   case 3:
--   case GL_RGB:
--   case GL_COMPRESSED_RGB:
--      switch ( type ) {
--      case GL_UNSIGNED_SHORT_4_4_4_4:
--      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
--	 return _dri_texformat_argb4444;
--      case GL_UNSIGNED_SHORT_5_5_5_1:
--      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
--	 return _dri_texformat_argb1555;
--      case GL_UNSIGNED_SHORT_5_6_5:
--      case GL_UNSIGNED_SHORT_5_6_5_REV:
--	 return _dri_texformat_rgb565;
--      default:
--         return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
--      }
--
--   case GL_RGBA8:
--   case GL_RGB10_A2:
--   case GL_RGBA12:
--   case GL_RGBA16:
--      return !force16bpt ?
--	  r200Choose8888TexFormat(format, type) : _dri_texformat_argb4444;
--
--   case GL_RGBA4:
--   case GL_RGBA2:
--      return _dri_texformat_argb4444;
--
--   case GL_RGB5_A1:
--      return _dri_texformat_argb1555;
--
--   case GL_RGB8:
--   case GL_RGB10:
--   case GL_RGB12:
--   case GL_RGB16:
--      return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
--
--   case GL_RGB5:
--   case GL_RGB4:
--   case GL_R3_G3_B2:
--      return _dri_texformat_rgb565;
--
--   case GL_ALPHA:
--   case GL_ALPHA4:
--   case GL_ALPHA8:
--   case GL_ALPHA12:
--   case GL_ALPHA16:
--   case GL_COMPRESSED_ALPHA:
--   /* can't use a8 format since interpreting hw I8 as a8 would result
--      in wrong rgb values (same as alpha value instead of 0). */
--      return _dri_texformat_al88;
--
--   case 1:
--   case GL_LUMINANCE:
--   case GL_LUMINANCE4:
--   case GL_LUMINANCE8:
--   case GL_LUMINANCE12:
--   case GL_LUMINANCE16:
--   case GL_COMPRESSED_LUMINANCE:
--      return _dri_texformat_l8;
--
--   case 2:
--   case GL_LUMINANCE_ALPHA:
--   case GL_LUMINANCE4_ALPHA4:
--   case GL_LUMINANCE6_ALPHA2:
--   case GL_LUMINANCE8_ALPHA8:
--   case GL_LUMINANCE12_ALPHA4:
--   case GL_LUMINANCE12_ALPHA12:
--   case GL_LUMINANCE16_ALPHA16:
--   case GL_COMPRESSED_LUMINANCE_ALPHA:
--      return _dri_texformat_al88;
--
--   case GL_INTENSITY:
--   case GL_INTENSITY4:
--   case GL_INTENSITY8:
--   case GL_INTENSITY12:
--   case GL_INTENSITY16:
--   case GL_COMPRESSED_INTENSITY:
--       return _dri_texformat_i8;
--
--   case GL_YCBCR_MESA:
--      if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
--          type == GL_UNSIGNED_BYTE)
--         return &_mesa_texformat_ycbcr;
--      else
--         return &_mesa_texformat_ycbcr_rev;
--
--   case GL_RGB_S3TC:
--   case GL_RGB4_S3TC:
--   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
--      return &_mesa_texformat_rgb_dxt1;
--
--   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
--      return &_mesa_texformat_rgba_dxt1;
--
--   case GL_RGBA_S3TC:
--   case GL_RGBA4_S3TC:
--   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
--      return &_mesa_texformat_rgba_dxt3;
--
--   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
--      return &_mesa_texformat_rgba_dxt5;
--
--   default:
--      _mesa_problem(ctx,
--         "unexpected internalFormat 0x%x in r200ChooseTextureFormat",
--         (int) internalFormat);
--      return NULL;
--   }
--
--   return NULL; /* never get here */
--}
--
--
--static GLboolean
--r200ValidateClientStorage( GLcontext *ctx, GLenum target,
--			   GLint internalFormat,
--			   GLint srcWidth, GLint srcHeight, 
--                           GLenum format, GLenum type,  const void *pixels,
--			   const struct gl_pixelstore_attrib *packing,
--			   struct gl_texture_object *texObj,
--			   struct gl_texture_image *texImage)
--
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--
--   if ( R200_DEBUG & DEBUG_TEXTURE )
--      fprintf(stderr, "intformat %s format %s type %s\n",
--	      _mesa_lookup_enum_by_nr( internalFormat ),
--	      _mesa_lookup_enum_by_nr( format ),
--	      _mesa_lookup_enum_by_nr( type ));
--
--   if (!ctx->Unpack.ClientStorage)
--      return 0;
--
--   if (ctx->_ImageTransferState ||
--       texImage->IsCompressed ||
--       texObj->GenerateMipmap)
--      return 0;
--
--
--   /* This list is incomplete, may be different on ppc???
--    */
--   switch ( internalFormat ) {
--   case GL_RGBA:
--      if ( format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV ) {
--	 texImage->TexFormat = _dri_texformat_argb8888;
--      }
--      else
--	 return 0;
--      break;
--
--   case GL_RGB:
--      if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
--	 texImage->TexFormat = _dri_texformat_rgb565;
--      }
--      else
--	 return 0;
--      break;
--
--   case GL_YCBCR_MESA:
--      if ( format == GL_YCBCR_MESA && 
--	   type == GL_UNSIGNED_SHORT_8_8_REV_APPLE ) {
--	 texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
--      }
--      else if ( format == GL_YCBCR_MESA && 
--		(type == GL_UNSIGNED_SHORT_8_8_APPLE || 
--		 type == GL_UNSIGNED_BYTE)) {
--	 texImage->TexFormat = &_mesa_texformat_ycbcr;
--      }
--      else
--	 return 0;
--      break;
--
--   default:
--      return 0;
--   }
--
--   /* Could deal with these packing issues, but currently don't:
--    */
--   if (packing->SkipPixels || 
--       packing->SkipRows || 
--       packing->SwapBytes ||
--       packing->LsbFirst) {
--      return 0;
--   }
--
--   {      
--      GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
--						  format, type);
--
--      
--      if ( R200_DEBUG & DEBUG_TEXTURE )
--	 fprintf(stderr, "%s: srcRowStride %d/%x\n", 
--		 __FUNCTION__, srcRowStride, srcRowStride);
--
--      /* Could check this later in upload, pitch restrictions could be
--       * relaxed, but would need to store the image pitch somewhere,
--       * as packing details might change before image is uploaded:
--       */
--      if (!r200IsGartMemory( rmesa, pixels, srcHeight * srcRowStride ) ||
--	  (srcRowStride & 63))
--	 return 0;
--
--
--      /* Have validated that _mesa_transfer_teximage would be a straight
--       * memcpy at this point.  NOTE: future calls to TexSubImage will
--       * overwrite the client data.  This is explicitly mentioned in the
--       * extension spec.
--       */
--      texImage->Data = (void *)pixels;
--      texImage->IsClientData = GL_TRUE;
--      texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes;
--
--      return 1;
--   }
--}
--
--
--static void r200TexImage1D( GLcontext *ctx, GLenum target, GLint level,
--                              GLint internalFormat,
--                              GLint width, GLint border,
--                              GLenum format, GLenum type, const GLvoid *pixels,
--                              const struct gl_pixelstore_attrib *packing,
--                              struct gl_texture_object *texObj,
--                              struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) r200AllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
--         return;
--      }
--   }
--
--   /* Note, this will call ChooseTextureFormat */
--   _mesa_store_teximage1d(ctx, target, level, internalFormat,
--                          width, border, format, type, pixels,
--                          &ctx->Unpack, texObj, texImage);
--
--   t->dirty_images[0] |= (1 << level);
--}
--
--
--static void r200TexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
--                                 GLint xoffset,
--                                 GLsizei width,
--                                 GLenum format, GLenum type,
--                                 const GLvoid *pixels,
--                                 const struct gl_pixelstore_attrib *packing,
--                                 struct gl_texture_object *texObj,
--                                 struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--
--   assert( t ); /* this _should_ be true */
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) r200AllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
--         return;
--      }
--   }
--
--   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
--			     format, type, pixels, packing, texObj,
--			     texImage);
--
--   t->dirty_images[0] |= (1 << level);
--}
--
--
--static void r200TexImage2D( GLcontext *ctx, GLenum target, GLint level,
--                              GLint internalFormat,
--                              GLint width, GLint height, GLint border,
--                              GLenum format, GLenum type, const GLvoid *pixels,
--                              const struct gl_pixelstore_attrib *packing,
--                              struct gl_texture_object *texObj,
--                              struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--   GLuint face;
--
--   /* which cube face or ordinary 2D image */
--   switch (target) {
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--      ASSERT(face < 6);
--      break;
--   default:
--      face = 0;
--   }
--
--   if ( t != NULL ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) r200AllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
--         return;
--      }
--   }
--
--   texImage->IsClientData = GL_FALSE;
--
--   if (r200ValidateClientStorage( ctx, target, 
--				  internalFormat, 
--				  width, height, 
--				  format, type, pixels, 
--				  packing, texObj, texImage)) {
--      if (R200_DEBUG & DEBUG_TEXTURE)
--	 fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); 
--   }
--   else {
--      if (R200_DEBUG & DEBUG_TEXTURE)
--	 fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); 
--
--      /* Normal path: copy (to cached memory) and eventually upload
--       * via another copy to GART memory and then a blit...  Could
--       * eliminate one copy by going straight to (permanent) GART.
--       *
--       * Note, this will call r200ChooseTextureFormat.
--       */
--      _mesa_store_teximage2d(ctx, target, level, internalFormat,
--			     width, height, border, format, type, pixels,
--			     &ctx->Unpack, texObj, texImage);
--      
--      t->dirty_images[face] |= (1 << level);
--   }
--}
--
--
--static void r200TexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
--                                 GLint xoffset, GLint yoffset,
--                                 GLsizei width, GLsizei height,
--                                 GLenum format, GLenum type,
--                                 const GLvoid *pixels,
--                                 const struct gl_pixelstore_attrib *packing,
--                                 struct gl_texture_object *texObj,
--                                 struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--   GLuint face;
--
--   /* which cube face or ordinary 2D image */
--   switch (target) {
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--      ASSERT(face < 6);
--      break;
--   default:
--      face = 0;
--   }
--
--   assert( t ); /* this _should_ be true */
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) r200AllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
--         return;
--      }
--   }
--
--   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
--			     height, format, type, pixels, packing, texObj,
--			     texImage);
--
--   t->dirty_images[face] |= (1 << level);
--}
--
--
--static void r200CompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
--                              GLint internalFormat,
--                              GLint width, GLint height, GLint border,
--                              GLsizei imageSize, const GLvoid *data,
--                              struct gl_texture_object *texObj,
--                              struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--   GLuint face;
--
--   /* which cube face or ordinary 2D image */
--   switch (target) {
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--      ASSERT(face < 6);
--      break;
--   default:
--      face = 0;
--   }
--
--   if ( t != NULL ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) r200AllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D");
--         return;
--      }
--   }
--
--   texImage->IsClientData = GL_FALSE;
--/* can't call this, different parameters. Would never evaluate to true anyway currently
--   if (r200ValidateClientStorage( ctx, target, 
--				  internalFormat,
--				  width, height,
--				  format, type, pixels,
--				  packing, texObj, texImage)) {
--      if (R200_DEBUG & DEBUG_TEXTURE)
--	 fprintf(stderr, "%s: Using client storage\n", __FUNCTION__);
--   }
--   else */{
--      if (R200_DEBUG & DEBUG_TEXTURE)
--	 fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__);
--
--      /* Normal path: copy (to cached memory) and eventually upload
--       * via another copy to GART memory and then a blit...  Could
--       * eliminate one copy by going straight to (permanent) GART.
--       *
--       * Note, this will call r200ChooseTextureFormat.
--       */
--      _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
--                                 height, border, imageSize, data, texObj, texImage);
--
--      t->dirty_images[face] |= (1 << level);
--   }
-+   t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
- }
- 
--
--static void r200CompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
--                                 GLint xoffset, GLint yoffset,
--                                 GLsizei width, GLsizei height,
--                                 GLenum format,
--                                 GLsizei imageSize, const GLvoid *data,
--                                 struct gl_texture_object *texObj,
--                                 struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--   GLuint face;
--
--
--   /* which cube face or ordinary 2D image */
--   switch (target) {
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--      ASSERT(face < 6);
--      break;
--   default:
--      face = 0;
--   }
--
--   assert( t ); /* this _should_ be true */
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) r200AllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D");
--         return;
--      }
--   }
--
--   _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
--                            height, format, imageSize, data, texObj, texImage);
--
--   t->dirty_images[face] |= (1 << level);
--}
--
--
--#if ENABLE_HW_3D_TEXTURE
--static void r200TexImage3D( GLcontext *ctx, GLenum target, GLint level,
--                            GLint internalFormat,
--                            GLint width, GLint height, GLint depth,
--                            GLint border,
--                            GLenum format, GLenum type, const GLvoid *pixels,
--                            const struct gl_pixelstore_attrib *packing,
--                            struct gl_texture_object *texObj,
--                            struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) r200AllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
--         return;
--      }
--   }
--
--   texImage->IsClientData = GL_FALSE;
--
--#if 0
--   if (r200ValidateClientStorage( ctx, target, 
--				  internalFormat, 
--				  width, height, 
--				  format, type, pixels, 
--				  packing, texObj, texImage)) {
--      if (R200_DEBUG & DEBUG_TEXTURE)
--	 fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); 
--   }
--   else
--#endif
--   {
--      if (R200_DEBUG & DEBUG_TEXTURE)
--	 fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); 
--
--      /* Normal path: copy (to cached memory) and eventually upload
--       * via another copy to GART memory and then a blit...  Could
--       * eliminate one copy by going straight to (permanent) GART.
--       *
--       * Note, this will call r200ChooseTextureFormat.
--       */
--      _mesa_store_teximage3d(ctx, target, level, internalFormat,
--			     width, height, depth, border,
--                             format, type, pixels,
--			     &ctx->Unpack, texObj, texImage);
--      
--      t->dirty_images[0] |= (1 << level);
--   }
--}
--#endif
--
--
--#if ENABLE_HW_3D_TEXTURE
--static void
--r200TexSubImage3D( GLcontext *ctx, GLenum target, GLint level,
--                   GLint xoffset, GLint yoffset, GLint zoffset,
--                   GLsizei width, GLsizei height, GLsizei depth,
--                   GLenum format, GLenum type,
--                   const GLvoid *pixels,
--                   const struct gl_pixelstore_attrib *packing,
--                   struct gl_texture_object *texObj,
--                   struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--
--/*     fprintf(stderr, "%s\n", __FUNCTION__); */
--
--   assert( t ); /* this _should_ be true */
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) r200AllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D");
--         return;
--      }
--      texObj->DriverData = t;
--   }
--
--   _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
--                             width, height, depth,
--                             format, type, pixels, packing, texObj, texImage);
--
--   t->dirty_images[0] |= (1 << level);
--}
--#endif
--
--
--
- static void r200TexEnv( GLcontext *ctx, GLenum target,
- 			  GLenum pname, const GLfloat *param )
- {
-@@ -983,7 +301,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target,
-       GLubyte c[4];
-       GLuint envColor;
-       UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor );
--      envColor = r200PackColor( 4, c[0], c[1], c[2], c[3] );
-+      envColor = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
-       if ( rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] != envColor ) {
- 	 R200_STATECHANGE( rmesa, tf );
- 	 rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] = envColor;
-@@ -1002,7 +320,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target,
-        * NOTE: Add a small bias to the bias for conform mipsel.c test.
-        */
-       bias = *param + .01;
--      min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ?
-+      min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ?
- 	  0.0 : -16.0;
-       bias = CLAMP( bias, min, 16.0 );
-       b = (int)(bias * fixed_one) & R200_LOD_BIAS_MASK;
-@@ -1039,7 +357,7 @@ static void r200TexParameter( GLcontext *ctx, GLenum target,
- 				struct gl_texture_object *texObj,
- 				GLenum pname, const GLfloat *params )
- {
--   r200TexObjPtr t = (r200TexObjPtr) texObj->DriverData;
-+   radeonTexObj* t = radeon_tex_obj(texObj);
- 
-    if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
-       fprintf( stderr, "%s( %s )\n", __FUNCTION__,
-@@ -1073,59 +391,46 @@ static void r200TexParameter( GLcontext *ctx, GLenum target,
-        * we just have to rely on loading the right subset of mipmap levels
-        * to simulate a clamped LOD.
-        */
--      driSwapOutTextureObject( (driTextureObject *) t );
-+      if (t->mt) {
-+         radeon_miptree_unreference(t->mt);
-+	 t->mt = 0;
-+	 t->validated = GL_FALSE;
-+      }
-       break;
- 
-    default:
-       return;
-    }
--
--   /* Mark this texobj as dirty (one bit per tex unit)
--    */
--   t->dirty_state = TEX_ALL;
- }
- 
- 
--
--static void r200BindTexture( GLcontext *ctx, GLenum target,
--			       struct gl_texture_object *texObj )
--{
--   if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
--      fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj,
--	       ctx->Texture.CurrentUnit );
--   }
--
--   if ( (target == GL_TEXTURE_1D)
--	|| (target == GL_TEXTURE_2D) 
--#if ENABLE_HW_3D_TEXTURE
--	|| (target == GL_TEXTURE_3D)
--#endif
--	|| (target == GL_TEXTURE_CUBE_MAP)
--	|| (target == GL_TEXTURE_RECTANGLE_NV) ) {
--      assert( texObj->DriverData != NULL );
--   }
--}
--
--
--static void r200DeleteTexture( GLcontext *ctx,
--				 struct gl_texture_object *texObj )
-+static void r200DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--
--   if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
--      fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj,
--	       _mesa_lookup_enum_by_nr( texObj->Target ) );
-+   radeonTexObj* t = radeon_tex_obj(texObj);
-+
-+   if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
-+      fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
-+	      (void *)texObj,
-+	      _mesa_lookup_enum_by_nr(texObj->Target));
-+   }
-+   
-+   if (rmesa) {
-+      int i;
-+      radeon_firevertices(&rmesa->radeon);
-+      for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) {
-+	 if ( t == rmesa->state.texture.unit[i].texobj ) {
-+	    rmesa->state.texture.unit[i].texobj = NULL;
-+	    rmesa->hw.tex[i].dirty = GL_FALSE;
-+	    rmesa->hw.cube[i].dirty = GL_FALSE;
-+	 }
-+      }      
-    }
--
--   if ( t != NULL ) {
--      if ( rmesa ) {
--         R200_FIREVERTICES( rmesa );
--      }
--
--      driDestroyTextureObject( t );
-+   
-+   if (t->mt) {
-+      radeon_miptree_unreference(t->mt);
-+      t->mt = 0;
-    }
--   /* Free mipmap images and the texture object itself */
-    _mesa_delete_texture_object(ctx, texObj);
- }
- 
-@@ -1155,46 +460,59 @@ static void r200TexGen( GLcontext *ctx,
-  * Called via ctx->Driver.NewTextureObject.
-  * Note: this function will be called during context creation to
-  * allocate the default texture objects.
-- * Note: we could use containment here to 'derive' the driver-specific
-- * texture object from the core mesa gl_texture_object.  Not done at this time.
-  * Fixup MaxAnisotropy according to user preference.
-  */
--static struct gl_texture_object *
--r200NewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
-+static struct gl_texture_object *r200NewTextureObject(GLcontext * ctx,
-+						      GLuint name,
-+						      GLenum target)
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   struct gl_texture_object *obj;
--   obj = _mesa_new_texture_object(ctx, name, target);
--   if (!obj)
--      return NULL;
--   obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
--   r200AllocTexObj( obj );
--   return obj;
-+   radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
-+
-+
-+   if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
-+     fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
-+	     t, _mesa_lookup_enum_by_nr(target));
-+   }
-+
-+   _mesa_initialize_texture_object(&t->base, name, target);
-+   t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
-+
-+   /* Initialize hardware state */
-+   r200SetTexWrap( t, t->base.WrapS, t->base.WrapT, t->base.WrapR );
-+   r200SetTexMaxAnisotropy( t, t->base.MaxAnisotropy );
-+   r200SetTexFilter(t, t->base.MinFilter, t->base.MagFilter);
-+   r200SetTexBorderColor(t, t->base.BorderColor);
-+
-+   return &t->base;
- }
- 
- 
-+
- void r200InitTextureFuncs( struct dd_function_table *functions )
- {
-    /* Note: we only plug in the functions we implement in the driver
-     * since _mesa_init_driver_functions() was already called.
-     */
--   functions->ChooseTextureFormat	= r200ChooseTextureFormat;
--   functions->TexImage1D		= r200TexImage1D;
--   functions->TexImage2D		= r200TexImage2D;
-+   functions->ChooseTextureFormat	= radeonChooseTextureFormat_mesa;
-+   functions->TexImage1D		= radeonTexImage1D;
-+   functions->TexImage2D		= radeonTexImage2D;
- #if ENABLE_HW_3D_TEXTURE
--   functions->TexImage3D		= r200TexImage3D;
-+   functions->TexImage3D		= radeonTexImage3D;
- #else
-    functions->TexImage3D		= _mesa_store_teximage3d;
- #endif
--   functions->TexSubImage1D		= r200TexSubImage1D;
--   functions->TexSubImage2D		= r200TexSubImage2D;
-+   functions->TexSubImage1D		= radeonTexSubImage1D;
-+   functions->TexSubImage2D		= radeonTexSubImage2D;
- #if ENABLE_HW_3D_TEXTURE
--   functions->TexSubImage3D		= r200TexSubImage3D;
-+   functions->TexSubImage3D		= radeonTexSubImage3D;
- #else
-    functions->TexSubImage3D		= _mesa_store_texsubimage3d;
- #endif
-+   functions->GetTexImage               = radeonGetTexImage;
-+   functions->GetCompressedTexImage     = radeonGetCompressedTexImage;
-    functions->NewTextureObject		= r200NewTextureObject;
--   functions->BindTexture		= r200BindTexture;
-+   //   functions->BindTexture		= r200BindTexture;
-    functions->DeleteTexture		= r200DeleteTexture;
-    functions->IsTextureResident		= driIsTextureResident;
- 
-@@ -1202,22 +520,16 @@ void r200InitTextureFuncs( struct dd_function_table *functions )
-    functions->TexParameter		= r200TexParameter;
-    functions->TexGen			= r200TexGen;
- 
--   functions->CompressedTexImage2D	= r200CompressedTexImage2D;
--   functions->CompressedTexSubImage2D	= r200CompressedTexSubImage2D;
-+   functions->CompressedTexImage2D	= radeonCompressedTexImage2D;
-+   functions->CompressedTexSubImage2D	= radeonCompressedTexSubImage2D;
- 
--   driInitTextureFormats();
-+   functions->GenerateMipmap = radeonGenerateMipmap;
- 
--#if 000
--   /* moved or obsolete code */
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   driInitTextureObjects( ctx, & rmesa->swapped,
--			  DRI_TEXMGR_DO_TEXTURE_1D
--			  | DRI_TEXMGR_DO_TEXTURE_2D );
-+   functions->NewTextureImage = radeonNewTextureImage;
-+   functions->FreeTexImageData = radeonFreeTexImageData;
-+   functions->MapTexture = radeonMapTexture;
-+   functions->UnmapTexture = radeonUnmapTexture;
-+
-+   driInitTextureFormats();
- 
--   /* Hack: r200NewTextureObject is not yet installed when the
--    * default textures are created. Therefore set MaxAnisotropy of the
--    * default 2D texture now. */
--   ctx->Shared->Default2D->MaxAnisotropy = driQueryOptionf (&rmesa->optionCache,
--							    "def_max_anisotropy");
--#endif
- }
-diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h
-index 10ff8e8..e122de6 100644
---- a/src/mesa/drivers/dri/r200/r200_tex.h
-+++ b/src/mesa/drivers/dri/r200/r200_tex.h
-@@ -35,15 +35,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #ifndef __R200_TEX_H__
- #define __R200_TEX_H__
- 
-+extern void r200SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv);
-+extern void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format,
-+			      __DRIdrawable *dPriv);
- extern void r200SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
- 			     unsigned long long offset, GLint depth,
- 			     GLuint pitch);
- 
- extern void r200UpdateTextureState( GLcontext *ctx );
- 
--extern int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face );
-+extern int r200UploadTexImages( r200ContextPtr rmesa, radeonTexObjPtr t, GLuint face );
- 
--extern void r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t );
-+extern void r200DestroyTexObj( r200ContextPtr rmesa, radeonTexObjPtr t );
- 
- extern void r200InitTextureFuncs( struct dd_function_table *functions );
- 
-diff --git a/src/mesa/drivers/dri/r200/r200_texmem.c b/src/mesa/drivers/dri/r200/r200_texmem.c
-deleted file mode 100644
-index 3b81ac0..0000000
---- a/src/mesa/drivers/dri/r200/r200_texmem.c
-+++ /dev/null
-@@ -1,530 +0,0 @@
--/**************************************************************************
--
--Copyright (C) Tungsten Graphics 2002.  All Rights Reserved.  
--The Weather Channel, Inc. funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86
--license. This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation on the rights to use, copy, modify, merge, publish,
--distribute, sub license, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
--NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
--SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
--IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
--IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
--SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Kevin E. Martin <martin@valinux.com>
-- *   Gareth Hughes <gareth@valinux.com>
-- *
-- */
-- 
--#include <errno.h>
--
--#include "main/glheader.h"
--#include "main/imports.h"
--#include "main/context.h"
--#include "main/colormac.h"
--#include "main/macros.h"
--#include "r200_context.h"
--#include "r200_ioctl.h"
--#include "r200_tex.h"
--#include "radeon_reg.h"
--
--#include <unistd.h>  /* for usleep() */
--
--
--/**
-- * Destroy any device-dependent state associated with the texture.  This may
-- * include NULLing out hardware state that points to the texture.
-- */
--void
--r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t )
--{
--   if ( R200_DEBUG & DEBUG_TEXTURE ) {
--      fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, 
--	       (void *)t, (void *)t->base.tObj );
--   }
--
--   if ( rmesa != NULL ) {
--      unsigned   i;
--
--
--      for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) {
--	 if ( t == rmesa->state.texture.unit[i].texobj ) {
--	    rmesa->state.texture.unit[i].texobj = NULL;
--	    rmesa->hw.tex[i].dirty = GL_FALSE;
--	    rmesa->hw.cube[i].dirty = GL_FALSE;
--	 }
--      }
--   }
--}
--
--
--/* ------------------------------------------------------------
-- * Texture image conversions
-- */
--
--
--static void r200UploadGARTClientSubImage( r200ContextPtr rmesa,
--					  r200TexObjPtr t, 
--					  struct gl_texture_image *texImage,
--					  GLint hwlevel,
--					  GLint x, GLint y, 
--					  GLint width, GLint height )
--{
--   const struct gl_texture_format *texFormat = texImage->TexFormat;
--   GLuint srcPitch, dstPitch;
--   int blit_format;
--   int srcOffset;
--
--   /*
--    * XXX it appears that we always upload the full image, not a subimage.
--    * I.e. x==0, y==0, width=texWidth, height=texWidth.  If this is ever
--    * changed, the src pitch will have to change.
--    */
--   switch ( texFormat->TexelBytes ) {
--   case 1:
--      blit_format = R200_CP_COLOR_FORMAT_CI8;
--      srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--      dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--      break;
--   case 2:
--      blit_format = R200_CP_COLOR_FORMAT_RGB565;
--      srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--      dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--      break;
--   case 4:
--      blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
--      srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--      dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--      break;
--   default:
--      return;
--   }
--
--   t->image[0][hwlevel].data = texImage->Data;
--   srcOffset = r200GartOffsetFromVirtual( rmesa, texImage->Data );
--
--   assert( srcOffset != ~0 );
--
--   /* Don't currently need to cope with small pitches?
--    */
--   width = texImage->Width;
--   height = texImage->Height;
--
--   r200EmitWait( rmesa, RADEON_WAIT_3D );
--
--   r200EmitBlit( rmesa, blit_format, 
--		 srcPitch,  
--		 srcOffset,   
--		 dstPitch,
--		 t->bufAddr,
--		 x, 
--		 y, 
--		 t->image[0][hwlevel].x + x,
--		 t->image[0][hwlevel].y + y, 
--		 width,
--		 height );
--
--   r200EmitWait( rmesa, RADEON_WAIT_2D );
--}
--
--static void r200UploadRectSubImage( r200ContextPtr rmesa,
--				    r200TexObjPtr t, 
--				    struct gl_texture_image *texImage,
--				    GLint x, GLint y, 
--				    GLint width, GLint height )
--{
--   const struct gl_texture_format *texFormat = texImage->TexFormat;
--   int blit_format, dstPitch, done;
--
--   switch ( texFormat->TexelBytes ) {
--   case 1:
--      blit_format = R200_CP_COLOR_FORMAT_CI8;
--      break;
--   case 2:
--      blit_format = R200_CP_COLOR_FORMAT_RGB565;
--      break;
--   case 4:
--      blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
--      break;
--   default:
--      return;
--   }
--
--   t->image[0][0].data = texImage->Data;
--
--   /* Currently don't need to cope with small pitches.
--    */
--   width = texImage->Width;
--   height = texImage->Height;
--   dstPitch = t->pp_txpitch + 32;
--
--   if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) {
--      /* In this case, could also use GART texturing.  This is
--       * currently disabled, but has been tested & works.
--       */
--      if ( !t->image_override )
--         t->pp_txoffset = r200GartOffsetFromVirtual( rmesa, texImage->Data );
--      t->pp_txpitch = texImage->RowStride * texFormat->TexelBytes - 32;
--
--      if (R200_DEBUG & DEBUG_TEXTURE)
--	 fprintf(stderr, 
--		 "Using GART texturing for rectangular client texture\n");
--
--      /* Release FB memory allocated for this image:
--       */
--      /* FIXME This may not be correct as driSwapOutTextureObject sets
--       * FIXME dirty_images.  It may be fine, though.
--       */
--      if ( t->base.memBlock ) {
--	 driSwapOutTextureObject( (driTextureObject *) t );
--      }
--   }
--   else if (texImage->IsClientData) {
--      /* Data already in GART memory, with usable pitch.
--       */
--      GLuint srcPitch;
--      srcPitch = texImage->RowStride * texFormat->TexelBytes;
--      r200EmitBlit( rmesa, 
--		    blit_format, 
--		    srcPitch,
--		    r200GartOffsetFromVirtual( rmesa, texImage->Data ),   
--		    dstPitch, t->bufAddr,
--		    0, 0, 
--		    0, 0, 
--		    width, height );
--   }
--   else {
--      /* Data not in GART memory, or bad pitch.
--       */
--      for (done = 0; done < height ; ) {
--	 struct r200_dma_region region;
--	 int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch );
--	 int src_pitch;
--	 char *tex;
--
--         src_pitch = texImage->RowStride * texFormat->TexelBytes;
--
--	 tex = (char *)texImage->Data + done * src_pitch;
--
--	 memset(&region, 0, sizeof(region));
--	 r200AllocDmaRegion( rmesa, &region, lines * dstPitch, 1024 );
--
--	 /* Copy texdata to dma:
--	  */
--	 if (0)
--	    fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n",
--		    __FUNCTION__, src_pitch, dstPitch);
--
--	 if (src_pitch == dstPitch) {
--	    memcpy( region.address + region.start, tex, lines * src_pitch );
--	 } 
--	 else {
--	    char *buf = region.address + region.start;
--	    int i;
--	    for (i = 0 ; i < lines ; i++) {
--	       memcpy( buf, tex, src_pitch );
--	       buf += dstPitch;
--	       tex += src_pitch;
--	    }
--	 }
--
--	 r200EmitWait( rmesa, RADEON_WAIT_3D );
--
--	 /* Blit to framebuffer
--	  */
--	 r200EmitBlit( rmesa,
--		       blit_format,
--		       dstPitch, GET_START( &region ),
--		       dstPitch | (t->tile_bits >> 16),
--		       t->bufAddr,
--		       0, 0,
--		       0, done,
--		       width, lines );
--	 
--	 r200EmitWait( rmesa, RADEON_WAIT_2D );
--
--	 r200ReleaseDmaRegion( rmesa, &region, __FUNCTION__ );
--	 done += lines;
--      }
--   }
--}
--
--
--/**
-- * Upload the texture image associated with texture \a t at the specified
-- * level at the address relative to \a start.
-- */
--static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t, 
--			    GLint hwlevel,
--			    GLint x, GLint y, GLint width, GLint height,
--			    GLuint face )
--{
--   struct gl_texture_image *texImage = NULL;
--   GLuint offset;
--   GLint imageWidth, imageHeight;
--   GLint ret;
--   drm_radeon_texture_t tex;
--   drm_radeon_tex_image_t tmp;
--   const int level = hwlevel + t->base.firstLevel;
--
--   if ( R200_DEBUG & DEBUG_TEXTURE ) {
--      fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", 
--	       __FUNCTION__, (void *)t, (void *)t->base.tObj,
--	       level, width, height, face );
--   }
--
--   ASSERT(face < 6);
--
--   /* Ensure we have a valid texture to upload */
--   if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) {
--      _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
--      return;
--   }
--
--   texImage = t->base.tObj->Image[face][level];
--
--   if ( !texImage ) {
--      if ( R200_DEBUG & DEBUG_TEXTURE )
--	 fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level );
--      return;
--   }
--   if ( !texImage->Data ) {
--      if ( R200_DEBUG & DEBUG_TEXTURE )
--	 fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ );
--      return;
--   }
--
--
--   if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--      assert(level == 0);
--      assert(hwlevel == 0);
--      if ( R200_DEBUG & DEBUG_TEXTURE )
--	 fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__);
--      r200UploadRectSubImage( rmesa, t, texImage, x, y, width, height );
--      return;
--   }
--   else if (texImage->IsClientData) {
--      if ( R200_DEBUG & DEBUG_TEXTURE )
--	 fprintf( stderr, "%s: image data is in GART client storage\n",
--		  __FUNCTION__);
--      r200UploadGARTClientSubImage( rmesa, t, texImage, hwlevel,
--				   x, y, width, height );
--      return;
--   }
--   else if ( R200_DEBUG & DEBUG_TEXTURE )
--      fprintf( stderr, "%s: image data is in normal memory\n",
--	       __FUNCTION__);
--      
--
--   imageWidth = texImage->Width;
--   imageHeight = texImage->Height;
--
--   offset = t->bufAddr + t->base.totalSize / 6 * face;
--
--   if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
--      GLint imageX = 0;
--      GLint imageY = 0;
--      GLint blitX = t->image[face][hwlevel].x;
--      GLint blitY = t->image[face][hwlevel].y;
--      GLint blitWidth = t->image[face][hwlevel].width;
--      GLint blitHeight = t->image[face][hwlevel].height;
--      fprintf( stderr, "   upload image: %d,%d at %d,%d\n",
--	       imageWidth, imageHeight, imageX, imageY );
--      fprintf( stderr, "   upload  blit: %d,%d at %d,%d\n",
--	       blitWidth, blitHeight, blitX, blitY );
--      fprintf( stderr, "       blit ofs: 0x%07x level: %d/%d\n",
--	       (GLuint)offset, hwlevel, level );
--   }
--
--   t->image[face][hwlevel].data = texImage->Data;
--
--   /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
--    * NOTE: we're always use a 1KB-wide blit and I8 texture format.
--    * We used to use 1, 2 and 4-byte texels and used to use the texture
--    * width to dictate the blit width - but that won't work for compressed
--    * textures. (Brian)
--    * NOTE: can't do that with texture tiling. (sroland)
--    */
--   tex.offset = offset;
--   tex.image = &tmp;
--   /* copy (x,y,width,height,data) */
--   memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) );
--   
--   if (texImage->TexFormat->TexelBytes) {
--      /* use multi-byte upload scheme */
--      tex.height = imageHeight;
--      tex.width = imageWidth;
--      tex.format = t->pp_txformat & R200_TXFORMAT_FORMAT_MASK;
--      if (tex.format == R200_TXFORMAT_ABGR8888) {
--	 /* drm will refuse abgr8888 textures. */
--	 tex.format = R200_TXFORMAT_ARGB8888;
--      }
--      tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
--      tex.offset += tmp.x & ~1023;
--      tmp.x = tmp.x % 1024;
--      if (t->tile_bits & R200_TXO_MICRO_TILE) {
--	 /* need something like "tiled coordinates" ? */
--	 tmp.y = tmp.x / (tex.pitch * 128) * 2;
--	 tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
--	 tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
--      }
--      else {
--	 tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
--      }
--      if ((t->tile_bits & R200_TXO_MACRO_TILE) &&
--	 (texImage->Width * texImage->TexFormat->TexelBytes >= 256) &&
--	 ((!(t->tile_bits & R200_TXO_MICRO_TILE) && (texImage->Height >= 8)) ||
--	    (texImage->Height >= 16))) {
--	 /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
--	    OR if height is smaller than 8 automatically, but if micro tiling is active
--	    the limit is height 16 instead ? */
--	 tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
--      }
--   }
--   else {
--      /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
--         needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
--      /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
--         so the kernel module reads the right amount of data. */
--      tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */
--      tex.pitch = (BLIT_WIDTH_BYTES / 64);
--      tex.height = (imageHeight + 3) / 4;
--      tex.width = (imageWidth + 3) / 4;
--      switch (t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) {
--      case R200_TXFORMAT_DXT1:
--           tex.width *= 8;
--           break;
--      case R200_TXFORMAT_DXT23:
--      case R200_TXFORMAT_DXT45:
--           tex.width *= 16;
--           break;
--      default:
--          fprintf(stderr, "unknown compressed tex format in uploadSubImage\n");
--      }
--   }
--
--   LOCK_HARDWARE( rmesa );
--   do {
--      ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
--                                 &tex, sizeof(drm_radeon_texture_t) );
--      if (ret) {
--	 if (R200_DEBUG & DEBUG_IOCTL)
--	    fprintf(stderr, "DRM_RADEON_TEXTURE:  again!\n");
--	 usleep(1);
--      }
--   } while ( ret == -EAGAIN );
--
--   UNLOCK_HARDWARE( rmesa );
--
--   if ( ret ) {
--      fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
--      fprintf( stderr, "   offset=0x%08x\n",
--	       offset );
--      fprintf( stderr, "   image width=%d height=%d\n",
--	       imageWidth, imageHeight );
--      fprintf( stderr, "    blit width=%d height=%d data=%p\n",
--	       t->image[face][hwlevel].width, t->image[face][hwlevel].height,
--	       t->image[face][hwlevel].data );
--      exit( 1 );
--   }
--}
--
--
--/**
-- * Upload the texture images associated with texture \a t.  This might
-- * require the allocation of texture memory.
-- * 
-- * \param rmesa Context pointer
-- * \param t Texture to be uploaded
-- * \param face Cube map face to be uploaded.  Zero for non-cube maps.
-- */
--
--int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face )
--{
--   const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
--
--   if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
--      fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
--	       (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize,
--	       t->base.firstLevel, t->base.lastLevel );
--   }
--
--   if ( !t || t->base.totalSize == 0 || t->image_override )
--      return 0;
--
--   if (R200_DEBUG & DEBUG_SYNC) {
--      fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
--      r200Finish( rmesa->glCtx );
--   }
--
--   LOCK_HARDWARE( rmesa );
--
--   if ( t->base.memBlock == NULL ) {
--      int heap;
--
--      heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps,
--				 (driTextureObject *) t );
--      if ( heap == -1 ) {
--	 UNLOCK_HARDWARE( rmesa );
--	 return -1;
--      }
--
--      /* Set the base offset of the texture image */
--      t->bufAddr = rmesa->r200Screen->texOffset[heap] 
--	   + t->base.memBlock->ofs;
--      t->pp_txoffset = t->bufAddr;
--       
--      if (!(t->base.tObj->Image[0][0]->IsClientData)) {
--	 /* hope it's safe to add that here... */
--	 t->pp_txoffset |= t->tile_bits;
--      }
--
--      /* Mark this texobj as dirty on all units:
--       */
--      t->dirty_state = TEX_ALL;
--   }
--
--   /* Let the world know we've used this memory recently.
--    */
--   driUpdateTextureLRU( (driTextureObject *) t );
--   UNLOCK_HARDWARE( rmesa );
--
--   /* Upload any images that are new */
--   if (t->base.dirty_images[face]) {
--      int i;
--      for ( i = 0 ; i < numLevels ; i++ ) {
--         if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) {
--            uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width,
--			    t->image[face][i].height, face );
--         }
--      }
--      t->base.dirty_images[face] = 0;
--   }
--
--
--   if (R200_DEBUG & DEBUG_SYNC) {
--      fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
--      r200Finish( rmesa->glCtx );
--   }
--
--   return 0;
--}
-diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
-index 0ad5651..eee54cd 100644
---- a/src/mesa/drivers/dri/r200/r200_texstate.c
-+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
-@@ -37,9 +37,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/context.h"
- #include "main/macros.h"
- #include "main/texformat.h"
-+#include "main/teximage.h"
- #include "main/texobj.h"
- #include "main/enums.h"
- 
-+#include "radeon_common.h"
-+#include "radeon_mipmap_tree.h"
- #include "r200_context.h"
- #include "r200_state.h"
- #include "r200_ioctl.h"
-@@ -139,257 +142,6 @@ static const struct tx_table tx_table_le[] =
- #undef _ALPHA
- #undef _INVALID
- 
--/**
-- * This function computes the number of bytes of storage needed for
-- * the given texture object (all mipmap levels, all cube faces).
-- * The \c image[face][level].x/y/width/height parameters for upload/blitting
-- * are computed here.  \c pp_txfilter, \c pp_txformat, etc. will be set here
-- * too.
-- * 
-- * \param rmesa Context pointer
-- * \param tObj GL texture object whose images are to be posted to
-- *                 hardware state.
-- */
--static void r200SetTexImages( r200ContextPtr rmesa,
--			      struct gl_texture_object *tObj )
--{
--   r200TexObjPtr t = (r200TexObjPtr)tObj->DriverData;
--   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
--   GLint curOffset, blitWidth;
--   GLint i, texelBytes;
--   GLint numLevels;
--   GLint log2Width, log2Height, log2Depth;
--
--   /* Set the hardware texture format
--    */
--   if ( !t->image_override ) {
--      if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
--	 const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
--								tx_table_be;
--
--         t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
--                             R200_TXFORMAT_ALPHA_IN_MAP);
--         t->pp_txfilter &= ~R200_YUV_TO_RGB;
--
--	 t->pp_txformat |= table[ baseImage->TexFormat->MesaFormat ].format;
--	 t->pp_txfilter |= table[ baseImage->TexFormat->MesaFormat ].filter;
--      }
--      else {
--         _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
--         return;
--      }
--   }
--
--   texelBytes = baseImage->TexFormat->TexelBytes;
--
--   /* Compute which mipmap levels we really want to send to the hardware.
--    */
--
--   driCalculateTextureFirstLastLevel( (driTextureObject *) t );
--   log2Width  = tObj->Image[0][t->base.firstLevel]->WidthLog2;
--   log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
--   log2Depth  = tObj->Image[0][t->base.firstLevel]->DepthLog2;
--
--   numLevels = t->base.lastLevel - t->base.firstLevel + 1;
--
--   assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
--
--   /* Calculate mipmap offsets and dimensions for blitting (uploading)
--    * The idea is that we lay out the mipmap levels within a block of
--    * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
--    */
--   curOffset = 0;
--   blitWidth = BLIT_WIDTH_BYTES;
--   t->tile_bits = 0;
--
--   /* figure out if this texture is suitable for tiling. */
--   if (texelBytes) {
--      if (rmesa->texmicrotile  && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
--      /* texrect might be able to use micro tiling too in theory? */
--	 (baseImage->Height > 1)) {
--	 /* allow 32 (bytes) x 1 mip (which will use two times the space
--	 the non-tiled version would use) max if base texture is large enough */
--	 if ((numLevels == 1) ||
--	   (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
--	       (baseImage->Width * texelBytes > 64)) ||
--	    ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
--	    t->tile_bits |= R200_TXO_MICRO_TILE;
--	 }
--      }
--      if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
--	 /* we can set macro tiling even for small textures, they will be untiled anyway */
--	 t->tile_bits |= R200_TXO_MACRO_TILE;
--      }
--   }
--
--   for (i = 0; i < numLevels; i++) {
--      const struct gl_texture_image *texImage;
--      GLuint size;
--
--      texImage = tObj->Image[0][i + t->base.firstLevel];
--      if ( !texImage )
--	 break;
--
--      /* find image size in bytes */
--      if (texImage->IsCompressed) {
--      /* need to calculate the size AFTER padding even though the texture is
--         submitted without padding.
--         Only handle pot textures currently - don't know if npot is even possible,
--         size calculation would certainly need (trivial) adjustments.
--         Align (and later pad) to 32byte, not sure what that 64byte blit width is
--         good for? */
--         if ((t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) == R200_TXFORMAT_DXT1) {
--            /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
--            if ((texImage->Width + 3) < 8) /* width one block */
--               size = texImage->CompressedSize * 4;
--            else if ((texImage->Width + 3) < 16)
--               size = texImage->CompressedSize * 2;
--            else size = texImage->CompressedSize;
--         }
--         else /* DXT3/5, 16 bytes per block */
--            if ((texImage->Width + 3) < 8)
--               size = texImage->CompressedSize * 2;
--            else size = texImage->CompressedSize;
--      }
--      else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--	 size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
--      }
--      else if (t->tile_bits & R200_TXO_MICRO_TILE) {
--	 /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
--	    though the actual offset may be different (if texture is less than
--	    32 bytes width) to the untiled case */
--	 int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
--	 size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
--	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
--      }
--      else {
--	 int w = (texImage->Width * texelBytes + 31) & ~31;
--	 size = w * texImage->Height * texImage->Depth;
--	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
--      }
--      assert(size > 0);
--
--      /* Align to 32-byte offset.  It is faster to do this unconditionally
--       * (no branch penalty).
--       */
--
--      curOffset = (curOffset + 0x1f) & ~0x1f;
--
--      if (texelBytes) {
--	 t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
--	 t->image[0][i].y = 0;
--	 t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
--	 t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
--      }
--      else {
--         t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
--         t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
--         t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
--         t->image[0][i].height = size / t->image[0][i].width;     
--      }
--
--#if 0
--      /* for debugging only and only  applicable to non-rectangle targets */
--      assert(size % t->image[0][i].width == 0);
--      assert(t->image[0][i].x == 0
--             || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
--#endif
--
--      if (0)
--         fprintf(stderr,
--                 "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
--                 i, texImage->Width, texImage->Height,
--                 t->image[0][i].x, t->image[0][i].y,
--                 t->image[0][i].width, t->image[0][i].height, size, curOffset);
--
--      curOffset += size;
--
--   }
--
--   /* Align the total size of texture memory block.
--    */
--   t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
--
--   /* Setup remaining cube face blits, if needed */
--   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
--      const GLuint faceSize = t->base.totalSize;
--      GLuint face;
--      /* reuse face 0 x/y/width/height - just update the offset when uploading */
--      for (face = 1; face < 6; face++) {
--         for (i = 0; i < numLevels; i++) {
--            t->image[face][i].x =  t->image[0][i].x;
--            t->image[face][i].y =  t->image[0][i].y;
--            t->image[face][i].width  = t->image[0][i].width;
--            t->image[face][i].height = t->image[0][i].height;
--         }
--      }
--      t->base.totalSize = 6 * faceSize; /* total texmem needed */
--   }
--
--
--   /* Hardware state:
--    */
--   t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
--   t->pp_txfilter |= (numLevels - 1) << R200_MAX_MIP_LEVEL_SHIFT;
--
--   t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
--		       R200_TXFORMAT_HEIGHT_MASK |
--                       R200_TXFORMAT_CUBIC_MAP_ENABLE |
--                       R200_TXFORMAT_F5_WIDTH_MASK |
--                       R200_TXFORMAT_F5_HEIGHT_MASK);
--   t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) |
--		      (log2Height << R200_TXFORMAT_HEIGHT_SHIFT));
--
--   t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK);
--   if (tObj->Target == GL_TEXTURE_3D) {
--      t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
--      t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
--   }
--   else if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
--      ASSERT(log2Width == log2Height);
--      t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
--                         (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
--/* don't think we need this bit, if it exists at all - fglrx does not set it */
--                         (R200_TXFORMAT_CUBIC_MAP_ENABLE));
--      t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
--      t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
--                           (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
--                           (log2Width << R200_FACE_WIDTH_2_SHIFT) |
--                           (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
--                           (log2Width << R200_FACE_WIDTH_3_SHIFT) |
--                           (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
--                           (log2Width << R200_FACE_WIDTH_4_SHIFT) |
--                           (log2Height << R200_FACE_HEIGHT_4_SHIFT));
--   }
--   else {
--      /* If we don't in fact send enough texture coordinates, q will be 1,
--       * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?)
--       */
--      t->pp_txformat_x |= R200_TEXCOORD_PROJ;
--   }
--
--   t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
--                   ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
--
--   /* Only need to round to nearest 32 for textures, but the blitter
--    * requires 64-byte aligned pitches, and we may/may not need the
--    * blitter.   NPOT only!
--    */
--   if ( !t->image_override ) {
--      if (baseImage->IsCompressed)
--         t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
--      else
--         t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
--      t->pp_txpitch -= 32;
--   }
--
--   t->dirty_state = TEX_ALL;
--
--   /* FYI: r200UploadTexImages( rmesa, t ) used to be called here */
--}
--
--
--
- /* ================================================================
-  * Texture combine functions
-  */
-@@ -981,20 +733,19 @@ void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
- {
- 	r200ContextPtr rmesa = pDRICtx->driverPrivate;
- 	struct gl_texture_object *tObj =
--	    _mesa_lookup_texture(rmesa->glCtx, texname);
--	r200TexObjPtr t;
-+	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
-+	radeonTexObjPtr t = radeon_tex_obj(tObj);
- 
- 	if (!tObj)
- 		return;
- 
--	t = (r200TexObjPtr) tObj->DriverData;
--
- 	t->image_override = GL_TRUE;
- 
- 	if (!offset)
- 		return;
- 
--	t->pp_txoffset = offset;
-+	t->bo = NULL;
-+	t->override_offset = offset;
- 	t->pp_txpitch = pitch - 32;
- 
- 	switch (depth) {
-@@ -1014,6 +765,122 @@ void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
- 	}
- }
- 
-+void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format,
-+		       __DRIdrawable *dPriv)
-+{
-+	struct gl_texture_unit *texUnit;
-+	struct gl_texture_object *texObj;
-+	struct gl_texture_image *texImage;
-+	struct radeon_renderbuffer *rb;
-+	radeon_texture_image *rImage;
-+	radeonContextPtr radeon;
-+	r200ContextPtr rmesa;
-+	struct radeon_framebuffer *rfb;
-+	radeonTexObjPtr t;
-+	uint32_t pitch_val;
-+	uint32_t internalFormat, type, format;
-+
-+	type = GL_BGRA;
-+	format = GL_UNSIGNED_BYTE;
-+	internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4);
-+
-+	radeon = pDRICtx->driverPrivate;
-+	rmesa = pDRICtx->driverPrivate;
-+
-+	rfb = dPriv->driverPrivate;
-+        texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
-+	texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
-+        texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
-+
-+	rImage = get_radeon_texture_image(texImage);
-+	t = radeon_tex_obj(texObj);
-+        if (t == NULL) {
-+    	    return;
-+    	}
-+
-+	radeon_update_renderbuffers(pDRICtx, dPriv);
-+	/* back & depth buffer are useless free them right away */
-+	rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer;
-+	if (rb && rb->bo) {
-+		radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+	}
-+	rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+	if (rb && rb->bo) {
-+		radeon_bo_unref(rb->bo);
-+		rb->bo = NULL;
-+	}
-+	rb = rfb->color_rb[0];
-+	if (rb->bo == NULL) {
-+		/* Failed to BO for the buffer */
-+		return;
-+	}
-+	
-+	_mesa_lock_texture(radeon->glCtx, texObj);
-+	if (t->bo) {
-+		radeon_bo_unref(t->bo);
-+		t->bo = NULL;
-+	}
-+	if (rImage->bo) {
-+		radeon_bo_unref(rImage->bo);
-+		rImage->bo = NULL;
-+	}
-+	if (t->mt) {
-+		radeon_miptree_unreference(t->mt);
-+		t->mt = NULL;
-+	}
-+	if (rImage->mt) {
-+		radeon_miptree_unreference(rImage->mt);
-+		rImage->mt = NULL;
-+	}
-+	_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
-+				   rb->width, rb->height, 1, 0, rb->cpp);
-+	texImage->RowStride = rb->pitch / rb->cpp;
-+	texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx,
-+							internalFormat,
-+							type, format, 0);
-+	rImage->bo = rb->bo;
-+	radeon_bo_ref(rImage->bo);
-+	t->bo = rb->bo;
-+	radeon_bo_ref(t->bo);
-+	t->tile_bits = 0;
-+	t->image_override = GL_TRUE;
-+	t->override_offset = 0;
-+	t->pp_txpitch &= (1 << 13) -1;
-+	pitch_val = rb->pitch;
-+	switch (rb->cpp) {
-+	case 4:
-+		t->pp_txformat = tx_table_le[MESA_FORMAT_ARGB8888].format;
-+		t->pp_txfilter |= tx_table_le[MESA_FORMAT_ARGB8888].filter;
-+		break;
-+	case 3:
-+	default:
-+		t->pp_txformat = tx_table_le[MESA_FORMAT_RGB888].format;
-+		t->pp_txfilter |= tx_table_le[MESA_FORMAT_RGB888].filter;
-+		break;
-+	case 2:
-+		t->pp_txformat = tx_table_le[MESA_FORMAT_RGB565].format;
-+		t->pp_txfilter |= tx_table_le[MESA_FORMAT_RGB565].filter;
-+		break;
-+	}
-+        t->pp_txsize = ((rb->width - 1) << RADEON_TEX_USIZE_SHIFT)
-+		   | ((rb->height - 1) << RADEON_TEX_VSIZE_SHIFT);
-+        t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
-+	t->pp_txpitch = pitch_val;
-+        t->pp_txpitch -= 32;
-+
-+	t->validated = GL_TRUE;
-+	_mesa_unlock_texture(radeon->glCtx, texObj);
-+	return;
-+}
-+
-+
-+void r200SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
-+{
-+        r200SetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv);
-+}
-+
-+
- #define REF_COLOR 1
- #define REF_ALPHA 2
- 
-@@ -1207,12 +1074,41 @@ static GLboolean r200UpdateAllTexEnv( GLcontext *ctx )
-                                 R200_VOLUME_FILTER_MASK)
- 
- 
-+static void disable_tex_obj_state( r200ContextPtr rmesa, 
-+				   int unit )
-+{
-+   
-+   R200_STATECHANGE( rmesa, vtx );
-+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
-+
-+   if (rmesa->radeon.TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) {
-+      TCL_FALLBACK( rmesa->radeon.glCtx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
-+   }
-+
-+   /* Actually want to keep all units less than max active texture
-+    * enabled, right?  Fix this for >2 texunits.
-+    */
-+
-+   {
-+      GLuint tmp = rmesa->TexGenEnabled;
-+
-+      rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
-+      rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
-+      rmesa->TexGenNeedNormals[unit] = GL_FALSE;
-+      rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
-+
-+      if (tmp != rmesa->TexGenEnabled) {
-+	 rmesa->recheck_texgen[unit] = GL_TRUE;
-+	 rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
-+      }
-+   }
-+}
- static void import_tex_obj_state( r200ContextPtr rmesa,
- 				  int unit,
--				  r200TexObjPtr texobj )
-+				  radeonTexObjPtr texobj )
- {
- /* do not use RADEON_DB_STATE to avoid stale texture caches */
--   int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
-+   GLuint *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
- 
-    R200_STATECHANGE( rmesa, tex[unit] );
- 
-@@ -1225,36 +1121,21 @@ static void import_tex_obj_state( r200ContextPtr rmesa,
-    cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */
-    cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */
-    cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
--   if (rmesa->r200Screen->drmSupportsFragShader) {
--      cmd[TEX_PP_TXOFFSET_NEWDRM] = texobj->pp_txoffset;
--   }
--   else {
--      cmd[TEX_PP_TXOFFSET_OLDDRM] = texobj->pp_txoffset;
--   }
- 
--   if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
--      int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
--      GLuint bytesPerFace = texobj->base.totalSize / 6;
--      ASSERT(texobj->base.totalSize % 6 == 0);
-+   if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) {
-+      GLuint *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
- 
-       R200_STATECHANGE( rmesa, cube[unit] );
-       cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
--      if (rmesa->r200Screen->drmSupportsFragShader) {
-+      if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
- 	 /* that value is submitted twice. could change cube atom
- 	    to not include that command when new drm is used */
- 	 cmd[TEX_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
-       }
--      cube_cmd[CUBE_PP_CUBIC_OFFSET_F1] = texobj->pp_txoffset + 1 * bytesPerFace;
--      cube_cmd[CUBE_PP_CUBIC_OFFSET_F2] = texobj->pp_txoffset + 2 * bytesPerFace;
--      cube_cmd[CUBE_PP_CUBIC_OFFSET_F3] = texobj->pp_txoffset + 3 * bytesPerFace;
--      cube_cmd[CUBE_PP_CUBIC_OFFSET_F4] = texobj->pp_txoffset + 4 * bytesPerFace;
--      cube_cmd[CUBE_PP_CUBIC_OFFSET_F5] = texobj->pp_txoffset + 5 * bytesPerFace;
-    }
- 
--   texobj->dirty_state &= ~(1<<unit);
- }
- 
--
- static void set_texgen_matrix( r200ContextPtr rmesa, 
- 			       GLuint unit,
- 			       const GLfloat *s_plane,
-@@ -1377,7 +1258,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
-    } else {
-       tgcm |= R200_TEXGEN_COMP_T << (unit * 4);
-    }
--
-    if (texUnit->TexGenEnabled & R_BIT) {
-       if (texUnit->GenR.Mode != mode)
- 	 mixed_fallback = GL_TRUE;
-@@ -1517,52 +1397,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
-    return GL_TRUE;
- }
- 
--
--static void disable_tex( GLcontext *ctx, int unit )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--
--   if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit)) {
--      /* Texture unit disabled */
--      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
--	 /* The old texture is no longer bound to this texture unit.
--	  * Mark it as such.
--	  */
--
--	 rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
--	 rmesa->state.texture.unit[unit].texobj = NULL;
--      }
--
--      R200_STATECHANGE( rmesa, ctx );
--      rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_0_ENABLE << unit);
--	 
--      R200_STATECHANGE( rmesa, vtx );
--      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
--	 
--      if (rmesa->TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) {
--	 TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
--      }
--
--      /* Actually want to keep all units less than max active texture
--       * enabled, right?  Fix this for >2 texunits.
--       */
--
--      {
--	 GLuint tmp = rmesa->TexGenEnabled;
--
--	 rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
--	 rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
--	 rmesa->TexGenNeedNormals[unit] = GL_FALSE;
--	 rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
--
--	 if (tmp != rmesa->TexGenEnabled) {
--	    rmesa->recheck_texgen[unit] = GL_TRUE;
--	    rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
--	 }
--      }
--   }
--}
--
- void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-@@ -1579,237 +1413,169 @@ void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
-    }
- }
- 
--static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
-+/**
-+ * Compute the cached hardware register values for the given texture object.
-+ *
-+ * \param rmesa Context pointer
-+ * \param t the r300 texture object
-+ */
-+static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t)
- {
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
--
--   /* Need to load the 2d images associated with this unit.
--    */
--   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
--      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
--      t->base.dirty_images[0] = ~0;
-+   int firstlevel = t->mt ? t->mt->firstLevel : 0;
-+   const struct gl_texture_image *firstImage = t->base.Image[0][firstlevel];
-+   GLint log2Width, log2Height, log2Depth, texelBytes;
-+   
-+   if ( t->bo ) {
-+       return;
-    }
- 
--   ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
-+   log2Width  = firstImage->WidthLog2;
-+   log2Height = firstImage->HeightLog2;
-+   log2Depth  = firstImage->DepthLog2;
-+   texelBytes = firstImage->TexFormat->TexelBytes;
- 
--   if ( t->base.dirty_images[0] ) {
--      R200_FIREVERTICES( rmesa );
--      r200SetTexImages( rmesa, tObj );
--      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
--      if ( !t->base.memBlock && !t->image_override ) 
--	 return GL_FALSE;
--   }
- 
--   set_re_cntl_d3d( ctx, unit, GL_FALSE );
--
--   return GL_TRUE;
--}
--
--#if ENABLE_HW_3D_TEXTURE
--static GLboolean enable_tex_3d( GLcontext *ctx, int unit )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
--
--   /* Need to load the 3d images associated with this unit.
--    */
--   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
--      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
--      t->base.dirty_images[0] = ~0;
-+   if (!t->image_override) {
-+      if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
-+	 const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
-+	    tx_table_be;
-+	 
-+	 t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
-+			     R200_TXFORMAT_ALPHA_IN_MAP);
-+	 t->pp_txfilter &= ~R200_YUV_TO_RGB;
-+	 
-+	 t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format;
-+	 t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter;
-+      } else {
-+	 _mesa_problem(NULL, "unexpected texture format in %s",
-+		       __FUNCTION__);
-+	 return;
-+      }
-    }
-+   
-+   t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
-+   t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << R200_MAX_MIP_LEVEL_SHIFT;
-+	
-+   t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
-+		       R200_TXFORMAT_HEIGHT_MASK |
-+		       R200_TXFORMAT_CUBIC_MAP_ENABLE |
-+		       R200_TXFORMAT_F5_WIDTH_MASK |
-+		       R200_TXFORMAT_F5_HEIGHT_MASK);
-+   t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) |
-+		      (log2Height << R200_TXFORMAT_HEIGHT_SHIFT));
-+   
-+   t->tile_bits = 0;
-+   
-+   t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK);
-+   if (t->base.Target == GL_TEXTURE_3D) {
-+      t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
-+      t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
- 
--   ASSERT(tObj->Target == GL_TEXTURE_3D);
--
--   /* R100 & R200 do not support mipmaps for 3D textures.
--    */
--   if ( (tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR) ) {
--      return GL_FALSE;
-    }
--
--   if ( t->base.dirty_images[0] ) {
--      R200_FIREVERTICES( rmesa );
--      r200SetTexImages( rmesa, tObj );
--      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
--      if ( !t->base.memBlock ) 
--	 return GL_FALSE;
-+   else if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
-+      ASSERT(log2Width == log2Height);
-+      t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
-+			 (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
-+			 /* don't think we need this bit, if it exists at all - fglrx does not set it */
-+			 (R200_TXFORMAT_CUBIC_MAP_ENABLE));
-+      t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
-+      t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
-+                           (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
-+                           (log2Width << R200_FACE_WIDTH_2_SHIFT) |
-+                           (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
-+                           (log2Width << R200_FACE_WIDTH_3_SHIFT) |
-+                           (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
-+                           (log2Width << R200_FACE_WIDTH_4_SHIFT) |
-+                           (log2Height << R200_FACE_HEIGHT_4_SHIFT));
-    }
--
--   set_re_cntl_d3d( ctx, unit, GL_TRUE );
--
--   return GL_TRUE;
--}
--#endif
--
--static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
--   GLuint face;
--
--   /* Need to load the 2d images associated with this unit.
--    */
--   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
--      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
--      for (face = 0; face < 6; face++)
--         t->base.dirty_images[face] = ~0;
-+   else {
-+      /* If we don't in fact send enough texture coordinates, q will be 1,
-+       * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?)
-+       */
-+      t->pp_txformat_x |= R200_TEXCOORD_PROJ;
-    }
- 
--   ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
-+   t->pp_txsize = (((firstImage->Width - 1) << R200_PP_TX_WIDTHMASK_SHIFT)
-+		   | ((firstImage->Height - 1) << R200_PP_TX_HEIGHTMASK_SHIFT));
- 
--   if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
--        t->base.dirty_images[2] || t->base.dirty_images[3] ||
--        t->base.dirty_images[4] || t->base.dirty_images[5] ) {
--      /* flush */
--      R200_FIREVERTICES( rmesa );
--      /* layout memory space, once for all faces */
--      r200SetTexImages( rmesa, tObj );
--   }
--
--   /* upload (per face) */
--   for (face = 0; face < 6; face++) {
--      if (t->base.dirty_images[face]) {
--         r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, face );
--      }
--   }
--      
--   if ( !t->base.memBlock ) {
--      /* texmem alloc failed, use s/w fallback */
--      return GL_FALSE;
-+   if ( !t->image_override ) {
-+      if (firstImage->IsCompressed)
-+         t->pp_txpitch = (firstImage->Width + 63) & ~(63);
-+      else
-+         t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
-+      t->pp_txpitch -= 32;
-    }
- 
--   set_re_cntl_d3d( ctx, unit, GL_TRUE );
--
--   return GL_TRUE;
--}
--
--static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
--
--   if (!(t->pp_txformat & R200_TXFORMAT_NON_POWER2)) {
-+   if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
-       t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
--      t->base.dirty_images[0] = ~0;
--   }
--
--   ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
--
--   if ( t->base.dirty_images[0] ) {
--      R200_FIREVERTICES( rmesa );
--      r200SetTexImages( rmesa, tObj );
--      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
--      if ( !t->base.memBlock &&
--           !t->image_override &&
--           !rmesa->prefer_gart_client_texturing ) 
--	 return GL_FALSE;
-    }
- 
--   set_re_cntl_d3d( ctx, unit, GL_FALSE );
--
--   return GL_TRUE;
- }
- 
--
--static GLboolean update_tex_common( GLcontext *ctx, int unit )
-+static GLboolean r200_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit)
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
--
--   /* Fallback if there's a texture border */
--   if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 )
--       return GL_FALSE;
--
--   /* Update state if this is a different texture object to last
--    * time.
--    */
--   if ( rmesa->state.texture.unit[unit].texobj != t ) {
--      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
--	 /* The old texture is no longer bound to this texture unit.
--	  * Mark it as such.
--	  */
-+   radeonTexObj *t = radeon_tex_obj(texObj);
- 
--	 rmesa->state.texture.unit[unit].texobj->base.bound &= 
--	     ~(1UL << unit);
--      }
--
--      rmesa->state.texture.unit[unit].texobj = t;
--      t->base.bound |= (1UL << unit);
--      t->dirty_state |= 1<<unit;
--      driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
--   }
--
--
--   /* Newly enabled?
--    */
--   if ( 1|| !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit))) {
--      R200_STATECHANGE( rmesa, ctx );
--      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit;
--
--      R200_STATECHANGE( rmesa, vtx );
--      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
--      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3);
-+   if (!radeon_validate_texture_miptree(ctx, texObj))
-+      return GL_FALSE;
- 
--      rmesa->recheck_texgen[unit] = GL_TRUE;
--   }
-+   r200_validate_texgen(ctx, unit);
-+   /* Configure the hardware registers (more precisely, the cached version
-+    * of the hardware registers). */
-+   setup_hardware_state(rmesa, t);
-+
-+   if (texObj->Target == GL_TEXTURE_RECTANGLE_NV ||
-+       texObj->Target == GL_TEXTURE_2D ||
-+       texObj->Target == GL_TEXTURE_1D)
-+      set_re_cntl_d3d( ctx, unit, GL_FALSE );
-+   else
-+      set_re_cntl_d3d( ctx, unit, GL_TRUE );
-+   R200_STATECHANGE( rmesa, ctx );
-+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit;
-+   
-+   R200_STATECHANGE( rmesa, vtx );
-+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
-+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3);
- 
--   if (t->dirty_state & (1<<unit)) {
--      import_tex_obj_state( rmesa, unit, t );
--   }
-+   rmesa->recheck_texgen[unit] = GL_TRUE;
-+   import_tex_obj_state( rmesa, unit, t );
- 
-    if (rmesa->recheck_texgen[unit]) {
-       GLboolean fallback = !r200_validate_texgen( ctx, unit );
-       TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
-       rmesa->recheck_texgen[unit] = 0;
--      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
-+      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
-    }
- 
--   FALLBACK( rmesa, R200_FALLBACK_BORDER_MODE, t->border_fallback );
--   return !t->border_fallback;
--}
-+   t->validated = GL_TRUE;
- 
-+   FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
- 
-+   return !t->border_fallback;
-+}
- 
--static GLboolean r200UpdateTextureUnit( GLcontext *ctx, int unit )
-+static GLboolean r200UpdateTextureUnit(GLcontext *ctx, int unit)
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-    GLuint unitneeded = rmesa->state.texture.unit[unit].unitneeded;
- 
--   if ( unitneeded & (TEXTURE_RECT_BIT) ) {
--      return (enable_tex_rect( ctx, unit ) &&
--	      update_tex_common( ctx, unit ));
--   }
--   else if ( unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
--      return (enable_tex_2d( ctx, unit ) &&
--	      update_tex_common( ctx, unit ));
--   }
--#if ENABLE_HW_3D_TEXTURE
--   else if ( unitneeded & (TEXTURE_3D_BIT) ) {
--      return (enable_tex_3d( ctx, unit ) &&
--	      update_tex_common( ctx, unit ));
--   }
--#endif
--   else if ( unitneeded & (TEXTURE_CUBE_BIT) ) {
--      return (enable_tex_cube( ctx, unit ) &&
--	      update_tex_common( ctx, unit ));
--   }
--   else if ( unitneeded ) {
--      return GL_FALSE;
--   }
--   else {
--      disable_tex( ctx, unit );
--      return GL_TRUE;
-+   if (!unitneeded) {
-+      /* disable the unit */
-+     disable_tex_obj_state(rmesa, unit);
-+     return GL_TRUE;
-    }
-+
-+   if (!r200_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) {
-+    _mesa_warning(ctx,
-+		  "failed to validate texture for unit %d.\n",
-+		  unit);
-+    rmesa->state.texture.unit[unit].texobj = NULL;
-+    return GL_FALSE;
-+  }
-+
-+   rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
-+  return GL_TRUE;
- }
- 
- 
-@@ -1850,11 +1616,11 @@ void r200UpdateTextureState( GLcontext *ctx )
- 
-    FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok );
- 
--   if (rmesa->TclFallback)
-+   if (rmesa->radeon.TclFallback)
-       r200ChooseVertexState( ctx );
- 
- 
--   if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) {
-+   if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
- 
-       /*
-        * T0 hang workaround -------------
-@@ -1867,7 +1633,7 @@ void r200UpdateTextureState( GLcontext *ctx )
- 	 R200_STATECHANGE(rmesa, tex[1]);
- 	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
- 	 if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE))
--	    rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
-+	   rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
- 	 rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= R200_TXFORMAT_LOOKUP_DISABLE;
-       }
-       else if (!ctx->ATIFragmentShader._Enabled) {
-diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c
-index 4ce93b5..620f29b 100644
---- a/src/mesa/drivers/dri/r200/r200_vertprog.c
-+++ b/src/mesa/drivers/dri/r200/r200_vertprog.c
-@@ -1110,9 +1110,9 @@ void r200SetupVertexProg( GLcontext *ctx ) {
-    }
-    /* could optimize setting up vertex progs away for non-tcl hw */
-    fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
--      rmesa->r200Screen->drmSupportsVertexProgram);
-+      rmesa->radeon.radeonScreen->drmSupportsVertexProgram);
-    TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
--   if (rmesa->TclFallback) return;
-+   if (rmesa->radeon.TclFallback) return;
- 
-    R200_STATECHANGE( rmesa, vap );
-    /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
-diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
-index 6ca9342..62715e3 100644
---- a/src/mesa/drivers/dri/r300/Makefile
-+++ b/src/mesa/drivers/dri/r300/Makefile
-@@ -3,6 +3,8 @@
- TOP = ../../../../..
- include $(TOP)/configs/current
- 
-+CFLAGS += $(RADEON_CFLAGS)
-+
- LIBNAME = r300_dri.so
- 
- MINIGLX_SOURCES = server/radeon_dri.c
-@@ -20,20 +22,25 @@ COMMON_SOURCES = \
- 	../common/xmlconfig.c \
- 	../common/dri_util.c
- 
-+RADEON_COMMON_SOURCES = \
-+	radeon_texture.c \
-+	radeon_common_context.c \
-+	radeon_common.c \
-+	radeon_dma.c \
-+	radeon_lock.c \
-+	radeon_bo_legacy.c \
-+	radeon_cs_legacy.c \
-+	radeon_mipmap_tree.c \
-+	radeon_span.c \
-+	radeon_fbo.c
-+
- DRIVER_SOURCES = \
- 		 radeon_screen.c \
--		 radeon_context.c \
--		 radeon_ioctl.c \
--		 radeon_lock.c \
--		 radeon_span.c \
--		 radeon_state.c \
--		 r300_mem.c \
- 		 r300_context.c \
- 		 r300_ioctl.c \
- 		 r300_cmdbuf.c \
- 		 r300_state.c \
- 		 r300_render.c \
--		 r300_texmem.c \
- 		 r300_tex.c \
- 		 r300_texstate.c \
- 		 radeon_program.c \
-@@ -41,6 +48,7 @@ DRIVER_SOURCES = \
- 		 radeon_program_pair.c \
- 		 radeon_nqssadce.c \
- 		 r300_vertprog.c \
-+		 r300_fragprog_common.c \
- 		 r300_fragprog.c \
- 		 r300_fragprog_swizzle.c \
- 		 r300_fragprog_emit.c \
-@@ -49,12 +57,15 @@ DRIVER_SOURCES = \
- 		 r300_shader.c \
- 		 r300_emit.c \
- 		 r300_swtcl.c \
-+		 $(RADEON_COMMON_SOURCES) \
- 		 $(EGL_SOURCES)
- 
- C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
- 
- DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \
--	-DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300
-+	-DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 \
-+#	-DRADEON_BO_TRACK \
-+	-Wall
- 
- SYMLINKS = \
- 	server/radeon_dri.c \
-@@ -68,7 +79,29 @@ COMMON_SYMLINKS = \
- 	radeon_chipset.h \
- 	radeon_screen.c \
- 	radeon_screen.h \
--	radeon_span.h
-+	radeon_span.h \
-+	radeon_span.c \
-+	radeon_bo_legacy.c \
-+	radeon_cs_legacy.c \
-+	radeon_bo_legacy.h \
-+	radeon_cs_legacy.h \
-+	radeon_bocs_wrapper.h \
-+	radeon_lock.c \
-+	radeon_lock.h \
-+	radeon_common.c \
-+	radeon_common.h \
-+	radeon_common_context.c \
-+	radeon_common_context.h \
-+	radeon_cmdbuf.h \
-+	radeon_dma.c \
-+	radeon_dma.h \
-+	radeon_mipmap_tree.c \
-+	radeon_mipmap_tree.h \
-+	radeon_texture.c \
-+	radeon_texture.h \
-+	radeon_fbo.c
-+
-+DRI_LIB_DEPS += $(RADEON_LDFLAGS)
- 
- ##### TARGETS #####
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
-index f447275..afca0e2 100644
---- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
-+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
-@@ -44,245 +44,336 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "drm.h"
- #include "radeon_drm.h"
- 
--#include "radeon_ioctl.h"
- #include "r300_context.h"
- #include "r300_ioctl.h"
- #include "radeon_reg.h"
- #include "r300_reg.h"
- #include "r300_cmdbuf.h"
- #include "r300_emit.h"
-+#include "radeon_bocs_wrapper.h"
-+#include "radeon_mipmap_tree.h"
- #include "r300_state.h"
-+#include "radeon_reg.h"
- 
--// Set this to 1 for extremely verbose debugging of command buffers
--#define DEBUG_CMDBUF		0
-+#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200
-+#   define RADEON_ONE_REG_WR        (1 << 15)
- 
--/**
-- * Send the current command buffer via ioctl to the hardware.
-+/** # of dwords reserved for additional instructions that may need to be written
-+ * during flushing.
-  */
--int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller)
-+#define SPACE_FOR_FLUSHING	4
-+
-+static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt)
- {
--	int ret;
--	int i;
--	drm_radeon_cmd_buffer_t cmd;
--	int start;
--
--	if (r300->radeon.lost_context) {
--		start = 0;
--		r300->radeon.lost_context = GL_FALSE;
--	} else
--		start = r300->cmdbuf.count_reemit;
--
--	if (RADEON_DEBUG & DEBUG_IOCTL) {
--		fprintf(stderr, "%s from %s - %i cliprects\n",
--			__FUNCTION__, caller, r300->radeon.numClipRects);
--
--		if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE)
--			for (i = start; i < r300->cmdbuf.count_used; ++i)
--				fprintf(stderr, "%d: %08x\n", i,
--					r300->cmdbuf.cmd_buf[i]);
--	}
-+    if (r300->radeon.radeonScreen->kernel_mm) {
-+        return ((((*pkt) >> 16) & 0x3FFF) + 1);
-+    } else {
-+        drm_r300_cmd_header_t *t = (drm_r300_cmd_header_t*)pkt;
-+        return t->packet0.count;
-+    }
-+    return 0;
-+}
- 
--	cmd.buf = (char *)(r300->cmdbuf.cmd_buf + start);
--	cmd.bufsz = (r300->cmdbuf.count_used - start) * 4;
-+#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
-+#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
- 
--	if (r300->radeon.state.scissor.enabled) {
--		cmd.nbox = r300->radeon.state.scissor.numClipRects;
--		cmd.boxes =
--		    (drm_clip_rect_t *) r300->radeon.state.scissor.pClipRects;
--	} else {
--		cmd.nbox = r300->radeon.numClipRects;
--		cmd.boxes = (drm_clip_rect_t *) r300->radeon.pClipRects;
-+void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom)
-+{
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	BATCH_LOCALS(&r300->radeon);
-+	drm_r300_cmd_header_t cmd;
-+	uint32_t addr, ndw, i;
-+
-+	if (!r300->radeon.radeonScreen->kernel_mm) {
-+		uint32_t dwords;
-+		dwords = (*atom->check) (ctx, atom);
-+		BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+		OUT_BATCH_TABLE(atom->cmd, dwords);
-+		END_BATCH();
-+		return;
- 	}
- 
--	ret = drmCommandWrite(r300->radeon.dri.fd,
--			      DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
--
--	if (RADEON_DEBUG & DEBUG_SYNC) {
--		fprintf(stderr, "Syncing in %s (from %s)\n\n",
--			__FUNCTION__, caller);
--		radeonWaitForIdleLocked(&r300->radeon);
-+	cmd.u = atom->cmd[0];
-+	addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
-+	ndw = cmd.vpu.count * 4;
-+	if (ndw) {
-+
-+		if (r300->vap_flush_needed) {
-+			BEGIN_BATCH_NO_AUTOSTATE(15 + ndw);
-+
-+			/* flush processing vertices */
-+			OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0);
-+			OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
-+			OUT_BATCH_REGVAL(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
-+			OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0xffffff);
-+			OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-+			r300->vap_flush_needed = GL_FALSE;
-+		} else {
-+			BEGIN_BATCH_NO_AUTOSTATE(5 + ndw);
-+		}
-+		OUT_BATCH_REGVAL(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
-+		OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR);
-+		for (i = 0; i < ndw; i++) {
-+			OUT_BATCH(atom->cmd[i+1]);
-+		}
-+		OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-+		END_BATCH();
- 	}
--
--	r300->dma.nr_released_bufs = 0;
--	r300->cmdbuf.count_used = 0;
--	r300->cmdbuf.count_reemit = 0;
--
--	return ret;
- }
- 
--int r300FlushCmdBuf(r300ContextPtr r300, const char *caller)
-+void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom)
- {
--	int ret;
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	BATCH_LOCALS(&r300->radeon);
-+	drm_r300_cmd_header_t cmd;
-+	uint32_t addr, ndw, i, sz;
-+	int type, clamp, stride;
-+
-+	if (!r300->radeon.radeonScreen->kernel_mm) {
-+		uint32_t dwords;
-+		dwords = (*atom->check) (ctx, atom);
-+		BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+		OUT_BATCH_TABLE(atom->cmd, dwords);
-+		END_BATCH();
-+		return;
-+	}
- 
--	LOCK_HARDWARE(&r300->radeon);
-+	cmd.u = atom->cmd[0];
-+	sz = cmd.r500fp.count;
-+	addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo;
-+	type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
-+	clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
- 
--	ret = r300FlushCmdBufLocked(r300, caller);
-+	addr |= (type << 16);
-+	addr |= (clamp << 17);
- 
--	UNLOCK_HARDWARE(&r300->radeon);
-+	stride = type ? 4 : 6;
- 
--	if (ret) {
--		fprintf(stderr, "drmRadeonCmdBuffer: %d\n", ret);
--		_mesa_exit(ret);
--	}
-+	ndw = sz * stride;
-+	if (ndw) {
- 
--	return ret;
-+		BEGIN_BATCH_NO_AUTOSTATE(3 + ndw);
-+		OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0));
-+		OUT_BATCH(addr);
-+		OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR);
-+		for (i = 0; i < ndw; i++) {
-+			OUT_BATCH(atom->cmd[i+1]);
-+		}
-+		END_BATCH();
-+	}
- }
- 
--static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state)
-+static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
- {
--	int i, j, reg;
--	int dwords = (*state->check) (r300, state);
--	drm_r300_cmd_header_t cmd;
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	BATCH_LOCALS(&r300->radeon);
-+	int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd);
-+	int notexture = 0;
- 
--	fprintf(stderr, "  emit %s %d/%d\n", state->name, dwords,
--		state->cmd_size);
--
--	if (RADEON_DEBUG & DEBUG_VERBOSE) {
--		for (i = 0; i < dwords;) {
--			cmd = *((drm_r300_cmd_header_t *) &state->cmd[i]);
--			reg = (cmd.packet0.reghi << 8) | cmd.packet0.reglo;
--			fprintf(stderr, "      %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n",
--					state->name, i, reg, cmd.packet0.count);
--			++i;
--			for (j = 0; j < cmd.packet0.count; j++) {
--				fprintf(stderr, "      %s[%d]: 0x%04x = %08x\n",
--					state->name, i, reg, state->cmd[i]);
--				reg += 4;
--				++i;
--			}
-+	if (numtmus) {
-+		int i;
-+
-+		for(i = 0; i < numtmus; ++i) {
-+		    radeonTexObj *t = r300->hw.textures[i];
-+
-+		    if (!t)
-+			notexture = 1;
-+		}
-+
-+		if (r300->radeon.radeonScreen->kernel_mm && notexture) {
-+			return;
- 		}
-+		BEGIN_BATCH_NO_AUTOSTATE(4 * numtmus);
-+		for(i = 0; i < numtmus; ++i) {
-+		    radeonTexObj *t = r300->hw.textures[i];
-+		    OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
-+		    if (t && !t->image_override) {
-+			    OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
-+					    RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+		    } else if (!t) {
-+			    OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]);
-+		    } else { /* override cases */
-+			    if (t->bo) {
-+				    OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
-+						    RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+			    } else if (!r300->radeon.radeonScreen->kernel_mm) {
-+				    OUT_BATCH(t->override_offset);
-+			    }
-+			    else
-+			    	OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]);
-+		    }
-+		}
-+		END_BATCH();
- 	}
- }
- 
--/**
-- * Emit all atoms with a dirty field equal to dirty.
-- *
-- * The caller must have ensured that there is enough space in the command
-- * buffer.
-- */
--static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
-+static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
- {
--	struct r300_state_atom *atom;
--	uint32_t *dest;
--	int dwords;
--
--	dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used;
--
--	/* Emit WAIT */
--	*dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN);
--	dest++;
--	r300->cmdbuf.count_used++;
--
--	/* Emit cache flush */
--	*dest = cmdpacket0(R300_TX_INVALTAGS, 1);
--	dest++;
--	r300->cmdbuf.count_used++;
--
--	*dest = R300_TX_FLUSH;
--	dest++;
--	r300->cmdbuf.count_used++;
--
--	/* Emit END3D */
--	*dest = cmdpacify();
--	dest++;
--	r300->cmdbuf.count_used++;
--
--	/* Emit actual atoms */
--
--	foreach(atom, &r300->hw.atomlist) {
--		if ((atom->dirty || r300->hw.all_dirty) == dirty) {
--			dwords = (*atom->check) (r300, atom);
--			if (dwords) {
--				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
--					r300PrintStateAtom(r300, atom);
--				}
--				memcpy(dest, atom->cmd, dwords * 4);
--				dest += dwords;
--				r300->cmdbuf.count_used += dwords;
--				atom->dirty = GL_FALSE;
--			} else {
--				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
--					fprintf(stderr, "  skip state %s\n",
--						atom->name);
--				}
--			}
--		}
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	BATCH_LOCALS(&r300->radeon);
-+	struct radeon_renderbuffer *rrb;
-+	uint32_t cbpitch;
-+	uint32_t offset = r300->radeon.state.color.draw_offset;
-+	uint32_t dw = 6;
-+    int i;
-+
-+	rrb = radeon_get_colorbuffer(&r300->radeon);
-+	if (!rrb || !rrb->bo) {
-+		fprintf(stderr, "no rrb\n");
-+		return;
- 	}
-+
-+	cbpitch = (rrb->pitch / rrb->cpp);
-+	if (rrb->cpp == 4)
-+		cbpitch |= R300_COLOR_FORMAT_ARGB8888;
-+	else
-+		cbpitch |= R300_COLOR_FORMAT_RGB565;
-+
-+	if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
-+		cbpitch |= R300_COLOR_TILE_ENABLE;
-+
-+    	if (r300->radeon.radeonScreen->kernel_mm)
-+		dw += 2;
-+	BEGIN_BATCH_NO_AUTOSTATE(dw);
-+	OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
-+	OUT_BATCH_RELOC(offset, rrb->bo, offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+	OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1);
-+    	if (!r300->radeon.radeonScreen->kernel_mm)
-+		OUT_BATCH(cbpitch);
-+	else
-+		OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+	END_BATCH();
-+    if (r300->radeon.radeonScreen->driScreen->dri2.enabled) {
-+        if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
-+            BEGIN_BATCH_NO_AUTOSTATE(3);
-+            OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
-+            OUT_BATCH(0);
-+            OUT_BATCH(((rrb->width - 1) << R300_SCISSORS_X_SHIFT) |
-+                    ((rrb->height - 1) << R300_SCISSORS_Y_SHIFT));
-+            END_BATCH();
-+            BEGIN_BATCH_NO_AUTOSTATE(16);
-+            for (i = 0; i < 4; i++) {
-+                OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2);
-+                OUT_BATCH((0 << R300_CLIPRECT_X_SHIFT) | (0 << R300_CLIPRECT_Y_SHIFT));
-+                OUT_BATCH(((rrb->width - 1) << R300_CLIPRECT_X_SHIFT) | ((rrb->height - 1) << R300_CLIPRECT_Y_SHIFT));
-+            }
-+            OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1);
-+            OUT_BATCH(0xAAAA);
-+            OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1);
-+            OUT_BATCH(0xffffff);
-+            END_BATCH();
-+        } else {
-+            BEGIN_BATCH_NO_AUTOSTATE(3);
-+            OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
-+            OUT_BATCH((R300_SCISSORS_OFFSET << R300_SCISSORS_X_SHIFT) |
-+                    (R300_SCISSORS_OFFSET << R300_SCISSORS_Y_SHIFT));
-+            OUT_BATCH(((rrb->width + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_X_SHIFT) |
-+                    ((rrb->height + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_Y_SHIFT));
-+            END_BATCH();
-+            BEGIN_BATCH_NO_AUTOSTATE(16);
-+            for (i = 0; i < 4; i++) {
-+                OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2);
-+                OUT_BATCH((R300_SCISSORS_OFFSET << R300_CLIPRECT_X_SHIFT) | (R300_SCISSORS_OFFSET << R300_CLIPRECT_Y_SHIFT));
-+                OUT_BATCH(((R300_SCISSORS_OFFSET + rrb->width - 1) << R300_CLIPRECT_X_SHIFT) |
-+                          ((R300_SCISSORS_OFFSET + rrb->height - 1) << R300_CLIPRECT_Y_SHIFT));
-+            }
-+            OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1);
-+            OUT_BATCH(0xAAAA);
-+            OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1);
-+            OUT_BATCH(0xffffff);
-+            END_BATCH();
-+        }
-+    }
- }
- 
--/**
-- * Copy dirty hardware state atoms into the command buffer.
-- *
-- * We also copy out clean state if we're at the start of a buffer. That makes
-- * it easy to recover from lost contexts.
-- */
--void r300EmitState(r300ContextPtr r300)
-+static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
- {
--	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS))
--		fprintf(stderr, "%s\n", __FUNCTION__);
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	BATCH_LOCALS(&r300->radeon);
-+	struct radeon_renderbuffer *rrb;
-+	uint32_t zbpitch;
- 
--	if (r300->cmdbuf.count_used && !r300->hw.is_dirty
--	    && !r300->hw.all_dirty)
-+	rrb = radeon_get_depthbuffer(&r300->radeon);
-+	if (!rrb)
- 		return;
- 
--	/* To avoid going across the entire set of states multiple times, just check
--	 * for enough space for the case of emitting all state, and inline the
--	 * r300AllocCmdBuf code here without all the checks.
--	 */
--	r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__);
--
--	if (!r300->cmdbuf.count_used) {
--		if (RADEON_DEBUG & DEBUG_STATE)
--			fprintf(stderr, "Begin reemit state\n");
--
--		r300EmitAtoms(r300, GL_FALSE);
--		r300->cmdbuf.count_reemit = r300->cmdbuf.count_used;
-+	zbpitch = (rrb->pitch / rrb->cpp);
-+	if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
-+		zbpitch |= R300_DEPTHMACROTILE_ENABLE;
-+	}
-+	if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
-+		zbpitch |= R300_DEPTHMICROTILE_TILED;
- 	}
- 
--	if (RADEON_DEBUG & DEBUG_STATE)
--		fprintf(stderr, "Begin dirty state\n");
--
--	r300EmitAtoms(r300, GL_TRUE);
-+	BEGIN_BATCH_NO_AUTOSTATE(6);
-+	OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
-+	OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+	OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, zbpitch);
-+	END_BATCH();
-+}
- 
--	assert(r300->cmdbuf.count_used < r300->cmdbuf.size);
-+static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom)
-+{
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	BATCH_LOCALS(&r300->radeon);
-+	struct radeon_renderbuffer *rrb;
-+	uint32_t format = 0;
-+
-+	rrb = radeon_get_depthbuffer(&r300->radeon);
-+	if (!rrb)
-+	  format = 0;
-+	else {
-+	  if (rrb->cpp == 2)
-+	    format = R300_DEPTHFORMAT_16BIT_INT_Z;
-+	  else if (rrb->cpp == 4)
-+	    format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
-+	}
- 
--	r300->hw.is_dirty = GL_FALSE;
--	r300->hw.all_dirty = GL_FALSE;
-+	OUT_BATCH(atom->cmd[0]);
-+	atom->cmd[1] &= ~0xf;
-+	atom->cmd[1] |= format;
-+	OUT_BATCH(atom->cmd[1]);
-+	OUT_BATCH(atom->cmd[2]);
-+	OUT_BATCH(atom->cmd[3]);
-+	OUT_BATCH(atom->cmd[4]);
- }
- 
--#define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count)
--#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
--#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
--
--static int check_always(r300ContextPtr r300, struct r300_state_atom *atom)
-+static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
- {
- 	return atom->cmd_size;
- }
- 
--static int check_variable(r300ContextPtr r300, struct r300_state_atom *atom)
-+static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom)
- {
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
- 	int cnt;
--	cnt = packet0_count(atom->cmd);
-+	if (atom->cmd[0] == CP_PACKET2) {
-+		return 0;
-+	}
-+	cnt = packet0_count(r300, atom->cmd);
- 	return cnt ? cnt + 1 : 0;
- }
- 
--static int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom)
-+int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom)
- {
- 	int cnt;
-+
- 	cnt = vpu_count(atom->cmd);
- 	return cnt ? (cnt * 4) + 1 : 0;
- }
- 
--static int check_r500fp(r300ContextPtr r300, struct r300_state_atom *atom)
-+int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom)
- {
- 	int cnt;
-+
- 	cnt = r500fp_count(atom->cmd);
- 	return cnt ? (cnt * 6) + 1 : 0;
- }
- 
--static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
-+int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom)
- {
- 	int cnt;
-+
- 	cnt = r500fp_count(atom->cmd);
- 	return cnt ? (cnt * 4) + 1 : 0;
- }
-@@ -295,8 +386,8 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
-       r300->hw.ATOM.idx = (IDX);					\
-       r300->hw.ATOM.check = check_##CHK;				\
-       r300->hw.ATOM.dirty = GL_FALSE;					\
--      r300->hw.max_state_size += (SZ);					\
--      insert_at_tail(&r300->hw.atomlist, &r300->hw.ATOM);		\
-+      r300->radeon.hw.max_state_size += (SZ);					\
-+      insert_at_tail(&r300->radeon.hw.atomlist, &r300->hw.ATOM);		\
-    } while (0)
- /**
-  * Allocate memory for the command buffer and initialize the state atom
-@@ -304,7 +395,7 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
-  */
- void r300InitCmdBuf(r300ContextPtr r300)
- {
--	int size, mtu;
-+	int mtu;
- 	int has_tcl = 1;
- 	int is_r500 = 0;
- 	int i;
-@@ -315,7 +406,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
- 	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
- 		is_r500 = 1;
- 
--	r300->hw.max_state_size = 2 + 2;	/* reserve extra space for WAIT_IDLE and tex cache flush */
-+	r300->radeon.hw.max_state_size = 2 + 2;	/* reserve extra space for WAIT_IDLE and tex cache flush */
- 
- 	mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
- 	if (RADEON_DEBUG & DEBUG_TEXTURE) {
-@@ -323,97 +414,97 @@ void r300InitCmdBuf(r300ContextPtr r300)
- 	}
- 
- 	/* Setup the atom linked list */
--	make_empty_list(&r300->hw.atomlist);
--	r300->hw.atomlist.name = "atom-list";
-+	make_empty_list(&r300->radeon.hw.atomlist);
-+	r300->radeon.hw.atomlist.name = "atom-list";
- 
- 	/* Initialize state atoms */
- 	ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0);
--	r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6);
-+	r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VPORT_XSCALE, 6);
- 	ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0);
--	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(R300_VAP_PVS_STATE_FLUSH_REG, 1);
-+	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1);
- 	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0;
--	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(R300_VAP_CNTL, 1);
-+	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL, 1);
- 	if (is_r500) {
- 	    ALLOC_STATE(vap_index_offset, always, 2, 0);
--	    r300->hw.vap_index_offset.cmd[0] = cmdpacket0(R500_VAP_INDEX_OFFSET, 1);
-+	    r300->hw.vap_index_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_VAP_INDEX_OFFSET, 1);
- 	    r300->hw.vap_index_offset.cmd[1] = 0;
- 	}
- 	ALLOC_STATE(vte, always, 3, 0);
--	r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2);
-+	r300->hw.vte.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VTE_CNTL, 2);
- 	ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0);
--	r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(R300_VAP_VF_MAX_VTX_INDX, 2);
-+	r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VF_MAX_VTX_INDX, 2);
- 	ALLOC_STATE(vap_cntl_status, always, 2, 0);
--	r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1);
-+	r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL_STATUS, 1);
- 	ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0);
- 	r300->hw.vir[0].cmd[R300_VIR_CMD_0] =
--	    cmdpacket0(R300_VAP_PROG_STREAM_CNTL_0, 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_0, 1);
- 	ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1);
- 	r300->hw.vir[1].cmd[R300_VIR_CMD_0] =
--	    cmdpacket0(R300_VAP_PROG_STREAM_CNTL_EXT_0, 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_EXT_0, 1);
- 	ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0);
--	r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_VTX_STATE_CNTL, 2);
-+	r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VTX_STATE_CNTL, 2);
- 	ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0);
--	r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
-+	r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
- 
- 	if (has_tcl) {
- 		ALLOC_STATE(vap_clip_cntl, always, 2, 0);
--		r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(R300_VAP_CLIP_CNTL, 1);
-+		r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CLIP_CNTL, 1);
- 		ALLOC_STATE(vap_clip, always, 5, 0);
--		r300->hw.vap_clip.cmd[0] = cmdpacket0(R300_VAP_GB_VERT_CLIP_ADJ, 4);
-+		r300->hw.vap_clip.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_GB_VERT_CLIP_ADJ, 4);
- 		ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0);
--		r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(VAP_PVS_VTX_TIMEOUT_REG, 1);
-+		r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, VAP_PVS_VTX_TIMEOUT_REG, 1);
- 	}
- 
- 	ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0);
- 	r300->hw.vof.cmd[R300_VOF_CMD_0] =
--	    cmdpacket0(R300_VAP_OUTPUT_VTX_FMT_0, 2);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_OUTPUT_VTX_FMT_0, 2);
- 
- 	if (has_tcl) {
- 		ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0);
- 		r300->hw.pvs.cmd[R300_PVS_CMD_0] =
--		    cmdpacket0(R300_VAP_PVS_CODE_CNTL_0, 3);
-+		    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_CODE_CNTL_0, 3);
- 	}
- 
- 	ALLOC_STATE(gb_enable, always, 2, 0);
--	r300->hw.gb_enable.cmd[0] = cmdpacket0(R300_GB_ENABLE, 1);
-+	r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1);
- 	ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0);
--	r300->hw.gb_misc.cmd[0] = cmdpacket0(R300_GB_MSPOS0, 5);
-+	r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 5);
- 	ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0);
--	r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(R300_TX_ENABLE, 1);
-+	r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1);
- 	ALLOC_STATE(ga_point_s0, always, 5, 0);
--	r300->hw.ga_point_s0.cmd[0] = cmdpacket0(R300_GA_POINT_S0, 4);
-+	r300->hw.ga_point_s0.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_S0, 4);
- 	ALLOC_STATE(ga_triangle_stipple, always, 2, 0);
--	r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(R300_GA_TRIANGLE_STIPPLE, 1);
-+	r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_TRIANGLE_STIPPLE, 1);
- 	ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0);
--	r300->hw.ps.cmd[0] = cmdpacket0(R300_GA_POINT_SIZE, 1);
-+	r300->hw.ps.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_SIZE, 1);
- 	ALLOC_STATE(ga_point_minmax, always, 4, 0);
--	r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(R300_GA_POINT_MINMAX, 3);
-+	r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_MINMAX, 3);
- 	ALLOC_STATE(lcntl, always, 2, 0);
--	r300->hw.lcntl.cmd[0] = cmdpacket0(R300_GA_LINE_CNTL, 1);
-+	r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1);
- 	ALLOC_STATE(ga_line_stipple, always, 4, 0);
--	r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(R300_GA_LINE_STIPPLE_VALUE, 3);
-+	r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3);
- 	ALLOC_STATE(shade, always, 5, 0);
--	r300->hw.shade.cmd[0] = cmdpacket0(R300_GA_ENHANCE, 4);
-+	r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 4);
- 	ALLOC_STATE(polygon_mode, always, 4, 0);
--	r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_GA_POLY_MODE, 3);
-+	r300->hw.polygon_mode.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POLY_MODE, 3);
- 	ALLOC_STATE(fogp, always, 3, 0);
--	r300->hw.fogp.cmd[0] = cmdpacket0(R300_GA_FOG_SCALE, 2);
-+	r300->hw.fogp.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_FOG_SCALE, 2);
- 	ALLOC_STATE(zbias_cntl, always, 2, 0);
--	r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_SU_TEX_WRAP, 1);
-+	r300->hw.zbias_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_TEX_WRAP, 1);
- 	ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0);
- 	r300->hw.zbs.cmd[R300_ZBS_CMD_0] =
--	    cmdpacket0(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
- 	ALLOC_STATE(occlusion_cntl, always, 2, 0);
--	r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_SU_POLY_OFFSET_ENABLE, 1);
-+	r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_ENABLE, 1);
- 	ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0);
--	r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_SU_CULL_MODE, 1);
-+	r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_CULL_MODE, 1);
- 	ALLOC_STATE(su_depth_scale, always, 3, 0);
--	r300->hw.su_depth_scale.cmd[0] = cmdpacket0(R300_SU_DEPTH_SCALE, 2);
-+	r300->hw.su_depth_scale.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_DEPTH_SCALE, 2);
- 	ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0);
--	r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2);
-+	r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_COUNT, 2);
- 	if (is_r500) {
- 		ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0);
--		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16);
-+		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, 16);
- 		for (i = 0; i < 8; i++) {
- 			r300->hw.ri.cmd[R300_RI_CMD_0 + i +1] =
- 			  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
-@@ -422,133 +513,149 @@ void r300InitCmdBuf(r300ContextPtr r300)
-                           (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
- 		}
- 		ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
--		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1);
-+		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, 1);
- 	} else {
- 		ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0);
--		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8);
-+		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, 8);
- 		ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
--		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, 1);
-+		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, 1);
- 	}
- 	ALLOC_STATE(sc_hyperz, always, 3, 0);
--	r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2);
-+	r300->hw.sc_hyperz.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_HYPERZ, 2);
- 	ALLOC_STATE(sc_screendoor, always, 2, 0);
--	r300->hw.sc_screendoor.cmd[0] = cmdpacket0(R300_SC_SCREENDOOR, 1);
-+	r300->hw.sc_screendoor.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1);
- 	ALLOC_STATE(us_out_fmt, always, 6, 0);
--	r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R300_US_OUT_FMT, 5);
-+	r300->hw.us_out_fmt.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_OUT_FMT, 5);
- 
- 	if (is_r500) {
- 		ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0);
--		r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(R500_US_CONFIG, 2);
-+		r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CONFIG, 2);
- 		r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO;
--		r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(R500_US_CODE_ADDR, 3);
--		r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(R500_US_FC_CTRL, 1);
-+		r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CODE_ADDR, 3);
-+		r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(r300->radeon.radeonScreen, R500_US_FC_CTRL, 1);
- 		r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */
- 
- 		ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0);
--		r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0);
-+		r300->hw.r500fp.cmd[R300_FPI_CMD_0] =
-+			cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0);
-+		r300->hw.r500fp.emit = emit_r500fp;
- 		ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0);
--		r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0);
-+		r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] =
-+			cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0);
-+		r300->hw.r500fp_const.emit = emit_r500fp;
- 	} else {
- 		ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0);
--		r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_US_CONFIG, 3);
--		r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_US_CODE_ADDR_0, 4);
-+		r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3);
-+		r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CODE_ADDR_0, 4);
-+
- 		ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0);
--		r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_US_TEX_INST_0, 0);
-+		r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_TEX_INST_0, 0);
- 
- 		ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0);
--		r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, 1);
-+		r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, 1);
- 		ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1);
--		r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, 1);
-+		r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, 1);
- 		ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2);
--		r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, 1);
-+		r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, 1);
- 		ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3);
--		r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, 1);
-+		r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, 1);
- 		ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0);
--		r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0);
-+		r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_PFS_PARAM_0_X, 0);
- 	}
- 	ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0);
--	r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(R300_FG_FOG_BLEND, 1);
-+	r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_BLEND, 1);
- 	ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0);
--	r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(R300_FG_FOG_COLOR_R, 3);
-+	r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_COLOR_R, 3);
- 	ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0);
--	r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(R300_FG_ALPHA_FUNC, 2);
-+	r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_ALPHA_FUNC, 2);
- 	ALLOC_STATE(fg_depth_src, always, 2, 0);
--	r300->hw.fg_depth_src.cmd[0] = cmdpacket0(R300_FG_DEPTH_SRC, 1);
-+	r300->hw.fg_depth_src.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_DEPTH_SRC, 1);
- 	ALLOC_STATE(rb3d_cctl, always, 2, 0);
--	r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(R300_RB3D_CCTL, 1);
-+	r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CCTL, 1);
- 	ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0);
--	r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(R300_RB3D_CBLEND, 2);
-+	r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CBLEND, 2);
- 	ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0);
--	r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(RB3D_COLOR_CHANNEL_MASK, 1);
-+	r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RB3D_COLOR_CHANNEL_MASK, 1);
- 	if (is_r500) {
- 		ALLOC_STATE(blend_color, always, 3, 0);
--		r300->hw.blend_color.cmd[0] = cmdpacket0(R500_RB3D_CONSTANT_COLOR_AR, 2);
-+		r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_CONSTANT_COLOR_AR, 2);
- 	} else {
- 		ALLOC_STATE(blend_color, always, 2, 0);
--		r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 1);
-+		r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_BLEND_COLOR, 1);
- 	}
- 	ALLOC_STATE(rop, always, 2, 0);
--	r300->hw.rop.cmd[0] = cmdpacket0(R300_RB3D_ROPCNTL, 1);
-+	r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1);
- 	ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0);
--	r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1);
--	r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1);
-+	r300->hw.cb.emit = &emit_cb_offset;
- 	ALLOC_STATE(rb3d_dither_ctl, always, 10, 0);
--	r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9);
-+	r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9);
- 	ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
--	r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(R300_RB3D_AARESOLVE_CTL, 1);
-+	r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1);
- 	ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0);
--	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
-+	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
- 	ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0);
- 	r300->hw.zs.cmd[R300_ZS_CMD_0] =
--	    cmdpacket0(R300_ZB_CNTL, 3);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3);
-+
- 	ALLOC_STATE(zstencil_format, always, 5, 0);
- 	r300->hw.zstencil_format.cmd[0] =
--	    cmdpacket0(R300_ZB_FORMAT, 4);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4);
-+	r300->hw.zstencil_format.emit = emit_zstencil_format;
-+
- 	ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0);
--	r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2);
-+	r300->hw.zb.emit = emit_zb_offset;
- 	ALLOC_STATE(zb_depthclearvalue, always, 2, 0);
--	r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1);
--	ALLOC_STATE(unk4F30, always, 3, 0);
--	r300->hw.unk4F30.cmd[0] = cmdpacket0(0x4F30, 2);
-+	r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1);
-+	ALLOC_STATE(zb_zmask, always, 3, 0);
-+	r300->hw.zb_zmask.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_ZMASK_OFFSET, 2);
- 	ALLOC_STATE(zb_hiz_offset, always, 2, 0);
--	r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(R300_ZB_HIZ_OFFSET, 1);
-+	r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_OFFSET, 1);
- 	ALLOC_STATE(zb_hiz_pitch, always, 2, 0);
--	r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(R300_ZB_HIZ_PITCH, 1);
-+	r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_PITCH, 1);
- 
- 	/* VPU only on TCL */
- 	if (has_tcl) {
-    	        int i;
- 		ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0);
--		r300->hw.vpi.cmd[R300_VPI_CMD_0] =
--		    cmdvpu(R300_PVS_CODE_START, 0);
-+		r300->hw.vpi.cmd[0] =
-+		    cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0);
-+		r300->hw.vpi.emit = emit_vpu;
- 
- 		if (is_r500) {
- 		    ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
--		    r300->hw.vpp.cmd[R300_VPP_CMD_0] =
--			cmdvpu(R500_PVS_CONST_START, 0);
-+		    r300->hw.vpp.cmd[0] =
-+			cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0);
-+		    r300->hw.vpp.emit = emit_vpu;
- 
- 		    ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
--		    r300->hw.vps.cmd[R300_VPS_CMD_0] =
--			cmdvpu(R500_POINT_VPORT_SCALE_OFFSET, 1);
-+		    r300->hw.vps.cmd[0] =
-+			cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1);
-+		    r300->hw.vps.emit = emit_vpu;
- 
- 			for (i = 0; i < 6; i++) {
--				ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
--				r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] =
--					cmdvpu(R500_PVS_UCP_START + i, 1);
-+			  ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
-+			  r300->hw.vpucp[i].cmd[0] =
-+				  cmdvpu(r300->radeon.radeonScreen,
-+                           R500_PVS_UCP_START + i, 1);
-+				r300->hw.vpucp[i].emit = emit_vpu;
- 			}
- 		} else {
- 		    ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
--		    r300->hw.vpp.cmd[R300_VPP_CMD_0] =
--			cmdvpu(R300_PVS_CONST_START, 0);
-+		    r300->hw.vpp.cmd[0] =
-+			cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0);
-+		    r300->hw.vpp.emit = emit_vpu;
- 
- 		    ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
--		    r300->hw.vps.cmd[R300_VPS_CMD_0] =
--			cmdvpu(R300_POINT_VPORT_SCALE_OFFSET, 1);
-+		    r300->hw.vps.cmd[0] =
-+			cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1);
-+		    r300->hw.vps.emit = emit_vpu;
- 
- 			for (i = 0; i < 6; i++) {
- 				ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
--				r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] =
--					cmdvpu(R300_PVS_UCP_START + i, 1);
-+				r300->hw.vpucp[i].cmd[0] =
-+					cmdvpu(r300->radeon.radeonScreen,
-+					       R300_PVS_UCP_START + i, 1);
-+				r300->hw.vpucp[i].emit = emit_vpu;
- 			}
- 		}
- 	}
-@@ -556,130 +663,37 @@ void r300InitCmdBuf(r300ContextPtr r300)
- 	/* Textures */
- 	ALLOC_STATE(tex.filter, variable, mtu + 1, 0);
- 	r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FILTER0_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 0);
- 
- 	ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0);
- 	r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FILTER1_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, 0);
- 
- 	ALLOC_STATE(tex.size, variable, mtu + 1, 0);
--	r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, 0);
-+	r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, 0);
- 
- 	ALLOC_STATE(tex.format, variable, mtu + 1, 0);
- 	r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FORMAT_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, 0);
- 
- 	ALLOC_STATE(tex.pitch, variable, mtu + 1, 0);
--	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT2_0, 0);
-+	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0);
- 
--	ALLOC_STATE(tex.offset, variable, mtu + 1, 0);
-+	ALLOC_STATE(tex.offset, variable, 1, 0);
- 	r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_OFFSET_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0);
-+	r300->hw.tex.offset.emit = &emit_tex_offsets;
- 
- 	ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0);
- 	r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_CHROMA_KEY_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, 0);
- 
- 	ALLOC_STATE(tex.border_color, variable, mtu + 1, 0);
- 	r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_BORDER_COLOR_0, 0);
--
--	r300->hw.is_dirty = GL_TRUE;
--	r300->hw.all_dirty = GL_TRUE;
--
--	/* Initialize command buffer */
--	size =
--	    256 * driQueryOptioni(&r300->radeon.optionCache,
--				  "command_buffer_size");
--	if (size < 2 * r300->hw.max_state_size) {
--		size = 2 * r300->hw.max_state_size + 65535;
--	}
--	if (size > 64 * 256)
--		size = 64 * 256;
--
--	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) {
--		fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n",
--			sizeof(drm_r300_cmd_header_t));
--		fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%zd\n",
--			sizeof(drm_radeon_cmd_buffer_t));
--		fprintf(stderr,
--			"Allocating %d bytes command buffer (max state is %d bytes)\n",
--			size * 4, r300->hw.max_state_size * 4);
--	}
--
--	r300->cmdbuf.size = size;
--	r300->cmdbuf.cmd_buf = (uint32_t *) CALLOC(size * 4);
--	r300->cmdbuf.count_used = 0;
--	r300->cmdbuf.count_reemit = 0;
--}
--
--/**
-- * Destroy the command buffer and state atoms.
-- */
--void r300DestroyCmdBuf(r300ContextPtr r300)
--{
--	struct r300_state_atom *atom;
--
--	FREE(r300->cmdbuf.cmd_buf);
--
--	foreach(atom, &r300->hw.atomlist) {
--		FREE(atom->cmd);
--	}
--}
--
--void r300EmitBlit(r300ContextPtr rmesa,
--		  GLuint color_fmt,
--		  GLuint src_pitch,
--		  GLuint src_offset,
--		  GLuint dst_pitch,
--		  GLuint dst_offset,
--		  GLint srcx, GLint srcy,
--		  GLint dstx, GLint dsty, GLuint w, GLuint h)
--{
--	drm_r300_cmd_header_t *cmd;
--
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr,
--			"%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
--			__FUNCTION__, src_pitch, src_offset, srcx, srcy,
--			dst_pitch, dst_offset, dstx, dsty, w, h);
--
--	assert((src_pitch & 63) == 0);
--	assert((dst_pitch & 63) == 0);
--	assert((src_offset & 1023) == 0);
--	assert((dst_offset & 1023) == 0);
--	assert(w < (1 << 16));
--	assert(h < (1 << 16));
--
--	cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__);
--
--	cmd[0].header.cmd_type = R300_CMD_PACKET3;
--	cmd[0].header.pad0 = R300_CMD_PACKET3_RAW;
--	cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16);
--	cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
--		    RADEON_GMC_DST_PITCH_OFFSET_CNTL |
--		    RADEON_GMC_BRUSH_NONE |
--		    (color_fmt << 8) |
--		    RADEON_GMC_SRC_DATATYPE_COLOR |
--		    RADEON_ROP3_S |
--		    RADEON_DP_SRC_SOURCE_MEMORY |
--		    RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
--
--	cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10);
--	cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10);
--	cmd[5].u = (srcx << 16) | srcy;
--	cmd[6].u = (dstx << 16) | dsty;	/* dst */
--	cmd[7].u = (w << 16) | h;
--}
--
--void r300EmitWait(r300ContextPtr rmesa, GLuint flags)
--{
--	drm_r300_cmd_header_t *cmd;
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, 0);
- 
--	assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D)));
-+	r300->radeon.hw.is_dirty = GL_TRUE;
-+	r300->radeon.hw.all_dirty = GL_TRUE;
- 
--	cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
--	cmd[0].u = 0;
--	cmd[0].wait.cmd_type = R300_CMD_WAIT;
--	cmd[0].wait.flags = flags;
-+	rcommonInitCmdBuf(&r300->radeon);
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
-index a8eaa58..3786813 100644
---- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h
-+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
-@@ -38,79 +38,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "r300_context.h"
- 
--extern int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller);
--extern int r300FlushCmdBuf(r300ContextPtr r300, const char *caller);
--
--extern void r300EmitState(r300ContextPtr r300);
--
- extern void r300InitCmdBuf(r300ContextPtr r300);
--extern void r300DestroyCmdBuf(r300ContextPtr r300);
--
--/**
-- * Make sure that enough space is available in the command buffer
-- * by flushing if necessary.
-- *
-- * \param dwords The number of dwords we need to be free on the command buffer
-- */
--static INLINE void r300EnsureCmdBufSpace(r300ContextPtr r300,
--					     int dwords, const char *caller)
--{
--	assert(dwords < r300->cmdbuf.size);
--
--	if (r300->cmdbuf.count_used + dwords > r300->cmdbuf.size)
--		r300FlushCmdBuf(r300, caller);
--}
--
--/**
-- * Allocate the given number of dwords in the command buffer and return
-- * a pointer to the allocated area.
-- * When necessary, these functions cause a flush. r300AllocCmdBuf() also
-- * causes state reemission after a flush. This is necessary to ensure
-- * correct hardware state after an unlock.
-- */
--static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300,
--					       int dwords, const char *caller)
--{
--	uint32_t *ptr;
--
--	r300EnsureCmdBufSpace(r300, dwords, caller);
--
--	ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
--	r300->cmdbuf.count_used += dwords;
--	return ptr;
--}
--
--static INLINE uint32_t *r300AllocCmdBuf(r300ContextPtr r300,
--					    int dwords, const char *caller)
--{
--	uint32_t *ptr;
--
--	r300EnsureCmdBufSpace(r300, dwords, caller);
--
--	if (!r300->cmdbuf.count_used) {
--		if (RADEON_DEBUG & DEBUG_IOCTL)
--			fprintf(stderr,
--				"Reemit state after flush (from %s)\n", caller);
--		r300EmitState(r300);
--	}
--
--	ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
--	r300->cmdbuf.count_used += dwords;
--	return ptr;
--}
- 
--extern void r300EmitBlit(r300ContextPtr rmesa,
--			 GLuint color_fmt,
--			 GLuint src_pitch,
--			 GLuint src_offset,
--			 GLuint dst_pitch,
--			 GLuint dst_offset,
--			 GLint srcx, GLint srcy,
--			 GLint dstx, GLint dsty, GLuint w, GLuint h);
-+void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom);
-+int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom);
- 
--extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags);
--extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start);
--extern void r300EmitVertexShader(r300ContextPtr rmesa);
--extern void r300EmitPixelShader(r300ContextPtr rmesa);
-+void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom);
-+int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom);
-+int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom);
- 
- #endif				/* __R300_CMDBUF_H__ */
-diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
-index 12bee1a..70c7730 100644
---- a/src/mesa/drivers/dri/r300/r300_context.c
-+++ b/src/mesa/drivers/dri/r300/r300_context.c
-@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/extensions.h"
- #include "main/state.h"
- #include "main/bufferobj.h"
-+#include "main/texobj.h"
- 
- #include "swrast/swrast.h"
- #include "swrast_setup/swrast_setup.h"
-@@ -55,19 +56,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "drivers/common/driverfuncs.h"
- 
--#include "radeon_ioctl.h"
--#include "radeon_span.h"
- #include "r300_context.h"
-+#include "radeon_context.h"
-+#include "radeon_span.h"
- #include "r300_cmdbuf.h"
- #include "r300_state.h"
- #include "r300_ioctl.h"
- #include "r300_tex.h"
- #include "r300_emit.h"
- #include "r300_swtcl.h"
-+#include "radeon_bocs_wrapper.h"
- 
--#ifdef USER_BUFFERS
--#include "r300_mem.h"
--#endif
- 
- #include "vblank.h"
- #include "utils.h"
-@@ -83,14 +82,17 @@ int hw_tcl_on = 1;
- #define need_GL_EXT_blend_equation_separate
- #define need_GL_EXT_blend_func_separate
- #define need_GL_EXT_blend_minmax
-+#define need_GL_EXT_framebuffer_object
- #define need_GL_EXT_fog_coord
- #define need_GL_EXT_gpu_program_parameters
- #define need_GL_EXT_secondary_color
- #define need_GL_EXT_stencil_two_side
- #define need_GL_ATI_separate_stencil
- #define need_GL_NV_vertex_program
-+
- #include "extension_helper.h"
- 
-+
- const struct dri_extension card_extensions[] = {
-   /* *INDENT-OFF* */
-   {"GL_ARB_depth_texture",		NULL},
-@@ -111,6 +113,7 @@ const struct dri_extension card_extensions[] = {
-   {"GL_EXT_blend_func_separate",	GL_EXT_blend_func_separate_functions},
-   {"GL_EXT_blend_minmax",		GL_EXT_blend_minmax_functions},
-   {"GL_EXT_blend_subtract",		NULL},
-+  {"GL_EXT_packed_depth_stencil",	NULL},
-   {"GL_EXT_fog_coord",			GL_EXT_fog_coord_functions },
-   {"GL_EXT_gpu_program_parameters",     GL_EXT_gpu_program_parameters_functions},
-   {"GL_EXT_secondary_color", 		GL_EXT_secondary_color_functions},
-@@ -138,6 +141,11 @@ const struct dri_extension card_extensions[] = {
- };
- 
- 
-+const struct dri_extension mm_extensions[] = {
-+  { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
-+  { NULL, NULL }
-+};
-+
- /**
-  * The GL 2.0 functions are needed to make display lists work with
-  * functions added by GL_ATI_separate_stencil.
-@@ -164,6 +172,7 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = {
- 	&_tnl_fog_coordinate_stage,
- 	&_tnl_texgen_stage,
- 	&_tnl_texture_transform_stage,
-+	&_tnl_point_attenuation_stage,
- 	&_tnl_vertex_program_stage,
- 
- 	/* Try again to go to tcl?
-@@ -183,6 +192,143 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = {
- 	0,
- };
- 
-+static void r300_get_lock(radeonContextPtr rmesa)
-+{
-+	drm_radeon_sarea_t *sarea = rmesa->sarea;
-+
-+	if (sarea->ctx_owner != rmesa->dri.hwContext) {
-+		sarea->ctx_owner = rmesa->dri.hwContext;
-+		if (!rmesa->radeonScreen->kernel_mm)
-+			radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom);
-+	}
-+}
-+
-+static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
-+{
-+    /* please flush pipe do all pending work */
-+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-+                                  R300_SC_SCREENDOOR, 1));
-+    radeon_cs_write_dword(cs, 0x0);
-+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-+                                  R300_SC_SCREENDOOR, 1));
-+    radeon_cs_write_dword(cs, 0x00FFFFFF);
-+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-+                                  R300_SC_HYPERZ, 1));
-+    radeon_cs_write_dword(cs, 0x0);
-+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-+                                  R300_US_CONFIG, 1));
-+    radeon_cs_write_dword(cs, 0x0);
-+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-+                                  R300_ZB_CNTL, 1));
-+    radeon_cs_write_dword(cs, 0x0);
-+    radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, R300_WAIT_3D));
-+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-+                                  R300_RB3D_DSTCACHE_CTLSTAT, 1));
-+    radeon_cs_write_dword(cs, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
-+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-+                                  R300_ZB_ZCACHE_CTLSTAT, 1));
-+    radeon_cs_write_dword(cs, R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE);
-+    radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen,
-+                               R300_WAIT_3D | R300_WAIT_3D_CLEAN));
-+}
-+
-+static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon)
-+{
-+	r300ContextPtr r300 = (r300ContextPtr)radeon;
-+	BATCH_LOCALS(radeon);
-+
-+	r300->vap_flush_needed = GL_TRUE;
-+
-+	cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
-+	BEGIN_BATCH_NO_AUTOSTATE(2);
-+	OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH);
-+	END_BATCH();
-+	end_3d(radeon);
-+}
-+
-+static void r300_fallback(GLcontext *ctx, GLuint bit, GLboolean mode)
-+{
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	if (mode)
-+		r300->radeon.Fallback |= bit;
-+	else
-+		r300->radeon.Fallback &= ~bit;
-+}
-+
-+static void r300_init_vtbl(radeonContextPtr radeon)
-+{
-+	radeon->vtbl.get_lock = r300_get_lock;
-+	radeon->vtbl.update_viewport_offset = r300UpdateViewportOffset;
-+	radeon->vtbl.emit_cs_header = r300_vtbl_emit_cs_header;
-+	radeon->vtbl.swtcl_flush = r300_swtcl_flush;
-+	radeon->vtbl.pre_emit_atoms = r300_vtbl_pre_emit_atoms;
-+	radeon->vtbl.fallback = r300_fallback;
-+}
-+
-+static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
-+{
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+
-+	ctx->Const.MaxTextureImageUnits =
-+	    driQueryOptioni(&r300->radeon.optionCache, "texture_image_units");
-+	ctx->Const.MaxTextureCoordUnits =
-+	    driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units");
-+	ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureImageUnits,
-+		 ctx->Const.MaxTextureCoordUnits);
-+	ctx->Const.MaxTextureMaxAnisotropy = 16.0;
-+	ctx->Const.MaxTextureLodBias = 16.0;
-+
-+	if (screen->chip_family >= CHIP_FAMILY_RV515)
-+		ctx->Const.MaxTextureLevels = 13;
-+	else
-+		ctx->Const.MaxTextureLevels = 12;
-+
-+	ctx->Const.MinPointSize = 1.0;
-+	ctx->Const.MinPointSizeAA = 1.0;
-+	ctx->Const.MaxPointSize = R300_POINTSIZE_MAX;
-+	ctx->Const.MaxPointSizeAA = R300_POINTSIZE_MAX;
-+
-+	ctx->Const.MinLineWidth = 1.0;
-+	ctx->Const.MinLineWidthAA = 1.0;
-+	ctx->Const.MaxLineWidth = R300_LINESIZE_MAX;
-+	ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX;
-+
-+	ctx->Const.MaxDrawBuffers = 1;
-+
-+	/* currently bogus data */
-+	if (screen->chip_flags & RADEON_CHIPSET_TCL) {
-+		ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
-+		ctx->Const.VertexProgram.MaxNativeInstructions =
-+		  VSF_MAX_FRAGMENT_LENGTH / 4;
-+		ctx->Const.VertexProgram.MaxNativeAttribs = 16;	/* r420 */
-+		ctx->Const.VertexProgram.MaxTemps = 32;
-+		ctx->Const.VertexProgram.MaxNativeTemps =
-+		  /*VSF_MAX_FRAGMENT_TEMPS */ 32;
-+		ctx->Const.VertexProgram.MaxNativeParameters = 256;	/* r420 */
-+		ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
-+	}
-+
-+	if (screen->chip_family >= CHIP_FAMILY_RV515) {
-+		ctx->Const.FragmentProgram.MaxNativeTemps = R500_PFS_NUM_TEMP_REGS;
-+		ctx->Const.FragmentProgram.MaxNativeAttribs = 11;	/* copy i915... */
-+		ctx->Const.FragmentProgram.MaxNativeParameters = R500_PFS_NUM_CONST_REGS;
-+		ctx->Const.FragmentProgram.MaxNativeAluInstructions = R500_PFS_MAX_INST;
-+		ctx->Const.FragmentProgram.MaxNativeTexInstructions = R500_PFS_MAX_INST;
-+		ctx->Const.FragmentProgram.MaxNativeInstructions = R500_PFS_MAX_INST;
-+		ctx->Const.FragmentProgram.MaxNativeTexIndirections = R500_PFS_MAX_INST;
-+		ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
-+	} else {
-+		ctx->Const.FragmentProgram.MaxNativeTemps = R300_PFS_NUM_TEMP_REGS;
-+		ctx->Const.FragmentProgram.MaxNativeAttribs = 11;	/* copy i915... */
-+		ctx->Const.FragmentProgram.MaxNativeParameters = R300_PFS_NUM_CONST_REGS;
-+		ctx->Const.FragmentProgram.MaxNativeAluInstructions = R300_PFS_MAX_ALU_INST;
-+		ctx->Const.FragmentProgram.MaxNativeTexInstructions = R300_PFS_MAX_TEX_INST;
-+		ctx->Const.FragmentProgram.MaxNativeInstructions = R300_PFS_MAX_ALU_INST + R300_PFS_MAX_TEX_INST;
-+		ctx->Const.FragmentProgram.MaxNativeTexIndirections = R300_PFS_MAX_TEX_INDIRECT;
-+		ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
-+	}
-+}
-+
- /* Create the device specific rendering context.
-  */
- GLboolean r300CreateContext(const __GLcontextModes * glVisual,
-@@ -194,13 +340,12 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	struct dd_function_table functions;
- 	r300ContextPtr r300;
- 	GLcontext *ctx;
--	int tcl_mode, i;
-+	int tcl_mode;
- 
- 	assert(glVisual);
- 	assert(driContextPriv);
- 	assert(screen);
- 
--	/* Allocate the R300 context */
- 	r300 = (r300ContextPtr) CALLOC(sizeof(*r300));
- 	if (!r300)
- 		return GL_FALSE;
-@@ -208,28 +353,17 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
- 		hw_tcl_on = future_hw_tcl_on = 0;
- 
--	/* Parse configuration files.
--	 * Do this here so that initialMaxAnisotropy is set before we create
--	 * the default textures.
--	 */
- 	driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache,
- 			    screen->driScreen->myNum, "r300");
--	r300->initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache,
--						     "def_max_anisotropy");
- 
--	/* Init default driver functions then plug in our R300-specific functions
--	 * (the texture functions are especially important)
--	 */
-+	r300_init_vtbl(&r300->radeon);
-+
- 	_mesa_init_driver_functions(&functions);
- 	r300InitIoctlFuncs(&functions);
- 	r300InitStateFuncs(&functions);
- 	r300InitTextureFuncs(&functions);
- 	r300InitShaderFuncs(&functions);
- 
--#ifdef USER_BUFFERS
--	r300_mem_init(r300);
--#endif
--
- 	if (!radeonInitContext(&r300->radeon, &functions,
- 			       glVisual, driContextPriv,
- 			       sharedContextPrivate)) {
-@@ -237,94 +371,13 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 		return GL_FALSE;
- 	}
- 
--	/* Init r300 context data */
--	r300->dma.buf0_address =
--	    r300->radeon.radeonScreen->buffers->list[0].address;
--
--	(void)memset(r300->texture_heaps, 0, sizeof(r300->texture_heaps));
--	make_empty_list(&r300->swapped);
--
--	r300->nr_heaps = 1 /* screen->numTexHeaps */ ;
--	assert(r300->nr_heaps < RADEON_NR_TEX_HEAPS);
--	for (i = 0; i < r300->nr_heaps; i++) {
--		/* *INDENT-OFF* */
--		r300->texture_heaps[i] = driCreateTextureHeap(i, r300,
--							       screen->
--							       texSize[i], 12,
--							       RADEON_NR_TEX_REGIONS,
--							       (drmTextureRegionPtr)
--							       r300->radeon.sarea->
--							       tex_list[i],
--							       &r300->radeon.sarea->
--							       tex_age[i],
--							       &r300->swapped,
--							       sizeof
--							       (r300TexObj),
--							       (destroy_texture_object_t
--								*)
--							       r300DestroyTexObj);
--		/* *INDENT-ON* */
--	}
--	r300->texture_depth = driQueryOptioni(&r300->radeon.optionCache,
--					      "texture_depth");
--	if (r300->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
--		r300->texture_depth = (screen->cpp == 4) ?
--		    DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
--
--	/* Set the maximum texture size small enough that we can guarentee that
--	 * all texture units can bind a maximal texture and have them both in
--	 * texturable memory at once.
--	 */
--
- 	ctx = r300->radeon.glCtx;
-+	r300InitConstValues(ctx, screen);
- 
--	ctx->Const.MaxTextureImageUnits =
--	    driQueryOptioni(&r300->radeon.optionCache, "texture_image_units");
--	ctx->Const.MaxTextureCoordUnits =
--	    driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units");
--	ctx->Const.MaxTextureUnits =
--	    MIN2(ctx->Const.MaxTextureImageUnits,
--		 ctx->Const.MaxTextureCoordUnits);
--	ctx->Const.MaxTextureMaxAnisotropy = 16.0;
--	ctx->Const.MaxTextureLodBias = 16.0;
--
--	if (screen->chip_family >= CHIP_FAMILY_RV515)
--	    ctx->Const.MaxTextureLevels = 13;
--	else
--	    ctx->Const.MaxTextureLevels = 12;
--
--        driCalculateMaxTextureLevels( r300->texture_heaps,
--                                      r300->nr_heaps,
--                                      & ctx->Const,
--                                      4,
--                                      ctx->Const.MaxTextureLevels - 1,
--                                      MIN2(ctx->Const.MaxTextureLevels,
--                                           MAX_3D_TEXTURE_LEVELS) - 1,
--                                      ctx->Const.MaxTextureLevels - 1,
--                                      ctx->Const.MaxTextureLevels - 1,
--                                      ctx->Const.MaxTextureLevels - 1,
--                                      GL_FALSE,
--                                      2 );
-+	if (hw_tcl_on)
-+		ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
- 
--	ctx->Const.MinPointSize = 1.0;
--	ctx->Const.MinPointSizeAA = 1.0;
--	ctx->Const.MaxPointSize = R300_POINTSIZE_MAX;
--	ctx->Const.MaxPointSizeAA = R300_POINTSIZE_MAX;
--
--	ctx->Const.MinLineWidth = 1.0;
--	ctx->Const.MinLineWidthAA = 1.0;
--	ctx->Const.MaxLineWidth = R300_LINESIZE_MAX;
--	ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX;
--
--#ifdef USER_BUFFERS
--	/* Needs further modifications */
--#if 0
--	ctx->Const.MaxArrayLockSize =
--	    ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4);
--#endif
--#endif
--
--	ctx->Const.MaxDrawBuffers = 1;
-+	ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
- 
- 	/* Initialize the software rasterizer and helper modules.
- 	 */
-@@ -333,16 +386,12 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	_tnl_CreateContext(ctx);
- 	_swsetup_CreateContext(ctx);
- 	_swsetup_Wakeup(ctx);
--	_ae_create_context(ctx);
- 
- 	/* Install the customized pipeline:
- 	 */
- 	_tnl_destroy_pipeline(ctx);
- 	_tnl_install_pipeline(ctx, r300_pipeline);
--
--	/* Try and keep materials and vertices separate:
--	 */
--/* 	_tnl_isolate_materials(ctx, GL_TRUE); */
-+	TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
- 
- 	/* Configure swrast and TNL to match hardware characteristics:
- 	 */
-@@ -351,59 +400,38 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	_tnl_allow_pixel_fog(ctx, GL_FALSE);
- 	_tnl_allow_vertex_fog(ctx, GL_TRUE);
- 
--	/* currently bogus data */
--	if (screen->chip_flags & RADEON_CHIPSET_TCL) {
--	        ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
--		ctx->Const.VertexProgram.MaxNativeInstructions =
--		  VSF_MAX_FRAGMENT_LENGTH / 4;
--		ctx->Const.VertexProgram.MaxNativeAttribs = 16;	/* r420 */
--		ctx->Const.VertexProgram.MaxTemps = 32;
--		ctx->Const.VertexProgram.MaxNativeTemps =
--		  /*VSF_MAX_FRAGMENT_TEMPS */ 32;
--		ctx->Const.VertexProgram.MaxNativeParameters = 256;	/* r420 */
--		ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
--	}
--
--	ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS;
--	ctx->Const.FragmentProgram.MaxNativeAttribs = 11;	/* copy i915... */
--	ctx->Const.FragmentProgram.MaxNativeParameters = PFS_NUM_CONST_REGS;
--	ctx->Const.FragmentProgram.MaxNativeAluInstructions = PFS_MAX_ALU_INST;
--	ctx->Const.FragmentProgram.MaxNativeTexInstructions = PFS_MAX_TEX_INST;
--	ctx->Const.FragmentProgram.MaxNativeInstructions =
--	    PFS_MAX_ALU_INST + PFS_MAX_TEX_INST;
--	ctx->Const.FragmentProgram.MaxNativeTexIndirections =
--	    PFS_MAX_TEX_INDIRECT;
--	ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;	/* and these are?? */
--	ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
--	ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
-+	radeon_fbo_init(&r300->radeon);
-+   	radeonInitSpanFuncs( ctx );
-+	r300InitCmdBuf(r300);
-+	r300InitState(r300);
-+	r300InitShaderFunctions(r300);
-+	if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
-+		r300InitSwtcl(ctx);
- 
- 	driInitExtensions(ctx, card_extensions, GL_TRUE);
-+	if (r300->radeon.radeonScreen->kernel_mm)
-+	  driInitExtensions(ctx, mm_extensions, GL_FALSE);
-+
-+	if (screen->chip_family == CHIP_FAMILY_RS600 ||	screen->chip_family == CHIP_FAMILY_RS690 ||
-+		screen->chip_family == CHIP_FAMILY_RS740) {
-+		r300->radeon.texture_row_align = 64;
-+	}
- 
--	if (driQueryOptionb
--	    (&r300->radeon.optionCache, "disable_stencil_two_side"))
-+	r300->radeon.initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache,
-+						     "def_max_anisotropy");
-+
-+	if (driQueryOptionb(&r300->radeon.optionCache, "disable_stencil_two_side"))
- 		_mesa_disable_extension(ctx, "GL_EXT_stencil_two_side");
- 
--	if (r300->radeon.glCtx->Mesa_DXTn
--	    && !driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc")) {
-+	if (ctx->Mesa_DXTn && !driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc")) {
- 		_mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
- 		_mesa_enable_extension(ctx, "GL_S3_s3tc");
--	} else
--	    if (driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable"))
--	{
-+	} else if (driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable")) {
- 		_mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
- 	}
- 
- 	r300->disable_lowimpact_fallback =
--	    driQueryOptionb(&r300->radeon.optionCache,
--			    "disable_lowimpact_fallback");
--
--	radeonInitSpanFuncs(ctx);
--	r300InitCmdBuf(r300);
--	r300InitState(r300);
--	if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
--	        r300InitSwtcl(ctx);
--
--	TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
-+		 driQueryOptionb(&r300->radeon.optionCache, "disable_lowimpact_fallback");
- 
- 	tcl_mode = driQueryOptioni(&r300->radeon.optionCache, "tcl_mode");
- 	if (driQueryOptionb(&r300->radeon.optionCache, "no_rast")) {
-@@ -426,145 +454,3 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	return GL_TRUE;
- }
- 
--static void r300FreeGartAllocations(r300ContextPtr r300)
--{
--	int i, ret, tries = 0, done_age, in_use = 0;
--	drm_radeon_mem_free_t memfree;
--
--	memfree.region = RADEON_MEM_REGION_GART;
--
--#ifdef USER_BUFFERS
--	for (i = r300->rmm->u_last; i > 0; i--) {
--		if (r300->rmm->u_list[i].ptr == NULL) {
--			continue;
--		}
--
--		/* check whether this buffer is still in use */
--		if (r300->rmm->u_list[i].pending) {
--			in_use++;
--		}
--	}
--	/* Cannot flush/lock if no context exists. */
--	if (in_use)
--		r300FlushCmdBuf(r300, __FUNCTION__);
--
--	done_age = radeonGetAge((radeonContextPtr) r300);
--
--	for (i = r300->rmm->u_last; i > 0; i--) {
--		if (r300->rmm->u_list[i].ptr == NULL) {
--			continue;
--		}
--
--		/* check whether this buffer is still in use */
--		if (!r300->rmm->u_list[i].pending) {
--			continue;
--		}
--
--		assert(r300->rmm->u_list[i].h_pending == 0);
--
--		tries = 0;
--		while (r300->rmm->u_list[i].age > done_age && tries++ < 1000) {
--			usleep(10);
--			done_age = radeonGetAge((radeonContextPtr) r300);
--		}
--		if (tries >= 1000) {
--			WARN_ONCE("Failed to idle region!");
--		}
--
--		memfree.region_offset = (char *)r300->rmm->u_list[i].ptr -
--		    (char *)r300->radeon.radeonScreen->gartTextures.map;
--
--		ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd,
--				      DRM_RADEON_FREE, &memfree,
--				      sizeof(memfree));
--		if (ret) {
--			fprintf(stderr, "Failed to free at %p\nret = %s\n",
--				r300->rmm->u_list[i].ptr, strerror(-ret));
--		} else {
--			if (i == r300->rmm->u_last)
--				r300->rmm->u_last--;
--
--			r300->rmm->u_list[i].pending = 0;
--			r300->rmm->u_list[i].ptr = NULL;
--		}
--	}
--	r300->rmm->u_head = i;
--#endif				/* USER_BUFFERS */
--}
--
--/* Destroy the device specific context.
-- */
--void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
--{
--	GET_CURRENT_CONTEXT(ctx);
--	r300ContextPtr r300 = (r300ContextPtr) driContextPriv->driverPrivate;
--	radeonContextPtr radeon = (radeonContextPtr) r300;
--	radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL;
--
--	if (RADEON_DEBUG & DEBUG_DRI) {
--		fprintf(stderr, "Destroying context !\n");
--	}
--
--	/* check if we're deleting the currently bound context */
--	if (&r300->radeon == current) {
--		radeonFlush(r300->radeon.glCtx);
--		_mesa_make_current(NULL, NULL, NULL);
--	}
--
--	/* Free r300 context resources */
--	assert(r300);		/* should never be null */
--
--	if (r300) {
--		GLboolean release_texture_heaps;
--
--		release_texture_heaps =
--		    (r300->radeon.glCtx->Shared->RefCount == 1);
--		_swsetup_DestroyContext(r300->radeon.glCtx);
--		_tnl_DestroyContext(r300->radeon.glCtx);
--		_vbo_DestroyContext(r300->radeon.glCtx);
--		_swrast_DestroyContext(r300->radeon.glCtx);
--
--		if (r300->dma.current.buf) {
--			r300ReleaseDmaRegion(r300, &r300->dma.current,
--					     __FUNCTION__);
--#ifndef USER_BUFFERS
--			r300FlushCmdBuf(r300, __FUNCTION__);
--#endif
--		}
--		r300FreeGartAllocations(r300);
--		r300DestroyCmdBuf(r300);
--
--		if (radeon->state.scissor.pClipRects) {
--			FREE(radeon->state.scissor.pClipRects);
--			radeon->state.scissor.pClipRects = NULL;
--		}
--
--		if (release_texture_heaps) {
--			/* This share group is about to go away, free our private
--			 * texture object data.
--			 */
--			int i;
--
--			for (i = 0; i < r300->nr_heaps; i++) {
--				driDestroyTextureHeap(r300->texture_heaps[i]);
--				r300->texture_heaps[i] = NULL;
--			}
--
--			assert(is_empty_list(&r300->swapped));
--		}
--
--		radeonCleanupContext(&r300->radeon);
--
--#ifdef USER_BUFFERS
--		/* the memory manager might be accessed when Mesa frees the shared
--		 * state, so don't destroy it earlier
--		 */
--		r300_mem_destroy(r300);
--#endif
--
--		/* free the option cache */
--		driDestroyOptionCache(&r300->radeon.optionCache);
--
--		FREE(r300);
--	}
--}
-diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
-index 9c49586..379977b 100644
---- a/src/mesa/drivers/dri/r300/r300_context.h
-+++ b/src/mesa/drivers/dri/r300/r300_context.h
-@@ -37,26 +37,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #ifndef __R300_CONTEXT_H__
- #define __R300_CONTEXT_H__
- 
--#include "tnl/t_vertex.h"
- #include "drm.h"
- #include "radeon_drm.h"
- #include "dri_util.h"
--#include "texmem.h"
-+#include "radeon_common.h"
- 
--#include "main/macros.h"
- #include "main/mtypes.h"
--#include "main/colormac.h"
--
--#define USER_BUFFERS
-+#include "shader/prog_instruction.h"
- 
- struct r300_context;
- typedef struct r300_context r300ContextRec;
- typedef struct r300_context *r300ContextPtr;
- 
--#include "radeon_lock.h"
--#include "main/mm.h"
- 
--/* From http://gcc.gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html .
-+/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html .
-    I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble
-    with other compilers ... GLUE!
- */
-@@ -73,180 +67,14 @@ typedef struct r300_context *r300ContextPtr;
- 	}
- 
- #include "r300_vertprog.h"
--#include "r500_fragprog.h"
--
--/**
-- * This function takes a float and packs it into a uint32_t
-- */
--static INLINE uint32_t r300PackFloat32(float fl)
--{
--	union {
--		float fl;
--		uint32_t u;
--	} u;
--
--	u.fl = fl;
--	return u.u;
--}
--
--/* This is probably wrong for some values, I need to test this
-- * some more.  Range checking would be a good idea also..
-- *
-- * But it works for most things.  I'll fix it later if someone
-- * else with a better clue doesn't
-- */
--static INLINE uint32_t r300PackFloat24(float f)
--{
--	float mantissa;
--	int exponent;
--	uint32_t float24 = 0;
--
--	if (f == 0.0)
--		return 0;
--
--	mantissa = frexpf(f, &exponent);
--
--	/* Handle -ve */
--	if (mantissa < 0) {
--		float24 |= (1 << 23);
--		mantissa = mantissa * -1.0;
--	}
--	/* Handle exponent, bias of 63 */
--	exponent += 62;
--	float24 |= (exponent << 16);
--	/* Kill 7 LSB of mantissa */
--	float24 |= (r300PackFloat32(mantissa) & 0x7FFFFF) >> 7;
--
--	return float24;
--}
--
--/************ DMA BUFFERS **************/
--
--/* Need refcounting on dma buffers:
-- */
--struct r300_dma_buffer {
--	int refcount;		/**< the number of retained regions in buf */
--	drmBufPtr buf;
--	int id;
--};
--#undef GET_START
--#ifdef USER_BUFFERS
--#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start))
--#else
--#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset +		\
--			(rvb)->address - rmesa->dma.buf0_address +	\
--			(rvb)->start)
--#endif
--/* A retained region, eg vertices for indexed vertices.
-- */
--struct r300_dma_region {
--	struct r300_dma_buffer *buf;
--	char *address;		/* == buf->address */
--	int start, end, ptr;	/* offsets from start of buf */
--
--	int aos_offset;		/* address in GART memory */
--	int aos_stride;		/* distance between elements, in dwords */
--	int aos_size;		/* number of components (1-4) */
--};
--
--struct r300_dma {
--	/* Active dma region.  Allocations for vertices and retained
--	 * regions come from here.  Also used for emitting random vertices,
--	 * these may be flushed by calling flush_current();
--	 */
--	struct r300_dma_region current;
- 
--	void (*flush) (r300ContextPtr);
--
--	char *buf0_address;	/* start of buf[0], for index calcs */
--
--	/* Number of "in-flight" DMA buffers, i.e. the number of buffers
--	 * for which a DISCARD command is currently queued in the command buffer.
--	 */
--	GLuint nr_released_bufs;
--};
--
--       /* Texture related */
--
--typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr;
--
--/* Maximum number of mipmap levels supported by any supported GPU
-- */
--#define R300_MAX_TEXTURE_LEVELS 13
--
--/* Texture object in locally shared texture space.
-- */
--struct r300_tex_obj {
--	driTextureObject base;
--
--	GLuint bufAddr;		/* Offset to start of locally
--				   shared texture block */
--
--	drm_radeon_tex_image_t image[6][R300_MAX_TEXTURE_LEVELS];
--	/* Six, for the cube faces */
--
--	GLboolean image_override;	/* Image overridden by GLX_EXT_tfp */
--
--	GLuint pitch;		/* this isn't sent to hardware just used in calculations */
--	/* hardware register values */
--	/* Note that R200 has 8 registers per texture and R300 only 7 */
--	GLuint filter;
--	GLuint filter_1;
--	GLuint pitch_reg;
--	GLuint size;		/* npot only */
--	GLuint format;
--	GLuint offset;		/* Image location in the card's address space.
--				   All cube faces follow. */
--	GLuint unknown4;
--	GLuint unknown5;
--	/* end hardware registers */
--
--	/* registers computed by r200 code - keep them here to
--	   compare against what is actually written.
--
--	   to be removed later.. */
--	GLuint pp_border_color;
--	GLuint pp_cubic_faces;	/* cube face 1,2,3,4 log2 sizes */
--	GLuint format_x;
--
--	GLboolean border_fallback;
--
--	GLuint tile_bits;	/* hw texture tile bits used on this texture */
--};
--
--struct r300_texture_env_state {
--	r300TexObjPtr texobj;
--	GLenum format;
--	GLenum envMode;
--};
- 
- /* The blit width for texture uploads
-  */
- #define R300_BLIT_WIDTH_BYTES 1024
- #define R300_MAX_TEXTURE_UNITS 8
- 
--struct r300_texture_state {
--	struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS];
--	int tc_count;		/* number of incoming texture coordinates from VAP */
--};
- 
--/**
-- * A block of hardware state.
-- *
-- * When check returns non-zero, the returned number of dwords must be
-- * copied verbatim into the command buffer in order to update a state atom
-- * when it is dirty.
-- */
--struct r300_state_atom {
--	struct r300_state_atom *next, *prev;
--	const char *name;	/* for debug */
--	int cmd_size;		/* maximum size in dwords */
--	GLuint idx;		/* index in an array (e.g. textures) */
--	uint32_t *cmd;
--	GLboolean dirty;
--
--	int (*check) (r300ContextPtr, struct r300_state_atom * atom);
--};
- 
- #define R300_VPT_CMD_0		0
- #define R300_VPT_XSCALE		1
-@@ -463,124 +291,98 @@ struct r300_state_atom {
-  * Cache for hardware register state.
-  */
- struct r300_hw_state {
--	struct r300_state_atom atomlist;
--
--	GLboolean is_dirty;
--	GLboolean all_dirty;
--	int max_state_size;	/* in dwords */
--
--	struct r300_state_atom vpt;	/* viewport (1D98) */
--	struct r300_state_atom vap_cntl;
--        struct r300_state_atom vap_index_offset; /* 0x208c r5xx only */
--	struct r300_state_atom vof;	/* VAP output format register 0x2090 */
--	struct r300_state_atom vte;	/* (20B0) */
--	struct r300_state_atom vap_vf_max_vtx_indx;	/* Maximum Vertex Indx Clamp (2134) */
--	struct r300_state_atom vap_cntl_status;
--	struct r300_state_atom vir[2];	/* vap input route (2150/21E0) */
--	struct r300_state_atom vic;	/* vap input control (2180) */
--	struct r300_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */
--	struct r300_state_atom vap_clip_cntl;
--	struct r300_state_atom vap_clip;
--	struct r300_state_atom vap_pvs_vtx_timeout_reg;	/* Vertex timeout register (2288) */
--	struct r300_state_atom pvs;	/* pvs_cntl (22D0) */
--	struct r300_state_atom gb_enable;	/* (4008) */
--	struct r300_state_atom gb_misc;	/* Multisampling position shifts ? (4010) */
--	struct r300_state_atom ga_point_s0;	/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */
--	struct r300_state_atom ga_triangle_stipple;	/* (4214) */
--	struct r300_state_atom ps;	/* pointsize (421C) */
--	struct r300_state_atom ga_point_minmax;	/* (4230) */
--	struct r300_state_atom lcntl;	/* line control */
--	struct r300_state_atom ga_line_stipple;	/* (4260) */
--	struct r300_state_atom shade;
--	struct r300_state_atom polygon_mode;
--	struct r300_state_atom fogp;	/* fog parameters (4294) */
--	struct r300_state_atom ga_soft_reset;	/* (429C) */
--	struct r300_state_atom zbias_cntl;
--	struct r300_state_atom zbs;	/* zbias (42A4) */
--	struct r300_state_atom occlusion_cntl;
--	struct r300_state_atom cul;	/* cull cntl (42B8) */
--	struct r300_state_atom su_depth_scale;	/* (42C0) */
--	struct r300_state_atom rc;	/* rs control (4300) */
--	struct r300_state_atom ri;	/* rs interpolators (4310) */
--	struct r300_state_atom rr;	/* rs route (4330) */
--	struct r300_state_atom sc_hyperz;	/* (43A4) */
--	struct r300_state_atom sc_screendoor;	/* (43E8) */
--	struct r300_state_atom fp;	/* fragment program cntl + nodes (4600) */
--	struct r300_state_atom fpt;	/* texi - (4620) */
--	struct r300_state_atom us_out_fmt;	/* (46A4) */
--	struct r300_state_atom r500fp;	/* r500 fp instructions */
--	struct r300_state_atom r500fp_const;	/* r500 fp constants */
--	struct r300_state_atom fpi[4];	/* fp instructions (46C0/47C0/48C0/49C0) */
--	struct r300_state_atom fogs;	/* fog state (4BC0) */
--	struct r300_state_atom fogc;	/* fog color (4BC8) */
--	struct r300_state_atom at;	/* alpha test (4BD4) */
--	struct r300_state_atom fg_depth_src;	/* (4BD8) */
--	struct r300_state_atom fpp;	/* 0x4C00 and following */
--	struct r300_state_atom rb3d_cctl;	/* (4E00) */
--	struct r300_state_atom bld;	/* blending (4E04) */
--	struct r300_state_atom cmk;	/* colormask (4E0C) */
--	struct r300_state_atom blend_color;	/* constant blend color */
--	struct r300_state_atom rop;	/* ropcntl */
--	struct r300_state_atom cb;	/* colorbuffer (4E28) */
--	struct r300_state_atom rb3d_dither_ctl;	/* (4E50) */
--	struct r300_state_atom rb3d_aaresolve_ctl;	/* (4E88) */
--	struct r300_state_atom rb3d_discard_src_pixel_lte_threshold;	/* (4E88) I saw it only written on RV350 hardware..  */
--	struct r300_state_atom zs;	/* zstencil control (4F00) */
--	struct r300_state_atom zstencil_format;
--	struct r300_state_atom zb;	/* z buffer (4F20) */
--	struct r300_state_atom zb_depthclearvalue;	/* (4F28) */
--	struct r300_state_atom unk4F30;	/* (4F30) */
--	struct r300_state_atom zb_hiz_offset;	/* (4F44) */
--	struct r300_state_atom zb_hiz_pitch;	/* (4F54) */
--
--	struct r300_state_atom vpi;	/* vp instructions */
--	struct r300_state_atom vpp;	/* vp parameters */
--	struct r300_state_atom vps;	/* vertex point size (?) */
--	struct r300_state_atom vpucp[6];	/* vp user clip plane - 6 */
-+	struct radeon_state_atom vpt;	/* viewport (1D98) */
-+	struct radeon_state_atom vap_cntl;
-+	struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */
-+	struct radeon_state_atom vof;	/* VAP output format register 0x2090 */
-+	struct radeon_state_atom vte;	/* (20B0) */
-+	struct radeon_state_atom vap_vf_max_vtx_indx;	/* Maximum Vertex Indx Clamp (2134) */
-+	struct radeon_state_atom vap_cntl_status;
-+	struct radeon_state_atom vir[2];	/* vap input route (2150/21E0) */
-+	struct radeon_state_atom vic;	/* vap input control (2180) */
-+	struct radeon_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */
-+	struct radeon_state_atom vap_clip_cntl;
-+	struct radeon_state_atom vap_clip;
-+	struct radeon_state_atom vap_pvs_vtx_timeout_reg;	/* Vertex timeout register (2288) */
-+	struct radeon_state_atom pvs;	/* pvs_cntl (22D0) */
-+	struct radeon_state_atom gb_enable;	/* (4008) */
-+	struct radeon_state_atom gb_misc;	/* Multisampling position shifts ? (4010) */
-+	struct radeon_state_atom ga_point_s0;	/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */
-+	struct radeon_state_atom ga_triangle_stipple;	/* (4214) */
-+	struct radeon_state_atom ps;	/* pointsize (421C) */
-+	struct radeon_state_atom ga_point_minmax;	/* (4230) */
-+	struct radeon_state_atom lcntl;	/* line control */
-+	struct radeon_state_atom ga_line_stipple;	/* (4260) */
-+	struct radeon_state_atom shade;
-+	struct radeon_state_atom polygon_mode;
-+	struct radeon_state_atom fogp;	/* fog parameters (4294) */
-+	struct radeon_state_atom ga_soft_reset;	/* (429C) */
-+	struct radeon_state_atom zbias_cntl;
-+	struct radeon_state_atom zbs;	/* zbias (42A4) */
-+	struct radeon_state_atom occlusion_cntl;
-+	struct radeon_state_atom cul;	/* cull cntl (42B8) */
-+	struct radeon_state_atom su_depth_scale;	/* (42C0) */
-+	struct radeon_state_atom rc;	/* rs control (4300) */
-+	struct radeon_state_atom ri;	/* rs interpolators (4310) */
-+	struct radeon_state_atom rr;	/* rs route (4330) */
-+	struct radeon_state_atom sc_hyperz;	/* (43A4) */
-+	struct radeon_state_atom sc_screendoor;	/* (43E8) */
-+	struct radeon_state_atom fp;	/* fragment program cntl + nodes (4600) */
-+	struct radeon_state_atom fpt;	/* texi - (4620) */
-+	struct radeon_state_atom us_out_fmt;	/* (46A4) */
-+	struct radeon_state_atom r500fp;	/* r500 fp instructions */
-+	struct radeon_state_atom r500fp_const;	/* r500 fp constants */
-+	struct radeon_state_atom fpi[4];	/* fp instructions (46C0/47C0/48C0/49C0) */
-+	struct radeon_state_atom fogs;	/* fog state (4BC0) */
-+	struct radeon_state_atom fogc;	/* fog color (4BC8) */
-+	struct radeon_state_atom at;	/* alpha test (4BD4) */
-+	struct radeon_state_atom fg_depth_src;	/* (4BD8) */
-+	struct radeon_state_atom fpp;	/* 0x4C00 and following */
-+	struct radeon_state_atom rb3d_cctl;	/* (4E00) */
-+	struct radeon_state_atom bld;	/* blending (4E04) */
-+	struct radeon_state_atom cmk;	/* colormask (4E0C) */
-+	struct radeon_state_atom blend_color;	/* constant blend color */
-+	struct radeon_state_atom rop;	/* ropcntl */
-+	struct radeon_state_atom cb;	/* colorbuffer (4E28) */
-+	struct radeon_state_atom rb3d_dither_ctl;	/* (4E50) */
-+	struct radeon_state_atom rb3d_aaresolve_ctl;	/* (4E88) */
-+	struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold;	/* (4E88) I saw it only written on RV350 hardware..  */
-+	struct radeon_state_atom zs;	/* zstencil control (4F00) */
-+	struct radeon_state_atom zstencil_format;
-+	struct radeon_state_atom zb;	/* z buffer (4F20) */
-+	struct radeon_state_atom zb_depthclearvalue;	/* (4F28) */
-+	struct radeon_state_atom zb_zmask;	/* (4F30) */
-+	struct radeon_state_atom zb_hiz_offset;	/* (4F44) */
-+	struct radeon_state_atom zb_hiz_pitch;	/* (4F54) */
-+
-+	struct radeon_state_atom vpi;	/* vp instructions */
-+	struct radeon_state_atom vpp;	/* vp parameters */
-+	struct radeon_state_atom vps;	/* vertex point size (?) */
-+	struct radeon_state_atom vpucp[6];	/* vp user clip plane - 6 */
- 	/* 8 texture units */
- 	/* the state is grouped by function and not by
- 	   texture unit. This makes single unit updates
- 	   really awkward - we are much better off
- 	   updating the whole thing at once */
- 	struct {
--		struct r300_state_atom filter;
--		struct r300_state_atom filter_1;
--		struct r300_state_atom size;
--		struct r300_state_atom format;
--		struct r300_state_atom pitch;
--		struct r300_state_atom offset;
--		struct r300_state_atom chroma_key;
--		struct r300_state_atom border_color;
-+		struct radeon_state_atom filter;
-+		struct radeon_state_atom filter_1;
-+		struct radeon_state_atom size;
-+		struct radeon_state_atom format;
-+		struct radeon_state_atom pitch;
-+		struct radeon_state_atom offset;
-+		struct radeon_state_atom chroma_key;
-+		struct radeon_state_atom border_color;
- 	} tex;
--	struct r300_state_atom txe;	/* tex enable (4104) */
--};
-+	struct radeon_state_atom txe;	/* tex enable (4104) */
- 
--/**
-- * This structure holds the command buffer while it is being constructed.
-- *
-- * The first batch of commands in the buffer is always the state that needs
-- * to be re-emitted when the context is lost. This batch can be skipped
-- * otherwise.
-- */
--struct r300_cmdbuf {
--	int size;		/* DWORDs allocated for buffer */
--	uint32_t *cmd_buf;
--	int count_used;		/* DWORDs filled so far */
--	int count_reemit;	/* size of re-emission batch */
-+	radeonTexObj *textures[R300_MAX_TEXTURE_UNITS];
- };
- 
- /**
-  * State cache
-  */
- 
--struct r300_depthbuffer_state {
--	GLfloat scale;
--};
--
--struct r300_stencilbuffer_state {
--	GLboolean hw_stencil;
--};
--
- /* Vertex shader state */
- 
- /* Perhaps more if we store programs in vmem? */
-@@ -613,12 +415,8 @@ extern int hw_tcl_on;
- #include "tnl_dd/t_dd_vertex.h"
- #undef TAG
- 
--//#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current)
- #define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->selected_vp)
- 
--/* Should but doesnt work */
--//#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->curr_vp)
--
- /* r300_vertex_shader_state and r300_vertex_program should probably be merged together someday.
-  * Keeping them them seperate for now should ensure fixed pipeline keeps functioning properly.
-  */
-@@ -652,14 +450,18 @@ struct r300_vertex_program_cont {
- 	struct r300_vertex_program *progs;
- };
- 
--#define PFS_MAX_ALU_INST	64
--#define PFS_MAX_TEX_INST	64
--#define PFS_MAX_TEX_INDIRECT 4
--#define PFS_NUM_TEMP_REGS	32
--#define PFS_NUM_CONST_REGS	16
-+#define R300_PFS_MAX_ALU_INST	64
-+#define R300_PFS_MAX_TEX_INST	32
-+#define R300_PFS_MAX_TEX_INDIRECT 4
-+#define R300_PFS_NUM_TEMP_REGS	32
-+#define R300_PFS_NUM_CONST_REGS	32
- 
--struct r300_pfs_compile_state;
-+#define R500_PFS_MAX_INST 512
-+#define R500_PFS_NUM_TEMP_REGS 128
-+#define R500_PFS_NUM_CONST_REGS 256
- 
-+struct r300_pfs_compile_state;
-+struct r500_pfs_compile_state;
- 
- /**
-  * Stores state that influences the compilation of a fragment program.
-@@ -702,7 +504,7 @@ struct r300_fragment_program_node {
- struct r300_fragment_program_code {
- 	struct {
- 		int length; /**< total # of texture instructions used */
--		GLuint inst[PFS_MAX_TEX_INST];
-+		GLuint inst[R300_PFS_MAX_TEX_INST];
- 	} tex;
- 
- 	struct {
-@@ -712,7 +514,7 @@ struct r300_fragment_program_code {
- 			GLuint inst1;
- 			GLuint inst2;
- 			GLuint inst3;
--		} inst[PFS_MAX_ALU_INST];
-+		} inst[R300_PFS_MAX_ALU_INST];
- 	} alu;
- 
- 	struct r300_fragment_program_node node[4];
-@@ -723,53 +525,12 @@ struct r300_fragment_program_code {
- 	 * Remember which program register a given hardware constant
- 	 * belongs to.
- 	 */
--	struct prog_src_register constant[PFS_NUM_CONST_REGS];
-+	struct prog_src_register constant[R300_PFS_NUM_CONST_REGS];
- 	int const_nr;
- 
- 	int max_temp_idx;
- };
- 
--/**
-- * Store everything about a fragment program that is needed
-- * to render with that program.
-- */
--struct r300_fragment_program {
--	struct gl_fragment_program mesa_program;
--
--	GLboolean translated;
--	GLboolean error;
--
--	struct r300_fragment_program_external_state state;
--	struct r300_fragment_program_code code;
--
--	GLboolean WritesDepth;
--	GLuint optimization;
--};
--
--struct r500_pfs_compile_state;
--
--struct r500_fragment_program_external_state {
--	struct {
--		/**
--		 * If the sampler is used as a shadow sampler,
--		 * this field is:
--		 *  0 - GL_LUMINANCE
--		 *  1 - GL_INTENSITY
--		 *  2 - GL_ALPHA
--		 * depending on the depth texture mode.
--		 */
--		GLuint depth_texture_mode : 2;
--
--		/**
--		 * If the sampler is used as a shadow sampler,
--		 * this field is (texture_compare_func - GL_NEVER).
--		 * [e.g. if compare function is GL_LEQUAL, this field is 3]
--		 *
--		 * Otherwise, this field is 0.
--		 */
--		GLuint texture_compare_func : 3;
--	} unit[16];
--};
- 
- struct r500_fragment_program_code {
- 	struct {
-@@ -779,7 +540,7 @@ struct r500_fragment_program_code {
- 		GLuint inst3;
- 		GLuint inst4;
- 		GLuint inst5;
--	} inst[512];
-+	} inst[R500_PFS_MAX_INST];
- 
- 	int inst_offset;
- 	int inst_end;
-@@ -788,51 +549,41 @@ struct r500_fragment_program_code {
- 	 * Remember which program register a given hardware constant
- 	 * belongs to.
- 	 */
--	struct prog_src_register constant[PFS_NUM_CONST_REGS];
-+	struct prog_src_register constant[R500_PFS_NUM_CONST_REGS];
- 	int const_nr;
- 
- 	int max_temp_idx;
- };
- 
--struct r500_fragment_program {
--	struct gl_fragment_program mesa_program;
-+/**
-+* Store everything about a fragment program that is needed
-+* to render with that program.
-+*/
-+struct r300_fragment_program {
-+	struct gl_fragment_program Base;
- 
--	GLcontext *ctx;
- 	GLboolean translated;
- 	GLboolean error;
- 
--	struct r500_fragment_program_external_state state;
--	struct r500_fragment_program_code code;
-+	struct r300_fragment_program_external_state state;
-+	union rX00_fragment_program_code {
-+		struct r300_fragment_program_code r300;
-+		struct r500_fragment_program_code r500;
-+	} code;
- 
- 	GLboolean writes_depth;
--
- 	GLuint optimization;
- };
- 
--#define R300_MAX_AOS_ARRAYS		16
--
--#define REG_COORDS	0
--#define REG_COLOR0	1
--#define REG_TEX0	2
--
--struct r300_state {
--	struct r300_depthbuffer_state depth;
--	struct r300_texture_state texture;
--	int sw_tcl_inputs[VERT_ATTRIB_MAX];
--	struct r300_vertex_shader_state vertex_shader;
--	struct r300_dma_region aos[R300_MAX_AOS_ARRAYS];
--	int aos_count;
--
--	GLuint *Elts;
--	struct r300_dma_region elt_dma;
--
--	struct r300_dma_region swtcl_dma;
--	DECLARE_RENDERINPUTS(render_inputs_bitset);	/* actual render inputs that R300 was configured for.
--							   They are the same as tnl->render_inputs for fixed pipeline */
-+struct r300_fragment_program_compiler {
-+	r300ContextPtr r300;
-+	struct r300_fragment_program *fp;
-+	union rX00_fragment_program_code *code;
-+	struct gl_program *program;
-+};
- 
--	struct r300_stencilbuffer_state stencil;
-+#define R300_MAX_AOS_ARRAYS		16
- 
--};
- 
- #define R300_FALLBACK_NONE 0
- #define R300_FALLBACK_TCL 1
-@@ -841,41 +592,7 @@ struct r300_state {
- /* r300_swtcl.c
-  */
- struct r300_swtcl_info {
--   GLuint RenderIndex;
--
--   /**
--    * Size of a hardware vertex.  This is calculated when \c ::vertex_attrs is
--    * installed in the Mesa state vector.
--    */
--   GLuint vertex_size;
--
--   /**
--    * Attributes instructing the Mesa TCL pipeline where / how to put vertex
--    * data in the hardware buffer.
--    */
--   struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
--
--   /**
--    * Number of elements of \c ::vertex_attrs that are actually used.
--    */
--   GLuint vertex_attr_count;
--
--   /**
--    * Cached pointer to the buffer where Mesa will store vertex data.
--    */
--   GLubyte *verts;
--
--   /* Fallback rasterization functions
--    */
--  //   r200_point_func draw_point;
--  //   r200_line_func draw_line;
--  //   r200_tri_func draw_tri;
--
--   GLuint hw_primitive;
--   GLenum render_primitive;
--   GLuint numverts;
--
--   /**
-+  /*
-     * Offset of the 4UB color data within a hardware (swtcl) vertex.
-     */
-    GLuint coloroffset;
-@@ -885,12 +602,25 @@ struct r300_swtcl_info {
-     */
-    GLuint specoffset;
- 
--   /**
--    * Should Mesa project vertex data or will the hardware do it?
--    */
--   GLboolean needproj;
-+   struct vertex_attribute{
-+       GLuint attr;
-+       GLubyte format;
-+       GLubyte dst_loc;
-+       GLuint swizzle;
-+       GLubyte write_mask;
-+   } vert_attrs[VERT_ATTRIB_MAX];
-+
-+   GLubyte vertex_attr_count;
-+
-+   int sw_tcl_inputs[VERT_ATTRIB_MAX];
-+};
- 
--   struct r300_dma_region indexed_verts;
-+struct r300_vtable {
-+	void (* SetupRSUnit)(GLcontext *ctx);
-+	void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings);
-+	GLboolean (* FragmentProgramEmit)(struct r300_fragment_program_compiler *compiler);
-+	void (* FragmentProgramDump)(union rX00_fragment_program_code *code);
-+	GLboolean (* SetupPixelShader)(GLcontext *ctx);
- };
- 
- 
-@@ -900,46 +630,24 @@ struct r300_swtcl_info {
- struct r300_context {
- 	struct radeon_context radeon;	/* parent class, must be first */
- 
-+	struct r300_vtable vtbl;
-+
- 	struct r300_hw_state hw;
--	struct r300_cmdbuf cmdbuf;
--	struct r300_state state;
--	struct gl_vertex_program *curr_vp;
-+
-+	struct r300_vertex_shader_state vertex_shader;
- 	struct r300_vertex_program *selected_vp;
- 
- 	/* Vertex buffers
- 	 */
--	struct r300_dma dma;
--	GLboolean save_on_next_unlock;
--	GLuint NewGLState;
--
--	/* Texture object bookkeeping
--	 */
--	unsigned nr_heaps;
--	driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS];
--	driTextureObject swapped;
--	int texture_depth;
--	float initialMaxAnisotropy;
--
--	/* Clientdata textures;
--	 */
--	GLuint prefer_gart_client_texturing;
--
--#ifdef USER_BUFFERS
--	struct r300_memory_manager *rmm;
--#endif
--
- 	GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
- 	GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
- 
- 	GLboolean disable_lowimpact_fallback;
- 
--	DECLARE_RENDERINPUTS(tnl_index_bitset);	/* index of bits for last tnl_install_attrs */
- 	struct r300_swtcl_info swtcl;
--};
-+	GLboolean vap_flush_needed;
- 
--struct r300_buffer_object {
--	struct gl_buffer_object mesa_obj;
--	int id;
-+	DECLARE_RENDERINPUTS(render_inputs_bitset);
- };
- 
- #define R300_CONTEXT(ctx)		((r300ContextPtr)(ctx->DriverCtx))
-@@ -955,9 +663,9 @@ extern int r300VertexProgUpdateParams(GLcontext * ctx,
- 				      struct r300_vertex_program_cont *vp,
- 				      float *dst);
- 
--#define RADEON_D_CAPTURE 0
--#define RADEON_D_PLAYBACK 1
--#define RADEON_D_PLAYBACK_RAW 2
--#define RADEON_D_T 3
-+extern void r300InitShaderFunctions(r300ContextPtr r300);
-+
-+#define r300PackFloat32 radeonPackFloat32
-+#define r300PackFloat24 radeonPackFloat24
- 
- #endif				/* __R300_CONTEXT_H__ */
-diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c
-index 28c3157..a19b0f1 100644
---- a/src/mesa/drivers/dri/r300/r300_emit.c
-+++ b/src/mesa/drivers/dri/r300/r300_emit.c
-@@ -46,14 +46,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "tnl/t_context.h"
- 
- #include "r300_context.h"
--#include "radeon_ioctl.h"
- #include "r300_state.h"
- #include "r300_emit.h"
- #include "r300_ioctl.h"
- 
--#ifdef USER_BUFFERS
--#include "r300_mem.h"
--#endif
- 
- #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
-     SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
-@@ -66,147 +62,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #define DEBUG_ALL DEBUG_VERTS
- 
--#if defined(USE_X86_ASM)
--#define COPY_DWORDS( dst, src, nr )					\
--do {									\
--	int __tmp;							\
--	__asm__ __volatile__( "rep ; movsl"				\
--			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
--			      : "0" (nr),				\
--			        "D" ((long)dst),			\
--			        "S" ((long)src) );			\
--} while (0)
--#else
--#define COPY_DWORDS( dst, src, nr )		\
--do {						\
--   int j;					\
--   for ( j = 0 ; j < nr ; j++ )			\
--      dst[j] = ((int *)src)[j];			\
--   dst += nr;					\
--} while (0)
--#endif
--
--static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
--			 GLvoid * data, int stride, int count)
--{
--	int i;
--	int *out = (int *)(rvb->address + rvb->start);
--
--	if (RADEON_DEBUG & DEBUG_VERTS)
--		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
--			__FUNCTION__, count, stride, (void *)out, (void *)data);
--
--	if (stride == 4)
--		COPY_DWORDS(out, data, count);
--	else
--		for (i = 0; i < count; i++) {
--			out[0] = *(int *)data;
--			out++;
--			data += stride;
--		}
--}
--
--static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
--			 GLvoid * data, int stride, int count)
--{
--	int i;
--	int *out = (int *)(rvb->address + rvb->start);
--
--	if (RADEON_DEBUG & DEBUG_VERTS)
--		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
--			__FUNCTION__, count, stride, (void *)out, (void *)data);
--
--	if (stride == 8)
--		COPY_DWORDS(out, data, count * 2);
--	else
--		for (i = 0; i < count; i++) {
--			out[0] = *(int *)data;
--			out[1] = *(int *)(data + 4);
--			out += 2;
--			data += stride;
--		}
--}
--
--static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
--			  GLvoid * data, int stride, int count)
--{
--	int i;
--	int *out = (int *)(rvb->address + rvb->start);
--
--	if (RADEON_DEBUG & DEBUG_VERTS)
--		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
--			__FUNCTION__, count, stride, (void *)out, (void *)data);
--
--	if (stride == 12)
--		COPY_DWORDS(out, data, count * 3);
--	else
--		for (i = 0; i < count; i++) {
--			out[0] = *(int *)data;
--			out[1] = *(int *)(data + 4);
--			out[2] = *(int *)(data + 8);
--			out += 3;
--			data += stride;
--		}
--}
--
--static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
--			  GLvoid * data, int stride, int count)
--{
--	int i;
--	int *out = (int *)(rvb->address + rvb->start);
--
--	if (RADEON_DEBUG & DEBUG_VERTS)
--		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
--			__FUNCTION__, count, stride, (void *)out, (void *)data);
--
--	if (stride == 16)
--		COPY_DWORDS(out, data, count * 4);
--	else
--		for (i = 0; i < count; i++) {
--			out[0] = *(int *)data;
--			out[1] = *(int *)(data + 4);
--			out[2] = *(int *)(data + 8);
--			out[3] = *(int *)(data + 12);
--			out += 4;
--			data += stride;
--		}
--}
--
--static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb,
--			GLvoid * data, int size, int stride, int count)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--
--	if (stride == 0) {
--		r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
--		count = 1;
--		rvb->aos_offset = GET_START(rvb);
--		rvb->aos_stride = 0;
--	} else {
--		r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
--		rvb->aos_offset = GET_START(rvb);
--		rvb->aos_stride = size;
--	}
--
--	switch (size) {
--	case 1:
--		r300EmitVec4(ctx, rvb, data, stride, count);
--		break;
--	case 2:
--		r300EmitVec8(ctx, rvb, data, stride, count);
--		break;
--	case 3:
--		r300EmitVec12(ctx, rvb, data, stride, count);
--		break;
--	case 4:
--		r300EmitVec16(ctx, rvb, data, stride, count);
--		break;
--	default:
--		assert(0);
--		break;
--	}
--}
--
- #define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) |	\
- 		    (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT)
- 
-@@ -272,7 +127,6 @@ GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead)
- 
- GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
- {
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	GLuint i, vic_1 = 0;
- 
- 	if (InputsRead & (1 << VERT_ATTRIB_POS))
-@@ -284,10 +138,8 @@ GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
- 	if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
- 		vic_1 |= R300_INPUT_CNTL_COLOR;
- 
--	rmesa->state.texture.tc_count = 0;
- 	for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
- 		if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
--			rmesa->state.texture.tc_count++;
- 			vic_1 |= R300_INPUT_CNTL_TC0 << i;
- 		}
- 
-@@ -336,7 +188,7 @@ GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten)
- 			fprintf(stderr, "\tout of free texcoords to write fog coord\n");
- 			_mesa_exit(-1);
- 		}
--		ret |= 4 << (3 * first_free_texcoord);
-+		ret |= 1 << (3 * first_free_texcoord);
- 	}
- 
- 	return ret;
-@@ -367,7 +219,7 @@ int r300EmitArrays(GLcontext * ctx)
- 		InputsRead = prog->key.InputsRead;
- 		OutputsWritten = prog->key.OutputsWritten;
- 	} else {
--		inputs = rmesa->state.sw_tcl_inputs;
-+		inputs = rmesa->swtcl.sw_tcl_inputs;
- 
- 		DECLARE_RENDERINPUTS(render_inputs_bitset);
- 		RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
-@@ -376,7 +228,6 @@ int r300EmitArrays(GLcontext * ctx)
- 
- 		assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS));
- 		assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0);
--		//assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0));
- 
- 		if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) {
- 			InputsRead |= 1 << VERT_ATTRIB_POS;
-@@ -421,7 +272,7 @@ int r300EmitArrays(GLcontext * ctx)
- 			if (InputsRead & (1 << i))
- 				inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
- 
--		RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
-+		RENDERINPUTS_COPY(rmesa->render_inputs_bitset, render_inputs_bitset);
- 	}
- 
- 	assert(InputsRead);
-@@ -438,7 +289,7 @@ int r300EmitArrays(GLcontext * ctx)
- 	}
- 
- 	for (i = 0; i < nr; i++) {
--		int ci, fix, found = 0;
-+		int ci;
- 
- 		swizzle[i][0] = SWIZZLE_ZERO;
- 		swizzle[i][1] = SWIZZLE_ZERO;
-@@ -448,60 +299,34 @@ int r300EmitArrays(GLcontext * ctx)
- 		for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
- 			swizzle[i][ci] = ci;
- 		}
--
--		if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
--			if (vb->AttribPtr[tab[i]]->stride % 4) {
--				return R300_FALLBACK_TCL;
--			}
--			rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data);
--			rmesa->state.aos[i].start = 0;
--			rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data);
--			rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4;
--			rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
--		} else {
--			r300EmitVec(ctx, &rmesa->state.aos[i],
-+		rcommon_emit_vector(ctx, &rmesa->radeon.tcl.aos[i],
- 				    vb->AttribPtr[tab[i]]->data,
- 				    vb->AttribPtr[tab[i]]->size,
- 				    vb->AttribPtr[tab[i]]->stride, count);
--		}
--
--		rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
--
--		for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
--			if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) {
--				continue;
--			}
--			found = 1;
--			break;
--		}
--
--		if (found) {
--			if (fix > 0) {
--				WARN_ONCE("Feeling lucky?\n");
--			}
--			rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix;
--			for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
--				swizzle[i][ci] += fix;
--			}
--		} else {
--			WARN_ONCE
--			    ("Cannot handle offset %x with stride %d, comp %d\n",
--			     rmesa->state.aos[i].aos_offset,
--			     rmesa->state.aos[i].aos_stride,
--			     vb->AttribPtr[tab[i]]->size);
--			return R300_FALLBACK_TCL;
--		}
- 	}
- 
- 	/* Setup INPUT_ROUTE. */
--	R300_STATECHANGE(rmesa, vir[0]);
--	((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
--	    r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
--			       vb->AttribPtr, inputs, tab, nr);
--	R300_STATECHANGE(rmesa, vir[1]);
--	((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
--	    r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
--			       nr);
-+	if (rmesa->radeon.radeonScreen->kernel_mm) {
-+		R300_STATECHANGE(rmesa, vir[0]);
-+		rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF;
-+		rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF;
-+		rmesa->hw.vir[0].cmd[0] |=
-+			(r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
-+					    vb->AttribPtr, inputs, tab, nr) & 0x3FFF) << 16;
-+		R300_STATECHANGE(rmesa, vir[1]);
-+		rmesa->hw.vir[1].cmd[0] |=
-+			(r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
-+					    nr) & 0x3FFF) << 16;
-+	} else {
-+		R300_STATECHANGE(rmesa, vir[0]);
-+		((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
-+			r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
-+					   vb->AttribPtr, inputs, tab, nr);
-+		R300_STATECHANGE(rmesa, vir[1]);
-+		((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
-+			r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
-+					   nr);
-+	}
- 
- 	/* Setup INPUT_CNTL. */
- 	R300_STATECHANGE(rmesa, vic);
-@@ -515,50 +340,22 @@ int r300EmitArrays(GLcontext * ctx)
- 	rmesa->hw.vof.cmd[R300_VOF_CNTL_1] =
- 	    r300VAPOutputCntl1(ctx, OutputsWritten);
- 
--	rmesa->state.aos_count = nr;
-+	rmesa->radeon.tcl.aos_count = nr;
- 
- 	return R300_FALLBACK_NONE;
- }
- 
--#ifdef USER_BUFFERS
--void r300UseArrays(GLcontext * ctx)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	int i;
--
--	if (rmesa->state.elt_dma.buf)
--		r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
--
--	for (i = 0; i < rmesa->state.aos_count; i++) {
--		if (rmesa->state.aos[i].buf)
--			r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
--	}
--}
--#endif
--
--void r300ReleaseArrays(GLcontext * ctx)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	int i;
--
--	r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
--	for (i = 0; i < rmesa->state.aos_count; i++) {
--		r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
--	}
--}
--
- void r300EmitCacheFlush(r300ContextPtr rmesa)
- {
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--
--	drm_radeon_cmd_header_t *cmd = NULL;
--
--	reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
--	e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
--	    R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
--
--	reg_start(R300_ZB_ZCACHE_CTLSTAT, 0);
--	e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
--	    R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
-+	BATCH_LOCALS(&rmesa->radeon);
-+
-+	BEGIN_BATCH_NO_AUTOSTATE(4);
-+	OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT,
-+		R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
-+		R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
-+	OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT,
-+		R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
-+		R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
-+	END_BATCH();
-+	COMMIT_BATCH();
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h
-index 89d7383..80c22d5 100644
---- a/src/mesa/drivers/dri/r300/r300_emit.h
-+++ b/src/mesa/drivers/dri/r300/r300_emit.h
-@@ -44,28 +44,31 @@
- #include "r300_cmdbuf.h"
- #include "radeon_reg.h"
- 
--/* TODO: move these defines (and the ones from DRM) into r300_reg.h and sync up
-- * with DRM */
--#define CP_PACKET0(reg, n)	(RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2))
--#define CP_PACKET3( pkt, n )						\
--	(RADEON_CP_PACKET3 | (pkt) | ((n) << 16))
--
--static INLINE uint32_t cmdpacket0(int reg, int count)
-+static INLINE uint32_t cmdpacket0(struct radeon_screen *rscrn,
-+                                  int reg, int count)
- {
--	drm_r300_cmd_header_t cmd;
--
--	cmd.packet0.cmd_type = R300_CMD_PACKET0;
--	cmd.packet0.count = count;
--	cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8;
--	cmd.packet0.reglo = ((unsigned int)reg & 0x00FF);
--
--	return cmd.u;
-+    if (!rscrn->kernel_mm) {
-+	    drm_r300_cmd_header_t cmd;
-+
-+	cmd.u = 0;
-+    	cmd.packet0.cmd_type = R300_CMD_PACKET0;
-+	    cmd.packet0.count = count;
-+    	cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8;
-+	    cmd.packet0.reglo = ((unsigned int)reg & 0x00FF);
-+
-+    	return cmd.u;
-+    }
-+    if (count) {
-+        return CP_PACKET0(reg, count - 1);
-+    }
-+    return CP_PACKET2;
- }
- 
--static INLINE uint32_t cmdvpu(int addr, int count)
-+static INLINE uint32_t cmdvpu(struct radeon_screen *rscrn, int addr, int count)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-+	cmd.u = 0;
- 	cmd.vpu.cmd_type = R300_CMD_VPU;
- 	cmd.vpu.count = count;
- 	cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8;
-@@ -74,10 +77,12 @@ static INLINE uint32_t cmdvpu(int addr, int count)
- 	return cmd.u;
- }
- 
--static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp)
-+static INLINE uint32_t cmdr500fp(struct radeon_screen *rscrn,
-+                                 int addr, int count, int type, int clamp)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-+	cmd.u = 0;
- 	cmd.r500fp.cmd_type = R300_CMD_R500FP;
- 	cmd.r500fp.count = count;
- 	cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8;
-@@ -88,170 +93,131 @@ static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp)
- 	return cmd.u;
- }
- 
--static INLINE uint32_t cmdpacket3(int packet)
-+static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-+	cmd.u = 0;
- 	cmd.packet3.cmd_type = R300_CMD_PACKET3;
- 	cmd.packet3.packet = packet;
- 
- 	return cmd.u;
- }
- 
--static INLINE uint32_t cmdcpdelay(unsigned short count)
-+static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn,  
-+                                  unsigned short count)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-+	cmd.u = 0;
-+
- 	cmd.delay.cmd_type = R300_CMD_CP_DELAY;
- 	cmd.delay.count = count;
- 
- 	return cmd.u;
- }
- 
--static INLINE uint32_t cmdwait(unsigned char flags)
-+static INLINE uint32_t cmdwait(struct radeon_screen *rscrn,
-+                               unsigned char flags)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-+	cmd.u = 0;
- 	cmd.wait.cmd_type = R300_CMD_WAIT;
- 	cmd.wait.flags = flags;
- 
- 	return cmd.u;
- }
- 
--static INLINE uint32_t cmdpacify(void)
-+static INLINE uint32_t cmdpacify(struct radeon_screen *rscrn)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-+	cmd.u = 0;
- 	cmd.header.cmd_type = R300_CMD_END3D;
- 
- 	return cmd.u;
- }
- 
- /**
-- * Prepare to write a register value to register at address reg.
-- * If num_extra > 0 then the following extra values are written
-- * to registers with address +4, +8 and so on..
-- */
--#define reg_start(reg, num_extra)					\
--	do {								\
--		int _n;							\
--		_n=(num_extra);						\
--		cmd = (drm_radeon_cmd_header_t*)			\
--			r300AllocCmdBuf(rmesa,				\
--					(_n+2),				\
--					__FUNCTION__);			\
--		cmd_reserved=_n+2;					\
--		cmd_written=1;						\
--		cmd[0].i=cmdpacket0((reg), _n+1);			\
--	} while (0);
--
--/**
-- * Emit GLuint freestyle
-+ * Write the header of a packet3 to the command buffer.
-+ * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards.
-  */
--#define e32(dword)							\
--	do {								\
--		if(cmd_written<cmd_reserved) {				\
--			cmd[cmd_written].i=(dword);			\
--			cmd_written++;					\
--		} else {						\
--			fprintf(stderr,					\
--				"e32 but no previous packet "		\
--				"declaration.\n"			\
--				"Aborting! in %s::%s at line %d, "	\
--				"cmd_written=%d cmd_reserved=%d\n",	\
--				__FILE__, __FUNCTION__, __LINE__,	\
--				cmd_written, cmd_reserved);		\
--			_mesa_exit(-1);					\
--		}							\
-+#define OUT_BATCH_PACKET3(packet, num_extra) do {\
-+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		\
-+    	OUT_BATCH(cmdpacket3(b_l_rmesa->radeonScreen,\
-+                  R300_CMD_PACKET3_RAW)); \
-+    } else b_l_rmesa->cmdbuf.cs->section_cdw++;\
-+	OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
- 	} while(0)
- 
--#define	efloat(f) e32(r300PackFloat32(f))
--
--#define vsf_start_fragment(dest, length)				\
--	do {								\
--		int _n;							\
--		_n = (length);						\
--		cmd = (drm_radeon_cmd_header_t*)			\
--			r300AllocCmdBuf(rmesa,				\
--					(_n+1),				\
--					__FUNCTION__);			\
--		cmd_reserved = _n+2;					\
--		cmd_written =1;						\
--		cmd[0].i = cmdvpu((dest), _n/4);			\
--	} while (0);
--
--#define r500fp_start_fragment(dest, length)				\
--	do {								\
--		int _n;							\
--		_n = (length);						\
--		cmd = (drm_radeon_cmd_header_t*)			\
--			r300AllocCmdBuf(rmesa,				\
--					(_n+1),				\
--					__FUNCTION__);			\
--		cmd_reserved = _n+1;					\
--		cmd_written =1;						\
--		cmd[0].i = cmdr500fp((dest), _n/6, 0, 0);		\
--	} while (0);
--
--#define start_packet3(packet, count)					\
--	{								\
--		int _n;							\
--		GLuint _p;						\
--		_n = (count);						\
--		_p = (packet);						\
--		cmd = (drm_radeon_cmd_header_t*)			\
--			r300AllocCmdBuf(rmesa,				\
--					(_n+3),				\
--					__FUNCTION__);			\
--		cmd_reserved = _n+3;					\
--		cmd_written = 2;					\
--		if(_n > 0x3fff) {					\
--			fprintf(stderr,"Too big packet3 %08x: cannot "	\
--				"store %d dwords\n",			\
--				_p, _n);				\
--			_mesa_exit(-1);					\
--		}							\
--		cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW);		\
--		cmd[1].i = _p | ((_n & 0x3fff)<<16);			\
--	}
--
- /**
-  * Must be sent to switch to 2d commands
-  */
--void static INLINE end_3d(r300ContextPtr rmesa)
-+void static INLINE end_3d(radeonContextPtr radeon)
- {
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	BATCH_LOCALS(radeon);
- 
--	cmd =
--	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
--	cmd[0].header.cmd_type = R300_CMD_END3D;
-+	if (!radeon->radeonScreen->kernel_mm) {
-+		BEGIN_BATCH_NO_AUTOSTATE(1);
-+		OUT_BATCH(cmdpacify(radeon->radeonScreen));
-+		END_BATCH();
-+	}
- }
- 
- void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count)
- {
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	BATCH_LOCALS(&rmesa->radeon);
- 
--	cmd =
--	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
--	cmd[0].i = cmdcpdelay(count);
-+	if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+		BEGIN_BATCH_NO_AUTOSTATE(1);
-+		OUT_BATCH(cmdcpdelay(rmesa->radeon.radeonScreen, count));
-+		END_BATCH();
-+	}
- }
- 
--void static INLINE cp_wait(r300ContextPtr rmesa, unsigned char flags)
-+void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags)
- {
--	drm_radeon_cmd_header_t *cmd = NULL;
--
--	cmd =
--	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
--	cmd[0].i = cmdwait(flags);
-+	BATCH_LOCALS(radeon);
-+	uint32_t wait_until;
-+
-+	if (!radeon->radeonScreen->kernel_mm) {
-+		BEGIN_BATCH_NO_AUTOSTATE(1);
-+		OUT_BATCH(cmdwait(radeon->radeonScreen, flags));
-+		END_BATCH();
-+	} else {
-+		switch(flags) {
-+		case R300_WAIT_2D:
-+			wait_until = (1 << 14);
-+			break;
-+		case R300_WAIT_3D:
-+			wait_until = (1 << 15);
-+			break;
-+		case R300_NEW_WAIT_2D_3D:
-+			wait_until = (1 << 14) | (1 << 15);
-+			break;
-+		case R300_NEW_WAIT_2D_2D_CLEAN:
-+			wait_until = (1 << 14) | (1 << 16) | (1 << 18);
-+			break;
-+		case R300_NEW_WAIT_3D_3D_CLEAN:
-+			wait_until = (1 << 15) | (1 << 17) | (1 << 18);
-+			break;
-+		case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
-+			wait_until  = (1 << 14) | (1 << 16) | (1 << 18);
-+			wait_until |= (1 << 15) | (1 << 17) | (1 << 18);
-+			break;
-+		default:
-+			return;
-+		}
-+		BEGIN_BATCH_NO_AUTOSTATE(2);
-+		OUT_BATCH(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
-+		OUT_BATCH(wait_until);
-+		END_BATCH();
-+	}
- }
- 
- extern int r300EmitArrays(GLcontext * ctx);
- 
--#ifdef USER_BUFFERS
--void r300UseArrays(GLcontext * ctx);
--#endif
--
--extern void r300ReleaseArrays(GLcontext * ctx);
- extern int r300PrimitiveType(r300ContextPtr rmesa, int prim);
- extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim);
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
-index 873cde4..ea530fd 100644
---- a/src/mesa/drivers/dri/r300/r300_fragprog.c
-+++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
-@@ -25,32 +25,12 @@
-  *
-  */
- 
--/**
-- * \file
-- *
-- * Fragment program compiler. Perform transformations on the intermediate
-- * representation until the program is in a form where we can translate
-- * it more or less directly into machine-readable form.
-- *
-- * \author Ben Skeggs <darktama@iinet.net.au>
-- * \author Jerome Glisse <j.glisse@gmail.com>
-- */
-+#include "r300_fragprog.h"
- 
--#include "main/glheader.h"
--#include "main/macros.h"
--#include "main/enums.h"
--#include "shader/prog_instruction.h"
- #include "shader/prog_parameter.h"
--#include "shader/prog_print.h"
- 
- #include "r300_context.h"
--#include "r300_fragprog.h"
- #include "r300_fragprog_swizzle.h"
--#include "r300_state.h"
--
--#include "radeon_nqssadce.h"
--#include "radeon_program_alu.h"
--
- 
- static void reset_srcreg(struct prog_src_register* reg)
- {
-@@ -81,7 +61,7 @@ static struct prog_src_register shadow_ambient(struct gl_program *program, int t
-  * \todo If/when r5xx uses the radeon_program architecture, this can probably
-  * be reused.
-  */
--static GLboolean transform_TEX(
-+GLboolean r300_transform_TEX(
- 	struct radeon_transform_context *t,
- 	struct prog_instruction* orig_inst, void* data)
- {
-@@ -175,7 +155,7 @@ static GLboolean transform_TEX(
- 		inst.SrcReg[0].File = PROGRAM_TEMPORARY;
- 		inst.SrcReg[0].Index = tmpreg;
- 	}
--	
-+
- 	tgt = radeonAppendInstructions(t->Program, 1);
- 	_mesa_copy_instructions(tgt, &inst, 1);
- 
-@@ -246,241 +226,10 @@ static GLboolean transform_TEX(
- 	return GL_TRUE;
- }
- 
--
--static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp)
--{
--	struct gl_fragment_program *mp = &fp->mesa_program;
--
--	/* Ask Mesa nicely to fill in ParameterValues for us */
--	if (mp->Base.Parameters)
--		_mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters);
--}
--
--
--/**
-- * Transform the program to support fragment.position.
-- *
-- * Introduce a small fragment at the start of the program that will be
-- * the only code that directly reads the FRAG_ATTRIB_WPOS input.
-- * All other code pieces that reference that input will be rewritten
-- * to read from a newly allocated temporary.
-- *
-- * \todo if/when r5xx supports the radeon_program architecture, this is a
-- * likely candidate for code sharing.
-- */
--static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
--{
--	GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead;
--
--	if (!(InputsRead & FRAG_BIT_WPOS))
--		return;
--
--	static gl_state_index tokens[STATE_LENGTH] = {
--		STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
--	};
--	struct prog_instruction *fpi;
--	GLuint window_index;
--	int i = 0;
--	GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
--
--	_mesa_insert_instructions(compiler->program, 0, 3);
--	fpi = compiler->program->Instructions;
--
--	/* perspective divide */
--	fpi[i].Opcode = OPCODE_RCP;
--
--	fpi[i].DstReg.File = PROGRAM_TEMPORARY;
--	fpi[i].DstReg.Index = tempregi;
--	fpi[i].DstReg.WriteMask = WRITEMASK_W;
--	fpi[i].DstReg.CondMask = COND_TR;
--
--	fpi[i].SrcReg[0].File = PROGRAM_INPUT;
--	fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
--	fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
--	i++;
--
--	fpi[i].Opcode = OPCODE_MUL;
--
--	fpi[i].DstReg.File = PROGRAM_TEMPORARY;
--	fpi[i].DstReg.Index = tempregi;
--	fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
--	fpi[i].DstReg.CondMask = COND_TR;
--
--	fpi[i].SrcReg[0].File = PROGRAM_INPUT;
--	fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
--	fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
--
--	fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
--	fpi[i].SrcReg[1].Index = tempregi;
--	fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
--	i++;
--
--	/* viewport transformation */
--	window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
--
--	fpi[i].Opcode = OPCODE_MAD;
--
--	fpi[i].DstReg.File = PROGRAM_TEMPORARY;
--	fpi[i].DstReg.Index = tempregi;
--	fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
--	fpi[i].DstReg.CondMask = COND_TR;
--
--	fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
--	fpi[i].SrcReg[0].Index = tempregi;
--	fpi[i].SrcReg[0].Swizzle =
--	    MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
--
--	fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
--	fpi[i].SrcReg[1].Index = window_index;
--	fpi[i].SrcReg[1].Swizzle =
--	    MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
--
--	fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
--	fpi[i].SrcReg[2].Index = window_index;
--	fpi[i].SrcReg[2].Swizzle =
--	    MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
--	i++;
--
--	for (; i < compiler->program->NumInstructions; ++i) {
--		int reg;
--		for (reg = 0; reg < 3; reg++) {
--			if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
--			    fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) {
--				fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY;
--				fpi[i].SrcReg[reg].Index = tempregi;
--			}
--		}
--	}
--}
--
--
--static void nqssadce_init(struct nqssadce_state* s)
--{
--	s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW;
--	s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W;
--}
--
--
--static GLuint build_dtm(GLuint depthmode)
--{
--	switch(depthmode) {
--	default:
--	case GL_LUMINANCE: return 0;
--	case GL_INTENSITY: return 1;
--	case GL_ALPHA: return 2;
--	}
--}
--
--static GLuint build_func(GLuint comparefunc)
--{
--	return comparefunc - GL_NEVER;
--}
--
--
--/**
-- * Collect all external state that is relevant for compiling the given
-- * fragment program.
-- */
--static void build_state(
--	r300ContextPtr r300,
--	struct r300_fragment_program *fp,
--	struct r300_fragment_program_external_state *state)
--{
--	int unit;
--
--	_mesa_bzero(state, sizeof(*state));
--
--	for(unit = 0; unit < 16; ++unit) {
--		if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) {
--			struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
--
--			state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
--			state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
--		}
--	}
--}
--
--
--void r300TranslateFragmentShader(r300ContextPtr r300,
--				 struct r300_fragment_program *fp)
--{
--	struct r300_fragment_program_external_state state;
--
--	build_state(r300, fp, &state);
--	if (_mesa_memcmp(&fp->state, &state, sizeof(state))) {
--		/* TODO: cache compiled programs */
--		fp->translated = GL_FALSE;
--		_mesa_memcpy(&fp->state, &state, sizeof(state));
--	}
--
--	if (!fp->translated) {
--		struct r300_fragment_program_compiler compiler;
--
--		compiler.r300 = r300;
--		compiler.fp = fp;
--		compiler.code = &fp->code;
--		compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base);
--
--		if (RADEON_DEBUG & DEBUG_PIXEL) {
--			_mesa_printf("Fragment Program: Initial program:\n");
--			_mesa_print_program(compiler.program);
--		}
--
--		insert_WPOS_trailer(&compiler);
--
--		struct radeon_program_transformation transformations[] = {
--			{ &transform_TEX, &compiler },
--			{ &radeonTransformALU, 0 },
--			{ &radeonTransformTrigSimple, 0 }
--		};
--		radeonLocalTransform(
--			r300->radeon.glCtx,
--			compiler.program,
--			3, transformations);
--
--		if (RADEON_DEBUG & DEBUG_PIXEL) {
--			_mesa_printf("Fragment Program: After native rewrite:\n");
--			_mesa_print_program(compiler.program);
--		}
--
--		struct radeon_nqssadce_descr nqssadce = {
--			.Init = &nqssadce_init,
--			.IsNativeSwizzle = &r300FPIsNativeSwizzle,
--			.BuildSwizzle = &r300FPBuildSwizzle,
--			.RewriteDepthOut = GL_TRUE
--		};
--		radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce);
--
--		if (RADEON_DEBUG & DEBUG_PIXEL) {
--			_mesa_printf("Compiler: after NqSSA-DCE:\n");
--			_mesa_print_program(compiler.program);
--		}
--
--		if (!r300FragmentProgramEmit(&compiler))
--			fp->error = GL_TRUE;
--
--		/* Subtle: Rescue any parameters that have been added during transformations */
--		_mesa_free_parameter_list(fp->mesa_program.Base.Parameters);
--		fp->mesa_program.Base.Parameters = compiler.program->Parameters;
--		compiler.program->Parameters = 0;
--
--		_mesa_reference_program(r300->radeon.glCtx, &compiler.program, NULL);
--
--		if (!fp->error)
--			fp->translated = GL_TRUE;
--		if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
--			r300FragmentProgramDump(fp, &fp->code);
--		r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
--	}
--
--	update_params(r300, fp);
--}
--
- /* just some random things... */
--void r300FragmentProgramDump(
--	struct r300_fragment_program *fp,
--	struct r300_fragment_program_code *code)
-+void r300FragmentProgramDump(union rX00_fragment_program_code *c)
- {
-+	struct r300_fragment_program_code *code = &c->r300;
- 	int n, i, j;
- 	static int pc = 0;
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h
-index 94fb554..affa022 100644
---- a/src/mesa/drivers/dri/r300/r300_fragprog.h
-+++ b/src/mesa/drivers/dri/r300/r300_fragprog.h
-@@ -33,9 +33,6 @@
- #ifndef __R300_FRAGPROG_H_
- #define __R300_FRAGPROG_H_
- 
--#include "main/glheader.h"
--#include "main/macros.h"
--#include "main/enums.h"
- #include "shader/program.h"
- #include "shader/prog_instruction.h"
- 
-@@ -105,28 +102,10 @@
- 
- #endif
- 
--struct r300_fragment_program;
--
--extern void r300TranslateFragmentShader(r300ContextPtr r300,
--					struct r300_fragment_program *fp);
--
--
--/**
-- * Used internally by the r300 fragment program code to store compile-time
-- * only data.
-- */
--struct r300_fragment_program_compiler {
--	r300ContextPtr r300;
--	struct r300_fragment_program *fp;
--	struct r300_fragment_program_code *code;
--	struct gl_program *program;
--};
--
- extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler);
- 
-+extern void r300FragmentProgramDump(union rX00_fragment_program_code *c);
- 
--extern void r300FragmentProgramDump(
--	struct r300_fragment_program *fp,
--	struct r300_fragment_program_code *code);
-+extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data);
- 
- #endif
-diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
-new file mode 100644
-index 0000000..6eaad76
---- /dev/null
-+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
-@@ -0,0 +1,291 @@
-+/*
-+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
-+ *
-+ * All Rights Reserved.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining
-+ * a copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sublicense, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial
-+ * portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ */
-+
-+/**
-+ * \file
-+ *
-+ * Fragment program compiler. Perform transformations on the intermediate
-+ * representation until the program is in a form where we can translate
-+ * it more or less directly into machine-readable form.
-+ *
-+ * \author Ben Skeggs <darktama@iinet.net.au>
-+ * \author Jerome Glisse <j.glisse@gmail.com>
-+ */
-+
-+#include "r300_fragprog_common.h"
-+
-+#include "shader/program.h"
-+#include "shader/prog_parameter.h"
-+#include "shader/prog_print.h"
-+
-+#include "r300_state.h"
-+#include "r300_fragprog.h"
-+#include "r300_fragprog_swizzle.h"
-+#include "r500_fragprog.h"
-+
-+#include "radeon_program.h"
-+#include "radeon_program_alu.h"
-+
-+static void update_params(GLcontext *ctx, struct gl_fragment_program *fp)
-+{
-+	/* Ask Mesa nicely to fill in ParameterValues for us */
-+	if (fp->Base.Parameters)
-+		_mesa_load_state_parameters(ctx, fp->Base.Parameters);
-+}
-+
-+static void nqssadce_init(struct nqssadce_state* s)
-+{
-+	s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW;
-+	s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W;
-+}
-+
-+/**
-+ * Transform the program to support fragment.position.
-+ *
-+ * Introduce a small fragment at the start of the program that will be
-+ * the only code that directly reads the FRAG_ATTRIB_WPOS input.
-+ * All other code pieces that reference that input will be rewritten
-+ * to read from a newly allocated temporary.
-+ *
-+ */
-+static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
-+{
-+	GLuint InputsRead = compiler->fp->Base.Base.InputsRead;
-+
-+	if (!(InputsRead & FRAG_BIT_WPOS))
-+		return;
-+
-+	static gl_state_index tokens[STATE_LENGTH] = {
-+		STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
-+	};
-+	struct prog_instruction *fpi;
-+	GLuint window_index;
-+	int i = 0;
-+	GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
-+
-+	_mesa_insert_instructions(compiler->program, 0, 3);
-+	fpi = compiler->program->Instructions;
-+
-+	/* perspective divide */
-+	fpi[i].Opcode = OPCODE_RCP;
-+
-+	fpi[i].DstReg.File = PROGRAM_TEMPORARY;
-+	fpi[i].DstReg.Index = tempregi;
-+	fpi[i].DstReg.WriteMask = WRITEMASK_W;
-+	fpi[i].DstReg.CondMask = COND_TR;
-+
-+	fpi[i].SrcReg[0].File = PROGRAM_INPUT;
-+	fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
-+	fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
-+	i++;
-+
-+	fpi[i].Opcode = OPCODE_MUL;
-+
-+	fpi[i].DstReg.File = PROGRAM_TEMPORARY;
-+	fpi[i].DstReg.Index = tempregi;
-+	fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
-+	fpi[i].DstReg.CondMask = COND_TR;
-+
-+	fpi[i].SrcReg[0].File = PROGRAM_INPUT;
-+	fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
-+	fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-+
-+	fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
-+	fpi[i].SrcReg[1].Index = tempregi;
-+	fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
-+	i++;
-+
-+	/* viewport transformation */
-+	window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
-+
-+	fpi[i].Opcode = OPCODE_MAD;
-+
-+	fpi[i].DstReg.File = PROGRAM_TEMPORARY;
-+	fpi[i].DstReg.Index = tempregi;
-+	fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
-+	fpi[i].DstReg.CondMask = COND_TR;
-+
-+	fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
-+	fpi[i].SrcReg[0].Index = tempregi;
-+	fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-+
-+	fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
-+	fpi[i].SrcReg[1].Index = window_index;
-+	fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-+
-+	fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
-+	fpi[i].SrcReg[2].Index = window_index;
-+	fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-+	i++;
-+
-+	for (; i < compiler->program->NumInstructions; ++i) {
-+		int reg;
-+		for (reg = 0; reg < 3; reg++) {
-+			if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
-+			    fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) {
-+				fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY;
-+				fpi[i].SrcReg[reg].Index = tempregi;
-+			}
-+		}
-+	}
-+}
-+
-+static GLuint build_dtm(GLuint depthmode)
-+{
-+	switch(depthmode) {
-+	default:
-+	case GL_LUMINANCE: return 0;
-+	case GL_INTENSITY: return 1;
-+	case GL_ALPHA: return 2;
-+	}
-+}
-+
-+static GLuint build_func(GLuint comparefunc)
-+{
-+	return comparefunc - GL_NEVER;
-+}
-+
-+/**
-+ * Collect all external state that is relevant for compiling the given
-+ * fragment program.
-+ */
-+static void build_state(
-+	r300ContextPtr r300,
-+	struct r300_fragment_program *fp,
-+	struct r300_fragment_program_external_state *state)
-+{
-+	int unit;
-+
-+	_mesa_bzero(state, sizeof(*state));
-+
-+	for(unit = 0; unit < 16; ++unit) {
-+		if (fp->Base.Base.ShadowSamplers & (1 << unit)) {
-+			struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
-+
-+			state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
-+			state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
-+		}
-+	}
-+}
-+
-+void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp)
-+{
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp;
-+	struct r300_fragment_program_external_state state;
-+
-+	build_state(r300, r300_fp, &state);
-+	if (_mesa_memcmp(&r300_fp->state, &state, sizeof(state))) {
-+		/* TODO: cache compiled programs */
-+		r300_fp->translated = GL_FALSE;
-+		_mesa_memcpy(&r300_fp->state, &state, sizeof(state));
-+	}
-+
-+	if (!r300_fp->translated) {
-+		struct r300_fragment_program_compiler compiler;
-+
-+		compiler.r300 = r300;
-+		compiler.fp = r300_fp;
-+		compiler.code = &r300_fp->code;
-+		compiler.program = _mesa_clone_program(ctx, &fp->Base);
-+
-+		if (RADEON_DEBUG & DEBUG_PIXEL) {
-+			fflush(stdout);
-+			_mesa_printf("Fragment Program: Initial program:\n");
-+			_mesa_print_program(compiler.program);
-+			fflush(stdout);
-+		}
-+
-+		insert_WPOS_trailer(&compiler);
-+
-+		if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
-+			struct radeon_program_transformation transformations[] = {
-+				{ &r500_transform_TEX, &compiler },
-+				{ &radeonTransformALU, 0 },
-+				{ &radeonTransformDeriv, 0 },
-+				{ &radeonTransformTrigScale, 0 }
-+			};
-+			radeonLocalTransform(ctx, compiler.program, 4, transformations);
-+		} else {
-+			struct radeon_program_transformation transformations[] = {
-+				{ &r300_transform_TEX, &compiler },
-+				{ &radeonTransformALU, 0 },
-+				{ &radeonTransformTrigSimple, 0 }
-+			};
-+			radeonLocalTransform(ctx, compiler.program, 3, transformations);
-+		}
-+
-+		if (RADEON_DEBUG & DEBUG_PIXEL) {
-+			_mesa_printf("Fragment Program: After native rewrite:\n");
-+			_mesa_print_program(compiler.program);
-+			fflush(stdout);
-+		}
-+
-+		if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
-+			struct radeon_nqssadce_descr nqssadce = {
-+				.Init = &nqssadce_init,
-+				.IsNativeSwizzle = &r500FPIsNativeSwizzle,
-+				.BuildSwizzle = &r500FPBuildSwizzle,
-+				.RewriteDepthOut = GL_TRUE
-+			};
-+			radeonNqssaDce(ctx, compiler.program, &nqssadce);
-+		} else {
-+			struct radeon_nqssadce_descr nqssadce = {
-+				.Init = &nqssadce_init,
-+				.IsNativeSwizzle = &r300FPIsNativeSwizzle,
-+				.BuildSwizzle = &r300FPBuildSwizzle,
-+				.RewriteDepthOut = GL_TRUE
-+			};
-+			radeonNqssaDce(ctx, compiler.program, &nqssadce);
-+		}
-+
-+		if (RADEON_DEBUG & DEBUG_PIXEL) {
-+			_mesa_printf("Compiler: after NqSSA-DCE:\n");
-+			_mesa_print_program(compiler.program);
-+			fflush(stdout);
-+		}
-+
-+		if (!r300->vtbl.FragmentProgramEmit(&compiler))
-+			r300_fp->error = GL_TRUE;
-+
-+		/* Subtle: Rescue any parameters that have been added during transformations */
-+		_mesa_free_parameter_list(fp->Base.Parameters);
-+		fp->Base.Parameters = compiler.program->Parameters;
-+		compiler.program->Parameters = 0;
-+
-+		_mesa_reference_program(ctx, &compiler.program, NULL);
-+
-+		r300_fp->translated = GL_TRUE;
-+
-+		r300UpdateStateParameters(ctx, _NEW_PROGRAM);
-+
-+		if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
-+			r300->vtbl.FragmentProgramDump(&r300_fp->code);
-+	}
-+
-+	update_params(ctx, fp);
-+}
-diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h
-new file mode 100644
-index 0000000..85ea86f
---- /dev/null
-+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h
-@@ -0,0 +1,35 @@
-+/*
-+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
-+ *
-+ * All Rights Reserved.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining
-+ * a copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sublicense, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial
-+ * portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ */
-+
-+#ifndef __R300_FRAGPROG_COMMON_H_
-+#define __R300_FRAGPROG_COMMON_H_
-+
-+#include "main/mtypes.h"
-+
-+extern void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp);
-+
-+#endif
-diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
-index 9f0b7e3..af8bb38 100644
---- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
-+++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
-@@ -47,7 +47,7 @@
- 
- #define PROG_CODE \
- 	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
--	struct r300_fragment_program_code *code = c->code
-+	struct r300_fragment_program_code *code = &c->code->r300
- 
- #define error(fmt, args...) do {			\
- 		fprintf(stderr, "%s::%s(): " fmt "\n",	\
-@@ -66,7 +66,7 @@ static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwind
- 	}
- 
- 	if (*hwindex >= code->const_nr) {
--		if (*hwindex >= PFS_NUM_CONST_REGS) {
-+		if (*hwindex >= R300_PFS_NUM_CONST_REGS) {
- 			error("Out of hw constants!\n");
- 			return GL_FALSE;
- 		}
-@@ -138,7 +138,7 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
- {
- 	PROG_CODE;
- 
--	if (code->alu.length >= PFS_MAX_ALU_INST) {
-+	if (code->alu.length >= R300_PFS_MAX_ALU_INST) {
- 		error("Too many ALU instructions");
- 		return GL_FALSE;
- 	}
-@@ -201,7 +201,7 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
- 	if (inst->Alpha.DepthWriteMask) {
- 		code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH;
- 		code->node[code->cur_node].flags |= R300_W_OUT;
--		c->fp->WritesDepth = GL_TRUE;
-+		c->fp->writes_depth = GL_TRUE;
- 	}
- 
- 	return GL_TRUE;
-@@ -213,7 +213,7 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
-  */
- static GLboolean finish_node(struct r300_fragment_program_compiler *c)
- {
--	struct r300_fragment_program_code *code = c->code;
-+	struct r300_fragment_program_code *code = &c->code->r300;
- 	struct r300_fragment_program_node *node = &code->node[code->cur_node];
- 
- 	if (node->alu_end < 0) {
-@@ -275,7 +275,7 @@ static GLboolean emit_tex(void* data, struct prog_instruction* inst)
- {
- 	PROG_CODE;
- 
--	if (code->tex.length >= PFS_MAX_TEX_INST) {
-+	if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
- 		error("Too many TEX instructions");
- 		return GL_FALSE;
- 	}
-@@ -318,7 +318,7 @@ static const struct radeon_pair_handler pair_handler = {
- 	.EmitPaired = &emit_alu,
- 	.EmitTex = &emit_tex,
- 	.BeginTexBlock = &begin_tex,
--	.MaxHwTemps = PFS_NUM_TEMP_REGS
-+	.MaxHwTemps = R300_PFS_NUM_TEMP_REGS
- };
- 
- /**
-@@ -327,7 +327,7 @@ static const struct radeon_pair_handler pair_handler = {
-  */
- GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler)
- {
--	struct r300_fragment_program_code *code = compiler->code;
-+	struct r300_fragment_program_code *code = &compiler->code->r300;
- 
- 	_mesa_bzero(code, sizeof(struct r300_fragment_program_code));
- 	code->node[0].alu_end = -1;
-diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c
-index ee85e22..a7f5121 100644
---- a/src/mesa/drivers/dri/r300/r300_ioctl.c
-+++ b/src/mesa/drivers/dri/r300/r300_ioctl.c
-@@ -46,8 +46,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/context.h"
- #include "swrast/swrast.h"
- 
-+#include "radeon_common.h"
-+#include "radeon_lock.h"
- #include "r300_context.h"
--#include "radeon_ioctl.h"
- #include "r300_ioctl.h"
- #include "r300_cmdbuf.h"
- #include "r300_state.h"
-@@ -55,71 +56,90 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_reg.h"
- #include "r300_emit.h"
- #include "r300_fragprog.h"
-+#include "r300_context.h"
- 
- #include "vblank.h"
- 
-+#define R200_3D_DRAW_IMMD_2      0xC0003500
-+
- #define CLEARBUFFER_COLOR	0x1
- #define CLEARBUFFER_DEPTH	0x2
- #define CLEARBUFFER_STENCIL	0x4
- 
--static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
-+static void r300EmitClearState(GLcontext * ctx);
-+
-+static void r300UserClear(GLcontext *ctx, GLuint mask)
- {
-+	radeon_clear_tris(ctx, mask);
-+}
-+
-+static void r300ClearBuffer(r300ContextPtr r300, int flags,
-+			    struct radeon_renderbuffer *rrb,
-+			    struct radeon_renderbuffer *rrbd)
-+{
-+	BATCH_LOCALS(&r300->radeon);
- 	GLcontext *ctx = r300->radeon.glCtx;
- 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
--	GLuint cboffset, cbpitch;
--	drm_r300_cmd_header_t *cmd2;
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	GLuint cbpitch = 0;
- 	r300ContextPtr rmesa = r300;
- 
- 	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s: %s buffer (%i,%i %ix%i)\n",
--			__FUNCTION__, buffer ? "back" : "front",
--			dPriv->x, dPriv->y, dPriv->w, dPriv->h);
--
--	if (buffer) {
--		cboffset = r300->radeon.radeonScreen->backOffset;
--		cbpitch = r300->radeon.radeonScreen->backPitch;
--	} else {
--		cboffset = r300->radeon.radeonScreen->frontOffset;
--		cbpitch = r300->radeon.radeonScreen->frontPitch;
-+		fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n",
-+			__FUNCTION__, rrb, dPriv->x, dPriv->y,
-+			dPriv->w, dPriv->h);
-+
-+	if (rrb) {
-+		cbpitch = (rrb->pitch / rrb->cpp);
-+		if (rrb->cpp == 4)
-+			cbpitch |= R300_COLOR_FORMAT_ARGB8888;
-+		else
-+			cbpitch |= R300_COLOR_FORMAT_RGB565;
-+
-+		if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){
-+			cbpitch |= R300_COLOR_TILE_ENABLE;
-+        }
- 	}
- 
--	cboffset += r300->radeon.radeonScreen->fbLocation;
--
--	cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
--	end_3d(rmesa);
--
--	R300_STATECHANGE(r300, cb);
--	reg_start(R300_RB3D_COLOROFFSET0, 0);
--	e32(cboffset);
--
--	if (r300->radeon.radeonScreen->cpp == 4)
--		cbpitch |= R300_COLOR_FORMAT_ARGB8888;
--	else
--		cbpitch |= R300_COLOR_FORMAT_RGB565;
--
--	if (r300->radeon.sarea->tiling_enabled)
--		cbpitch |= R300_COLOR_TILE_ENABLE;
--
--	reg_start(R300_RB3D_COLORPITCH0, 0);
--	e32(cbpitch);
--
--	R300_STATECHANGE(r300, cmk);
--	reg_start(RB3D_COLOR_CHANNEL_MASK, 0);
-+	/* TODO in bufmgr */
-+	cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
-+	end_3d(&rmesa->radeon);
- 
- 	if (flags & CLEARBUFFER_COLOR) {
--		e32((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
--		    (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
--		    (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
--		    (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
-+		assert(rrb != 0);
-+		BEGIN_BATCH_NO_AUTOSTATE(6);
-+		OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
-+		OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+		OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch);
-+		END_BATCH();
-+	}
-+#if 1
-+	if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) {
-+		assert(rrbd != 0);
-+		cbpitch = (rrbd->pitch / rrbd->cpp);
-+		if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){
-+			cbpitch |= R300_DEPTHMACROTILE_ENABLE;
-+        }
-+		if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
-+            cbpitch |= R300_DEPTHMICROTILE_TILED;
-+        }
-+		BEGIN_BATCH_NO_AUTOSTATE(6);
-+		OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
-+		OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+		OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, cbpitch);
-+		END_BATCH();
-+	}
-+#endif
-+	BEGIN_BATCH_NO_AUTOSTATE(6);
-+	OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1);
-+	if (flags & CLEARBUFFER_COLOR) {
-+		OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
-+			  (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
-+			  (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
-+			  (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
- 	} else {
--		e32(0x0);
-+		OUT_BATCH(0);
- 	}
- 
--	R300_STATECHANGE(r300, zs);
--	reg_start(R300_ZB_CNTL, 2);
- 
- 	{
- 		uint32_t t1, t2;
-@@ -146,37 +166,55 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
- 			     R300_S_FRONT_ZFAIL_OP_SHIFT);
- 		}
- 
--		e32(t1);
--		e32(t2);
--		e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) |
--		    (ctx->Stencil.Clear & R300_STENCILREF_MASK));
-+		OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3);
-+		OUT_BATCH(t1);
-+		OUT_BATCH(t2);
-+		OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) <<
-+                   R300_STENCILWRITEMASK_SHIFT) |
-+			  (ctx->Stencil.Clear & R300_STENCILREF_MASK));
-+		END_BATCH();
- 	}
- 
--	cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__);
--	cmd2[0].packet3.cmd_type = R300_CMD_PACKET3;
--	cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR;
--	cmd2[1].u = r300PackFloat32(dPriv->w / 2.0);
--	cmd2[2].u = r300PackFloat32(dPriv->h / 2.0);
--	cmd2[3].u = r300PackFloat32(ctx->Depth.Clear);
--	cmd2[4].u = r300PackFloat32(1.0);
--	cmd2[5].u = r300PackFloat32(ctx->Color.ClearColor[0]);
--	cmd2[6].u = r300PackFloat32(ctx->Color.ClearColor[1]);
--	cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]);
--	cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]);
--
-+	if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+		BEGIN_BATCH_NO_AUTOSTATE(9);
-+		OUT_BATCH(cmdpacket3(r300->radeon.radeonScreen, R300_CMD_PACKET3_CLEAR));
-+		OUT_BATCH_FLOAT32(dPriv->w / 2.0);
-+		OUT_BATCH_FLOAT32(dPriv->h / 2.0);
-+		OUT_BATCH_FLOAT32(ctx->Depth.Clear);
-+		OUT_BATCH_FLOAT32(1.0);
-+		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
-+		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
-+		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
-+		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
-+		END_BATCH();
-+	} else {
-+		OUT_BATCH(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
-+		OUT_BATCH(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
-+			  (1 << R300_PRIM_NUM_VERTICES_SHIFT));
-+		OUT_BATCH_FLOAT32(dPriv->w / 2.0);
-+		OUT_BATCH_FLOAT32(dPriv->h / 2.0);
-+		OUT_BATCH_FLOAT32(ctx->Depth.Clear);
-+		OUT_BATCH_FLOAT32(1.0);
-+		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
-+		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
-+		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
-+		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
-+	}
-+	
- 	r300EmitCacheFlush(rmesa);
--	cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
-+	cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
-+
-+	R300_STATECHANGE(r300, cb);
-+	R300_STATECHANGE(r300, cmk);
-+	R300_STATECHANGE(r300, zs);
- }
- 
- static void r300EmitClearState(GLcontext * ctx)
- {
- 	r300ContextPtr r300 = R300_CONTEXT(ctx);
--	r300ContextPtr rmesa = r300;
-+	BATCH_LOCALS(&r300->radeon);
- 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
- 	int i;
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
- 	int has_tcl = 1;
- 	int is_r500 = 0;
- 	GLuint vap_cntl;
-@@ -184,35 +222,37 @@ static void r300EmitClearState(GLcontext * ctx)
- 	if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
- 		has_tcl = 0;
- 
--        if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
--                is_r500 = 1;
--
-+	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
-+		is_r500 = 1;
- 
--	/* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and
--	 * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are
--	 * quite complex; see the functions in r300_emit.c.
-+	/* State atom dirty tracking is a little subtle here.
- 	 *
--	 * I believe it would be a good idea to extend the functions in
--	 * r300_emit.c so that they can be used to setup the default values for
--	 * these registers, as well as the actual values used for rendering.
-+	 * On the one hand, we need to make sure base state is emitted
-+	 * here if we start with an empty batch buffer, otherwise clear
-+	 * works incorrectly with multiple processes. Therefore, the first
-+	 * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE.
-+	 *
-+	 * On the other hand, implicit state emission clears the state atom
-+	 * dirty bits, so we have to call R300_STATECHANGE later than the
-+	 * first BEGIN_BATCH.
-+	 *
-+	 * The final trickiness is that, because we change state, we need
-+	 * to ensure that any stored swtcl primitives are flushed properly
-+	 * before we start changing state. See the R300_NEWPRIM in r300Clear
-+	 * for this.
- 	 */
--	R300_STATECHANGE(r300, vir[0]);
--	reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0);
-+	BEGIN_BATCH(31);
-+	OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1);
- 	if (!has_tcl)
--	    e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
-+		OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
- 		 ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
- 	else
--	    e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
-+		OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
- 		 ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
- 
--	/* disable fog */
--	R300_STATECHANGE(r300, fogs);
--	reg_start(R300_FG_FOG_BLEND, 0);
--	e32(0x0);
--
--	R300_STATECHANGE(r300, vir[1]);
--	reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0);
--	e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
-+	OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0);
-+	OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0,
-+	   ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
- 	       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
- 	       (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) |
- 	       (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) |
-@@ -226,619 +266,402 @@ static void r300EmitClearState(GLcontext * ctx)
- 	      << R300_SWIZZLE1_SHIFT)));
- 
- 	/* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */
--	R300_STATECHANGE(r300, vic);
--	reg_start(R300_VAP_VTX_STATE_CNTL, 1);
--	e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
--	e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
-+	OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2);
-+	OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
-+	OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
- 
--	R300_STATECHANGE(r300, vte);
- 	/* comes from fglrx startup of clear */
--	reg_start(R300_SE_VTE_CNTL, 1);
--	e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
--	    R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
--	    R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
--	    R300_VPORT_Z_OFFSET_ENA);
--	e32(0x8);
-+	OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2);
-+	OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
-+		  R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
-+		  R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
-+		  R300_VPORT_Z_OFFSET_ENA);
-+	OUT_BATCH(0x8);
- 
--	reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0);
--	e32(0xaaaaaaaa);
-+	OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa);
- 
--	R300_STATECHANGE(r300, vof);
--	reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1);
--	e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
--	    R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
--	e32(0x0);		/* no textures */
-+	OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
-+	OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
-+		  R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
-+	OUT_BATCH(0); /* no textures */
- 
--	R300_STATECHANGE(r300, txe);
--	reg_start(R300_TX_ENABLE, 0);
--	e32(0x0);
-+	OUT_BATCH_REGVAL(R300_TX_ENABLE, 0);
- 
--	R300_STATECHANGE(r300, vpt);
--	reg_start(R300_SE_VPORT_XSCALE, 5);
--	efloat(1.0);
--	efloat(dPriv->x);
--	efloat(1.0);
--	efloat(dPriv->y);
--	efloat(1.0);
--	efloat(0.0);
-+	OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6);
-+	OUT_BATCH_FLOAT32(1.0);
-+	OUT_BATCH_FLOAT32(dPriv->x);
-+	OUT_BATCH_FLOAT32(1.0);
-+	OUT_BATCH_FLOAT32(dPriv->y);
-+	OUT_BATCH_FLOAT32(1.0);
-+	OUT_BATCH_FLOAT32(0.0);
- 
--	R300_STATECHANGE(r300, at);
--	reg_start(R300_FG_ALPHA_FUNC, 0);
--	e32(0x0);
-+	OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0);
-+
-+	OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2);
-+	OUT_BATCH(0x0);
-+	OUT_BATCH(0x0);
-+	END_BATCH();
- 
-+	R300_STATECHANGE(r300, vir[0]);
-+	R300_STATECHANGE(r300, fogs);
-+	R300_STATECHANGE(r300, vir[1]);
-+	R300_STATECHANGE(r300, vic);
-+	R300_STATECHANGE(r300, vte);
-+	R300_STATECHANGE(r300, vof);
-+	R300_STATECHANGE(r300, txe);
-+	R300_STATECHANGE(r300, vpt);
-+	R300_STATECHANGE(r300, at);
- 	R300_STATECHANGE(r300, bld);
--	reg_start(R300_RB3D_CBLEND, 1);
--	e32(0x0);
--	e32(0x0);
-+	R300_STATECHANGE(r300, ps);
- 
- 	if (has_tcl) {
--	    R300_STATECHANGE(r300, vap_clip_cntl);
--	    reg_start(R300_VAP_CLIP_CNTL, 0);
--	    e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
-+		R300_STATECHANGE(r300, vap_clip_cntl);
-+
-+		BEGIN_BATCH_NO_AUTOSTATE(2);
-+		OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
-+		END_BATCH();
-         }
- 
--	R300_STATECHANGE(r300, ps);
--	reg_start(R300_GA_POINT_SIZE, 0);
--	e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
--	    ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
-+	BEGIN_BATCH_NO_AUTOSTATE(2);
-+	OUT_BATCH_REGVAL(R300_GA_POINT_SIZE,
-+		((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
-+		((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
-+	END_BATCH();
- 
- 	if (!is_r500) {
- 		R300_STATECHANGE(r300, ri);
--		reg_start(R300_RS_IP_0, 7);
--		for (i = 0; i < 8; ++i) {
--			e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
--		}
--
- 		R300_STATECHANGE(r300, rc);
--		/* The second constant is needed to get glxgears display anything .. */
--		reg_start(R300_RS_COUNT, 1);
--		e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
--		e32(0x0);
--
- 		R300_STATECHANGE(r300, rr);
--		reg_start(R300_RS_INST_0, 0);
--		e32(R300_RS_INST_COL_CN_WRITE);
-+
-+		BEGIN_BATCH(14);
-+		OUT_BATCH_REGSEQ(R300_RS_IP_0, 8);
-+		for (i = 0; i < 8; ++i)
-+			OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
-+
-+		OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
-+		OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
-+		OUT_BATCH(0x0);
-+
-+		OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE);
-+		END_BATCH();
- 	} else {
- 		R300_STATECHANGE(r300, ri);
--		reg_start(R500_RS_IP_0, 7);
-+		R300_STATECHANGE(r300, rc);
-+		R300_STATECHANGE(r300, rr);
-+
-+		BEGIN_BATCH(14);
-+		OUT_BATCH_REGSEQ(R500_RS_IP_0, 8);
- 		for (i = 0; i < 8; ++i) {
--			e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
--			    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
--			    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
--			    (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
-+			OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
-+				  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
-+				  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
-+				  (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
- 		}
- 
--		R300_STATECHANGE(r300, rc);
--		/* The second constant is needed to get glxgears display anything .. */
--		reg_start(R300_RS_COUNT, 1);
--		e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
--		e32(0x0);
--
--		R300_STATECHANGE(r300, rr);
--		reg_start(R500_RS_INST_0, 0);
--		e32(R500_RS_INST_COL_CN_WRITE);
-+		OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
-+		OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
-+		OUT_BATCH(0x0);
- 
-+		OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE);
-+		END_BATCH();
- 	}
- 
- 	if (!is_r500) {
- 		R300_STATECHANGE(r300, fp);
--		reg_start(R300_US_CONFIG, 2);
--		e32(0x0);
--		e32(0x0);
--		e32(0x0);
--		reg_start(R300_US_CODE_ADDR_0, 3);
--		e32(0x0);
--		e32(0x0);
--		e32(0x0);
--		e32(R300_RGBA_OUT);
--
- 		R300_STATECHANGE(r300, fpi[0]);
- 		R300_STATECHANGE(r300, fpi[1]);
- 		R300_STATECHANGE(r300, fpi[2]);
- 		R300_STATECHANGE(r300, fpi[3]);
- 
--		reg_start(R300_US_ALU_RGB_INST_0, 0);
--		e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
--
--		reg_start(R300_US_ALU_RGB_ADDR_0, 0);
--		e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
--
--		reg_start(R300_US_ALU_ALPHA_INST_0, 0);
--		e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
--
--		reg_start(R300_US_ALU_ALPHA_ADDR_0, 0);
--		e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
-+		BEGIN_BATCH(17);
-+		OUT_BATCH_REGSEQ(R300_US_CONFIG, 3);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(R300_RGBA_OUT);
-+
-+		OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0,
-+			FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
-+		OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0,
-+			FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
-+		OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0,
-+			FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
-+		OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0,
-+			FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
-+		END_BATCH();
- 	} else {
-- 		R300_STATECHANGE(r300, fp);
-- 		reg_start(R500_US_CONFIG, 1);
-- 		e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
-- 		e32(0x0);
-- 		reg_start(R500_US_CODE_ADDR, 2);
-- 		e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
-- 		e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
-- 		e32(R500_US_CODE_OFFSET_ADDR(0));
-+		struct radeon_state_atom r500fp;
-+		uint32_t _cmd[10];
- 
-+		R300_STATECHANGE(r300, fp);
- 		R300_STATECHANGE(r300, r500fp);
--		r500fp_start_fragment(0, 6);
--
--		e32(R500_INST_TYPE_OUT |
--		    R500_INST_TEX_SEM_WAIT |
--		    R500_INST_LAST |
--		    R500_INST_RGB_OMASK_R |
--		    R500_INST_RGB_OMASK_G |
--		    R500_INST_RGB_OMASK_B |
--		    R500_INST_ALPHA_OMASK |
--		    R500_INST_RGB_CLAMP |
--		    R500_INST_ALPHA_CLAMP);
--
--		e32(R500_RGB_ADDR0(0) |
--		    R500_RGB_ADDR1(0) |
--		    R500_RGB_ADDR1_CONST |
--		    R500_RGB_ADDR2(0) |
--		    R500_RGB_ADDR2_CONST);
--
--		e32(R500_ALPHA_ADDR0(0) |
--		    R500_ALPHA_ADDR1(0) |
--		    R500_ALPHA_ADDR1_CONST |
--		    R500_ALPHA_ADDR2(0) |
--		    R500_ALPHA_ADDR2_CONST);
--
--		e32(R500_ALU_RGB_SEL_A_SRC0 |
--		    R500_ALU_RGB_R_SWIZ_A_R |
--		    R500_ALU_RGB_G_SWIZ_A_G |
--		    R500_ALU_RGB_B_SWIZ_A_B |
--		    R500_ALU_RGB_SEL_B_SRC0 |
--		    R500_ALU_RGB_R_SWIZ_B_R |
--		    R500_ALU_RGB_B_SWIZ_B_G |
--		    R500_ALU_RGB_G_SWIZ_B_B);
--
--		e32(R500_ALPHA_OP_CMP |
--		    R500_ALPHA_SWIZ_A_A |
--		    R500_ALPHA_SWIZ_B_A);
--
--		e32(R500_ALU_RGBA_OP_CMP |
--		    R500_ALU_RGBA_R_SWIZ_0 |
--		    R500_ALU_RGBA_G_SWIZ_0 |
--		    R500_ALU_RGBA_B_SWIZ_0 |
--		    R500_ALU_RGBA_A_SWIZ_0);
-+
-+		BEGIN_BATCH(7);
-+		OUT_BATCH_REGSEQ(R500_US_CONFIG, 2);
-+		OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3);
-+		OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
-+		OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
-+		OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0));
-+		END_BATCH();
-+
-+		r500fp.check = check_r500fp;
-+		r500fp.cmd = _cmd;
-+		r500fp.cmd[0] = cmdr500fp(r300->radeon.radeonScreen, 0, 1, 0, 0);
-+		r500fp.cmd[1] = R500_INST_TYPE_OUT |
-+			R500_INST_TEX_SEM_WAIT |
-+			R500_INST_LAST |
-+			R500_INST_RGB_OMASK_R |
-+			R500_INST_RGB_OMASK_G |
-+			R500_INST_RGB_OMASK_B |
-+			R500_INST_ALPHA_OMASK |
-+			R500_INST_RGB_CLAMP |
-+			R500_INST_ALPHA_CLAMP;
-+		r500fp.cmd[2] = R500_RGB_ADDR0(0) |
-+			R500_RGB_ADDR1(0) |
-+			R500_RGB_ADDR1_CONST |
-+			R500_RGB_ADDR2(0) |
-+			R500_RGB_ADDR2_CONST;
-+		r500fp.cmd[3] = R500_ALPHA_ADDR0(0) |
-+			R500_ALPHA_ADDR1(0) |
-+			R500_ALPHA_ADDR1_CONST |
-+			R500_ALPHA_ADDR2(0) |
-+			R500_ALPHA_ADDR2_CONST;
-+		r500fp.cmd[4] = R500_ALU_RGB_SEL_A_SRC0 |
-+			R500_ALU_RGB_R_SWIZ_A_R |
-+			R500_ALU_RGB_G_SWIZ_A_G |
-+			R500_ALU_RGB_B_SWIZ_A_B |
-+			R500_ALU_RGB_SEL_B_SRC0 |
-+			R500_ALU_RGB_R_SWIZ_B_R |
-+			R500_ALU_RGB_B_SWIZ_B_G |
-+			R500_ALU_RGB_G_SWIZ_B_B;
-+		r500fp.cmd[5] = R500_ALPHA_OP_CMP |
-+			R500_ALPHA_SWIZ_A_A |
-+			R500_ALPHA_SWIZ_B_A;
-+		r500fp.cmd[6] = R500_ALU_RGBA_OP_CMP |
-+			R500_ALU_RGBA_R_SWIZ_0 |
-+			R500_ALU_RGBA_G_SWIZ_0 |
-+			R500_ALU_RGBA_B_SWIZ_0 |
-+			R500_ALU_RGBA_A_SWIZ_0;
-+		
-+		r500fp.cmd[7] = 0;
-+		emit_r500fp(ctx, &r500fp);
- 	}
- 
--	reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0);
--	e32(0x00000000);
-+	BEGIN_BATCH(2);
-+	OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-+	END_BATCH();
-+
- 	if (has_tcl) {
--	    vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-+		vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
- 			(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
- 			(12 << R300_VF_MAX_VTX_NUM_SHIFT));
--	    if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
--		vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
--	} else
--	    vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-+		if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
-+			vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
-+	} else {
-+		vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
- 			(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
- 			(5 << R300_VF_MAX_VTX_NUM_SHIFT));
-+	}
- 
- 	if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
--	    vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
- 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
- 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) ||
- 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
--	    vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
- 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
- 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420))
--	    vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
- 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
- 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580))
--	    vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
- 	else
--	    vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
- 
--	R300_STATECHANGE(rmesa, vap_cntl);
--	reg_start(R300_VAP_CNTL, 0);
--	e32(vap_cntl);
-+	R300_STATECHANGE(r300, vap_cntl);
-+
-+	BEGIN_BATCH(2);
-+	OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl);
-+	END_BATCH();
- 
- 	if (has_tcl) {
-+        struct radeon_state_atom vpu;
-+        uint32_t _cmd[10];
- 		R300_STATECHANGE(r300, pvs);
--		reg_start(R300_VAP_PVS_CODE_CNTL_0, 2);
--
--		e32((0 << R300_PVS_FIRST_INST_SHIFT) |
--		    (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
--		    (1 << R300_PVS_LAST_INST_SHIFT));
--		e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
--		    (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
--		e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
--
- 		R300_STATECHANGE(r300, vpi);
--		vsf_start_fragment(0x0, 8);
--
--		e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT));
--		e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
--		e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
--		e32(0x0);
- 
--		e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT));
--		e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
--		e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
--		e32(0x0);
-+		BEGIN_BATCH(4);
-+		OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
-+		OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) |
-+			  (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
-+			  (1 << R300_PVS_LAST_INST_SHIFT));
-+		OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
-+			  (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
-+		OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
-+		END_BATCH();
-+
-+		vpu.check = check_vpu;
-+		vpu.cmd = _cmd;
-+		vpu.cmd[0] = cmdvpu(r300->radeon.radeonScreen, 0, 2);
-+
-+		vpu.cmd[1] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE,
-+                                         0, 0xf, PVS_DST_REG_OUT);
-+		vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
-+                                      PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
-+                                      PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
-+		vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
-+		vpu.cmd[4] = 0x0;
-+
-+		vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf,
-+                                         PVS_DST_REG_OUT);
-+		vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X,
-+                                      PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z,
-+                                      PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT,
-+
-+                                      VSF_FLAG_NONE);
-+		vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
-+		vpu.cmd[8] = 0x0;
-+
-+		r300->vap_flush_needed = GL_TRUE;
-+		emit_vpu(ctx, &vpu);
- 	}
- }
- 
--/**
-- * Buffer clear
-- */
--static void r300Clear(GLcontext * ctx, GLbitfield mask)
--{
-+static void r300KernelClear(GLcontext *ctx, GLuint flags)
-+{	  	
- 	r300ContextPtr r300 = R300_CONTEXT(ctx);
- 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
--	int flags = 0;
-+	struct radeon_framebuffer *rfb = dPriv->driverPrivate;
-+	struct radeon_renderbuffer *rrb;
-+	struct radeon_renderbuffer *rrbd;
- 	int bits = 0;
--	int swapped;
--
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "r300Clear\n");
--
--	{
--		LOCK_HARDWARE(&r300->radeon);
--		UNLOCK_HARDWARE(&r300->radeon);
--		if (dPriv->numClipRects == 0)
--			return;
--	}
- 
--	if (mask & BUFFER_BIT_FRONT_LEFT) {
--		flags |= BUFFER_BIT_FRONT_LEFT;
--		mask &= ~BUFFER_BIT_FRONT_LEFT;
--	}
--
--	if (mask & BUFFER_BIT_BACK_LEFT) {
--		flags |= BUFFER_BIT_BACK_LEFT;
--		mask &= ~BUFFER_BIT_BACK_LEFT;
--	}
--
--	if (mask & BUFFER_BIT_DEPTH) {
-+	/* Make sure it fits there. */
-+	rcommonEnsureCmdBufSpace(&r300->radeon, 421 * 3, __FUNCTION__);
-+	if (flags || bits)
-+		r300EmitClearState(ctx);
-+	rrbd = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH);
-+	if (rrbd && (flags & BUFFER_BIT_DEPTH))
- 		bits |= CLEARBUFFER_DEPTH;
--		mask &= ~BUFFER_BIT_DEPTH;
--	}
- 
--	if ((mask & BUFFER_BIT_STENCIL) && r300->state.stencil.hw_stencil) {
-+	if (rrbd && (flags & BUFFER_BIT_STENCIL))
- 		bits |= CLEARBUFFER_STENCIL;
--		mask &= ~BUFFER_BIT_STENCIL;
--	}
- 
--	if (mask) {
--		if (RADEON_DEBUG & DEBUG_FALLBACKS)
--			fprintf(stderr, "%s: swrast clear, mask: %x\n",
--				__FUNCTION__, mask);
--		_swrast_Clear(ctx, mask);
-+	if (flags & BUFFER_BIT_COLOR0) {
-+		rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0);
-+		r300ClearBuffer(r300, CLEARBUFFER_COLOR, rrb, NULL);
-+		bits = 0;
- 	}
--
--	swapped = r300->radeon.sarea->pfCurrentPage == 1;
--
--	/* Make sure it fits there. */
--	r300EnsureCmdBufSpace(r300, 421 * 3, __FUNCTION__);
--	if (flags || bits)
--		r300EmitClearState(ctx);
--
-+		
- 	if (flags & BUFFER_BIT_FRONT_LEFT) {
--		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped);
-+		rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT);
-+		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd);
- 		bits = 0;
- 	}
- 
- 	if (flags & BUFFER_BIT_BACK_LEFT) {
--		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped ^ 1);
-+		rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_BACK_LEFT);
-+		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd);
- 		bits = 0;
- 	}
- 
- 	if (bits)
--		r300ClearBuffer(r300, bits, 0);
-+		r300ClearBuffer(r300, bits, NULL, rrbd);
- 
-+	COMMIT_BATCH();
- }
- 
--void r300Flush(GLcontext * ctx)
-+/**
-+ * Buffer clear
-+ */
-+static void r300Clear(GLcontext * ctx, GLbitfield mask)
- {
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
-+	const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask);
-+	GLbitfield swrast_mask = 0, tri_mask = 0;
-+	int i;
-+	struct gl_framebuffer *fb = ctx->DrawBuffer;
- 
- 	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s\n", __FUNCTION__);
--
--	if (rmesa->dma.flush)
--		rmesa->dma.flush( rmesa );
--
--	if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit)
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--}
--
--#ifdef USER_BUFFERS
--#include "r300_mem.h"
--
--void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
--{
--	struct r300_dma_buffer *dmabuf;
--	size = MAX2(size, RADEON_BUFFER_SIZE * 16);
--
--	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
--		fprintf(stderr, "%s\n", __FUNCTION__);
--
--	if (rmesa->dma.flush) {
--		rmesa->dma.flush(rmesa);
--	}
-+		fprintf(stderr, "r300Clear\n");
- 
--	if (rmesa->dma.current.buf) {
--#ifdef USER_BUFFERS
--		r300_mem_use(rmesa, rmesa->dma.current.buf->id);
--#endif
--		r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
-+	if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
-+		LOCK_HARDWARE(&r300->radeon);
-+		UNLOCK_HARDWARE(&r300->radeon);
-+		if (dPriv->numClipRects == 0)
-+			return;
- 	}
--	if (rmesa->dma.nr_released_bufs > 4)
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
- 
--	dmabuf = CALLOC_STRUCT(r300_dma_buffer);
--	dmabuf->buf = (void *)1;	/* hack */
--	dmabuf->refcount = 1;
--
--	dmabuf->id = r300_mem_alloc(rmesa, 4, size);
--	if (dmabuf->id == 0) {
--		LOCK_HARDWARE(&rmesa->radeon);	/* no need to validate */
--
--		r300FlushCmdBufLocked(rmesa, __FUNCTION__);
--		radeonWaitForIdleLocked(&rmesa->radeon);
-+	/* Flush swtcl vertices if necessary, because we will change hardware
-+	 * state during clear. See also the state-related comment in
-+	 * r300EmitClearState.
-+	 */
-+	R300_NEWPRIM(r300);
- 
--		dmabuf->id = r300_mem_alloc(rmesa, 4, size);
-+	if (colorMask == ~0)
-+	  tri_mask |= (mask & BUFFER_BITS_COLOR);
- 
--		UNLOCK_HARDWARE(&rmesa->radeon);
- 
--		if (dmabuf->id == 0) {
--			fprintf(stderr,
--				"Error: Could not get dma buffer... exiting\n");
--			_mesa_exit(-1);
--		}
-+	/* HW stencil */
-+	if (mask & BUFFER_BIT_STENCIL) {
-+		tri_mask |= BUFFER_BIT_STENCIL;
- 	}
- 
--	rmesa->dma.current.buf = dmabuf;
--	rmesa->dma.current.address = r300_mem_ptr(rmesa, dmabuf->id);
--	rmesa->dma.current.end = size;
--	rmesa->dma.current.start = 0;
--	rmesa->dma.current.ptr = 0;
--}
--
--void r300ReleaseDmaRegion(r300ContextPtr rmesa,
--			  struct r300_dma_region *region, const char *caller)
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
--
--	if (!region->buf)
--		return;
--
--	if (rmesa->dma.flush)
--		rmesa->dma.flush(rmesa);
--
--	if (--region->buf->refcount == 0) {
--		r300_mem_free(rmesa, region->buf->id);
--		FREE(region->buf);
--		rmesa->dma.nr_released_bufs++;
-+	/* HW depth */
-+	if (mask & BUFFER_BIT_DEPTH) {
-+    	        tri_mask |= BUFFER_BIT_DEPTH;
- 	}
- 
--	region->buf = 0;
--	region->start = 0;
--}
--
--/* Allocates a region from rmesa->dma.current.  If there isn't enough
-- * space in current, grab a new buffer (and discard what was left of current)
-- */
--void r300AllocDmaRegion(r300ContextPtr rmesa,
--			struct r300_dma_region *region,
--			int bytes, int alignment)
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
--
--	if (rmesa->dma.flush)
--		rmesa->dma.flush(rmesa);
--
--	if (region->buf)
--		r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
--
--	alignment--;
--	rmesa->dma.current.start = rmesa->dma.current.ptr =
--	    (rmesa->dma.current.ptr + alignment) & ~alignment;
--
--	if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
--		r300RefillCurrentDmaRegion(rmesa, (bytes + 0x7) & ~0x7);
--
--	region->start = rmesa->dma.current.start;
--	region->ptr = rmesa->dma.current.start;
--	region->end = rmesa->dma.current.start + bytes;
--	region->address = rmesa->dma.current.address;
--	region->buf = rmesa->dma.current.buf;
--	region->buf->refcount++;
--
--	rmesa->dma.current.ptr += bytes;	/* bug - if alignment > 7 */
--	rmesa->dma.current.start =
--	    rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
--
--	assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
--}
-+	/* If we're doing a tri pass for depth/stencil, include a likely color
-+	 * buffer with it.
-+	 */
- 
--#else
--static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa)
--{
--	struct r300_dma_buffer *dmabuf;
--	int fd = rmesa->radeon.dri.fd;
--	int index = 0;
--	int size = 0;
--	drmDMAReq dma;
--	int ret;
--
--	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
--		fprintf(stderr, "%s\n", __FUNCTION__);
--
--	if (rmesa->dma.flush) {
--		rmesa->dma.flush(rmesa);
-+	for (i = 0; i < BUFFER_COUNT; i++) {
-+	  GLuint bufBit = 1 << i;
-+	  if ((tri_mask) & bufBit) {
-+	    if (!fb->Attachment[i].Renderbuffer->ClassID) {
-+	      tri_mask &= ~bufBit;
-+	      swrast_mask |= bufBit;
-+	    }
-+	  }
- 	}
- 
--	if (rmesa->dma.current.buf)
--		r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
--
--	if (rmesa->dma.nr_released_bufs > 4)
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--
--	dma.context = rmesa->radeon.dri.hwContext;
--	dma.send_count = 0;
--	dma.send_list = NULL;
--	dma.send_sizes = NULL;
--	dma.flags = 0;
--	dma.request_count = 1;
--	dma.request_size = RADEON_BUFFER_SIZE;
--	dma.request_list = &index;
--	dma.request_sizes = &size;
--	dma.granted_count = 0;
--
--	LOCK_HARDWARE(&rmesa->radeon);	/* no need to validate */
--
--	ret = drmDMA(fd, &dma);
--
--	if (ret != 0) {
--		/* Try to release some buffers and wait until we can't get any more */
--		if (rmesa->dma.nr_released_bufs) {
--			r300FlushCmdBufLocked(rmesa, __FUNCTION__);
--		}
-+	/* SW fallback clearing */
-+	swrast_mask = mask & ~tri_mask;
- 
--		if (RADEON_DEBUG & DEBUG_DMA)
--			fprintf(stderr, "Waiting for buffers\n");
--
--		radeonWaitForIdleLocked(&rmesa->radeon);
--		ret = drmDMA(fd, &dma);
--
--		if (ret != 0) {
--			UNLOCK_HARDWARE(&rmesa->radeon);
--			fprintf(stderr,
--				"Error: Could not get dma buffer... exiting\n");
--			_mesa_exit(-1);
--		}
-+	if (tri_mask) {
-+		if (r300->radeon.radeonScreen->kernel_mm)
-+			r300UserClear(ctx, tri_mask);
-+		else
-+			r300KernelClear(ctx, tri_mask);
- 	}
--
--	UNLOCK_HARDWARE(&rmesa->radeon);
--
--	if (RADEON_DEBUG & DEBUG_DMA)
--		fprintf(stderr, "Allocated buffer %d\n", index);
--
--	dmabuf = CALLOC_STRUCT(r300_dma_buffer);
--	dmabuf->buf = &rmesa->radeon.radeonScreen->buffers->list[index];
--	dmabuf->refcount = 1;
--
--	rmesa->dma.current.buf = dmabuf;
--	rmesa->dma.current.address = dmabuf->buf->address;
--	rmesa->dma.current.end = dmabuf->buf->total;
--	rmesa->dma.current.start = 0;
--	rmesa->dma.current.ptr = 0;
--}
--
--void r300ReleaseDmaRegion(r300ContextPtr rmesa,
--			  struct r300_dma_region *region, const char *caller)
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
--
--	if (!region->buf)
--		return;
--
--	if (rmesa->dma.flush)
--		rmesa->dma.flush(rmesa);
--
--	if (--region->buf->refcount == 0) {
--		drm_radeon_cmd_header_t *cmd;
--
--		if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
--			fprintf(stderr, "%s -- DISCARD BUF %d\n",
--				__FUNCTION__, region->buf->buf->idx);
--		cmd =
--		    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa,
--								sizeof
--								(*cmd) / 4,
--								__FUNCTION__);
--		cmd->dma.cmd_type = R300_CMD_DMA_DISCARD;
--		cmd->dma.buf_idx = region->buf->buf->idx;
--
--		FREE(region->buf);
--		rmesa->dma.nr_released_bufs++;
-+	if (swrast_mask) {
-+		if (RADEON_DEBUG & DEBUG_FALLBACKS)
-+			fprintf(stderr, "%s: swrast clear, mask: %x\n",
-+				__FUNCTION__, swrast_mask);
-+		_swrast_Clear(ctx, swrast_mask);
- 	}
--
--	region->buf = 0;
--	region->start = 0;
--}
--
--/* Allocates a region from rmesa->dma.current.  If there isn't enough
-- * space in current, grab a new buffer (and discard what was left of current)
-- */
--void r300AllocDmaRegion(r300ContextPtr rmesa,
--			struct r300_dma_region *region,
--			int bytes, int alignment)
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
--
--	if (rmesa->dma.flush)
--		rmesa->dma.flush(rmesa);
--
--	if (region->buf)
--		r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
--
--	alignment--;
--	rmesa->dma.current.start = rmesa->dma.current.ptr =
--	    (rmesa->dma.current.ptr + alignment) & ~alignment;
--
--	if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
--		r300RefillCurrentDmaRegion(rmesa);
--
--	region->start = rmesa->dma.current.start;
--	region->ptr = rmesa->dma.current.start;
--	region->end = rmesa->dma.current.start + bytes;
--	region->address = rmesa->dma.current.address;
--	region->buf = rmesa->dma.current.buf;
--	region->buf->refcount++;
--
--	rmesa->dma.current.ptr += bytes;	/* bug - if alignment > 7 */
--	rmesa->dma.current.start =
--	    rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
--
--	assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
- }
- 
--#endif
--
--GLboolean r300IsGartMemory(r300ContextPtr rmesa, const GLvoid * pointer,
--			   GLint size)
--{
--	int offset =
--	    (char *)pointer -
--	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
--	int valid = (size >= 0 && offset >= 0
--		     && offset + size <
--		     rmesa->radeon.radeonScreen->gartTextures.size);
--
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "r300IsGartMemory( %p ) : %d\n", pointer,
--			valid);
--
--	return valid;
--}
--
--GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, const GLvoid * pointer)
--{
--	int offset =
--	    (char *)pointer -
--	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
--
--	//fprintf(stderr, "offset=%08x\n", offset);
--
--	if (offset < 0
--	    || offset > rmesa->radeon.radeonScreen->gartTextures.size)
--		return ~0;
--	else
--		return rmesa->radeon.radeonScreen->gart_texture_offset + offset;
--}
- 
- void r300InitIoctlFuncs(struct dd_function_table *functions)
- {
- 	functions->Clear = r300Clear;
- 	functions->Finish = radeonFinish;
--	functions->Flush = r300Flush;
-+	functions->Flush = radeonFlush;
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.h b/src/mesa/drivers/dri/r300/r300_ioctl.h
-index e1143fb..3abfa71 100644
---- a/src/mesa/drivers/dri/r300/r300_ioctl.h
-+++ b/src/mesa/drivers/dri/r300/r300_ioctl.h
-@@ -39,22 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_context.h"
- #include "radeon_drm.h"
- 
--extern GLboolean r300IsGartMemory(r300ContextPtr rmesa,
--				  const GLvoid * pointer, GLint size);
--
--extern GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa,
--					const GLvoid * pointer);
--
--extern void r300Flush(GLcontext * ctx);
--
--extern void r300ReleaseDmaRegion(r300ContextPtr rmesa,
--				 struct r300_dma_region *region,
--				 const char *caller);
--extern void r300AllocDmaRegion(r300ContextPtr rmesa,
--			       struct r300_dma_region *region, int bytes,
--			       int alignment);
--
- extern void r300InitIoctlFuncs(struct dd_function_table *functions);
- 
--extern void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size);
- #endif				/* __R300_IOCTL_H__ */
-diff --git a/src/mesa/drivers/dri/r300/r300_mem.c b/src/mesa/drivers/dri/r300/r300_mem.c
-deleted file mode 100644
-index f8f9d4f..0000000
---- a/src/mesa/drivers/dri/r300/r300_mem.c
-+++ /dev/null
-@@ -1,385 +0,0 @@
--/*
-- * Copyright (C) 2005 Aapo Tahkola.
-- *
-- * All Rights Reserved.
-- *
-- * Permission is hereby granted, free of charge, to any person obtaining
-- * a copy of this software and associated documentation files (the
-- * "Software"), to deal in the Software without restriction, including
-- * without limitation the rights to use, copy, modify, merge, publish,
-- * distribute, sublicense, and/or sell copies of the Software, and to
-- * permit persons to whom the Software is furnished to do so, subject to
-- * the following conditions:
-- *
-- * The above copyright notice and this permission notice (including the
-- * next paragraph) shall be included in all copies or substantial
-- * portions of the Software.
-- *
-- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-- *
-- */
--
--/**
-- * \file
-- *
-- * \author Aapo Tahkola <aet@rasterburn.org>
-- */
--
--#include <unistd.h>
--
--#include "r300_context.h"
--#include "r300_cmdbuf.h"
--#include "r300_ioctl.h"
--#include "r300_mem.h"
--#include "radeon_ioctl.h"
--
--#ifdef USER_BUFFERS
--
--static void resize_u_list(r300ContextPtr rmesa)
--{
--	void *temp;
--	int nsize;
--
--	temp = rmesa->rmm->u_list;
--	nsize = rmesa->rmm->u_size * 2;
--
--	rmesa->rmm->u_list = _mesa_malloc(nsize * sizeof(*rmesa->rmm->u_list));
--	_mesa_memset(rmesa->rmm->u_list, 0,
--		     nsize * sizeof(*rmesa->rmm->u_list));
--
--	if (temp) {
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--
--		_mesa_memcpy(rmesa->rmm->u_list, temp,
--			     rmesa->rmm->u_size * sizeof(*rmesa->rmm->u_list));
--		_mesa_free(temp);
--	}
--
--	rmesa->rmm->u_size = nsize;
--}
--
--void r300_mem_init(r300ContextPtr rmesa)
--{
--	rmesa->rmm = malloc(sizeof(struct r300_memory_manager));
--	memset(rmesa->rmm, 0, sizeof(struct r300_memory_manager));
--
--	rmesa->rmm->u_size = 128;
--	resize_u_list(rmesa);
--}
--
--void r300_mem_destroy(r300ContextPtr rmesa)
--{
--	_mesa_free(rmesa->rmm->u_list);
--	rmesa->rmm->u_list = NULL;
--
--	_mesa_free(rmesa->rmm);
--	rmesa->rmm = NULL;
--}
--
--void *r300_mem_ptr(r300ContextPtr rmesa, int id)
--{
--	assert(id <= rmesa->rmm->u_last);
--	return rmesa->rmm->u_list[id].ptr;
--}
--
--int r300_mem_find(r300ContextPtr rmesa, void *ptr)
--{
--	int i;
--
--	for (i = 1; i < rmesa->rmm->u_size + 1; i++)
--		if (rmesa->rmm->u_list[i].ptr &&
--		    ptr >= rmesa->rmm->u_list[i].ptr &&
--		    ptr <
--		    rmesa->rmm->u_list[i].ptr + rmesa->rmm->u_list[i].size)
--			break;
--
--	if (i < rmesa->rmm->u_size + 1)
--		return i;
--
--	fprintf(stderr, "%p failed\n", ptr);
--	return 0;
--}
--
--//#define MM_DEBUG
--int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size)
--{
--	drm_radeon_mem_alloc_t alloc;
--	int offset = 0, ret;
--	int i, free = -1;
--	int done_age;
--	drm_radeon_mem_free_t memfree;
--	int tries = 0;
--	static int bytes_wasted = 0, allocated = 0;
--
--	if (size < 4096)
--		bytes_wasted += 4096 - size;
--
--	allocated += size;
--
--#if 0
--	static int t = 0;
--	if (t != time(NULL)) {
--		t = time(NULL);
--		fprintf(stderr, "slots used %d, wasted %d kb, allocated %d\n",
--			rmesa->rmm->u_last, bytes_wasted / 1024,
--			allocated / 1024);
--	}
--#endif
--
--	memfree.region = RADEON_MEM_REGION_GART;
--
--      again:
--
--	done_age = radeonGetAge((radeonContextPtr) rmesa);
--
--	if (rmesa->rmm->u_last + 1 >= rmesa->rmm->u_size)
--		resize_u_list(rmesa);
--
--	for (i = rmesa->rmm->u_last + 1; i > 0; i--) {
--		if (rmesa->rmm->u_list[i].ptr == NULL) {
--			free = i;
--			continue;
--		}
--
--		if (rmesa->rmm->u_list[i].h_pending == 0 &&
--		    rmesa->rmm->u_list[i].pending
--		    && rmesa->rmm->u_list[i].age <= done_age) {
--			memfree.region_offset =
--			    (char *)rmesa->rmm->u_list[i].ptr -
--			    (char *)rmesa->radeon.radeonScreen->gartTextures.
--			    map;
--
--			ret =
--			    drmCommandWrite(rmesa->radeon.radeonScreen->
--					    driScreen->fd, DRM_RADEON_FREE,
--					    &memfree, sizeof(memfree));
--
--			if (ret) {
--				fprintf(stderr, "Failed to free at %p\n",
--					rmesa->rmm->u_list[i].ptr);
--				fprintf(stderr, "ret = %s\n", strerror(-ret));
--				exit(1);
--			} else {
--#ifdef MM_DEBUG
--				fprintf(stderr, "really freed %d at age %x\n",
--					i,
--					radeonGetAge((radeonContextPtr) rmesa));
--#endif
--				if (i == rmesa->rmm->u_last)
--					rmesa->rmm->u_last--;
--
--				if (rmesa->rmm->u_list[i].size < 4096)
--					bytes_wasted -=
--					    4096 - rmesa->rmm->u_list[i].size;
--
--				allocated -= rmesa->rmm->u_list[i].size;
--				rmesa->rmm->u_list[i].pending = 0;
--				rmesa->rmm->u_list[i].ptr = NULL;
--				free = i;
--			}
--		}
--	}
--	rmesa->rmm->u_head = i;
--
--	if (free == -1) {
--		WARN_ONCE("Ran out of slots!\n");
--		//usleep(100);
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--		tries++;
--		if (tries > 100) {
--			WARN_ONCE("Ran out of slots!\n");
--			exit(1);
--		}
--		goto again;
--	}
--
--	alloc.region = RADEON_MEM_REGION_GART;
--	alloc.alignment = alignment;
--	alloc.size = size;
--	alloc.region_offset = &offset;
--
--	ret =
--	    drmCommandWriteRead(rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc,
--				sizeof(alloc));
--	if (ret) {
--#if 0
--		WARN_ONCE("Ran out of mem!\n");
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--		//usleep(100);
--		tries2++;
--		tries = 0;
--		if (tries2 > 100) {
--			WARN_ONCE("Ran out of GART memory!\n");
--			exit(1);
--		}
--		goto again;
--#else
--		WARN_ONCE
--		    ("Ran out of GART memory (for %d)!\nPlease consider adjusting GARTSize option.\n",
--		     size);
--		return 0;
--#endif
--	}
--
--	i = free;
--
--	if (i > rmesa->rmm->u_last)
--		rmesa->rmm->u_last = i;
--
--	rmesa->rmm->u_list[i].ptr =
--	    ((GLubyte *) rmesa->radeon.radeonScreen->gartTextures.map) + offset;
--	rmesa->rmm->u_list[i].size = size;
--	rmesa->rmm->u_list[i].age = 0;
--	//fprintf(stderr, "alloc %p at id %d\n", rmesa->rmm->u_list[i].ptr, i);
--
--#ifdef MM_DEBUG
--	fprintf(stderr, "allocated %d at age %x\n", i,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--
--	return i;
--}
--
--void r300_mem_use(r300ContextPtr rmesa, int id)
--{
--	uint64_t ull;
--#ifdef MM_DEBUG
--	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--	drm_r300_cmd_header_t *cmd;
--
--	assert(id <= rmesa->rmm->u_last);
--
--	if (id == 0)
--		return;
--
--	cmd =
--	    (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa,
--						      2 + sizeof(ull) / 4,
--						      __FUNCTION__);
--	cmd[0].scratch.cmd_type = R300_CMD_SCRATCH;
--	cmd[0].scratch.reg = R300_MEM_SCRATCH;
--	cmd[0].scratch.n_bufs = 1;
--	cmd[0].scratch.flags = 0;
--	cmd++;
--
--	ull = (uint64_t) (intptr_t) & rmesa->rmm->u_list[id].age;
--	_mesa_memcpy(cmd, &ull, sizeof(ull));
--	cmd += sizeof(ull) / 4;
--
--	cmd[0].u = /*id */ 0;
--
--	LOCK_HARDWARE(&rmesa->radeon);	/* Protect from DRM. */
--	rmesa->rmm->u_list[id].h_pending++;
--	UNLOCK_HARDWARE(&rmesa->radeon);
--}
--
--unsigned long r300_mem_offset(r300ContextPtr rmesa, int id)
--{
--	unsigned long offset;
--
--	assert(id <= rmesa->rmm->u_last);
--
--	offset = (char *)rmesa->rmm->u_list[id].ptr -
--	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
--	offset += rmesa->radeon.radeonScreen->gart_texture_offset;
--
--	return offset;
--}
--
--void *r300_mem_map(r300ContextPtr rmesa, int id, int access)
--{
--#ifdef MM_DEBUG
--	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--	void *ptr;
--	int tries = 0;
--
--	assert(id <= rmesa->rmm->u_last);
--
--	if (access == R300_MEM_R) {
--
--		if (rmesa->rmm->u_list[id].mapped == 1)
--			WARN_ONCE("buffer %d already mapped\n", id);
--
--		rmesa->rmm->u_list[id].mapped = 1;
--		ptr = r300_mem_ptr(rmesa, id);
--
--		return ptr;
--	}
--
--	if (rmesa->rmm->u_list[id].h_pending)
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--
--	if (rmesa->rmm->u_list[id].h_pending) {
--		return NULL;
--	}
--
--	while (rmesa->rmm->u_list[id].age >
--	       radeonGetAge((radeonContextPtr) rmesa) && tries++ < 1000)
--		usleep(10);
--
--	if (tries >= 1000) {
--		fprintf(stderr, "Idling failed (%x vs %x)\n",
--			rmesa->rmm->u_list[id].age,
--			radeonGetAge((radeonContextPtr) rmesa));
--		return NULL;
--	}
--
--	if (rmesa->rmm->u_list[id].mapped == 1)
--		WARN_ONCE("buffer %d already mapped\n", id);
--
--	rmesa->rmm->u_list[id].mapped = 1;
--	ptr = r300_mem_ptr(rmesa, id);
--
--	return ptr;
--}
--
--void r300_mem_unmap(r300ContextPtr rmesa, int id)
--{
--#ifdef MM_DEBUG
--	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--
--	assert(id <= rmesa->rmm->u_last);
--
--	if (rmesa->rmm->u_list[id].mapped == 0)
--		WARN_ONCE("buffer %d not mapped\n", id);
--
--	rmesa->rmm->u_list[id].mapped = 0;
--}
--
--void r300_mem_free(r300ContextPtr rmesa, int id)
--{
--#ifdef MM_DEBUG
--	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--
--	assert(id <= rmesa->rmm->u_last);
--
--	if (id == 0)
--		return;
--
--	if (rmesa->rmm->u_list[id].ptr == NULL) {
--		WARN_ONCE("Not allocated!\n");
--		return;
--	}
--
--	if (rmesa->rmm->u_list[id].pending) {
--		WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr);
--		return;
--	}
--
--	rmesa->rmm->u_list[id].pending = 1;
--}
--#endif
-diff --git a/src/mesa/drivers/dri/r300/r300_mem.h b/src/mesa/drivers/dri/r300/r300_mem.h
-deleted file mode 100644
-index 625a7f6..0000000
---- a/src/mesa/drivers/dri/r300/r300_mem.h
-+++ /dev/null
-@@ -1,37 +0,0 @@
--#ifndef __R300_MEM_H__
--#define __R300_MEM_H__
--
--//#define R300_MEM_PDL 0
--#define R300_MEM_UL 1
--
--#define R300_MEM_R 1
--#define R300_MEM_W 2
--#define R300_MEM_RW (R300_MEM_R | R300_MEM_W)
--
--#define R300_MEM_SCRATCH 2
--
--struct r300_memory_manager {
--	struct {
--		void *ptr;
--		uint32_t size;
--		uint32_t age;
--		uint32_t h_pending;
--		int pending;
--		int mapped;
--	} *u_list;
--	int u_head, u_size, u_last;
--
--};
--
--extern void r300_mem_init(r300ContextPtr rmesa);
--extern void r300_mem_destroy(r300ContextPtr rmesa);
--extern void *r300_mem_ptr(r300ContextPtr rmesa, int id);
--extern int r300_mem_find(r300ContextPtr rmesa, void *ptr);
--extern int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size);
--extern void r300_mem_use(r300ContextPtr rmesa, int id);
--extern unsigned long r300_mem_offset(r300ContextPtr rmesa, int id);
--extern void *r300_mem_map(r300ContextPtr rmesa, int id, int access);
--extern void r300_mem_unmap(r300ContextPtr rmesa, int id);
--extern void r300_mem_free(r300ContextPtr rmesa, int id);
--
--#endif
-diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
-index 8f1a663..79dd1e1 100644
---- a/src/mesa/drivers/dri/r300/r300_reg.h
-+++ b/src/mesa/drivers/dri/r300/r300_reg.h
-@@ -1531,6 +1531,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
- #	define R500_SEL_FILTER4_TC3		 (3 << 18)
- 
- #define R300_TX_OFFSET_0                    0x4540
-+#define R300_TX_OFFSET_1                    0x4544
-+#define R300_TX_OFFSET_2                    0x4548
-+#define R300_TX_OFFSET_3                    0x454C
-+#define R300_TX_OFFSET_4                    0x4550
-+#define R300_TX_OFFSET_5                    0x4554
-+#define R300_TX_OFFSET_6                    0x4558
-+#define R300_TX_OFFSET_7                    0x455C
- 	/* BEGIN: Guess from R200 */
- #       define R300_TXO_ENDIAN_NO_SWAP           (0 << 0)
- #       define R300_TXO_ENDIAN_BYTE_SWAP         (1 << 0)
-@@ -2425,6 +2432,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
- /* Z Buffer Clear Value */
- #define R300_ZB_DEPTHCLEARVALUE                  0x4f28
- 
-+#define R300_ZB_ZMASK_OFFSET                     0x4f30
-+#define R300_ZB_ZMASK_PITCH                      0x4f34
-+#define R300_ZB_ZMASK_WRINDEX                    0x4f38
-+#define R300_ZB_ZMASK_DWORD                      0x4f3c
-+#define R300_ZB_ZMASK_RDINDEX                    0x4f40
-+
- /* Hierarchical Z Memory Offset */
- #define R300_ZB_HIZ_OFFSET                       0x4f44
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
-index 16ce4a1..f87fee4 100644
---- a/src/mesa/drivers/dri/r300/r300_render.c
-+++ b/src/mesa/drivers/dri/r300/r300_render.c
-@@ -66,15 +66,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "tnl/t_vp_build.h"
- #include "radeon_reg.h"
- #include "radeon_macros.h"
--#include "radeon_ioctl.h"
--#include "radeon_state.h"
- #include "r300_context.h"
- #include "r300_ioctl.h"
- #include "r300_state.h"
- #include "r300_reg.h"
- #include "r300_tex.h"
- #include "r300_emit.h"
--#include "r300_fragprog.h"
-+#include "r300_fragprog_common.h"
-+
- extern int future_hw_tcl_on;
- 
- /**
-@@ -175,85 +174,164 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
- static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct r300_dma_region *rvb = &rmesa->state.elt_dma;
- 	void *out;
- 
--	if (r300IsGartMemory(rmesa, elts, n_elts * 4)) {
--		rvb->address = rmesa->radeon.radeonScreen->gartTextures.map;
--		rvb->start = ((char *)elts) - rvb->address;
--		rvb->aos_offset =
--		    rmesa->radeon.radeonScreen->gart_texture_offset +
--		    rvb->start;
--		return;
--	} else if (r300IsGartMemory(rmesa, elts, 1)) {
--		WARN_ONCE("Pointer not within GART memory!\n");
--		_mesa_exit(-1);
--	}
--
--	r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4);
--	rvb->aos_offset = GET_START(rvb);
--
--	out = rvb->address + rvb->start;
-+	radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo,
-+			     &rmesa->radeon.tcl.elt_dma_offset, n_elts * 4, 4);
-+	radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1);
-+	out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset;
- 	memcpy(out, elts, n_elts * 4);
-+	radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo);
- }
- 
--static void r300FireEB(r300ContextPtr rmesa, unsigned long addr,
--		       int vertex_count, int type)
-+static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
- {
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
--
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0);
--	e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
--
--	start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2);
--	e32(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
--	    (R300_VAP_PORT_IDX0 >> 2));
--	e32(addr);
--	e32(vertex_count);
-+	BATCH_LOCALS(&rmesa->radeon);
-+
-+	if (vertex_count > 0) {
-+		BEGIN_BATCH(10);
-+		OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
-+		OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
-+			  ((vertex_count + 0) << 16) |
-+			  type |
-+			  R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
-+
-+		if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+			OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
-+			OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
-+	    			 (R300_VAP_PORT_IDX0 >> 2));
-+			OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset,
-+					rmesa->radeon.tcl.elt_dma_bo,
-+					rmesa->radeon.tcl.elt_dma_offset,
-+					RADEON_GEM_DOMAIN_GTT, 0, 0);
-+			OUT_BATCH(vertex_count);
-+		} else {
-+			OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
-+			OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
-+	    			 (R300_VAP_PORT_IDX0 >> 2));
-+			OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset);
-+			OUT_BATCH(vertex_count);
-+			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+					      rmesa->radeon.tcl.elt_dma_bo,
-+					      RADEON_GEM_DOMAIN_GTT, 0, 0);
-+		}
-+		END_BATCH();
-+	}
- }
- 
- static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
- {
-+	BATCH_LOCALS(&rmesa->radeon);
-+	uint32_t voffset;
- 	int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
- 	int i;
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
- 
- 	if (RADEON_DEBUG & DEBUG_VERTS)
- 		fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
- 			offset);
- 
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1);
--	e32(nr);
- 
--	for (i = 0; i + 1 < nr; i += 2) {
--		e32((rmesa->state.aos[i].aos_size << 0) |
--		    (rmesa->state.aos[i].aos_stride << 8) |
--		    (rmesa->state.aos[i + 1].aos_size << 16) |
--		    (rmesa->state.aos[i + 1].aos_stride << 24));
-+	if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+		BEGIN_BATCH(sz+2+(nr * 2));
-+		OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
-+		OUT_BATCH(nr);
-+
-+		for (i = 0; i + 1 < nr; i += 2) {
-+			OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
-+				  (rmesa->radeon.tcl.aos[i].stride << 8) |
-+				  (rmesa->radeon.tcl.aos[i + 1].components << 16) |
-+				  (rmesa->radeon.tcl.aos[i + 1].stride << 24));
-+
-+			voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
-+				offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
-+			OUT_BATCH_RELOC(voffset,
-+					rmesa->radeon.tcl.aos[i].bo,
-+					voffset,
-+					RADEON_GEM_DOMAIN_GTT,
-+					0, 0);
-+			voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
-+			  offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
-+			OUT_BATCH_RELOC(voffset,
-+					rmesa->radeon.tcl.aos[i+1].bo,
-+					voffset,
-+					RADEON_GEM_DOMAIN_GTT,
-+					0, 0);
-+		}
- 
--		e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride);
--		e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride);
--	}
-+		if (nr & 1) {
-+			OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
-+				  (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
-+			voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
-+				offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
-+			OUT_BATCH_RELOC(voffset,
-+					rmesa->radeon.tcl.aos[nr - 1].bo,
-+					voffset,
-+					RADEON_GEM_DOMAIN_GTT,
-+					0, 0);
-+		}
-+		END_BATCH();
-+	} else {
- 
--	if (nr & 1) {
--		e32((rmesa->state.aos[nr - 1].aos_size << 0) |
--		    (rmesa->state.aos[nr - 1].aos_stride << 8));
--		e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride);
-+		BEGIN_BATCH(sz+2+(nr * 2));
-+		OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
-+		OUT_BATCH(nr);
-+
-+		for (i = 0; i + 1 < nr; i += 2) {
-+			OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
-+				  (rmesa->radeon.tcl.aos[i].stride << 8) |
-+				  (rmesa->radeon.tcl.aos[i + 1].components << 16) |
-+				  (rmesa->radeon.tcl.aos[i + 1].stride << 24));
-+
-+			voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
-+				offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
-+			OUT_BATCH(voffset);
-+			voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
-+				offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
-+			OUT_BATCH(voffset);
-+		}
-+
-+		if (nr & 1) {
-+			OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
-+			  (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
-+			voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
-+				offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
-+			OUT_BATCH(voffset);
-+		}
-+		for (i = 0; i + 1 < nr; i += 2) {
-+			voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
-+				offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
-+			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+					      rmesa->radeon.tcl.aos[i+0].bo,
-+					      RADEON_GEM_DOMAIN_GTT,
-+					      0, 0);
-+			voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
-+				offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
-+			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+					      rmesa->radeon.tcl.aos[i+1].bo,
-+					      RADEON_GEM_DOMAIN_GTT,
-+					      0, 0);
-+		}
-+		if (nr & 1) {
-+			voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
-+				offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
-+			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+					      rmesa->radeon.tcl.aos[nr-1].bo,
-+					      RADEON_GEM_DOMAIN_GTT,
-+					      0, 0);
-+		}
-+		END_BATCH();
- 	}
-+
- }
- 
- static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
- {
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	BATCH_LOCALS(&rmesa->radeon);
- 
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
--	e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
-+	BEGIN_BATCH(3);
-+	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
-+	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
-+	END_BATCH();
- }
- 
- static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
-@@ -269,6 +347,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
- 	if (type < 0 || num_verts <= 0)
- 		return;
- 
-+	/* Make space for at least 64 dwords.
-+	 * This is supposed to ensure that we can get all rendering
-+	 * commands into a single command buffer.
-+	 */
-+	rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
-+
- 	if (vb->Elts) {
- 		if (num_verts > 65535) {
- 			/* not implemented yet */
-@@ -287,12 +371,13 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
- 		 * arrays. *sigh*
- 		 */
- 		r300EmitElts(ctx, vb->Elts, num_verts);
--		r300EmitAOS(rmesa, rmesa->state.aos_count, start);
--		r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type);
-+		r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start);
-+		r300FireEB(rmesa, num_verts, type);
- 	} else {
--		r300EmitAOS(rmesa, rmesa->state.aos_count, start);
-+		r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start);
- 		r300FireAOS(rmesa, num_verts, type);
- 	}
-+	COMMIT_BATCH();
- }
- 
- static GLboolean r300RunRender(GLcontext * ctx,
-@@ -303,7 +388,6 @@ static GLboolean r300RunRender(GLcontext * ctx,
- 	TNLcontext *tnl = TNL_CONTEXT(ctx);
- 	struct vertex_buffer *vb = &tnl->vb;
- 
--
- 	if (RADEON_DEBUG & DEBUG_PRIMS)
- 		fprintf(stderr, "%s\n", __FUNCTION__);
- 
-@@ -314,7 +398,7 @@ static GLboolean r300RunRender(GLcontext * ctx,
- 	r300UpdateShaderStates(rmesa);
- 
- 	r300EmitCacheFlush(rmesa);
--	r300EmitState(rmesa);
-+	radeonEmitState(&rmesa->radeon);
- 
- 	for (i = 0; i < vb->PrimitiveCount; i++) {
- 		GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
-@@ -325,11 +409,7 @@ static GLboolean r300RunRender(GLcontext * ctx,
- 
- 	r300EmitCacheFlush(rmesa);
- 
--#ifdef USER_BUFFERS
--	r300UseArrays(ctx);
--#endif
--
--	r300ReleaseArrays(ctx);
-+	radeonReleaseArrays(ctx, ~0);
- 
- 	return GL_FALSE;
- }
-@@ -349,38 +429,19 @@ static int r300Fallback(GLcontext * ctx)
- 	r300ContextPtr r300 = R300_CONTEXT(ctx);
- 	const unsigned back = ctx->Stencil._BackFace;
- 
--	/* Do we need to use new-style shaders?
--	 * Also is there a better way to do this? */
--	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
--		struct r500_fragment_program *fp = (struct r500_fragment_program *)
--	    (char *)ctx->FragmentProgram._Current;
--		if (fp) {
--			if (!fp->translated) {
--				r500TranslateFragmentShader(r300, fp);
--				FALLBACK_IF(!fp->translated);
--			}
--		}
--	} else {
--		struct r300_fragment_program *fp = (struct r300_fragment_program *)
--	    (char *)ctx->FragmentProgram._Current;
--		if (fp) {
--			if (!fp->translated) {
--				r300TranslateFragmentShader(r300, fp);
--				FALLBACK_IF(!fp->translated);
--			}
--		}
-+	FALLBACK_IF(r300->radeon.Fallback);
-+
-+	struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
-+	if (fp && !fp->translated) {
-+		r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current);
-+		FALLBACK_IF(fp->error);
- 	}
- 
- 	FALLBACK_IF(ctx->RenderMode != GL_RENDER);
- 
--	/* If GL_EXT_stencil_two_side is disabled, this fallback check can
--	 * be removed.
--	 */
--	FALLBACK_IF(ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
--		    || ctx->Stencil.ValueMask[0] !=
--		    ctx->Stencil.ValueMask[back]
--		    || ctx->Stencil.WriteMask[0] !=
--		    ctx->Stencil.WriteMask[back]);
-+	FALLBACK_IF(ctx->Stencil.Enabled && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
-+		    || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back]
-+		    || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back]));
- 
- 	if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
- 		FALLBACK_IF(ctx->Point.PointSprite);
-@@ -410,6 +471,9 @@ static GLboolean r300RunNonTCLRender(GLcontext * ctx,
- 	if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
-  	        return GL_TRUE;
- 
-+	if (!r300ValidateBuffers(ctx))
-+	    return GL_TRUE;
-+
- 	return r300RunRender(ctx, stage);
- }
- 
-@@ -432,6 +496,9 @@ static GLboolean r300RunTCLRender(GLcontext * ctx,
- 		return GL_TRUE;
- 	}
- 
-+	if (!r300ValidateBuffers(ctx))
-+	    return GL_TRUE;
-+
- 	r300UpdateShaders(rmesa);
- 
- 	vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
-diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c
-index f30fd98..0133b83 100644
---- a/src/mesa/drivers/dri/r300/r300_shader.c
-+++ b/src/mesa/drivers/dri/r300/r300_shader.c
-@@ -1,18 +1,42 @@
-+/*
-+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
-+ *
-+ * All Rights Reserved.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining
-+ * a copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sublicense, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial
-+ * portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ */
- 
- #include "main/glheader.h"
- 
- #include "shader/program.h"
- #include "tnl/tnl.h"
- #include "r300_context.h"
--#include "r300_fragprog.h"
-+#include "r300_fragprog_common.h"
- 
- static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target,
- 					 GLuint id)
- {
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	struct r300_vertex_program_cont *vp;
--	struct r300_fragment_program *r300_fp;
--	struct r500_fragment_program *r500_fp;
-+	struct r300_fragment_program *fp;
- 
- 	switch (target) {
- 	case GL_VERTEX_STATE_PROGRAM_NV:
-@@ -20,28 +44,12 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target,
- 		vp = CALLOC_STRUCT(r300_vertex_program_cont);
- 		return _mesa_init_vertex_program(ctx, &vp->mesa_program,
- 						 target, id);
--	case GL_FRAGMENT_PROGRAM_ARB:
--		if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
--			r500_fp = CALLOC_STRUCT(r500_fragment_program);
--			r500_fp->ctx = ctx;
--			return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program,
--							   target, id);
--		} else {
--			r300_fp = CALLOC_STRUCT(r300_fragment_program);
--			return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program,
--							   target, id);
--		}
- 
- 	case GL_FRAGMENT_PROGRAM_NV:
--		if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
--			r500_fp = CALLOC_STRUCT(r500_fragment_program);
--			return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program,
--							   target, id);
--		} else {
--			r300_fp = CALLOC_STRUCT(r300_fragment_program);
--			return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program,
--							   target, id);
--		}
-+	case GL_FRAGMENT_PROGRAM_ARB:
-+		fp = CALLOC_STRUCT(r300_fragment_program);
-+		return _mesa_init_fragment_program(ctx, &fp->Base, target, id);
-+
- 	default:
- 		_mesa_problem(ctx, "Bad target in r300NewProgram");
- 	}
-@@ -57,20 +65,15 @@ static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog)
- static void
- r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
- {
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	struct r300_vertex_program_cont *vp = (void *)prog;
- 	struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog;
--	struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)prog;
- 
- 	switch (target) {
- 	case GL_VERTEX_PROGRAM_ARB:
- 		vp->progs = NULL;
- 		break;
- 	case GL_FRAGMENT_PROGRAM_ARB:
--		if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
--			r500_fp->translated = GL_FALSE;
--		else
--			r300_fp->translated = GL_FALSE;
-+		r300_fp->translated = GL_FALSE;
- 		break;
- 	}
- 
-@@ -81,7 +84,14 @@ r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
- static GLboolean
- r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
- {
--	return GL_TRUE;
-+	if (target == GL_FRAGMENT_PROGRAM_ARB) {
-+		struct r300_fragment_program *fp = (struct r300_fragment_program *)prog;
-+		if (!fp->translated)
-+			r300TranslateFragmentShader(ctx, &fp->Base);
-+
-+		return !fp->error;
-+	} else
-+		return GL_TRUE;
- }
- 
- void r300InitShaderFuncs(struct dd_function_table *functions)
-diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
-index 8095538..99441a2 100644
---- a/src/mesa/drivers/dri/r300/r300_state.c
-+++ b/src/mesa/drivers/dri/r300/r300_state.c
-@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/macros.h"
- #include "main/context.h"
- #include "main/dd.h"
-+#include "main/framebuffer.h"
- #include "main/simple_list.h"
- #include "main/api_arrayelt.h"
- #include "main/texformat.h"
-@@ -53,20 +54,19 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "vbo/vbo.h"
- #include "tnl/tnl.h"
- 
--#include "radeon_ioctl.h"
--#include "radeon_state.h"
- #include "r300_context.h"
- #include "r300_ioctl.h"
- #include "r300_state.h"
- #include "r300_reg.h"
- #include "r300_emit.h"
--#include "r300_fragprog.h"
- #include "r300_tex.h"
-+#include "r300_fragprog_common.h"
-+#include "r300_fragprog.h"
-+#include "r500_fragprog.h"
- 
- #include "drirenderbuffer.h"
- 
- extern int future_hw_tcl_on;
--extern void _tnl_UpdateFixedFunctionProgram(GLcontext * ctx);
- 
- static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4])
- {
-@@ -451,18 +451,9 @@ static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state)
- 
- static GLboolean current_fragment_program_writes_depth(GLcontext* ctx)
- {
--	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
- 
--	if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
--		struct r300_fragment_program *fp = (struct r300_fragment_program *)
--			(char *)ctx->FragmentProgram._Current;
--		return (fp && fp->WritesDepth);
--	} else {
--		struct r500_fragment_program* fp =
--			(struct r500_fragment_program*)(char*)
--			ctx->FragmentProgram._Current;
--		return (fp && fp->writes_depth);
--	}
-+	return (fp && fp->writes_depth);
- }
- 
- static void r300SetEarlyZState(GLcontext * ctx)
-@@ -533,8 +524,6 @@ static void r300SetAlphaState(GLcontext * ctx)
- 	R300_STATECHANGE(r300, at);
- 	r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc;
- 	r300->hw.at.cmd[R300_AT_UNKNOWN] = 0;
--
--	r300SetEarlyZState(ctx);
- }
- 
- static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
-@@ -582,15 +571,19 @@ static void r300SetDepthState(GLcontext * ctx)
- 		r300->hw.zs.cmd[R300_ZS_CNTL_1] |=
- 		    translate_func(ctx->Depth.Func) << R300_Z_FUNC_SHIFT;
- 	}
--
--	r300SetEarlyZState(ctx);
- }
- 
- static void r300SetStencilState(GLcontext * ctx, GLboolean state)
- {
- 	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	GLboolean hw_stencil = GL_FALSE;
-+	if (ctx->DrawBuffer) {
-+		struct radeon_renderbuffer *rrbStencil
-+			= radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
-+		hw_stencil = (rrbStencil && rrbStencil->bo);
-+	}
- 
--	if (r300->state.stencil.hw_stencil) {
-+	if (hw_stencil) {
- 		R300_STATECHANGE(r300, zs);
- 		if (state) {
- 			r300->hw.zs.cmd[R300_ZS_CNTL_0] |=
-@@ -735,7 +728,12 @@ static void r300ColorMask(GLcontext * ctx,
- static void r300PointSize(GLcontext * ctx, GLfloat size)
- {
- 	r300ContextPtr r300 = R300_CONTEXT(ctx);
--        /* same size limits for AA, non-AA points */
-+
-+	/* We need to clamp to user defined range here, because
-+	 * the HW clamping happens only for per vertex point size. */
-+	size = CLAMP(size, ctx->Point.MinSize, ctx->Point.MaxSize);
-+
-+	/* same size limits for AA, non-AA points */
- 	size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize);
- 
- 	R300_STATECHANGE(r300, ps);
-@@ -935,15 +933,25 @@ static void r300UpdateWindow(GLcontext * ctx)
- 	GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
- 	GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
- 	const GLfloat *v = ctx->Viewport._WindowMap.m;
-+	const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
-+	const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
-+	GLfloat y_scale, y_bias;
-+
-+	if (render_to_fbo) {
-+		y_scale = 1.0;
-+		y_bias = 0;
-+	} else {
-+		y_scale = -1.0;
-+		y_bias = yoffset;
-+	}
- 
- 	GLfloat sx = v[MAT_SX];
- 	GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
--	GLfloat sy = -v[MAT_SY];
--	GLfloat ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y;
--	GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale;
--	GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale;
-+	GLfloat sy = v[MAT_SY] * y_scale;
-+	GLfloat ty = (v[MAT_TY] * y_scale) + y_bias + SUBPIXEL_Y;
-+	GLfloat sz = v[MAT_SZ] * depthScale;
-+	GLfloat tz = v[MAT_TZ] * depthScale;
- 
--	R300_FIREVERTICES(rmesa);
- 	R300_STATECHANGE(rmesa, vpt);
- 
- 	rmesa->hw.vpt.cmd[R300_VPT_XSCALE] = r300PackFloat32(sx);
-@@ -962,6 +970,8 @@ static void r300Viewport(GLcontext * ctx, GLint x, GLint y,
- 	 * values, or keep the originals hanging around.
- 	 */
- 	r300UpdateWindow(ctx);
-+
-+	radeon_viewport(ctx, x, y, width, height);
- }
- 
- static void r300DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval)
-@@ -994,64 +1004,6 @@ void r300UpdateViewportOffset(GLcontext * ctx)
- 	radeonUpdateScissor(ctx);
- }
- 
--/**
-- * Tell the card where to render (offset, pitch).
-- * Effected by glDrawBuffer, etc
-- */
--void r300UpdateDrawBuffer(GLcontext * ctx)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	r300ContextPtr r300 = rmesa;
--	struct gl_framebuffer *fb = ctx->DrawBuffer;
--	driRenderbuffer *drb;
--
--	if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
--		/* draw to front */
--		drb =
--		    (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].
--		    Renderbuffer;
--	} else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
--		/* draw to back */
--		drb =
--		    (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].
--		    Renderbuffer;
--	} else {
--		/* drawing to multiple buffers, or none */
--		return;
--	}
--
--	assert(drb);
--	assert(drb->flippedPitch);
--
--	R300_STATECHANGE(rmesa, cb);
--
--	r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset +	//r300->radeon.state.color.drawOffset +
--	    r300->radeon.radeonScreen->fbLocation;
--	r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch;	//r300->radeon.state.color.drawPitch;
--
--	if (r300->radeon.radeonScreen->cpp == 4)
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
--	else
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
--
--	if (r300->radeon.sarea->tiling_enabled)
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
--#if 0
--	R200_STATECHANGE(rmesa, ctx);
--
--	/* Note: we used the (possibly) page-flipped values */
--	rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
--	    = ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
--	       & R200_COLOROFFSET_MASK);
--	rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
--
--	if (rmesa->sarea->tiling_enabled) {
--		rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
--		    R200_COLOR_TILE_ENABLE;
--	}
--#endif
--}
--
- static void
- r300FetchStateParameter(GLcontext * ctx,
- 			const gl_state_index state[STATE_LENGTH],
-@@ -1114,7 +1066,7 @@ void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
- 	if (!fp)
- 		return;
- 
--	paramList = fp->mesa_program.Base.Parameters;
-+	paramList = fp->Base.Base.Parameters;
- 
- 	if (!paramList)
- 		return;
-@@ -1233,9 +1185,8 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
- {
- 	r300ContextPtr r300 = R300_CONTEXT(ctx);
- 	int i;
--	struct r300_fragment_program *fp = (struct r300_fragment_program *)
--	    (char *)ctx->FragmentProgram._Current;
--	struct r300_fragment_program_code *code = &fp->code;
-+	struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
-+	struct r300_fragment_program_code *code = &fp->code.r300;
- 
- 	R300_STATECHANGE(r300, fpt);
- 
-@@ -1269,15 +1220,15 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
- 	}
- 
- 	r300->hw.fpt.cmd[R300_FPT_CMD_0] =
--		cmdpacket0(R300_US_TEX_INST_0, code->tex.length);
-+		cmdpacket0(r300->radeon.radeonScreen,
-+                   R300_US_TEX_INST_0, code->tex.length);
- }
- 
- static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
- {
- 	int i;
--	struct r500_fragment_program *fp = (struct r500_fragment_program *)
--	    (char *)ctx->FragmentProgram._Current;
--	struct r500_fragment_program_code *code = &fp->code;
-+	struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
-+	struct r500_fragment_program_code *code = &fp->code.r500;
- 
- 	/* find all the texture instructions and relocate the texture units */
- 	for (i = 0; i < code->inst_end + 1; i++) {
-@@ -1320,7 +1271,7 @@ static GLuint translate_lod_bias(GLfloat bias)
- static void r300SetupTextures(GLcontext * ctx)
- {
- 	int i, mtu;
--	struct r300_tex_obj *t;
-+	struct radeon_tex_obj *t;
- 	r300ContextPtr r300 = R300_CONTEXT(ctx);
- 	int hw_tmu = 0;
- 	int last_hw_tmu = -1;	/* -1 translates into no setup costs for fields */
-@@ -1354,21 +1305,16 @@ static void r300SetupTextures(GLcontext * ctx)
- 	/* We cannot let disabled tmu offsets pass DRM */
- 	for (i = 0; i < mtu; i++) {
- 		if (ctx->Texture.Unit[i]._ReallyEnabled) {
--
--#if 0				/* Enables old behaviour */
--			hw_tmu = i;
--#endif
- 			tmu_mappings[i] = hw_tmu;
- 
--			t = r300->state.texture.unit[i].texobj;
--			/* XXX questionable fix for bug 9170: */
-+			t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
- 			if (!t)
- 				continue;
- 
--			if ((t->format & 0xffffff00) == 0xffffff00) {
-+			if ((t->pp_txformat & 0xffffff00) == 0xffffff00) {
- 				WARN_ONCE
- 				    ("unknown texture format (entry %x) encountered. Help me !\n",
--				     t->format & 0xff);
-+				     t->pp_txformat & 0xff);
- 			}
- 
- 			if (RADEON_DEBUG & DEBUG_STATE)
-@@ -1379,29 +1325,28 @@ static void r300SetupTextures(GLcontext * ctx)
- 
- 			r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 +
- 						hw_tmu] =
--			    gen_fixed_filter(t->filter) | (hw_tmu << 28);
-+			    gen_fixed_filter(t->pp_txfilter) | (hw_tmu << 28);
- 			/* Note: There is a LOD bias per texture unit and a LOD bias
- 			 * per texture object. We add them here to get the correct behaviour.
- 			 * (The per-texture object LOD bias was introduced in OpenGL 1.4
- 			 * and is not present in the EXT_texture_object extension).
- 			 */
- 			r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] =
--				t->filter_1 |
--				translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.tObj->LodBias);
-+				t->pp_txfilter_1 |
-+				translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias);
- 			r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] =
--			    t->size;
-+			    t->pp_txsize;
- 			r300->hw.tex.format.cmd[R300_TEX_VALUE_0 +
--						hw_tmu] = t->format;
-+						hw_tmu] = t->pp_txformat;
- 			r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] =
--			    t->pitch_reg;
--			r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 +
--						hw_tmu] = t->offset;
-+			  t->pp_txpitch;
-+			r300->hw.textures[hw_tmu] = t;
- 
--			if (t->offset & R300_TXO_MACRO_TILE) {
-+			if (t->tile_bits & R300_TXO_MACRO_TILE) {
- 				WARN_ONCE("macro tiling enabled!\n");
- 			}
- 
--			if (t->offset & R300_TXO_MICRO_TILE) {
-+			if (t->tile_bits & R300_TXO_MICRO_TILE) {
- 				WARN_ONCE("micro tiling enabled!\n");
- 			}
- 
-@@ -1418,37 +1363,36 @@ static void r300SetupTextures(GLcontext * ctx)
- 	}
- 
- 	r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FILTER0_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1);
- 	r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, last_hw_tmu + 1);
- 	r300->hw.tex.size.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_SIZE_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, last_hw_tmu + 1);
- 	r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FORMAT_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, last_hw_tmu + 1);
- 	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FORMAT2_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, last_hw_tmu + 1);
- 	r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, last_hw_tmu + 1);
- 	r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
- 	r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
- 
- 	if (!fp)		/* should only happenen once, just after context is created */
- 		return;
- 
- 	if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
--		if (fp->mesa_program.UsesKill && last_hw_tmu < 0) {
-+		if (fp->Base.UsesKill && last_hw_tmu < 0) {
- 			// The KILL operation requires the first texture unit
- 			// to be enabled.
- 			r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
- 			r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0;
- 			r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
--				cmdpacket0(R300_TX_FILTER0_0, 1);
-+				cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 1);
- 		}
--		r300SetupFragmentShaderTextures(ctx, tmu_mappings);
--	} else
--		r500SetupFragmentShaderTextures(ctx, tmu_mappings);
-+	}
-+	r300->vtbl.SetupFragmentShaderTextures(ctx, tmu_mappings);
- 
- 	if (RADEON_DEBUG & DEBUG_STATE)
- 		fprintf(stderr, "TX_ENABLE: %08x  last_hw_tmu=%d\n",
-@@ -1479,7 +1423,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
- 	if (hw_tcl_on)
- 		OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
- 	else
--		RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset);
-+		RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
- 
- 	if (ctx->FragmentProgram._Current)
- 		InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
-@@ -1543,6 +1487,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
- 		}
- 	}
- 
-+	/* We always route 4 texcoord components */
- 	for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- 		if (! ( InputsRead & FRAG_BIT_TEX(i) ) )
- 		    continue;
-@@ -1552,36 +1497,30 @@ static void r300SetupRSUnit(GLcontext * ctx)
- 		    continue;
- 		}
- 
--		int swiz;
--
--		/* with TCL we always seem to route 4 components */
--		if (hw_tcl_on)
--		  count = 4;
--		else
--		  count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
--
--		switch(count) {
--		case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break;
--		case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break;
--		default:
--		case 1:
--		case 2: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); break;
--		};
--
--		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz | R300_RS_TEX_PTR(rs_tex_count);
-+		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count);
- 		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
- 		InputsRead &= ~(FRAG_BIT_TEX0 << i);
--		rs_tex_count += count;
-+		rs_tex_count += 4;
-+		++tex_ip;
-+		++fp_reg;
-+	}
-+
-+	if (InputsRead & FRAG_BIT_WPOS) {
-+		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count);
-+		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
-+		InputsRead &= ~FRAG_BIT_WPOS;
-+		rs_tex_count += 4;
- 		++tex_ip;
- 		++fp_reg;
- 	}
- 
- 	if (InputsRead & FRAG_BIT_FOGC) {
- 		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) {
--			r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |=  R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) |  R300_RS_TEX_PTR(rs_tex_count);
-+			r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(R300_RS_SEL_K0) | R300_RS_SEL_R(R300_RS_SEL_K0);
-+			r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_Q(R300_RS_SEL_K1) | R300_RS_TEX_PTR(rs_tex_count);
- 			r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
- 			InputsRead &= ~FRAG_BIT_FOGC;
--			rs_tex_count += 4;
-+			rs_tex_count += 1;
- 			++tex_ip;
- 			++fp_reg;
- 		} else {
-@@ -1589,16 +1528,6 @@ static void r300SetupRSUnit(GLcontext * ctx)
- 		}
- 	}
- 
--	if (InputsRead & FRAG_BIT_WPOS) {
--		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |=  R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) |  R300_RS_TEX_PTR(rs_tex_count);
--		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
--		InputsRead &= ~FRAG_BIT_WPOS;
--		rs_tex_count += 4;
--		++tex_ip;
--		++fp_reg;
--	}
--	InputsRead &= ~FRAG_BIT_WPOS;
--
- 	/* Setup default color if no color or tex was set */
- 	if (rs_tex_count == 0 && col_ip == 0) {
- 		r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
-@@ -1606,10 +1535,10 @@ static void r300SetupRSUnit(GLcontext * ctx)
- 	}
- 
- 	high_rr = (col_ip > tex_ip) ? col_ip : tex_ip;
--	r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT)  | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
-+	r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
- 	r300->hw.rc.cmd[2] |= high_rr - 1;
- 
--	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr);
-+	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr);
- 
- 	if (InputsRead)
- 		WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
-@@ -1630,7 +1559,7 @@ static void r500SetupRSUnit(GLcontext * ctx)
- 	if (hw_tcl_on)
- 		OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
- 	else
--		RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset);
-+		RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
- 
- 	if (ctx->FragmentProgram._Current)
- 		InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
-@@ -1694,7 +1623,7 @@ static void r500SetupRSUnit(GLcontext * ctx)
- 		}
- 	}
- 
--
-+	/* We always route 4 texcoord components */
- 	for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- 		if (! ( InputsRead & FRAG_BIT_TEX(i) ) )
- 		    continue;
-@@ -1704,59 +1633,41 @@ static void r500SetupRSUnit(GLcontext * ctx)
- 		    continue;
- 		}
- 
--		int swiz = 0;
--
--		/* with TCL we always seem to route 4 components */
--		if (hw_tcl_on)
--		  count = 4;
--		else
--		  count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
--
--		if (count == 4) {
--			swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
--			swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT;
--			swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT;
--			swiz |= (rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT;
--		} else if (count == 3) {
--			swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
--			swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT;
--			swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT;
--			swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
--		} else if (count == 2) {
--			swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
--			swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT;
--			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
--			swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
--		} else if (count == 1) {
--			swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
--			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT;
--			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
--			swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
--		} else {
--			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT;
--			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT;
--			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
--			swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
--		}
-+		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
-+			((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
-+			((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
-+			((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
- 
--		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz;
- 		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
- 		InputsRead &= ~(FRAG_BIT_TEX0 << i);
--		rs_tex_count += count;
-+		rs_tex_count += 4;
-+		++tex_ip;
-+		++fp_reg;
-+	}
-+
-+	if (InputsRead & FRAG_BIT_WPOS) {
-+		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
-+			((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
-+			((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
-+			((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
-+
-+		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
-+		InputsRead &= ~FRAG_BIT_WPOS;
-+		rs_tex_count += 4;
- 		++tex_ip;
- 		++fp_reg;
- 	}
- 
- 	if (InputsRead & FRAG_BIT_FOGC) {
- 		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) {
--			r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
--				((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
--				((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
--				((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
-+			r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= (rs_tex_count << R500_RS_IP_TEX_PTR_S_SHIFT) |
-+				(R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
-+				(R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
-+				(R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
- 
- 			r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
- 			InputsRead &= ~FRAG_BIT_FOGC;
--			rs_tex_count += 4;
-+			rs_tex_count += 1;
- 			++tex_ip;
- 			++fp_reg;
- 		} else {
-@@ -1764,19 +1675,6 @@ static void r500SetupRSUnit(GLcontext * ctx)
- 		}
- 	}
- 
--	if (InputsRead & FRAG_BIT_WPOS) {
--		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
--				((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
--				((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
--				((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
--
--		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
--		InputsRead &= ~FRAG_BIT_WPOS;
--		rs_tex_count += 4;
--		++tex_ip;
--		++fp_reg;
--	}
--
- 	/* Setup default color if no color or tex was set */
- 	if (rs_tex_count == 0 && col_ip == 0) {
- 		r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
-@@ -1787,7 +1685,7 @@ static void r500SetupRSUnit(GLcontext * ctx)
- 	r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT)  | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
- 	r300->hw.rc.cmd[2] |= 0xC0 | (high_rr - 1);
- 
--	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr);
-+	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, high_rr);
- 
- 	if (InputsRead)
- 		WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
-@@ -1900,7 +1798,7 @@ static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count,
- 
- static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa)
- {
--	struct r300_vertex_shader_state *prog = &(rmesa->state.vertex_shader);
-+	struct r300_vertex_shader_state *prog = &(rmesa->vertex_shader);
- 	GLuint o_reg = 0;
- 	GLuint i_reg = 0;
- 	int i;
-@@ -1909,11 +1807,11 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa)
- 	int program_end = 0;
- 
- 	for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) {
--		if (rmesa->state.sw_tcl_inputs[i] != -1) {
-+		if (rmesa->swtcl.sw_tcl_inputs[i] != -1) {
- 			prog->program.body.i[program_end + 0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, GL_FALSE, GL_FALSE, o_reg++, VSF_FLAG_ALL, PVS_DST_REG_OUT);
--			prog->program.body.i[program_end + 1] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
--			prog->program.body.i[program_end + 2] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
--			prog->program.body.i[program_end + 3] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
-+			prog->program.body.i[program_end + 1] = PVS_SRC_OPERAND(rmesa->swtcl.sw_tcl_inputs[i], PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
-+			prog->program.body.i[program_end + 2] = PVS_SRC_OPERAND(rmesa->swtcl.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
-+			prog->program.body.i[program_end + 3] = PVS_SRC_OPERAND(rmesa->swtcl.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
- 			program_end += 4;
- 			i_reg++;
- 		}
-@@ -1984,6 +1882,7 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa)
- 	  (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
- }
- 
-+
- static void r300SetupVertexProgram(r300ContextPtr rmesa)
- {
- 	GLcontext *ctx = rmesa->radeon.glCtx;
-@@ -2013,6 +1912,7 @@ static void r300SetupVertexProgram(r300ContextPtr rmesa)
-  */
- static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
- {
-+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	if (RADEON_DEBUG & DEBUG_STATE)
- 		fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__,
- 			_mesa_lookup_enum_by_nr(cap),
-@@ -2058,8 +1958,12 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
- 	case GL_POLYGON_OFFSET_FILL:
- 		r300SetPolygonOffsetState(ctx, state);
- 		break;
-+	case GL_SCISSOR_TEST:
-+		radeon_firevertices(&rmesa->radeon);
-+		rmesa->radeon.state.scissor.enabled = state;
-+		radeonUpdateScissor( ctx );
-+		break;
- 	default:
--		radeonEnable(ctx, cap, state);
- 		break;
- 	}
- }
-@@ -2078,7 +1982,7 @@ static void r300ResetHwState(r300ContextPtr r300)
- 	if (RADEON_DEBUG & DEBUG_STATE)
- 		fprintf(stderr, "%s\n", __FUNCTION__);
- 
--	r300UpdateWindow(ctx);
-+	radeon_firevertices(&r300->radeon);
- 
- 	r300ColorMask(ctx,
- 		      ctx->Color.ColorMask[RCOMP],
-@@ -2100,8 +2004,6 @@ static void r300ResetHwState(r300ContextPtr r300)
- 
- 	r300UpdateCulling(ctx);
- 
--	r300UpdateTextureState(ctx);
--
- 	r300SetBlendState(ctx);
- 	r300SetLogicOpState(ctx);
- 
-@@ -2240,20 +2142,6 @@ static void r300ResetHwState(r300ContextPtr r300)
- 
- 	r300BlendColor(ctx, ctx->Color.BlendColor);
- 
--	/* Again, r300ClearBuffer uses this */
--	r300->hw.cb.cmd[R300_CB_OFFSET] =
--	    r300->radeon.state.color.drawOffset +
--	    r300->radeon.radeonScreen->fbLocation;
--	r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
--
--	if (r300->radeon.radeonScreen->cpp == 4)
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
--	else
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
--
--	if (r300->radeon.sarea->tiling_enabled)
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
--
- 	r300->hw.rb3d_dither_ctl.cmd[1] = 0;
- 	r300->hw.rb3d_dither_ctl.cmd[2] = 0;
- 	r300->hw.rb3d_dither_ctl.cmd[3] = 0;
-@@ -2269,41 +2157,15 @@ static void r300ResetHwState(r300ContextPtr r300)
- 	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000;
- 	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff;
- 
--	r300->hw.zb.cmd[R300_ZB_OFFSET] =
--	    r300->radeon.radeonScreen->depthOffset +
--	    r300->radeon.radeonScreen->fbLocation;
--	r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch;
--
--	if (r300->radeon.sarea->tiling_enabled) {
--		/* XXX: Turn off when clearing buffers ? */
--		r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTHMACROTILE_ENABLE;
--
--		if (ctx->Visual.depthBits == 24)
--			r300->hw.zb.cmd[R300_ZB_PITCH] |=
--			    R300_DEPTHMICROTILE_TILED;
--	}
--
- 	r300->hw.zb_depthclearvalue.cmd[1] = 0;
- 
--	switch (ctx->Visual.depthBits) {
--	case 16:
--		r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_16BIT_INT_Z;
--		break;
--	case 24:
--		r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
--		break;
--	default:
--		fprintf(stderr, "Error: Unsupported depth %d... exiting\n", ctx->Visual.depthBits);
--		_mesa_exit(-1);
--	}
--
- 	r300->hw.zstencil_format.cmd[2] = R300_ZTOP_DISABLE;
- 	r300->hw.zstencil_format.cmd[3] = 0x00000003;
- 	r300->hw.zstencil_format.cmd[4] = 0x00000000;
- 	r300SetEarlyZState(ctx);
- 
--	r300->hw.unk4F30.cmd[1] = 0;
--	r300->hw.unk4F30.cmd[2] = 0;
-+	r300->hw.zb_zmask.cmd[1] = 0;
-+	r300->hw.zb_zmask.cmd[2] = 0;
- 
- 	r300->hw.zb_hiz_offset.cmd[1] = 0;
- 
-@@ -2317,7 +2179,7 @@ static void r300ResetHwState(r300ContextPtr r300)
- 		r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0;
- 	}
- 
--	r300->hw.all_dirty = GL_TRUE;
-+	r300->radeon.hw.all_dirty = GL_TRUE;
- }
- 
- void r300UpdateShaders(r300ContextPtr rmesa)
-@@ -2328,8 +2190,8 @@ void r300UpdateShaders(r300ContextPtr rmesa)
- 
- 	ctx = rmesa->radeon.glCtx;
- 
--	if (rmesa->NewGLState && hw_tcl_on) {
--		rmesa->NewGLState = 0;
-+	if (rmesa->radeon.NewGLState && hw_tcl_on) {
-+		rmesa->radeon.NewGLState = 0;
- 
- 		for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
- 			rmesa->temp_attrib[i] =
-@@ -2383,24 +2245,18 @@ static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx,
- }
- 
- 
--static void r300SetupPixelShader(r300ContextPtr rmesa)
-+static GLboolean r300SetupPixelShader(GLcontext *ctx)
- {
--	GLcontext *ctx = rmesa->radeon.glCtx;
--	struct r300_fragment_program *fp = (struct r300_fragment_program *)
--	    (char *)ctx->FragmentProgram._Current;
-+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-+	struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
- 	struct r300_fragment_program_code *code;
- 	int i, k;
- 
--	if (!fp)		/* should only happenen once, just after context is created */
--		return;
-+	/* Program is not native, fallback to software */
-+	if (fp->error)
-+		return GL_FALSE;
- 
--	r300TranslateFragmentShader(rmesa, fp);
--	if (!fp->translated) {
--		fprintf(stderr, "%s: No valid fragment shader, exiting\n",
--			__FUNCTION__);
--		return;
--	}
--	code = &fp->code;
-+	code = &fp->code.r300;
- 
- 	r300SetupTextures(ctx);
- 
-@@ -2408,10 +2264,10 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
- 	R300_STATECHANGE(rmesa, fpi[1]);
- 	R300_STATECHANGE(rmesa, fpi[2]);
- 	R300_STATECHANGE(rmesa, fpi[3]);
--	rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, code->alu.length);
--	rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, code->alu.length);
--	rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, code->alu.length);
--	rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
-+	rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, code->alu.length);
-+	rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, code->alu.length);
-+	rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length);
-+	rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
- 	for (i = 0; i < code->alu.length; i++) {
- 		rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0;
- 		rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1;
-@@ -2442,15 +2298,17 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
- 	}
- 
- 	R300_STATECHANGE(rmesa, fpp);
--	rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, code->const_nr * 4);
-+	rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4);
- 	for (i = 0; i < code->const_nr; i++) {
- 		const GLfloat *constant = get_fragmentprogram_constant(ctx,
--			&fp->mesa_program.Base, code->constant[i]);
-+			&fp->Base.Base, code->constant[i]);
- 		rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]);
- 		rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]);
- 		rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]);
- 		rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(constant[3]);
- 	}
-+
-+	return GL_TRUE;
- }
- 
- #define bump_r500fp_count(ptr, new_count)   do{\
-@@ -2467,27 +2325,21 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
- 	if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\
- } while(0)
- 
--static void r500SetupPixelShader(r300ContextPtr rmesa)
-+static GLboolean r500SetupPixelShader(GLcontext *ctx)
- {
--	GLcontext *ctx = rmesa->radeon.glCtx;
--	struct r500_fragment_program *fp = (struct r500_fragment_program *)
--	    (char *)ctx->FragmentProgram._Current;
-+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-+	struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
- 	int i;
- 	struct r500_fragment_program_code *code;
- 
--	if (!fp)		/* should only happenen once, just after context is created */
--		return;
--
- 	((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0;
- 	((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0;
- 
--	r500TranslateFragmentShader(rmesa, fp);
--	if (!fp->translated) {
--		fprintf(stderr, "%s: No valid fragment shader, exiting\n",
--			__FUNCTION__);
--		return;
--	}
--	code = &fp->code;
-+	/* Program is not native, fallback to software */
-+	if (fp->error)
-+		return GL_FALSE;
-+
-+	code = &fp->code.r500;
- 
- 	r300SetupTextures(ctx);
- 
-@@ -2519,7 +2371,7 @@ static void r500SetupPixelShader(r300ContextPtr rmesa)
- 	R300_STATECHANGE(rmesa, r500fp_const);
- 	for (i = 0; i < code->const_nr; i++) {
- 		const GLfloat *constant = get_fragmentprogram_constant(ctx,
--			&fp->mesa_program.Base, code->constant[i]);
-+			&fp->Base.Base, code->constant[i]);
- 		rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]);
- 		rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]);
- 		rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]);
-@@ -2527,6 +2379,7 @@ static void r500SetupPixelShader(r300ContextPtr rmesa)
- 	}
- 	bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4);
- 
-+	return GL_TRUE;
- }
- 
- void r300UpdateShaderStates(r300ContextPtr rmesa)
-@@ -2534,7 +2387,10 @@ void r300UpdateShaderStates(r300ContextPtr rmesa)
- 	GLcontext *ctx;
- 	ctx = rmesa->radeon.glCtx;
- 
--	r300UpdateTextureState(ctx);
-+	/* should only happenen once, just after context is created */
-+	if (!ctx->FragmentProgram._Current)
-+		return;
-+
- 	r300SetEarlyZState(ctx);
- 
- 	/* w_fmt value is set to get best performance
-@@ -2558,19 +2414,15 @@ void r300UpdateShaderStates(r300ContextPtr rmesa)
- 		rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc;
- 	}
- 
--	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
--		r500SetupPixelShader(rmesa);
--	else
--		r300SetupPixelShader(rmesa);
-+	r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current);
- 
--	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
--		r500SetupRSUnit(ctx);
--	else
--		r300SetupRSUnit(ctx);
-+	if (!rmesa->vtbl.SetupPixelShader(ctx))
-+		return;
-+
-+	rmesa->vtbl.SetupRSUnit(ctx);
- 
- 	if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
- 		r300SetupVertexProgram(rmesa);
--
- }
- 
- /**
-@@ -2584,15 +2436,18 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
- 	_swsetup_InvalidateState(ctx, new_state);
- 	_vbo_InvalidateState(ctx, new_state);
- 	_tnl_InvalidateState(ctx, new_state);
--	_ae_invalidate_state(ctx, new_state);
- 
- 	if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
--		r300UpdateDrawBuffer(ctx);
-+		_mesa_update_framebuffer(ctx);
-+		/* this updates the DrawBuffer's Width/Height if it's a FBO */
-+		_mesa_update_draw_buffer_bounds(ctx);
-+
-+		R300_STATECHANGE(r300, cb);
- 	}
- 
- 	r300UpdateStateParameters(ctx, new_state);
- 
--	r300->NewGLState |= new_state;
-+	r300->radeon.NewGLState |= new_state;
- }
- 
- /**
-@@ -2602,32 +2457,6 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
-  */
- void r300InitState(r300ContextPtr r300)
- {
--	GLcontext *ctx = r300->radeon.glCtx;
--	GLuint depth_fmt;
--
--	radeonInitState(&r300->radeon);
--
--	switch (ctx->Visual.depthBits) {
--	case 16:
--		r300->state.depth.scale = 1.0 / (GLfloat) 0xffff;
--		depth_fmt = R300_DEPTHFORMAT_16BIT_INT_Z;
--		break;
--	case 24:
--		r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff;
--		depth_fmt = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
--		break;
--	default:
--		fprintf(stderr, "Error: Unsupported depth %d... exiting\n",
--			ctx->Visual.depthBits);
--		_mesa_exit(-1);
--	}
--
--	/* Only have hw stencil when depth buffer is 24 bits deep */
--	r300->state.stencil.hw_stencil = (ctx->Visual.stencilBits > 0 &&
--					  ctx->Visual.depthBits == 24);
--
--	memset(&(r300->state.texture), 0, sizeof(r300->state.texture));
--
- 	r300ResetHwState(r300);
- }
- 
-@@ -2661,7 +2490,6 @@ void r300UpdateClipPlanes( GLcontext *ctx )
-  */
- void r300InitStateFuncs(struct dd_function_table *functions)
- {
--	radeonInitStateFuncs(functions);
- 
- 	functions->UpdateState = r300InvalidateState;
- 	functions->AlphaFunc = r300AlphaFunc;
-@@ -2697,4 +2525,25 @@ void r300InitStateFuncs(struct dd_function_table *functions)
- 	functions->RenderMode = r300RenderMode;
- 
- 	functions->ClipPlane = r300ClipPlane;
-+	functions->Scissor = radeonScissor;
-+
-+	functions->DrawBuffer		= radeonDrawBuffer;
-+	functions->ReadBuffer		= radeonReadBuffer;
-+}
-+
-+void r300InitShaderFunctions(r300ContextPtr r300)
-+{
-+	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
-+		r300->vtbl.SetupRSUnit = r500SetupRSUnit;
-+		r300->vtbl.SetupPixelShader = r500SetupPixelShader;
-+		r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures;
-+		r300->vtbl.FragmentProgramEmit = r500FragmentProgramEmit;
-+		r300->vtbl.FragmentProgramDump = r500FragmentProgramDump;
-+	} else {
-+		r300->vtbl.SetupRSUnit = r300SetupRSUnit;
-+		r300->vtbl.SetupPixelShader = r300SetupPixelShader;
-+		r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures;
-+		r300->vtbl.FragmentProgramEmit = r300FragmentProgramEmit;
-+		r300->vtbl.FragmentProgramDump = r300FragmentProgramDump;
-+	}
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h
-index 0589ab7..247a20e 100644
---- a/src/mesa/drivers/dri/r300/r300_state.h
-+++ b/src/mesa/drivers/dri/r300/r300_state.h
-@@ -39,32 +39,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #define R300_NEWPRIM( rmesa )			\
-   do {						\
--    if ( rmesa->dma.flush )			\
--      rmesa->dma.flush( rmesa );		\
-+  if ( rmesa->radeon.dma.flush )			\
-+    rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	\
-   } while (0)
- 
- #define R300_STATECHANGE(r300, atom) \
- 	do {						\
- 	  R300_NEWPRIM(r300);				\
- 		r300->hw.atom.dirty = GL_TRUE;		\
--		r300->hw.is_dirty = GL_TRUE;		\
-+		r300->radeon.hw.is_dirty = GL_TRUE;		\
- 	} while(0)
- 
--#define R300_PRINT_STATE(r300, atom) \
--		r300PrintStateAtom(r300, &r300->hw.atom)
--
--/* Fire the buffered vertices no matter what.
--   TODO: This has not been implemented yet
-- */
--#define R300_FIREVERTICES( r300 )			\
--do {							\
--    \
--   if ( (r300)->cmdbuf.count_used || (r300)->dma.flush ) {	\
--      r300Flush( (r300)->radeon.glCtx );		\
--   }							\
--    \
--} while (0)
--
- // r300_state.c
- extern int future_hw_tcl_on;
- void _tnl_UpdateFixedFunctionProgram (GLcontext * ctx);
-diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
-index ba3621b..a40d037 100644
---- a/src/mesa/drivers/dri/r300/r300_swtcl.c
-+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
-@@ -28,362 +28,303 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
- /*
-  * Authors:
-  *   Dave Airlie <airlied@linux.ie>
-+ *   Maciej Cencora <m.cencora@gmail.com>
-  */
- 
--/* derived from r200 swtcl path */
--
--
--
--#include "main/glheader.h"
--#include "main/mtypes.h"
--#include "main/colormac.h"
--#include "main/enums.h"
--#include "main/image.h"
--#include "main/imports.h"
--#include "main/light.h"
--#include "main/macros.h"
--
--#include "swrast/s_context.h"
--#include "swrast/s_fog.h"
--#include "swrast_setup/swrast_setup.h"
--#include "math/m_translate.h"
- #include "tnl/tnl.h"
--#include "tnl/t_context.h"
- #include "tnl/t_pipeline.h"
- 
--#include "r300_context.h"
--#include "r300_swtcl.h"
- #include "r300_state.h"
--#include "r300_ioctl.h"
-+#include "r300_swtcl.h"
- #include "r300_emit.h"
--#include "r300_mem.h"
--
--static void flush_last_swtcl_prim( r300ContextPtr rmesa  );
-+#include "r300_tex.h"
- 
--
--void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset);
--void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr);
- #define EMIT_ATTR( ATTR, STYLE )					\
- do {									\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR);	\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE);	\
--   rmesa->swtcl.vertex_attr_count++;					\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);	\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);	\
-+   rmesa->radeon.swtcl.vertex_attr_count++;					\
- } while (0)
- 
- #define EMIT_PAD( N )							\
- do {									\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0;		\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD;	\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N);		\
--   rmesa->swtcl.vertex_attr_count++;					\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;		\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;	\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);		\
-+   rmesa->radeon.swtcl.vertex_attr_count++;					\
-+} while (0)
-+
-+#define ADD_ATTR(_attr, _format, _dst_loc, _swizzle, _write_mask) \
-+do { \
-+	attrs[num_attrs].attr = (_attr); \
-+	attrs[num_attrs].format = (_format); \
-+	attrs[num_attrs].dst_loc = (_dst_loc); \
-+	attrs[num_attrs].swizzle = (_swizzle); \
-+	attrs[num_attrs].write_mask = (_write_mask); \
-+	++num_attrs; \
- } while (0)
- 
-+static void r300SwtclVAPSetup(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten, GLuint vap_out_fmt_1)
-+{
-+	r300ContextPtr rmesa = R300_CONTEXT( ctx );
-+	struct vertex_attribute *attrs = rmesa->swtcl.vert_attrs;
-+	int i, j, reg_count;
-+	uint32_t *vir0 = &rmesa->hw.vir[0].cmd[1];
-+	uint32_t *vir1 = &rmesa->hw.vir[1].cmd[1];
-+
-+	for (i = 0; i < R300_VIR_CMDSIZE-1; ++i)
-+		vir0[i] = vir1[i] = 0;
-+
-+	for (i = 0, j = 0; i < rmesa->radeon.swtcl.vertex_attr_count; ++i) {
-+		int tmp, data_format;
-+		switch (attrs[i].format) {
-+			case EMIT_1F:
-+				data_format = R300_DATA_TYPE_FLOAT_1;
-+				break;
-+			case EMIT_2F:
-+				data_format = R300_DATA_TYPE_FLOAT_2;
-+				break;
-+			case EMIT_3F:
-+				data_format = R300_DATA_TYPE_FLOAT_3;
-+				break;
-+			case EMIT_4F:
-+				data_format = R300_DATA_TYPE_FLOAT_4;
-+				break;
-+			case EMIT_4UB_4F_RGBA:
-+			case EMIT_4UB_4F_ABGR:
-+				data_format = R300_DATA_TYPE_BYTE | R300_NORMALIZE;
-+				break;
-+			default:
-+				fprintf(stderr, "%s: Invalid data format type", __FUNCTION__);
-+				_mesa_exit(-1);
-+				break;
-+		}
-+
-+		tmp = data_format | (attrs[i].dst_loc << R300_DST_VEC_LOC_SHIFT);
-+		if (i % 2 == 0) {
-+			vir0[j] = tmp << R300_DATA_TYPE_0_SHIFT;
-+			vir1[j] = attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT);
-+		} else {
-+			vir0[j] |= tmp << R300_DATA_TYPE_1_SHIFT;
-+			vir1[j] |= (attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT;
-+			++j;
-+		}
-+	}
-+
-+	reg_count = (rmesa->radeon.swtcl.vertex_attr_count + 1) >> 1;
-+	if (rmesa->radeon.swtcl.vertex_attr_count % 2 != 0) {
-+		vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT;
-+	} else {
-+		vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT;
-+	}
-+
-+	R300_STATECHANGE(rmesa, vir[0]);
-+	R300_STATECHANGE(rmesa, vir[1]);
-+	R300_STATECHANGE(rmesa, vof);
-+	R300_STATECHANGE(rmesa, vic);
-+
-+	if (rmesa->radeon.radeonScreen->kernel_mm) {
-+		rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF;
-+		rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF;
-+		rmesa->hw.vir[0].cmd[0] |= (reg_count & 0x3FFF) << 16;
-+		rmesa->hw.vir[1].cmd[0] |= (reg_count & 0x3FFF) << 16;
-+	} else {
-+		((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = reg_count;
-+		((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = reg_count;
-+	}
-+
-+	rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
-+	rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
-+	rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
-+	/**
-+	  * Can't use r300VAPOutputCntl1 function because it assumes
-+	  * that all texture coords have 4 components and that's the case
-+	  * for HW TCL path, but not for SW TCL.
-+	  */
-+	rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_out_fmt_1;
-+}
-+
-+
- static void r300SetVertexFormat( GLcontext *ctx )
- {
- 	r300ContextPtr rmesa = R300_CONTEXT( ctx );
- 	TNLcontext *tnl = TNL_CONTEXT(ctx);
- 	struct vertex_buffer *VB = &tnl->vb;
--	DECLARE_RENDERINPUTS(index_bitset);
--	GLuint InputsRead = 0, OutputsWritten = 0;
--	int vap_fmt_1 = 0;
--	int offset = 0;
--	int vte = 0;
--	int fog_id;
--	GLint inputs[VERT_ATTRIB_MAX];
--	GLint tab[VERT_ATTRIB_MAX];
--	int swizzle[VERT_ATTRIB_MAX][4];
--	GLuint i, nr;
--	GLuint sz;
--
--	DECLARE_RENDERINPUTS(render_inputs_bitset);
--	RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
--	RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
--	RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
--
--	vte = rmesa->hw.vte.cmd[1];
--	vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT);
--	/* Important:
--	 */
--	if ( VB->NdcPtr != NULL ) {
--		VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
--		vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT;
--	}
--	else {
--		VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
--		vte |= R300_VTX_W0_FMT;
--	}
-+	int first_free_tex = 0, vap_out_fmt_1 = 0;
-+	GLuint InputsRead = 0;
-+	GLuint OutputsWritten = 0;
-+	int num_attrs = 0;
-+	struct vertex_attribute *attrs = rmesa->swtcl.vert_attrs;
- 
--	assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
--	rmesa->swtcl.vertex_attr_count = 0;
-+	rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0;
-+	rmesa->radeon.swtcl.vertex_attr_count = 0;
- 
--	/* EMIT_ATTR's must be in order as they tell t_vertex.c how to
--	 * build up a hardware vertex.
--	 */
--	if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS)) {
--		sz = VB->AttribPtr[VERT_ATTRIB_POS]->size;
-+	/* We always want non Ndc coords format */
-+	VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
-+
-+	if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POS)) {
- 		InputsRead |= 1 << VERT_ATTRIB_POS;
- 		OutputsWritten |= 1 << VERT_RESULT_HPOS;
--		EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 );
--		offset = sz;
--	} else {
--		offset = 4;
--		EMIT_PAD(4 * sizeof(float));
--	}
--/*
--	if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) {
--		EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F );
--		offset += 1;
-+		EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F );
-+		ADD_ATTR(VERT_ATTRIB_POS, EMIT_4F, SWTCL_OVM_POS, SWIZZLE_XYZW, MASK_XYZW);
-+		rmesa->swtcl.coloroffset = 4;
- 	}
--*/
--	if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR0)) {
--		sz = VB->AttribPtr[VERT_ATTRIB_COLOR0]->size;
--	        rmesa->swtcl.coloroffset = offset;
-+
-+	if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR0)) {
- 		InputsRead |= 1 << VERT_ATTRIB_COLOR0;
- 		OutputsWritten |= 1 << VERT_RESULT_COL0;
--		EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_1F + sz - 1 );
--		offset += sz;
-+#if MESA_LITTLE_ENDIAN
-+		EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA );
-+		ADD_ATTR(VERT_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW);
-+#else
-+		EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR );
-+		ADD_ATTR(VERT_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW);
-+#endif
- 	}
- 
--	rmesa->swtcl.specoffset = 0;
--	if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
--		sz = VB->AttribPtr[VERT_ATTRIB_COLOR1]->size;
--		rmesa->swtcl.specoffset = offset;
--		EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_1F + sz - 1 );
-+	if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR1 )) {
-+		GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
- 		InputsRead |= 1 << VERT_ATTRIB_COLOR1;
- 		OutputsWritten |= 1 << VERT_RESULT_COL1;
-+#if MESA_LITTLE_ENDIAN
-+		EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA );
-+		ADD_ATTR(VERT_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA, SWTCL_OVM_COLOR1, swiz, MASK_XYZW);
-+#else
-+		EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR );
-+		ADD_ATTR(VERT_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR, SWTCL_OVM_COLOR1, swiz, MASK_XYZW);
-+#endif
-+		rmesa->swtcl.specoffset = rmesa->swtcl.coloroffset + 1;
- 	}
- 
--	fog_id = -1;
--	if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_FOG)) {
--		/* find first free tex coord slot */
--		if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
--			int i;
--			for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
--				if (!RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
--					fog_id = i;
--					break;
--				}
--			}
--		} else {
--			fog_id = 0;
--		}
--
--		if (fog_id == -1) {
--			fprintf(stderr, "\tout of free texcoords to do fog\n");
--			_mesa_exit(-1);
-+	if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
-+		VB->AttribPtr[VERT_ATTRIB_GENERIC0] = VB->ColorPtr[1];
-+		OutputsWritten |= 1 << VERT_RESULT_BFC0;
-+#if MESA_LITTLE_ENDIAN
-+		EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_RGBA );
-+		ADD_ATTR(VERT_ATTRIB_GENERIC0, EMIT_4UB_4F_RGBA, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW);
-+#else
-+		EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_ABGR );
-+		ADD_ATTR(VERT_ATTRIB_GENERIC0, EMIT_4UB_4F_ABGR, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW);
-+#endif
-+		if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR1 )) {
-+			GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
-+			OutputsWritten |= 1 << VERT_RESULT_BFC1;
-+#if MESA_LITTLE_ENDIAN
-+			EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_RGBA );
-+			ADD_ATTR(VERT_ATTRIB_GENERIC1, EMIT_4UB_4F_RGBA, SWTCL_OVM_COLOR3, swiz, MASK_XYZW);
-+#else
-+			EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_ABGR );
-+			ADD_ATTR(VERT_ATTRIB_GENERIC1, EMIT_4UB_4F_ABGR, SWTCL_OVM_COLOR3, swiz, MASK_XYZW);
-+#endif
- 		}
-+	}
- 
--		sz = VB->AttribPtr[VERT_ATTRIB_FOG]->size;
--		EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F + sz - 1);
--		InputsRead |= 1 << VERT_ATTRIB_FOG;
--		OutputsWritten |= 1 << VERT_RESULT_FOGC;
--		vap_fmt_1 |= sz << (3 * fog_id);
-+	if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POINTSIZE )) {
-+		GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO);
-+		InputsRead |= 1 << VERT_ATTRIB_POINT_SIZE;
-+		OutputsWritten |= 1 << VERT_RESULT_PSIZ;
-+		EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F );
-+		ADD_ATTR(VERT_ATTRIB_POINT_SIZE, EMIT_1F, SWTCL_OVM_POINT_SIZE, swiz, MASK_X);
- 	}
- 
--	if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
-+	/**
-+	 *  Sending only one texcoord component may lead to lock up,
-+	 *  so for all textures always output 4 texcoord components to RS.
-+	 */
-+	if (RENDERINPUTS_TEST_RANGE(tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
- 		int i;
--
-+		GLuint swiz, format;
- 		for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
--			if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
--				sz = VB->TexCoordPtr[i]->size;
-+			if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) )) {
-+				switch (VB->TexCoordPtr[i]->size) {
-+					case 1:
-+						format = EMIT_1F;
-+						swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
-+						break;
-+					case 2:
-+						format = EMIT_2F;
-+						swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE);
-+						break;
-+					case 3:
-+						format = EMIT_3F;
-+						swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
-+						break;
-+					case 4:
-+						format = EMIT_4F;
-+						swiz = SWIZZLE_XYZW;
-+						break;
-+					default:
-+						continue;
-+				}
- 				InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
- 				OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
--				EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1 );
--				vap_fmt_1 |= sz << (3 * i);
-+				EMIT_ATTR(_TNL_ATTRIB_TEX(i), format);
-+				ADD_ATTR(VERT_ATTRIB_TEX0 + i, format, SWTCL_OVM_TEX(i), swiz, MASK_XYZW);
-+				vap_out_fmt_1 |= 4 << (i * 3);
-+				++first_free_tex;
- 			}
- 		}
- 	}
- 
- 	/* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */
--	if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS) && (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_WPOS)) {
--		int first_free_tex = -1;
--		if (fog_id >= 0) {
--			first_free_tex = fog_id+1;
--		} else {
--			if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
--				int i;
--				for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
--					if (!RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
--						first_free_tex = i;
--						break;
--					}
--				}
--			} else {
--				first_free_tex = 0;
--			}
--		}
--
--		if (first_free_tex == -1) {
-+	if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POS) && (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_WPOS)) {
-+		if (first_free_tex >= ctx->Const.MaxTextureUnits) {
- 			fprintf(stderr, "\tout of free texcoords to write w pos\n");
- 			_mesa_exit(-1);
- 		}
- 
--		sz = VB->AttribPtr[VERT_ATTRIB_POS]->size;
- 		InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex);
- 		OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex);
--		EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 );
--		vap_fmt_1 |= sz << (3 * first_free_tex);
--	}
--
--	for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
--		if (InputsRead & (1 << i)) {
--			inputs[i] = nr++;
--		} else {
--			inputs[i] = -1;
--		}
-+		EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F );
-+		ADD_ATTR(VERT_ATTRIB_POS, EMIT_4F, SWTCL_OVM_TEX(first_free_tex), SWIZZLE_XYZW, MASK_XYZW);
-+		vap_out_fmt_1 |= 4 << (first_free_tex * 3);
-+		++first_free_tex;
- 	}
- 
--	/* Fixed, apply to vir0 only */
--	if (InputsRead & (1 << VERT_ATTRIB_POS))
--		inputs[VERT_ATTRIB_POS] = 0;
--	if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
--		inputs[VERT_ATTRIB_COLOR0] = 2;
--	if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
--		inputs[VERT_ATTRIB_COLOR1] = 3;
--	if (InputsRead & (1 << VERT_ATTRIB_FOG))
--		inputs[VERT_ATTRIB_FOG] = 6 + fog_id;
--	for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
--		if (InputsRead & (1 << i))
--			inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
--
--	for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
--		if (InputsRead & (1 << i)) {
--			tab[nr++] = i;
-+	if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_FOG)) {
-+		if (first_free_tex >= ctx->Const.MaxTextureUnits) {
-+			fprintf(stderr, "\tout of free texcoords to write fog coordinate\n");
-+			_mesa_exit(-1);
- 		}
--	}
- 
--	for (i = 0; i < nr; i++) {
--		int ci;
--
--		swizzle[i][0] = SWIZZLE_ZERO;
--		swizzle[i][1] = SWIZZLE_ZERO;
--		swizzle[i][2] = SWIZZLE_ZERO;
--		swizzle[i][3] = SWIZZLE_ONE;
--
--		for (ci = 0; ci < VB->AttribPtr[tab[i]]->size; ci++) {
--			swizzle[i][ci] = ci;
--		}
-+		InputsRead |= 1 << VERT_ATTRIB_FOG;
-+		OutputsWritten |= 1 << VERT_RESULT_FOGC;
-+		GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO);
-+		EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F );
-+		ADD_ATTR(VERT_ATTRIB_FOG, EMIT_1F, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_X);
-+		vap_out_fmt_1 |=  1 << (first_free_tex * 3);
- 	}
- 
- 	R300_NEWPRIM(rmesa);
--	R300_STATECHANGE(rmesa, vir[0]);
--	((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
--		r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
--				   VB->AttribPtr, inputs, tab, nr);
--	R300_STATECHANGE(rmesa, vir[1]);
--	((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
--		r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
--				   nr);
-+	r300SwtclVAPSetup(ctx, InputsRead, OutputsWritten, vap_out_fmt_1);
- 
--	R300_STATECHANGE(rmesa, vic);
--	rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
--	rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
--
--	R300_STATECHANGE(rmesa, vof);
--	rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
--	rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1;
--
--	rmesa->swtcl.vertex_size =
-+	rmesa->radeon.swtcl.vertex_size =
- 		_tnl_install_attrs( ctx,
--				    rmesa->swtcl.vertex_attrs,
--				    rmesa->swtcl.vertex_attr_count,
-+				    rmesa->radeon.swtcl.vertex_attrs,
-+				    rmesa->radeon.swtcl.vertex_attr_count,
- 				    NULL, 0 );
- 
--	rmesa->swtcl.vertex_size /= 4;
--
--	RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
--
-+	rmesa->radeon.swtcl.vertex_size /= 4;
- 
--	R300_STATECHANGE(rmesa, vte);
--	rmesa->hw.vte.cmd[1] = vte;
--	rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size;
-+	RENDERINPUTS_COPY(rmesa->render_inputs_bitset, tnl->render_inputs_bitset);
- }
- 
- 
--/* Flush vertices in the current dma region.
-- */
--static void flush_last_swtcl_prim( r300ContextPtr rmesa  )
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s\n", __FUNCTION__);
--
--	rmesa->dma.flush = NULL;
--
--	if (rmesa->dma.current.buf) {
--		struct r300_dma_region *current = &rmesa->dma.current;
--		GLuint current_offset = GET_START(current);
--
--		assert (current->start +
--			rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
--			current->ptr);
--
--		if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
--
--			r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__);
--
--			r300EmitState(rmesa);
--
--			r300EmitVertexAOS( rmesa,
--					   rmesa->swtcl.vertex_size,
--					   current_offset);
--
--			r300EmitVbufPrim( rmesa,
--					  rmesa->swtcl.hw_primitive,
--					  rmesa->swtcl.numverts);
--
--			r300EmitCacheFlush(rmesa);
--		}
--
--		rmesa->swtcl.numverts = 0;
--		current->start = current->ptr;
--	}
--}
--
--/* Alloc space in the current dma region.
-- */
--static void *
--r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize )
--{
--	GLuint bytes = vsize * nverts;
--
--	if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
--		r300RefillCurrentDmaRegion( rmesa, bytes);
--
--	if (!rmesa->dma.flush) {
--		rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
--		rmesa->dma.flush = flush_last_swtcl_prim;
--	}
--
--	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
--	ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
--	ASSERT( rmesa->dma.current.start +
--		rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
--		rmesa->dma.current.ptr );
--
--	{
--		GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
--		rmesa->dma.current.ptr += bytes;
--		rmesa->swtcl.numverts += nverts;
--		return head;
--	}
--}
--
- static GLuint reduced_prim[] = {
--  GL_POINTS,
--  GL_LINES,
--  GL_LINES,
--  GL_LINES,
--  GL_TRIANGLES,
--  GL_TRIANGLES,
--  GL_TRIANGLES,
--  GL_TRIANGLES,
--  GL_TRIANGLES,
--  GL_TRIANGLES,
-+	GL_POINTS,
-+	GL_LINES,
-+	GL_LINES,
-+	GL_LINES,
-+	GL_TRIANGLES,
-+	GL_TRIANGLES,
-+	GL_TRIANGLES,
-+	GL_TRIANGLES,
-+	GL_TRIANGLES,
-+	GL_TRIANGLES,
- };
- 
- static void r300RasterPrimitive( GLcontext *ctx, GLuint prim );
- static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
--//static void r300ResetLineStipple( GLcontext *ctx );
- 
- /***********************************************************************
-  *                    Emit primitives as inline vertices               *
-@@ -405,15 +346,13 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
- #undef LOCAL_VARS
- #undef ALLOC_VERTS
- #define CTX_ARG r300ContextPtr rmesa
--#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
--#define ALLOC_VERTS( n, size ) r300AllocDmaLowVerts( rmesa, n, size * 4 )
-+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
-+#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 )
- #define LOCAL_VARS						\
-    r300ContextPtr rmesa = R300_CONTEXT(ctx);		\
--   const char *r300verts = (char *)rmesa->swtcl.verts;
-+   const char *r300verts = (char *)rmesa->radeon.swtcl.verts;
- #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int)))
- #define VERTEX r300Vertex
--#define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS))
--#define PRINT_VERTEX(x)
- #undef TAG
- #define TAG(x) r300_##x
- #include "tnl_dd/t_dd_triemit.h"
-@@ -433,9 +372,8 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
-  *              Build render functions from dd templates               *
-  ***********************************************************************/
- 
--#define R300_TWOSIDE_BIT	0x01
--#define R300_UNFILLED_BIT	0x02
--#define R300_MAX_TRIFUNC	0x04
-+#define R300_UNFILLED_BIT	0x01
-+#define R300_MAX_TRIFUNC	0x02
- 
- static struct {
-    tnl_points_func	        points;
-@@ -446,9 +384,9 @@ static struct {
- 
- #define DO_FALLBACK  0
- #define DO_UNFILLED (IND & R300_UNFILLED_BIT)
--#define DO_TWOSIDE  (IND & R300_TWOSIDE_BIT)
-+#define DO_TWOSIDE   0
- #define DO_FLAT      0
--#define DO_OFFSET     0
-+#define DO_OFFSET    0
- #define DO_TRI       1
- #define DO_QUAD      1
- #define DO_LINE      1
-@@ -468,33 +406,39 @@ static struct {
- #define VERT_Y(_v) _v->v.y
- #define VERT_Z(_v) _v->v.z
- #define AREA_IS_CCW( a ) (a < 0)
--#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int)))
--
--/* Only used to pull back colors into vertices (ie, we know color is
-- * floating point).
-- */
--#define R300_COLOR( dst, src )				\
--do {							\
--   UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]);	\
--   UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]);	\
--   UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]);	\
--   UNCLAMPED_FLOAT_TO_UBYTE((dst)[3], (src)[3]);	\
-+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
-+
-+#define VERT_SET_RGBA( v, c ) \
-+do { \
-+   r300_color_t *color = (r300_color_t *)&((v)->ui[coloroffset]); \
-+   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \
-+   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \
-+   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \
-+   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]); \
- } while (0)
- 
--#define VERT_SET_RGBA( v, c )    if (coloroffset) R300_COLOR( v->ub4[coloroffset], c )
--#define VERT_COPY_RGBA( v0, v1 ) if (coloroffset) v0->ui[coloroffset] = v1->ui[coloroffset]
--#define VERT_SAVE_RGBA( idx )    if (coloroffset) color[idx] = v[idx]->ui[coloroffset]
--#define VERT_RESTORE_RGBA( idx ) if (coloroffset) v[idx]->ui[coloroffset] = color[idx]
-+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
- 
--#define R300_SPEC( dst, src )				\
--do {							\
--   UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]);	\
--   UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]);	\
--   UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]);	\
-+#define VERT_SET_SPEC( v0, c ) \
-+do { \
-+   if (specoffset) { \
-+   UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.red, (c)[0]); \
-+   UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.green, (c)[1]); \
-+   UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.blue, (c)[2]); \
-+   } \
- } while (0)
- 
--#define VERT_SET_SPEC( v, c )    if (specoffset) R300_SPEC( v->ub4[specoffset], c )
--#define VERT_COPY_SPEC( v0, v1 ) if (specoffset) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset])
-+#define VERT_COPY_SPEC( v0, v1 ) \
-+do { \
-+   if (specoffset) { \
-+       v0->v.specular.red = v1->v.specular.red; \
-+       v0->v.specular.green = v1->v.specular.green; \
-+       v0->v.specular.blue = v1->v.specular.blue; \
-+   } \
-+} while (0)
-+
-+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
-+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
- #define VERT_SAVE_SPEC( idx )    if (specoffset) spec[idx] = v[idx]->ui[specoffset]
- #define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
- 
-@@ -514,7 +458,7 @@ do {							\
-  ***********************************************************************/
- 
- #define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] )
--#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
-+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
- #undef TAG
- #define TAG(x) x
- #include "tnl_dd/t_dd_unfilled.h"
-@@ -530,26 +474,15 @@ do {							\
- #define TAG(x) x
- #include "tnl_dd/t_dd_tritmp.h"
- 
--#define IND (R300_TWOSIDE_BIT)
--#define TAG(x) x##_twoside
--#include "tnl_dd/t_dd_tritmp.h"
--
- #define IND (R300_UNFILLED_BIT)
- #define TAG(x) x##_unfilled
- #include "tnl_dd/t_dd_tritmp.h"
- 
--#define IND (R300_TWOSIDE_BIT|R300_UNFILLED_BIT)
--#define TAG(x) x##_twoside_unfilled
--#include "tnl_dd/t_dd_tritmp.h"
--
--
- 
- static void init_rast_tab( void )
- {
-    init();
--   init_twoside();
-    init_unfilled();
--   init_twoside_unfilled();
- }
- 
- /**********************************************************************/
-@@ -571,8 +504,8 @@ static void init_rast_tab( void )
- #undef LOCAL_VARS
- #define LOCAL_VARS						\
-    r300ContextPtr rmesa = R300_CONTEXT(ctx);		\
--   const GLuint vertsize = rmesa->swtcl.vertex_size;		\
--   const char *r300verts = (char *)rmesa->swtcl.verts;		\
-+   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;		\
-+   const char *r300verts = (char *)rmesa->radeon.swtcl.verts;		\
-    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
-    const GLboolean stipple = ctx->Line.StippleFlag;		\
-    (void) elt; (void) stipple;
-@@ -601,10 +534,9 @@ static void r300ChooseRenderState( GLcontext *ctx )
- 	GLuint index = 0;
- 	GLuint flags = ctx->_TriangleCaps;
- 
--	if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT;
- 	if (flags & DD_TRI_UNFILLED)      index |= R300_UNFILLED_BIT;
- 
--	if (index != rmesa->swtcl.RenderIndex) {
-+	if (index != rmesa->radeon.swtcl.RenderIndex) {
- 		tnl->Driver.Render.Points = rast_tab[index].points;
- 		tnl->Driver.Render.Line = rast_tab[index].line;
- 		tnl->Driver.Render.ClippedLine = rast_tab[index].line;
-@@ -621,27 +553,29 @@ static void r300ChooseRenderState( GLcontext *ctx )
- 			tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
- 		}
- 
--		rmesa->swtcl.RenderIndex = index;
-+		rmesa->radeon.swtcl.RenderIndex = index;
- 	}
- }
- 
- 
- static void r300RenderStart(GLcontext *ctx)
- {
--        r300ContextPtr rmesa = R300_CONTEXT( ctx );
-+	r300ContextPtr rmesa = R300_CONTEXT( ctx );
- 
- 	r300ChooseRenderState(ctx);
- 	r300SetVertexFormat(ctx);
- 
-+	r300ValidateBuffers(ctx);
-+
- 	r300UpdateShaders(rmesa);
- 	r300UpdateShaderStates(rmesa);
- 
- 	r300EmitCacheFlush(rmesa);
- 
--	if (rmesa->dma.flush != 0 &&
--	    rmesa->dma.flush != flush_last_swtcl_prim)
--		rmesa->dma.flush( rmesa );
--
-+	/* investigate if we can put back flush optimisation if needed */
-+	if (rmesa->radeon.dma.flush != NULL) {
-+		rmesa->radeon.dma.flush(ctx);
-+	}
- }
- 
- static void r300RenderFinish(GLcontext *ctx)
-@@ -652,9 +586,9 @@ static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 
--	if (rmesa->swtcl.hw_primitive != hwprim) {
--	        R300_NEWPRIM( rmesa );
--		rmesa->swtcl.hw_primitive = hwprim;
-+	if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
-+		R300_NEWPRIM( rmesa );
-+		rmesa->radeon.swtcl.hw_primitive = hwprim;
- 	}
- }
- 
-@@ -662,18 +596,16 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
- {
- 
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	rmesa->swtcl.render_primitive = prim;
-+	rmesa->radeon.swtcl.render_primitive = prim;
- 
- 	if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
--	  return;
-+		return;
- 
- 	r300RasterPrimitive( ctx, reduced_prim[prim] );
- }
- 
- static void r300ResetLineStipple(GLcontext *ctx)
- {
--
--
- }
- 
- void r300InitSwtcl(GLcontext *ctx)
-@@ -699,14 +631,13 @@ void r300InitSwtcl(GLcontext *ctx)
- 	_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
- 			    48 * sizeof(GLfloat) );
- 
--	rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
--	rmesa->swtcl.RenderIndex = ~0;
--	rmesa->swtcl.render_primitive = GL_TRIANGLES;
--	rmesa->swtcl.hw_primitive = 0;
-+	rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
-+	rmesa->radeon.swtcl.RenderIndex = ~0;
-+	rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
-+	rmesa->radeon.swtcl.hw_primitive = 0;
- 
- 	_tnl_invalidate_vertex_state( ctx, ~0 );
- 	_tnl_invalidate_vertices( ctx, ~0 );
--	RENDERINPUTS_ZERO( rmesa->tnl_index_bitset );
- 
- 	_tnl_need_projected_coords( ctx, GL_FALSE );
- 	r300ChooseRenderState(ctx);
-@@ -716,33 +647,52 @@ void r300DestroySwtcl(GLcontext *ctx)
- {
- }
- 
--void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset)
-+static void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset)
- {
--	int cmd_reserved = 0;
--	int cmd_written = 0;
-+	BATCH_LOCALS(&rmesa->radeon);
- 
--	drm_radeon_cmd_header_t *cmd = NULL;
- 	if (RADEON_DEBUG & DEBUG_VERTS)
--	  fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
--		  __FUNCTION__, vertex_size, offset);
--
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2);
--	e32(1);
--	e32(vertex_size | (vertex_size << 8));
--	e32(offset);
-+		fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
-+			__FUNCTION__, vertex_size, offset);
-+
-+	BEGIN_BATCH(7);
-+	OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2);
-+	OUT_BATCH(1);
-+	OUT_BATCH(vertex_size | (vertex_size << 8));
-+	OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
-+	END_BATCH();
- }
- 
--void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
-+static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
- {
--
--	int cmd_reserved = 0;
--	int cmd_written = 0;
-+	BATCH_LOCALS(&rmesa->radeon);
- 	int type, num_verts;
--	drm_radeon_cmd_header_t *cmd = NULL;
- 
- 	type = r300PrimitiveType(rmesa, primitive);
- 	num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
- 
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
--	e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
-+	BEGIN_BATCH(3);
-+	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
-+	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
-+	END_BATCH();
-+}
-+
-+void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
-+{
-+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-+
-+	rcommonEnsureCmdBufSpace(&rmesa->radeon,
-+			   rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
-+			   __FUNCTION__);
-+	radeonEmitState(&rmesa->radeon);
-+	r300EmitVertexAOS(rmesa,
-+			rmesa->radeon.swtcl.vertex_size,
-+			rmesa->radeon.dma.current,
-+			current_offset);
-+
-+	r300EmitVbufPrim(rmesa,
-+		   rmesa->radeon.swtcl.hw_primitive,
-+		   rmesa->radeon.swtcl.numverts);
-+	r300EmitCacheFlush(rmesa);
-+	COMMIT_BATCH();
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.h b/src/mesa/drivers/dri/r300/r300_swtcl.h
-index 55df53c..75c4193 100644
---- a/src/mesa/drivers/dri/r300/r300_swtcl.h
-+++ b/src/mesa/drivers/dri/r300/r300_swtcl.h
-@@ -39,7 +39,26 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "swrast/swrast.h"
- #include "r300_context.h"
- 
-+#define MASK_XYZW (R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
-+#define MASK_X R300_WRITE_ENA_X
-+#define MASK_Y R300_WRITE_ENA_Y
-+#define MASK_Z R300_WRITE_ENA_Z
-+#define MASK_W R300_WRITE_ENA_W
-+
-+/*
-+ * Here are definitions of OVM locations of vertex attributes for non TCL hw
-+ */
-+#define SWTCL_OVM_POS 0
-+#define SWTCL_OVM_COLOR0 2
-+#define SWTCL_OVM_COLOR1 3
-+#define SWTCL_OVM_COLOR2 4
-+#define SWTCL_OVM_COLOR3 5
-+#define SWTCL_OVM_TEX(n) ((n) + 6)
-+#define SWTCL_OVM_POINT_SIZE 15
-+
-+
- extern void r300InitSwtcl( GLcontext *ctx );
- extern void r300DestroySwtcl( GLcontext *ctx );
- 
-+extern void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
- #endif
-diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
-index 7c699ec..0af5bb4 100644
---- a/src/mesa/drivers/dri/r300/r300_tex.c
-+++ b/src/mesa/drivers/dri/r300/r300_tex.c
-@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/context.h"
- #include "main/enums.h"
- #include "main/image.h"
-+#include "main/mipmap.h"
- #include "main/simple_list.h"
- #include "main/texformat.h"
- #include "main/texstore.h"
-@@ -49,6 +50,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_context.h"
- #include "r300_state.h"
- #include "r300_ioctl.h"
-+#include "radeon_mipmap_tree.h"
- #include "r300_tex.h"
- 
- #include "xmlpool.h"
-@@ -77,20 +79,20 @@ static unsigned int translate_wrap_mode(GLenum wrapmode)
-  *
-  * \param t Texture object whose wrap modes are to be set
-  */
--static void r300UpdateTexWrap(r300TexObjPtr t)
-+static void r300UpdateTexWrap(radeonTexObjPtr t)
- {
--	struct gl_texture_object *tObj = t->base.tObj;
-+	struct gl_texture_object *tObj = &t->base;
- 
--	t->filter &=
-+	t->pp_txfilter &=
- 	    ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK);
- 
--	t->filter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT;
-+	t->pp_txfilter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT;
- 
- 	if (tObj->Target != GL_TEXTURE_1D) {
--		t->filter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT;
-+		t->pp_txfilter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT;
- 
- 		if (tObj->Target == GL_TEXTURE_3D)
--			t->filter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT;
-+			t->pp_txfilter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT;
- 	}
- }
- 
-@@ -117,10 +119,13 @@ static GLuint aniso_filter(GLfloat anisotropy)
-  * \param magf Texture magnification mode
-  * \param anisotropy Maximum anisotropy level
-  */
--static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
-+static void r300SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
- {
--	t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK);
--	t->filter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY;
-+	/* Force revalidation to account for switches from/to mipmapping. */
-+	t->validated = GL_FALSE;
-+
-+	t->pp_txfilter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK);
-+	t->pp_txfilter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY;
- 
- 	/* Note that EXT_texture_filter_anisotropic is extremely vague about
- 	 * how anisotropic filtering interacts with the "normal" filter modes.
-@@ -128,7 +133,7 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat
- 	 * filter settings completely. This includes driconf's settings.
- 	 */
- 	if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) {
--		t->filter |= R300_TX_MAG_FILTER_ANISO
-+		t->pp_txfilter |= R300_TX_MAG_FILTER_ANISO
- 			| R300_TX_MIN_FILTER_ANISO
- 			| R300_TX_MIN_FILTER_MIP_LINEAR
- 			| aniso_filter(anisotropy);
-@@ -139,22 +144,22 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat
- 
- 	switch (minf) {
- 	case GL_NEAREST:
--		t->filter |= R300_TX_MIN_FILTER_NEAREST;
-+		t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST;
- 		break;
- 	case GL_LINEAR:
--		t->filter |= R300_TX_MIN_FILTER_LINEAR;
-+		t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR;
- 		break;
- 	case GL_NEAREST_MIPMAP_NEAREST:
--		t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST;
-+		t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST;
- 		break;
- 	case GL_NEAREST_MIPMAP_LINEAR:
--		t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR;
-+		t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR;
- 		break;
- 	case GL_LINEAR_MIPMAP_NEAREST:
--		t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST;
-+		t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST;
- 		break;
- 	case GL_LINEAR_MIPMAP_LINEAR:
--		t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR;
-+		t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR;
- 		break;
- 	}
- 
-@@ -163,15 +168,15 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat
- 	 */
- 	switch (magf) {
- 	case GL_NEAREST:
--		t->filter |= R300_TX_MAG_FILTER_NEAREST;
-+		t->pp_txfilter |= R300_TX_MAG_FILTER_NEAREST;
- 		break;
- 	case GL_LINEAR:
--		t->filter |= R300_TX_MAG_FILTER_LINEAR;
-+		t->pp_txfilter |= R300_TX_MAG_FILTER_LINEAR;
- 		break;
- 	}
- }
- 
--static void r300SetTexBorderColor(r300TexObjPtr t, const GLfloat color[4])
-+static void r300SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4])
- {
- 	GLubyte c[4];
- 	CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
-@@ -182,729 +187,6 @@ static void r300SetTexBorderColor(r300TexObjPtr t, const GLfloat color[4])
- }
- 
- /**
-- * Allocate space for and load the mesa images into the texture memory block.
-- * This will happen before drawing with a new texture, or drawing with a
-- * texture after it was swapped out or teximaged again.
-- */
--
--static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj)
--{
--	r300TexObjPtr t;
--
--	t = CALLOC_STRUCT(r300_tex_obj);
--	texObj->DriverData = t;
--	if (t != NULL) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE) {
--			fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
--				(void *)texObj, (void *)t);
--		}
--
--		/* Initialize non-image-dependent parts of the state:
--		 */
--		t->base.tObj = texObj;
--		t->border_fallback = GL_FALSE;
--
--		make_empty_list(&t->base);
--
--		r300UpdateTexWrap(t);
--		r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
--		r300SetTexBorderColor(t, texObj->BorderColor);
--	}
--
--	return t;
--}
--
--/* try to find a format which will only need a memcopy */
--static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat,
--							       GLenum srcType)
--{
--	const GLuint ui = 1;
--	const GLubyte littleEndian = *((const GLubyte *)&ui);
--
--	if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
--	    (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
--	    (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
--	    (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
--		return &_mesa_texformat_rgba8888;
--	} else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
--		   (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
--		   (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
--		   (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
--		return &_mesa_texformat_rgba8888_rev;
--	} else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
--					    srcType == GL_UNSIGNED_INT_8_8_8_8)) {
--		return &_mesa_texformat_argb8888_rev;
--	} else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) ||
--					    srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) {
--		return &_mesa_texformat_argb8888;
--	} else
--		return _dri_texformat_argb8888;
--}
--
--static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx,
--							       GLint
--							       internalFormat,
--							       GLenum format,
--							       GLenum type)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	const GLboolean do32bpt =
--	    (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32);
--	const GLboolean force16bpt =
--	    (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16);
--	(void)format;
--
--#if 0
--	fprintf(stderr, "InternalFormat=%s(%d) type=%s format=%s\n",
--		_mesa_lookup_enum_by_nr(internalFormat), internalFormat,
--		_mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
--	fprintf(stderr, "do32bpt=%d force16bpt=%d\n", do32bpt, force16bpt);
--#endif
--
--	switch (internalFormat) {
--	case 4:
--	case GL_RGBA:
--	case GL_COMPRESSED_RGBA:
--		switch (type) {
--		case GL_UNSIGNED_INT_10_10_10_2:
--		case GL_UNSIGNED_INT_2_10_10_10_REV:
--			return do32bpt ? _dri_texformat_argb8888 :
--			    _dri_texformat_argb1555;
--		case GL_UNSIGNED_SHORT_4_4_4_4:
--		case GL_UNSIGNED_SHORT_4_4_4_4_REV:
--			return _dri_texformat_argb4444;
--		case GL_UNSIGNED_SHORT_5_5_5_1:
--		case GL_UNSIGNED_SHORT_1_5_5_5_REV:
--			return _dri_texformat_argb1555;
--		default:
--			return do32bpt ? r300Choose8888TexFormat(format, type) :
--			    _dri_texformat_argb4444;
--		}
--
--	case 3:
--	case GL_RGB:
--	case GL_COMPRESSED_RGB:
--		switch (type) {
--		case GL_UNSIGNED_SHORT_4_4_4_4:
--		case GL_UNSIGNED_SHORT_4_4_4_4_REV:
--			return _dri_texformat_argb4444;
--		case GL_UNSIGNED_SHORT_5_5_5_1:
--		case GL_UNSIGNED_SHORT_1_5_5_5_REV:
--			return _dri_texformat_argb1555;
--		case GL_UNSIGNED_SHORT_5_6_5:
--		case GL_UNSIGNED_SHORT_5_6_5_REV:
--			return _dri_texformat_rgb565;
--		default:
--			return do32bpt ? _dri_texformat_argb8888 :
--			    _dri_texformat_rgb565;
--		}
--
--	case GL_RGBA8:
--	case GL_RGB10_A2:
--	case GL_RGBA12:
--	case GL_RGBA16:
--		return !force16bpt ?
--		    r300Choose8888TexFormat(format,
--					    type) : _dri_texformat_argb4444;
--
--	case GL_RGBA4:
--	case GL_RGBA2:
--		return _dri_texformat_argb4444;
--
--	case GL_RGB5_A1:
--		return _dri_texformat_argb1555;
--
--	case GL_RGB8:
--	case GL_RGB10:
--	case GL_RGB12:
--	case GL_RGB16:
--		return !force16bpt ? _dri_texformat_argb8888 :
--		    _dri_texformat_rgb565;
--
--	case GL_RGB5:
--	case GL_RGB4:
--	case GL_R3_G3_B2:
--		return _dri_texformat_rgb565;
--
--	case GL_ALPHA:
--	case GL_ALPHA4:
--	case GL_ALPHA8:
--	case GL_ALPHA12:
--	case GL_ALPHA16:
--	case GL_COMPRESSED_ALPHA:
--		return _dri_texformat_a8;
--
--	case 1:
--	case GL_LUMINANCE:
--	case GL_LUMINANCE4:
--	case GL_LUMINANCE8:
--	case GL_LUMINANCE12:
--	case GL_LUMINANCE16:
--	case GL_COMPRESSED_LUMINANCE:
--		return _dri_texformat_l8;
--
--	case 2:
--	case GL_LUMINANCE_ALPHA:
--	case GL_LUMINANCE4_ALPHA4:
--	case GL_LUMINANCE6_ALPHA2:
--	case GL_LUMINANCE8_ALPHA8:
--	case GL_LUMINANCE12_ALPHA4:
--	case GL_LUMINANCE12_ALPHA12:
--	case GL_LUMINANCE16_ALPHA16:
--	case GL_COMPRESSED_LUMINANCE_ALPHA:
--		return _dri_texformat_al88;
--
--	case GL_INTENSITY:
--	case GL_INTENSITY4:
--	case GL_INTENSITY8:
--	case GL_INTENSITY12:
--	case GL_INTENSITY16:
--	case GL_COMPRESSED_INTENSITY:
--		return _dri_texformat_i8;
--
--	case GL_YCBCR_MESA:
--		if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
--		    type == GL_UNSIGNED_BYTE)
--			return &_mesa_texformat_ycbcr;
--		else
--			return &_mesa_texformat_ycbcr_rev;
--
--	case GL_RGB_S3TC:
--	case GL_RGB4_S3TC:
--	case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
--		return &_mesa_texformat_rgb_dxt1;
--
--	case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
--		return &_mesa_texformat_rgba_dxt1;
--
--	case GL_RGBA_S3TC:
--	case GL_RGBA4_S3TC:
--	case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
--		return &_mesa_texformat_rgba_dxt3;
--
--	case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
--		return &_mesa_texformat_rgba_dxt5;
--
--	case GL_ALPHA16F_ARB:
--		return &_mesa_texformat_alpha_float16;
--	case GL_ALPHA32F_ARB:
--		return &_mesa_texformat_alpha_float32;
--	case GL_LUMINANCE16F_ARB:
--		return &_mesa_texformat_luminance_float16;
--	case GL_LUMINANCE32F_ARB:
--		return &_mesa_texformat_luminance_float32;
--	case GL_LUMINANCE_ALPHA16F_ARB:
--		return &_mesa_texformat_luminance_alpha_float16;
--	case GL_LUMINANCE_ALPHA32F_ARB:
--		return &_mesa_texformat_luminance_alpha_float32;
--	case GL_INTENSITY16F_ARB:
--		return &_mesa_texformat_intensity_float16;
--	case GL_INTENSITY32F_ARB:
--		return &_mesa_texformat_intensity_float32;
--	case GL_RGB16F_ARB:
--		return &_mesa_texformat_rgba_float16;
--	case GL_RGB32F_ARB:
--		return &_mesa_texformat_rgba_float32;
--	case GL_RGBA16F_ARB:
--		return &_mesa_texformat_rgba_float16;
--	case GL_RGBA32F_ARB:
--		return &_mesa_texformat_rgba_float32;
--
--	case GL_DEPTH_COMPONENT:
--	case GL_DEPTH_COMPONENT16:
--	case GL_DEPTH_COMPONENT24:
--	case GL_DEPTH_COMPONENT32:
--#if 0
--		switch (type) {
--		case GL_UNSIGNED_BYTE:
--		case GL_UNSIGNED_SHORT:
--			return &_mesa_texformat_z16;
--		case GL_UNSIGNED_INT:
--			return &_mesa_texformat_z32;
--		case GL_UNSIGNED_INT_24_8_EXT:
--		default:
--			return &_mesa_texformat_z24_s8;
--		}
--#else
--		return &_mesa_texformat_z16;
--#endif
--
--	default:
--		_mesa_problem(ctx,
--			      "unexpected internalFormat 0x%x in r300ChooseTextureFormat",
--			      (int)internalFormat);
--		return NULL;
--	}
--
--	return NULL;		/* never get here */
--}
--
--static GLboolean
--r300ValidateClientStorage(GLcontext * ctx, GLenum target,
--			  GLint internalFormat,
--			  GLint srcWidth, GLint srcHeight,
--			  GLenum format, GLenum type, const void *pixels,
--			  const struct gl_pixelstore_attrib *packing,
--			  struct gl_texture_object *texObj,
--			  struct gl_texture_image *texImage)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr, "intformat %s format %s type %s\n",
--			_mesa_lookup_enum_by_nr(internalFormat),
--			_mesa_lookup_enum_by_nr(format),
--			_mesa_lookup_enum_by_nr(type));
--
--	if (!ctx->Unpack.ClientStorage)
--		return 0;
--
--	if (ctx->_ImageTransferState ||
--	    texImage->IsCompressed || texObj->GenerateMipmap)
--		return 0;
--
--	/* This list is incomplete, may be different on ppc???
--	 */
--	switch (internalFormat) {
--	case GL_RGBA:
--		if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) {
--			texImage->TexFormat = _dri_texformat_argb8888;
--		} else
--			return 0;
--		break;
--
--	case GL_RGB:
--		if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
--			texImage->TexFormat = _dri_texformat_rgb565;
--		} else
--			return 0;
--		break;
--
--	case GL_YCBCR_MESA:
--		if (format == GL_YCBCR_MESA &&
--		    type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
--			texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
--		} else if (format == GL_YCBCR_MESA &&
--			   (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
--			    type == GL_UNSIGNED_BYTE)) {
--			texImage->TexFormat = &_mesa_texformat_ycbcr;
--		} else
--			return 0;
--		break;
--
--	default:
--		return 0;
--	}
--
--	/* Could deal with these packing issues, but currently don't:
--	 */
--	if (packing->SkipPixels ||
--	    packing->SkipRows || packing->SwapBytes || packing->LsbFirst) {
--		return 0;
--	}
--
--	GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
--						    format, type);
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr, "%s: srcRowStride %d/%x\n",
--			__FUNCTION__, srcRowStride, srcRowStride);
--
--	/* Could check this later in upload, pitch restrictions could be
--	 * relaxed, but would need to store the image pitch somewhere,
--	 * as packing details might change before image is uploaded:
--	 */
--	if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride)
--	    || (srcRowStride & 63))
--		return 0;
--
--	/* Have validated that _mesa_transfer_teximage would be a straight
--	 * memcpy at this point.  NOTE: future calls to TexSubImage will
--	 * overwrite the client data.  This is explicitly mentioned in the
--	 * extension spec.
--	 */
--	texImage->Data = (void *)pixels;
--	texImage->IsClientData = GL_TRUE;
--	texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes;
--
--	return 1;
--}
--
--static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level,
--			   GLint internalFormat,
--			   GLint width, GLint border,
--			   GLenum format, GLenum type, const GLvoid * pixels,
--			   const struct gl_pixelstore_attrib *packing,
--			   struct gl_texture_object *texObj,
--			   struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
--			return;
--		}
--	}
--
--	/* Note, this will call ChooseTextureFormat */
--	_mesa_store_teximage1d(ctx, target, level, internalFormat,
--			       width, border, format, type, pixels,
--			       &ctx->Unpack, texObj, texImage);
--
--	t->dirty_images[0] |= (1 << level);
--}
--
--static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
--			      GLint xoffset,
--			      GLsizei width,
--			      GLenum format, GLenum type,
--			      const GLvoid * pixels,
--			      const struct gl_pixelstore_attrib *packing,
--			      struct gl_texture_object *texObj,
--			      struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--
--	assert(t);		/* this _should_ be true */
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
--			return;
--		}
--	}
--
--	_mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
--				  format, type, pixels, packing, texObj,
--				  texImage);
--
--	t->dirty_images[0] |= (1 << level);
--}
--
--static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
--			   GLint internalFormat,
--			   GLint width, GLint height, GLint border,
--			   GLenum format, GLenum type, const GLvoid * pixels,
--			   const struct gl_pixelstore_attrib *packing,
--			   struct gl_texture_object *texObj,
--			   struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--	GLuint face;
--
--	/* which cube face or ordinary 2D image */
--	switch (target) {
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--		face =
--		    (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--		ASSERT(face < 6);
--		break;
--	default:
--		face = 0;
--	}
--
--	if (t != NULL) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
--			return;
--		}
--	}
--
--	texImage->IsClientData = GL_FALSE;
--
--	if (r300ValidateClientStorage(ctx, target,
--				      internalFormat,
--				      width, height,
--				      format, type, pixels,
--				      packing, texObj, texImage)) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using client storage\n",
--				__FUNCTION__);
--	} else {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using normal storage\n",
--				__FUNCTION__);
--
--		/* Normal path: copy (to cached memory) and eventually upload
--		 * via another copy to GART memory and then a blit...  Could
--		 * eliminate one copy by going straight to (permanent) GART.
--		 *
--		 * Note, this will call r300ChooseTextureFormat.
--		 */
--		_mesa_store_teximage2d(ctx, target, level, internalFormat,
--				       width, height, border, format, type,
--				       pixels, &ctx->Unpack, texObj, texImage);
--
--		t->dirty_images[face] |= (1 << level);
--	}
--}
--
--static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
--			      GLint xoffset, GLint yoffset,
--			      GLsizei width, GLsizei height,
--			      GLenum format, GLenum type,
--			      const GLvoid * pixels,
--			      const struct gl_pixelstore_attrib *packing,
--			      struct gl_texture_object *texObj,
--			      struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--	GLuint face;
--
--	/* which cube face or ordinary 2D image */
--	switch (target) {
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--		face =
--		    (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--		ASSERT(face < 6);
--		break;
--	default:
--		face = 0;
--	}
--
--	assert(t);		/* this _should_ be true */
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
--			return;
--		}
--	}
--
--	_mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
--				  height, format, type, pixels, packing, texObj,
--				  texImage);
--
--	t->dirty_images[face] |= (1 << level);
--}
--
--static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
--				     GLint level, GLint internalFormat,
--				     GLint width, GLint height, GLint border,
--				     GLsizei imageSize, const GLvoid * data,
--				     struct gl_texture_object *texObj,
--				     struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--	GLuint face;
--
--	/* which cube face or ordinary 2D image */
--	switch (target) {
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--		face =
--		    (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--		ASSERT(face < 6);
--		break;
--	default:
--		face = 0;
--	}
--
--	if (t != NULL) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY,
--				    "glCompressedTexImage2D");
--			return;
--		}
--	}
--
--	texImage->IsClientData = GL_FALSE;
--
--	/* can't call this, different parameters. Would never evaluate to true anyway currently */
--#if 0
--	if (r300ValidateClientStorage(ctx, target,
--				      internalFormat,
--				      width, height,
--				      format, type, pixels,
--				      packing, texObj, texImage)) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using client storage\n",
--				__FUNCTION__);
--	} else
--#endif
--	{
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using normal storage\n",
--				__FUNCTION__);
--
--		/* Normal path: copy (to cached memory) and eventually upload
--		 * via another copy to GART memory and then a blit...  Could
--		 * eliminate one copy by going straight to (permanent) GART.
--		 *
--		 * Note, this will call r300ChooseTextureFormat.
--		 */
--		_mesa_store_compressed_teximage2d(ctx, target, level,
--						  internalFormat, width, height,
--						  border, imageSize, data,
--						  texObj, texImage);
--
--		t->dirty_images[face] |= (1 << level);
--	}
--}
--
--static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
--					GLint level, GLint xoffset,
--					GLint yoffset, GLsizei width,
--					GLsizei height, GLenum format,
--					GLsizei imageSize, const GLvoid * data,
--					struct gl_texture_object *texObj,
--					struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--	GLuint face;
--
--	/* which cube face or ordinary 2D image */
--	switch (target) {
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--		face =
--		    (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--		ASSERT(face < 6);
--		break;
--	default:
--		face = 0;
--	}
--
--	assert(t);		/* this _should_ be true */
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY,
--				    "glCompressedTexSubImage3D");
--			return;
--		}
--	}
--
--	_mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset,
--					     yoffset, width, height, format,
--					     imageSize, data, texObj, texImage);
--
--	t->dirty_images[face] |= (1 << level);
--}
--
--static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level,
--			   GLint internalFormat,
--			   GLint width, GLint height, GLint depth,
--			   GLint border,
--			   GLenum format, GLenum type, const GLvoid * pixels,
--			   const struct gl_pixelstore_attrib *packing,
--			   struct gl_texture_object *texObj,
--			   struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
--			return;
--		}
--	}
--
--	texImage->IsClientData = GL_FALSE;
--
--#if 0
--	if (r300ValidateClientStorage(ctx, target,
--				      internalFormat,
--				      width, height,
--				      format, type, pixels,
--				      packing, texObj, texImage)) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using client storage\n",
--				__FUNCTION__);
--	} else
--#endif
--	{
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using normal storage\n",
--				__FUNCTION__);
--
--		/* Normal path: copy (to cached memory) and eventually upload
--		 * via another copy to GART memory and then a blit...  Could
--		 * eliminate one copy by going straight to (permanent) GART.
--		 *
--		 * Note, this will call r300ChooseTextureFormat.
--		 */
--		_mesa_store_teximage3d(ctx, target, level, internalFormat,
--				       width, height, depth, border,
--				       format, type, pixels,
--				       &ctx->Unpack, texObj, texImage);
--
--		t->dirty_images[0] |= (1 << level);
--	}
--}
--
--static void
--r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
--		  GLint xoffset, GLint yoffset, GLint zoffset,
--		  GLsizei width, GLsizei height, GLsizei depth,
--		  GLenum format, GLenum type,
--		  const GLvoid * pixels,
--		  const struct gl_pixelstore_attrib *packing,
--		  struct gl_texture_object *texObj,
--		  struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--
--/*     fprintf(stderr, "%s\n", __FUNCTION__); */
--
--	assert(t);		/* this _should_ be true */
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D");
--			return;
--		}
--		texObj->DriverData = t;
--	}
--
--	_mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
--				  width, height, depth,
--				  format, type, pixels, packing, texObj,
--				  texImage);
--
--	t->dirty_images[0] |= (1 << level);
--}
--
--/**
-  * Changes variables and flags for a state update, which will happen at the
-  * next UpdateTextureState
-  */
-@@ -913,7 +195,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
- 			     struct gl_texture_object *texObj,
- 			     GLenum pname, const GLfloat * params)
- {
--	r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData;
-+	radeonTexObj* t = radeon_tex_obj(texObj);
- 
- 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
- 		fprintf(stderr, "%s( %s )\n", __FUNCTION__,
-@@ -946,7 +228,11 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
- 		 * we just have to rely on loading the right subset of mipmap levels
- 		 * to simulate a clamped LOD.
- 		 */
--		driSwapOutTextureObject((driTextureObject *) t);
-+		if (t->mt) {
-+			radeon_miptree_unreference(t->mt);
-+			t->mt = 0;
-+			t->validated = GL_FALSE;
-+		}
- 		break;
- 
- 	case GL_DEPTH_TEXTURE_MODE:
-@@ -969,27 +255,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
- 	}
- }
- 
--static void r300BindTexture(GLcontext * ctx, GLenum target,
--			    struct gl_texture_object *texObj)
--{
--	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
--		fprintf(stderr, "%s( %p ) unit=%d\n", __FUNCTION__,
--			(void *)texObj, ctx->Texture.CurrentUnit);
--	}
--
--	if ((target == GL_TEXTURE_1D)
--	    || (target == GL_TEXTURE_2D)
--	    || (target == GL_TEXTURE_3D)
--	    || (target == GL_TEXTURE_CUBE_MAP)
--	    || (target == GL_TEXTURE_RECTANGLE_NV)) {
--		assert(texObj->DriverData != NULL);
--	}
--}
--
- static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
-+	radeonTexObj* t = radeon_tex_obj(texObj);
- 
- 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
- 		fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
-@@ -997,14 +266,24 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
- 			_mesa_lookup_enum_by_nr(texObj->Target));
- 	}
- 
--	if (t != NULL) {
--		if (rmesa) {
--			R300_FIREVERTICES(rmesa);
--		}
-+	if (rmesa) {
-+		int i;
-+		radeon_firevertices(&rmesa->radeon);
-+
-+		for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i)
-+			if (rmesa->hw.textures[i] == t)
-+				rmesa->hw.textures[i] = 0;
-+	}
- 
--		driDestroyTextureObject(t);
-+	if (t->bo) {
-+		radeon_bo_unref(t->bo);
-+		t->bo = NULL;
-+	}
-+
-+	if (t->mt) {
-+		radeon_miptree_unreference(t->mt);
-+		t->mt = 0;
- 	}
--	/* Free mipmap images and the texture object itself */
- 	_mesa_delete_texture_object(ctx, texObj);
- }
- 
-@@ -1013,8 +292,6 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
-  * Called via ctx->Driver.NewTextureObject.
-  * Note: this function will be called during context creation to
-  * allocate the default texture objects.
-- * Note: we could use containment here to 'derive' the driver-specific
-- * texture object from the core mesa gl_texture_object.  Not done at this time.
-  * Fixup MaxAnisotropy according to user preference.
-  */
- static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
-@@ -1022,14 +299,23 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
- 						      GLenum target)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_object *obj;
--	obj = _mesa_new_texture_object(ctx, name, target);
--	if (!obj)
--		return NULL;
--	obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
-+	radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
-+
- 
--	r300AllocTexObj(obj);
--	return obj;
-+	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
-+		fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
-+			t, _mesa_lookup_enum_by_nr(target));
-+	}
-+
-+	_mesa_initialize_texture_object(&t->base, name, target);
-+	t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
-+
-+	/* Initialize hardware state */
-+	r300UpdateTexWrap(t);
-+	r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy);
-+	r300SetTexBorderColor(t, t->base.BorderColor);
-+
-+	return &t->base;
- }
- 
- void r300InitTextureFuncs(struct dd_function_table *functions)
-@@ -1037,22 +323,30 @@ void r300InitTextureFuncs(struct dd_function_table *functions)
- 	/* Note: we only plug in the functions we implement in the driver
- 	 * since _mesa_init_driver_functions() was already called.
- 	 */
--	functions->ChooseTextureFormat = r300ChooseTextureFormat;
--	functions->TexImage1D = r300TexImage1D;
--	functions->TexImage2D = r300TexImage2D;
--	functions->TexImage3D = r300TexImage3D;
--	functions->TexSubImage1D = r300TexSubImage1D;
--	functions->TexSubImage2D = r300TexSubImage2D;
--	functions->TexSubImage3D = r300TexSubImage3D;
-+	functions->NewTextureImage = radeonNewTextureImage;
-+	functions->FreeTexImageData = radeonFreeTexImageData;
-+	functions->MapTexture = radeonMapTexture;
-+	functions->UnmapTexture = radeonUnmapTexture;
-+
-+	functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa;
-+	functions->TexImage1D = radeonTexImage1D;
-+	functions->TexImage2D = radeonTexImage2D;
-+	functions->TexImage3D = radeonTexImage3D;
-+	functions->TexSubImage1D = radeonTexSubImage1D;
-+	functions->TexSubImage2D = radeonTexSubImage2D;
-+	functions->TexSubImage3D = radeonTexSubImage3D;
-+	functions->GetTexImage = radeonGetTexImage;
-+	functions->GetCompressedTexImage = radeonGetCompressedTexImage;
- 	functions->NewTextureObject = r300NewTextureObject;
--	functions->BindTexture = r300BindTexture;
- 	functions->DeleteTexture = r300DeleteTexture;
- 	functions->IsTextureResident = driIsTextureResident;
- 
- 	functions->TexParameter = r300TexParameter;
- 
--	functions->CompressedTexImage2D = r300CompressedTexImage2D;
--	functions->CompressedTexSubImage2D = r300CompressedTexSubImage2D;
-+	functions->CompressedTexImage2D = radeonCompressedTexImage2D;
-+	functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
-+
-+	functions->GenerateMipmap = radeonGenerateMipmap;
- 
- 	driInitTextureFormats();
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h
-index b86d45b..8a653ea 100644
---- a/src/mesa/drivers/dri/r300/r300_tex.h
-+++ b/src/mesa/drivers/dri/r300/r300_tex.h
-@@ -37,16 +37,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- extern void r300SetDepthTexMode(struct gl_texture_object *tObj);
- 
-+extern void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target,
-+			     __DRIdrawable *dPriv);
-+
-+extern void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
-+			      GLint format, __DRIdrawable *dPriv);
-+
- extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
- 			     unsigned long long offset, GLint depth,
- 			     GLuint pitch);
- 
--extern void r300UpdateTextureState(GLcontext * ctx);
--
--extern int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t,
--			       GLuint face);
--
--extern void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t);
-+extern GLboolean r300ValidateBuffers(GLcontext * ctx);
- 
- extern void r300InitTextureFuncs(struct dd_function_table *functions);
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c
-deleted file mode 100644
-index 0fe51b0..0000000
---- a/src/mesa/drivers/dri/r300/r300_texmem.c
-+++ /dev/null
-@@ -1,567 +0,0 @@
--/**************************************************************************
--
--Copyright (C) Tungsten Graphics 2002.  All Rights Reserved.
--The Weather Channel, Inc. funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86
--license. This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation on the rights to use, copy, modify, merge, publish,
--distribute, sub license, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
--NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
--SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
--IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
--IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
--SOFTWARE.
--
--**************************************************************************/
--
--/**
-- * \file
-- *
-- * \author Gareth Hughes <gareth@valinux.com>
-- *
-- * \author Kevin E. Martin <martin@valinux.com>
-- */
--
--#include <errno.h>
--
--#include "main/glheader.h"
--#include "main/imports.h"
--#include "main/context.h"
--#include "main/colormac.h"
--#include "main/macros.h"
--#include "main/simple_list.h"
--#include "radeon_reg.h"		/* gets definition for usleep */
--#include "r300_context.h"
--#include "r300_state.h"
--#include "r300_cmdbuf.h"
--#include "radeon_ioctl.h"
--#include "r300_tex.h"
--#include "r300_ioctl.h"
--#include <unistd.h>		/* for usleep() */
--
--#ifdef USER_BUFFERS
--#include "r300_mem.h"
--#endif
--
--/**
-- * Destroy any device-dependent state associated with the texture.  This may
-- * include NULLing out hardware state that points to the texture.
-- */
--void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t)
--{
--	int i;
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE) {
--		fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
--			(void *)t, (void *)t->base.tObj);
--	}
--
--	for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) {
--		if (rmesa->state.texture.unit[i].texobj == t) {
--			rmesa->state.texture.unit[i].texobj = NULL;
--		}
--	}
--}
--
--/* ------------------------------------------------------------
-- * Texture image conversions
-- */
--
--static void r300UploadGARTClientSubImage(r300ContextPtr rmesa,
--					 r300TexObjPtr t,
--					 struct gl_texture_image *texImage,
--					 GLint hwlevel,
--					 GLint x, GLint y,
--					 GLint width, GLint height)
--{
--	const struct gl_texture_format *texFormat = texImage->TexFormat;
--	GLuint srcPitch, dstPitch;
--	int blit_format;
--	int srcOffset;
--
--	/*
--	 * XXX it appears that we always upload the full image, not a subimage.
--	 * I.e. x==0, y==0, width=texWidth, height=texWidth.  If this is ever
--	 * changed, the src pitch will have to change.
--	 */
--	switch (texFormat->TexelBytes) {
--	case 1:
--		blit_format = R300_CP_COLOR_FORMAT_CI8;
--		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--		break;
--	case 2:
--		blit_format = R300_CP_COLOR_FORMAT_RGB565;
--		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--		break;
--	case 4:
--		blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
--		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--		break;
--	case 8:
--	case 16:
--		blit_format = R300_CP_COLOR_FORMAT_CI8;
--		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--		break;
--	default:
--		return;
--	}
--
--	t->image[0][hwlevel].data = texImage->Data;
--	srcOffset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
--
--	assert(srcOffset != ~0);
--
--	/* Don't currently need to cope with small pitches?
--	 */
--	width = texImage->Width;
--	height = texImage->Height;
--
--	if (texFormat->TexelBytes > 4) {
--		width *= texFormat->TexelBytes;
--	}
--
--	r300EmitWait(rmesa, R300_WAIT_3D);
--
--	r300EmitBlit(rmesa, blit_format,
--		     srcPitch,
--		     srcOffset,
--		     dstPitch,
--		     t->bufAddr,
--		     x,
--		     y,
--		     t->image[0][hwlevel].x + x,
--		     t->image[0][hwlevel].y + y, width, height);
--
--	r300EmitWait(rmesa, R300_WAIT_2D);
--}
--
--static void r300UploadRectSubImage(r300ContextPtr rmesa,
--				   r300TexObjPtr t,
--				   struct gl_texture_image *texImage,
--				   GLint x, GLint y, GLint width, GLint height)
--{
--	const struct gl_texture_format *texFormat = texImage->TexFormat;
--	int blit_format, dstPitch, done;
--
--	switch (texFormat->TexelBytes) {
--	case 1:
--		blit_format = R300_CP_COLOR_FORMAT_CI8;
--		break;
--	case 2:
--		blit_format = R300_CP_COLOR_FORMAT_RGB565;
--		break;
--	case 4:
--		blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
--		break;
--	case 8:
--	case 16:
--		blit_format = R300_CP_COLOR_FORMAT_CI8;
--		break;
--	default:
--		return;
--	}
--
--	t->image[0][0].data = texImage->Data;
--
--	/* Currently don't need to cope with small pitches.
--	 */
--	width = texImage->Width;
--	height = texImage->Height;
--	dstPitch = t->pitch;
--
--	if (texFormat->TexelBytes > 4) {
--		width *= texFormat->TexelBytes;
--	}
--
--	if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) {
--		/* In this case, could also use GART texturing.  This is
--		 * currently disabled, but has been tested & works.
--		 */
--		t->offset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
--		t->pitch = texImage->RowStride * texFormat->TexelBytes - 32;
--
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr,
--				"Using GART texturing for rectangular client texture\n");
--
--		/* Release FB memory allocated for this image:
--		 */
--		/* FIXME This may not be correct as driSwapOutTextureObject sets
--		 * FIXME dirty_images.  It may be fine, though.
--		 */
--		if (t->base.memBlock) {
--			driSwapOutTextureObject((driTextureObject *) t);
--		}
--	} else if (texImage->IsClientData) {
--		/* Data already in GART memory, with usable pitch.
--		 */
--		GLuint srcPitch;
--		srcPitch = texImage->RowStride * texFormat->TexelBytes;
--		r300EmitBlit(rmesa,
--			     blit_format,
--			     srcPitch,
--			     r300GartOffsetFromVirtual(rmesa, texImage->Data),
--			     dstPitch, t->bufAddr, 0, 0, 0, 0, width, height);
--	} else {
--		/* Data not in GART memory, or bad pitch.
--		 */
--		for (done = 0; done < height;) {
--			struct r300_dma_region region;
--			int lines =
--			    MIN2(height - done, RADEON_BUFFER_SIZE / dstPitch);
--			int src_pitch;
--			char *tex;
--
--			src_pitch = texImage->RowStride * texFormat->TexelBytes;
--
--			tex = (char *)texImage->Data + done * src_pitch;
--
--			memset(&region, 0, sizeof(region));
--			r300AllocDmaRegion(rmesa, &region, lines * dstPitch,
--					   1024);
--
--			/* Copy texdata to dma:
--			 */
--			if (RADEON_DEBUG & DEBUG_TEXTURE)
--				fprintf(stderr,
--					"%s: src_pitch %d dst_pitch %d\n",
--					__FUNCTION__, src_pitch, dstPitch);
--
--			if (src_pitch == dstPitch) {
--				memcpy(region.address + region.start, tex,
--				       lines * src_pitch);
--			} else {
--				char *buf = region.address + region.start;
--				int i;
--				for (i = 0; i < lines; i++) {
--					memcpy(buf, tex, src_pitch);
--					buf += dstPitch;
--					tex += src_pitch;
--				}
--			}
--
--			r300EmitWait(rmesa, R300_WAIT_3D);
--
--			/* Blit to framebuffer
--			 */
--			r300EmitBlit(rmesa,
--				     blit_format,
--				     dstPitch, GET_START(&region),
--				     dstPitch | (t->tile_bits >> 16),
--				     t->bufAddr, 0, 0, 0, done, width, lines);
--
--			r300EmitWait(rmesa, R300_WAIT_2D);
--#ifdef USER_BUFFERS
--			r300_mem_use(rmesa, region.buf->id);
--#endif
--
--			r300ReleaseDmaRegion(rmesa, &region, __FUNCTION__);
--			done += lines;
--		}
--	}
--}
--
--/**
-- * Upload the texture image associated with texture \a t at the specified
-- * level at the address relative to \a start.
-- */
--static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t,
--			       GLint hwlevel,
--			       GLint x, GLint y, GLint width, GLint height,
--			       GLuint face)
--{
--	struct gl_texture_image *texImage = NULL;
--	GLuint offset;
--	GLint imageWidth, imageHeight;
--	GLint ret;
--	drm_radeon_texture_t tex;
--	drm_radeon_tex_image_t tmp;
--	const int level = hwlevel + t->base.firstLevel;
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE) {
--		fprintf(stderr,
--			"%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n",
--			__FUNCTION__, (void *)t, (void *)t->base.tObj, level,
--			width, height, face);
--	}
--
--	ASSERT(face < 6);
--
--	/* Ensure we have a valid texture to upload */
--	if ((hwlevel < 0) || (hwlevel >= R300_MAX_TEXTURE_LEVELS)) {
--		_mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
--		return;
--	}
--
--	texImage = t->base.tObj->Image[face][level];
--
--	if (!texImage) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: texImage %d is NULL!\n",
--				__FUNCTION__, level);
--		return;
--	}
--	if (!texImage->Data) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: image data is NULL!\n",
--				__FUNCTION__);
--		return;
--	}
--
--	if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--		assert(level == 0);
--		assert(hwlevel == 0);
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: image data is rectangular\n",
--				__FUNCTION__);
--		r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height);
--		return;
--	} else if (texImage->IsClientData) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr,
--				"%s: image data is in GART client storage\n",
--				__FUNCTION__);
--		r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y,
--					     width, height);
--		return;
--	} else if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr, "%s: image data is in normal memory\n",
--			__FUNCTION__);
--
--	imageWidth = texImage->Width;
--	imageHeight = texImage->Height;
--
--	offset = t->bufAddr;
--
--	if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
--		GLint imageX = 0;
--		GLint imageY = 0;
--		GLint blitX = t->image[face][hwlevel].x;
--		GLint blitY = t->image[face][hwlevel].y;
--		GLint blitWidth = t->image[face][hwlevel].width;
--		GLint blitHeight = t->image[face][hwlevel].height;
--		fprintf(stderr, "   upload image: %d,%d at %d,%d\n",
--			imageWidth, imageHeight, imageX, imageY);
--		fprintf(stderr, "   upload  blit: %d,%d at %d,%d\n",
--			blitWidth, blitHeight, blitX, blitY);
--		fprintf(stderr, "       blit ofs: 0x%07x level: %d/%d\n",
--			(GLuint) offset, hwlevel, level);
--	}
--
--	t->image[face][hwlevel].data = texImage->Data;
--
--	/* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
--	 * NOTE: we're always use a 1KB-wide blit and I8 texture format.
--	 * We used to use 1, 2 and 4-byte texels and used to use the texture
--	 * width to dictate the blit width - but that won't work for compressed
--	 * textures. (Brian)
--	 * NOTE: can't do that with texture tiling. (sroland)
--	 */
--	tex.offset = offset;
--	tex.image = &tmp;
--	/* copy (x,y,width,height,data) */
--	memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp));
--
--	if (texImage->TexFormat->TexelBytes > 4) {
--		const int log2TexelBytes =
--		    (3 + (texImage->TexFormat->TexelBytes >> 4));
--		tex.format = RADEON_TXFORMAT_I8;	/* any 1-byte texel format */
--		tex.pitch =
--		    MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
--			 64, 1);
--		tex.height = imageHeight;
--		tex.width = imageWidth << log2TexelBytes;
--		tex.offset += (tmp.x << log2TexelBytes) & ~1023;
--		tmp.x = tmp.x % (1024 >> log2TexelBytes);
--		tmp.width = tmp.width << log2TexelBytes;
--	} else if (texImage->TexFormat->TexelBytes) {
--		/* use multi-byte upload scheme */
--		tex.height = imageHeight;
--		tex.width = imageWidth;
--		switch (texImage->TexFormat->TexelBytes) {
--		case 1:
--			tex.format = RADEON_TXFORMAT_I8;
--			break;
--		case 2:
--			tex.format = RADEON_TXFORMAT_AI88;
--			break;
--		case 4:
--			tex.format = RADEON_TXFORMAT_ARGB8888;
--			break;
--		}
--		tex.pitch =
--		    MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
--			 64, 1);
--		tex.offset += tmp.x & ~1023;
--		tmp.x = tmp.x % 1024;
--
--		if (t->tile_bits & R300_TXO_MICRO_TILE) {
--			/* need something like "tiled coordinates" ? */
--			tmp.y = tmp.x / (tex.pitch * 128) * 2;
--			tmp.x =
--			    tmp.x % (tex.pitch * 128) / 2 /
--			    texImage->TexFormat->TexelBytes;
--			tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
--		} else {
--			tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
--		}
--#if 1
--		if ((t->tile_bits & R300_TXO_MACRO_TILE) &&
--		    (texImage->Width * texImage->TexFormat->TexelBytes >= 256)
--		    && ((!(t->tile_bits & R300_TXO_MICRO_TILE)
--			 && (texImage->Height >= 8))
--			|| (texImage->Height >= 16))) {
--			/* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
--			   OR if height is smaller than 8 automatically, but if micro tiling is active
--			   the limit is height 16 instead ? */
--			tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
--		}
--#endif
--	} else {
--		/* In case of for instance 8x8 texture (2x2 dxt blocks),
--		   padding after the first two blocks is needed (only
--		   with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
--		/* set tex.height to 1/4 since 1 "macropixel" (dxt-block)
--		   has 4 real pixels. Needed so the kernel module reads
--		   the right amount of data. */
--		tex.format = RADEON_TXFORMAT_I8;	/* any 1-byte texel format */
--		tex.pitch = (R300_BLIT_WIDTH_BYTES / 64);
--		tex.height = (imageHeight + 3) / 4;
--		tex.width = (imageWidth + 3) / 4;
--		if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) {
--			tex.width *= 8;
--		} else {
--			tex.width *= 16;
--		}
--	}
--
--	LOCK_HARDWARE(&rmesa->radeon);
--	do {
--		ret =
--		    drmCommandWriteRead(rmesa->radeon.dri.fd,
--					DRM_RADEON_TEXTURE, &tex,
--					sizeof(drm_radeon_texture_t));
--		if (ret) {
--			if (RADEON_DEBUG & DEBUG_IOCTL)
--				fprintf(stderr,
--					"DRM_RADEON_TEXTURE:  again!\n");
--			usleep(1);
--		}
--	} while (ret == -EAGAIN);
--
--	UNLOCK_HARDWARE(&rmesa->radeon);
--
--	if (ret) {
--		fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret);
--		fprintf(stderr, "   offset=0x%08x\n", offset);
--		fprintf(stderr, "   image width=%d height=%d\n",
--			imageWidth, imageHeight);
--		fprintf(stderr, "    blit width=%d height=%d data=%p\n",
--			t->image[face][hwlevel].width,
--			t->image[face][hwlevel].height,
--			t->image[face][hwlevel].data);
--		_mesa_exit(-1);
--	}
--}
--
--/**
-- * Upload the texture images associated with texture \a t.  This might
-- * require the allocation of texture memory.
-- *
-- * \param rmesa Context pointer
-- * \param t Texture to be uploaded
-- * \param face Cube map face to be uploaded.  Zero for non-cube maps.
-- */
--
--int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face)
--{
--	const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
--
--	if (t->image_override)
--		return 0;
--
--	if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
--		fprintf(stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
--			(void *)rmesa->radeon.glCtx, (void *)t->base.tObj,
--			t->base.totalSize, t->base.firstLevel,
--			t->base.lastLevel);
--	}
--
--	if (t->base.totalSize == 0)
--		return 0;
--
--	if (RADEON_DEBUG & DEBUG_SYNC) {
--		fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
--		radeonFinish(rmesa->radeon.glCtx);
--	}
--
--	LOCK_HARDWARE(&rmesa->radeon);
--
--	if (t->base.memBlock == NULL) {
--		int heap;
--
--		heap = driAllocateTexture(rmesa->texture_heaps, rmesa->nr_heaps,
--					  (driTextureObject *) t);
--		if (heap == -1) {
--			UNLOCK_HARDWARE(&rmesa->radeon);
--			return -1;
--		}
--
--		/* Set the base offset of the texture image */
--		t->bufAddr = rmesa->radeon.radeonScreen->texOffset[heap]
--		    + t->base.memBlock->ofs;
--		t->offset = t->bufAddr;
--
--		if (!(t->base.tObj->Image[0][0]->IsClientData)) {
--			/* hope it's safe to add that here... */
--			t->offset |= t->tile_bits;
--		}
--	}
--
--	/* Let the world know we've used this memory recently.
--	 */
--	driUpdateTextureLRU((driTextureObject *) t);
--	UNLOCK_HARDWARE(&rmesa->radeon);
--
--	/* Upload any images that are new */
--	if (t->base.dirty_images[face]) {
--		int i;
--		for (i = 0; i < numLevels; i++) {
--			if ((t->base.
--			     dirty_images[face] & (1 <<
--						   (i + t->base.firstLevel))) !=
--			    0) {
--				r300UploadSubImage(rmesa, t, i, 0, 0,
--						   t->image[face][i].width,
--						   t->image[face][i].height,
--						   face);
--			}
--		}
--		t->base.dirty_images[face] = 0;
--	}
--
--	if (RADEON_DEBUG & DEBUG_SYNC) {
--		fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
--		radeonFinish(rmesa->radeon.glCtx);
--	}
--
--	return 0;
--}
-diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
-index cadec7f..2d7ad55 100644
---- a/src/mesa/drivers/dri/r300/r300_texstate.c
-+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
-@@ -47,7 +47,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_context.h"
- #include "r300_state.h"
- #include "r300_ioctl.h"
--#include "radeon_ioctl.h"
-+#include "radeon_mipmap_tree.h"
- #include "r300_tex.h"
- #include "r300_reg.h"
- 
-@@ -143,13 +143,12 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
- 		},
- 	};
- 	const GLuint *format;
--	r300TexObjPtr t;
-+	radeonTexObjPtr t;
- 
- 	if (!tObj)
- 		return;
- 
--	t = (r300TexObjPtr) tObj->DriverData;
--
-+	t = radeon_tex_obj(tObj);
- 
- 	switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) {
- 	case MESA_FORMAT_Z16:
-@@ -171,13 +170,13 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
- 
- 	switch (tObj->DepthMode) {
- 	case GL_LUMINANCE:
--		t->format = format[0];
-+		t->pp_txformat = format[0];
- 		break;
- 	case GL_INTENSITY:
--		t->format = format[1];
-+		t->pp_txformat = format[1];
- 		break;
- 	case GL_ALPHA:
--		t->format = format[2];
-+		t->pp_txformat = format[2];
- 		break;
- 	default:
- 		/* Error...which should have already been caught by higher
-@@ -190,399 +189,134 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
- 
- 
- /**
-- * Compute sizes and fill in offset and blit information for the given
-- * image (determined by \p face and \p level).
-- *
-- * \param curOffset points to the offset at which the image is to be stored
-- * and is updated by this function according to the size of the image.
-- */
--static void compute_tex_image_offset(
--	struct gl_texture_object *tObj,
--	GLuint face,
--	GLint level,
--	GLint* curOffset)
--{
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--	const struct gl_texture_image* texImage;
--	GLuint blitWidth = R300_BLIT_WIDTH_BYTES;
--	GLuint texelBytes;
--	GLuint size;
--
--	texImage = tObj->Image[0][level + t->base.firstLevel];
--	if (!texImage)
--		return;
--
--	texelBytes = texImage->TexFormat->TexelBytes;
--
--	/* find image size in bytes */
--	if (texImage->IsCompressed) {
--		if ((t->format & R300_TX_FORMAT_DXT1) ==
--			R300_TX_FORMAT_DXT1) {
--			// fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format);
--			if ((texImage->Width + 3) < 8)	/* width one block */
--				size = texImage->CompressedSize * 4;
--			else if ((texImage->Width + 3) < 16)
--				size = texImage->CompressedSize * 2;
--			else
--				size = texImage->CompressedSize;
--		} else {
--			/* DXT3/5, 16 bytes per block */
--			WARN_ONCE
--				("DXT 3/5 suffers from multitexturing problems!\n");
--			// fprintf(stderr,"DXT 3/5 %d\n", texImage->Width);
--			if ((texImage->Width + 3) < 8)
--				size = texImage->CompressedSize * 2;
--			else
--				size = texImage->CompressedSize;
--		}
--	} else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--		size =
--			((texImage->Width * texelBytes +
--			63) & ~63) * texImage->Height;
--		blitWidth = 64 / texelBytes;
--	} else if (t->tile_bits & R300_TXO_MICRO_TILE) {
--		/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
--			though the actual offset may be different (if texture is less than
--			32 bytes width) to the untiled case */
--		int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
--		size =
--			(w * ((texImage->Height + 1) / 2)) *
--			texImage->Depth;
--		blitWidth = MAX2(texImage->Width, 64 / texelBytes);
--	} else {
--		int w = (texImage->Width * texelBytes + 31) & ~31;
--		size = w * texImage->Height * texImage->Depth;
--		blitWidth = MAX2(texImage->Width, 64 / texelBytes);
--	}
--	assert(size > 0);
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n",
--			texImage->Width, texImage->Height,
--			texImage->Depth,
--			texImage->TexFormat->TexelBytes,
--			texImage->InternalFormat);
--
--	/* All images are aligned to a 32-byte offset */
--	*curOffset = (*curOffset + 0x1f) & ~0x1f;
--
--	if (texelBytes) {
--		/* fix x and y coords up later together with offset */
--		t->image[face][level].x = *curOffset;
--		t->image[face][level].y = 0;
--		t->image[face][level].width =
--			MIN2(size / texelBytes, blitWidth);
--		t->image[face][level].height =
--			(size / texelBytes) / t->image[face][level].width;
--	} else {
--		t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES;
--		t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES;
--		t->image[face][level].width =
--			MIN2(size, R300_BLIT_WIDTH_BYTES);
--		t->image[face][level].height = size / t->image[face][level].width;
--	}
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr,
--			"level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
--			level, face, texImage->Width, texImage->Height,
--			t->image[face][level].x, t->image[face][level].y,
--			t->image[face][level].width, t->image[face][level].height,
--			size, *curOffset);
--
--	*curOffset += size;
--}
--
--
--
--/**
-- * This function computes the number of bytes of storage needed for
-- * the given texture object (all mipmap levels, all cube faces).
-- * The \c image[face][level].x/y/width/height parameters for upload/blitting
-- * are computed here.  \c filter, \c format, etc. will be set here
-- * too.
-+ * Compute the cached hardware register values for the given texture object.
-  *
-  * \param rmesa Context pointer
-- * \param tObj GL texture object whose images are to be posted to
-- *                 hardware state.
-+ * \param t the r300 texture object
-  */
--static void r300SetTexImages(r300ContextPtr rmesa,
--			     struct gl_texture_object *tObj)
-+static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t)
- {
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--	const struct gl_texture_image *baseImage =
--	    tObj->Image[0][tObj->BaseLevel];
--	GLint curOffset;
--	GLint i, texelBytes;
--	GLint numLevels;
--	GLint log2Width, log2Height, log2Depth;
--
--	/* Set the hardware texture format
--	 */
-+	const struct gl_texture_image *firstImage;
-+	int firstlevel = t->mt ? t->mt->firstLevel : 0;
-+	    
-+	firstImage = t->base.Image[0][firstlevel];
-+
- 	if (!t->image_override
--	    && VALID_FORMAT(baseImage->TexFormat->MesaFormat)) {
--		if (baseImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) {
--			r300SetDepthTexMode(tObj);
-+	    && VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
-+		if (firstImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) {
-+			r300SetDepthTexMode(&t->base);
- 		} else {
--			t->format = tx_table[baseImage->TexFormat->MesaFormat].format;
-+			t->pp_txformat = tx_table[firstImage->TexFormat->MesaFormat].format;
- 		}
- 
--		t->filter |= tx_table[baseImage->TexFormat->MesaFormat].filter;
-+		t->pp_txfilter |= tx_table[firstImage->TexFormat->MesaFormat].filter;
- 	} else if (!t->image_override) {
- 		_mesa_problem(NULL, "unexpected texture format in %s",
- 			      __FUNCTION__);
- 		return;
- 	}
- 
--	texelBytes = baseImage->TexFormat->TexelBytes;
--
--	/* Compute which mipmap levels we really want to send to the hardware.
--	 */
--	driCalculateTextureFirstLastLevel((driTextureObject *) t);
--	log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
--	log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
--	log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2;
--
--	numLevels = t->base.lastLevel - t->base.firstLevel + 1;
-+	if (t->image_override && t->bo)
-+		return;
- 
--	assert(numLevels <= R300_MAX_TEXTURE_LEVELS);
-+	t->pp_txsize = (((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT)
-+			| ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT)
-+			| ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT)
-+			| ((t->mt->lastLevel - t->mt->firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT));
- 
--	/* Calculate mipmap offsets and dimensions for blitting (uploading)
--	 * The idea is that we lay out the mipmap levels within a block of
--	 * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
--	 */
- 	t->tile_bits = 0;
- 
--	/* figure out if this texture is suitable for tiling. */
--#if 0				/* Disabled for now */
--	if (texelBytes) {
--		if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
--		    /* texrect might be able to use micro tiling too in theory? */
--		    (baseImage->Height > 1)) {
--
--			/* allow 32 (bytes) x 1 mip (which will use two times the space
--			   the non-tiled version would use) max if base texture is large enough */
--			if ((numLevels == 1) ||
--			    (((baseImage->Width * texelBytes /
--			       baseImage->Height) <= 32)
--			     && (baseImage->Width * texelBytes > 64))
--			    ||
--			    ((baseImage->Width * texelBytes /
--			      baseImage->Height) <= 16)) {
--				t->tile_bits |= R300_TXO_MICRO_TILE;
--			}
--		}
--
--		if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
--			/* we can set macro tiling even for small textures, they will be untiled anyway */
--			t->tile_bits |= R300_TXO_MACRO_TILE;
--		}
--	}
--#endif
-+	if (t->base.Target == GL_TEXTURE_CUBE_MAP)
-+		t->pp_txformat |= R300_TX_FORMAT_CUBIC_MAP;
-+	if (t->base.Target == GL_TEXTURE_3D)
-+		t->pp_txformat |= R300_TX_FORMAT_3D;
- 
--	curOffset = 0;
- 
--	if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
--		ASSERT(log2Width == log2Height);
--		t->format |= R300_TX_FORMAT_CUBIC_MAP;
--
--		for(i = 0; i < numLevels; i++) {
--			GLuint face;
--			for(face = 0; face < 6; face++)
--				compute_tex_image_offset(tObj, face, i, &curOffset);
--		}
--	} else {
--		if (tObj->Target == GL_TEXTURE_3D)
--                	t->format |= R300_TX_FORMAT_3D;
--
--		for (i = 0; i < numLevels; i++)
--			compute_tex_image_offset(tObj, 0, i, &curOffset);
--	}
--
--	/* Align the total size of texture memory block.
--	 */
--	t->base.totalSize =
--	    (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
--
--	t->size =
--	    (((tObj->Image[0][t->base.firstLevel]->Width -
--	       1) << R300_TX_WIDTHMASK_SHIFT)
--	     | ((tObj->Image[0][t->base.firstLevel]->Height - 1) <<
--		R300_TX_HEIGHTMASK_SHIFT)
--	     | ((tObj->Image[0][t->base.firstLevel]->DepthLog2) <<
--		R300_TX_DEPTHMASK_SHIFT))
--	    | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT);
--
--	t->pitch = 0;
--
--	/* Only need to round to nearest 32 for textures, but the blitter
--	 * requires 64-byte aligned pitches, and we may/may not need the
--	 * blitter.   NPOT only!
--	 */
--	if (baseImage->IsCompressed) {
--		t->pitch |=
--		    (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
--	} else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--		unsigned int align = (64 / texelBytes) - 1;
--		t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width *
--			     texelBytes) + 63) & ~(63);
--		t->size |= R300_TX_SIZE_TXPITCH_EN;
-+	if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
-+		unsigned int align = (64 / t->mt->bpp) - 1;
-+		t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
- 		if (!t->image_override)
--			t->pitch_reg =
--			    (((tObj->Image[0][t->base.firstLevel]->Width) +
--			      align) & ~align) - 1;
--	} else {
--		t->pitch |=
--		    ((tObj->Image[0][t->base.firstLevel]->Width *
--		      texelBytes) + 63) & ~(63);
-+			t->pp_txpitch = ((firstImage->Width + align) & ~align) - 1;
- 	}
- 
- 	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
--	    if (tObj->Image[0][t->base.firstLevel]->Width > 2048)
--		t->pitch_reg |= R500_TXWIDTH_BIT11;
--	    if (tObj->Image[0][t->base.firstLevel]->Height > 2048)
--		t->pitch_reg |= R500_TXHEIGHT_BIT11;
-+	    if (firstImage->Width > 2048)
-+		t->pp_txpitch |= R500_TXWIDTH_BIT11;
-+	    if (firstImage->Height > 2048)
-+		t->pp_txpitch |= R500_TXHEIGHT_BIT11;
- 	}
- }
- 
--/* ================================================================
-- * Texture unit state management
-+/**
-+ * Ensure the given texture is ready for rendering.
-+ *
-+ * Mostly this means populating the texture object's mipmap tree.
-  */
--
--static GLboolean r300EnableTexture2D(GLcontext * ctx, int unit)
-+static GLboolean r300_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--
--	ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
--
--	if (t->base.dirty_images[0]) {
--		R300_FIREVERTICES(rmesa);
-+	radeonTexObj *t = radeon_tex_obj(texObj);
- 
--		r300SetTexImages(rmesa, tObj);
--		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
--		if (!t->base.memBlock && !t->image_override)
--			return GL_FALSE;
--	}
--
--	return GL_TRUE;
--}
--
--static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--
--	ASSERT(tObj->Target == GL_TEXTURE_3D);
--
--	/* r300 does not support mipmaps for 3D textures. */
--	if ((tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR)) {
-+	if (!radeon_validate_texture_miptree(ctx, texObj))
- 		return GL_FALSE;
--	}
- 
--	if (t->base.dirty_images[0]) {
--		R300_FIREVERTICES(rmesa);
--		r300SetTexImages(rmesa, tObj);
--		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
--		if (!t->base.memBlock)
--			return GL_FALSE;
--	}
-+	/* Configure the hardware registers (more precisely, the cached version
-+	 * of the hardware registers). */
-+	setup_hardware_state(rmesa, t);
- 
-+	t->validated = GL_TRUE;
- 	return GL_TRUE;
- }
- 
--static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit)
-+/**
-+ * Ensure all enabled and complete textures are uploaded along with any buffers being used.
-+ */
-+GLboolean r300ValidateBuffers(GLcontext * ctx)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--	GLuint face;
--
--	ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
--
--	if (t->base.dirty_images[0] || t->base.dirty_images[1] ||
--	    t->base.dirty_images[2] || t->base.dirty_images[3] ||
--	    t->base.dirty_images[4] || t->base.dirty_images[5]) {
--		/* flush */
--		R300_FIREVERTICES(rmesa);
--		/* layout memory space, once for all faces */
--		r300SetTexImages(rmesa, tObj);
--	}
-+	struct radeon_renderbuffer *rrb;
-+	int i;
- 
--	/* upload (per face) */
--	for (face = 0; face < 6; face++) {
--		if (t->base.dirty_images[face]) {
--			r300UploadTexImages(rmesa,
--					    (r300TexObjPtr) tObj->DriverData,
--					    face);
--		}
--	}
-+	radeon_validate_reset_bos(&rmesa->radeon);
- 
--	if (!t->base.memBlock) {
--		/* texmem alloc failed, use s/w fallback */
--		return GL_FALSE;
-+	rrb = radeon_get_colorbuffer(&rmesa->radeon);
-+	/* color buffer */
-+	if (rrb && rrb->bo) {
-+		radeon_validate_bo(&rmesa->radeon, rrb->bo,
-+				   0, RADEON_GEM_DOMAIN_VRAM);
- 	}
- 
--	return GL_TRUE;
--}
--
--static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--
--	ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
--
--	if (t->base.dirty_images[0]) {
--		R300_FIREVERTICES(rmesa);
--
--		r300SetTexImages(rmesa, tObj);
--		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
--		if (!t->base.memBlock && !t->image_override &&
--		    !rmesa->prefer_gart_client_texturing)
--			return GL_FALSE;
-+	/* depth buffer */
-+	rrb = radeon_get_depthbuffer(&rmesa->radeon);
-+	if (rrb && rrb->bo) {
-+		radeon_validate_bo(&rmesa->radeon, rrb->bo,
-+				   0, RADEON_GEM_DOMAIN_VRAM);
- 	}
-+	
-+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
-+		radeonTexObj *t;
- 
--	return GL_TRUE;
--}
--
--static GLboolean r300UpdateTexture(GLcontext * ctx, int unit)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
-+		if (!ctx->Texture.Unit[i]._ReallyEnabled)
-+			continue;
- 
--	/* Fallback if there's a texture border */
--	if (tObj->Image[0][tObj->BaseLevel]->Border > 0)
--		return GL_FALSE;
--
--	/* Update state if this is a different texture object to last
--	 * time.
--	 */
--	if (rmesa->state.texture.unit[unit].texobj != t) {
--		if (rmesa->state.texture.unit[unit].texobj != NULL) {
--			/* The old texture is no longer bound to this texture unit.
--			 * Mark it as such.
--			 */
--
--			rmesa->state.texture.unit[unit].texobj->base.bound &=
--			    ~(1 << unit);
-+		if (!r300_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) {
-+			_mesa_warning(ctx,
-+				      "failed to validate texture for unit %d.\n",
-+				      i);
- 		}
--
--		rmesa->state.texture.unit[unit].texobj = t;
--		t->base.bound |= (1 << unit);
--		driUpdateTextureLRU((driTextureObject *) t);	/* XXX: should be locked! */
-+		t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
-+		if (t->image_override && t->bo)
-+			radeon_validate_bo(&rmesa->radeon, t->bo,
-+					   RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
-+
-+		else if (t->mt->bo)
-+			radeon_validate_bo(&rmesa->radeon, t->mt->bo,
-+					   RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
- 	}
-+	if (rmesa->radeon.dma.current)
-+		radeon_validate_bo(&rmesa->radeon, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0);
- 
--	return !t->border_fallback;
-+	return radeon_revalidate_bos(ctx);
- }
- 
- void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
-@@ -591,78 +325,163 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
- 	r300ContextPtr rmesa = pDRICtx->driverPrivate;
- 	struct gl_texture_object *tObj =
- 	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
--	r300TexObjPtr t;
-+	radeonTexObjPtr t = radeon_tex_obj(tObj);
- 	uint32_t pitch_val;
- 
- 	if (!tObj)
- 		return;
- 
--	t = (r300TexObjPtr) tObj->DriverData;
--
- 	t->image_override = GL_TRUE;
- 
- 	if (!offset)
- 		return;
- 
--	t->offset = offset;
--	t->pitch_reg &= (1 << 13) -1;
-+	t->bo = NULL;
-+	t->override_offset = offset;
-+	t->pp_txpitch &= (1 << 13) -1;
- 	pitch_val = pitch;
- 
- 	switch (depth) {
- 	case 32:
--		t->format = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
--		t->filter |= tx_table[2].filter;
-+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
-+		t->pp_txfilter |= tx_table[2].filter;
- 		pitch_val /= 4;
- 		break;
- 	case 24:
- 	default:
--		t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
--		t->filter |= tx_table[4].filter;
-+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
-+		t->pp_txfilter |= tx_table[4].filter;
- 		pitch_val /= 4;
- 		break;
- 	case 16:
--		t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
--		t->filter |= tx_table[5].filter;
-+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
-+		t->pp_txfilter |= tx_table[5].filter;
- 		pitch_val /= 2;
- 		break;
- 	}
- 	pitch_val--;
- 
--	t->pitch_reg |= pitch_val;
-+	t->pp_txpitch |= pitch_val;
- }
- 
--static GLboolean r300UpdateTextureUnit(GLcontext * ctx, int unit)
-+void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, __DRIdrawable *dPriv)
- {
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--
--	if (texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT)) {
--		return (r300EnableTextureRect(ctx, unit) &&
--			r300UpdateTexture(ctx, unit));
--	} else if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) {
--		return (r300EnableTexture2D(ctx, unit) &&
--			r300UpdateTexture(ctx, unit));
--	} else if (texUnit->_ReallyEnabled & (TEXTURE_3D_BIT)) {
--		return (r300EnableTexture3D(ctx, unit) &&
--			r300UpdateTexture(ctx, unit));
--	} else if (texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT)) {
--		return (r300EnableTextureCube(ctx, unit) &&
--			r300UpdateTexture(ctx, unit));
--	} else if (texUnit->_ReallyEnabled) {
--		return GL_FALSE;
--	} else {
--		return GL_TRUE;
-+	struct gl_texture_unit *texUnit;
-+	struct gl_texture_object *texObj;
-+	struct gl_texture_image *texImage;
-+	struct radeon_renderbuffer *rb;
-+	radeon_texture_image *rImage;
-+	radeonContextPtr radeon;
-+	r300ContextPtr rmesa;
-+	struct radeon_framebuffer *rfb;
-+	radeonTexObjPtr t;
-+	uint32_t pitch_val;
-+	uint32_t internalFormat, type, format;
-+
-+	type = GL_BGRA;
-+	format = GL_UNSIGNED_BYTE;
-+	internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4);
-+
-+	radeon = pDRICtx->driverPrivate;
-+	rmesa = pDRICtx->driverPrivate;
-+
-+	rfb = dPriv->driverPrivate;
-+        texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
-+	texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
-+        texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
-+
-+	rImage = get_radeon_texture_image(texImage);
-+	t = radeon_tex_obj(texObj);
-+        if (t == NULL) {
-+    	    return;
-+    	}
-+
-+	radeon_update_renderbuffers(pDRICtx, dPriv);
-+	/* back & depth buffer are useless free them right away */
-+	rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer;
-+	if (rb && rb->bo) {
-+		radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+	}
-+	rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+	if (rb && rb->bo) {
-+		radeon_bo_unref(rb->bo);
-+		rb->bo = NULL;
- 	}
-+	rb = rfb->color_rb[0];
-+	if (rb->bo == NULL) {
-+		/* Failed to BO for the buffer */
-+		return;
-+	}
-+	
-+	_mesa_lock_texture(radeon->glCtx, texObj);
-+	if (t->bo) {
-+		radeon_bo_unref(t->bo);
-+		t->bo = NULL;
-+	}
-+	if (rImage->bo) {
-+		radeon_bo_unref(rImage->bo);
-+		rImage->bo = NULL;
-+	}
-+	if (t->mt) {
-+		radeon_miptree_unreference(t->mt);
-+		t->mt = NULL;
-+	}
-+	if (rImage->mt) {
-+		radeon_miptree_unreference(rImage->mt);
-+		rImage->mt = NULL;
-+	}
-+	_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
-+				   rb->width, rb->height, 1, 0, rb->cpp);
-+	texImage->RowStride = rb->pitch / rb->cpp;
-+	texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx,
-+							internalFormat,
-+							type, format, 0);
-+	rImage->bo = rb->bo;
-+	radeon_bo_ref(rImage->bo);
-+	t->bo = rb->bo;
-+	radeon_bo_ref(t->bo);
-+	t->tile_bits = 0;
-+	t->image_override = GL_TRUE;
-+	t->override_offset = 0;
-+	t->pp_txpitch &= (1 << 13) -1;
-+	pitch_val = rb->pitch;
-+	switch (rb->cpp) {
-+	case 4:
-+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
-+		t->pp_txfilter |= tx_table[2].filter;
-+		pitch_val /= 4;
-+		break;
-+	case 3:
-+	default:
-+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
-+		t->pp_txfilter |= tx_table[4].filter;
-+		pitch_val /= 4;
-+		break;
-+	case 2:
-+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
-+		t->pp_txfilter |= tx_table[5].filter;
-+		pitch_val /= 2;
-+		break;
-+	}
-+	pitch_val--;
-+	t->pp_txsize = ((rb->width - 1) << R300_TX_WIDTHMASK_SHIFT) |
-+              ((rb->height - 1) << R300_TX_HEIGHTMASK_SHIFT);
-+	t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
-+	t->pp_txpitch |= pitch_val;
-+
-+	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
-+	    if (rb->width > 2048)
-+		t->pp_txpitch |= R500_TXWIDTH_BIT11;
-+	    if (rb->height > 2048)
-+		t->pp_txpitch |= R500_TXHEIGHT_BIT11;
-+	}
-+	t->validated = GL_TRUE;
-+	_mesa_unlock_texture(radeon->glCtx, texObj);
-+	return;
- }
- 
--void r300UpdateTextureState(GLcontext * ctx)
-+void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
- {
--	int i;
--
--	for (i = 0; i < 8; i++) {
--		if (!r300UpdateTextureUnit(ctx, i)) {
--			_mesa_warning(ctx,
--				      "failed to update texture state for unit %d.\n",
--				      i);
--		}
--	}
-+        r300SetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv);
- }
-diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c
-index 292573d..8f0b70a 100644
---- a/src/mesa/drivers/dri/r300/r500_fragprog.c
-+++ b/src/mesa/drivers/dri/r300/r500_fragprog.c
-@@ -27,10 +27,6 @@
- 
- #include "r500_fragprog.h"
- 
--#include "radeon_nqssadce.h"
--#include "radeon_program_alu.h"
--
--
- static void reset_srcreg(struct prog_src_register* reg)
- {
- 	_mesa_bzero(reg, sizeof(*reg));
-@@ -58,12 +54,12 @@ static struct prog_src_register shadow_ambient(struct gl_program *program, int t
-  *  - introduce a temporary register when write masks are needed
-  *
-  */
--static GLboolean transform_TEX(
-+GLboolean r500_transform_TEX(
- 	struct radeon_transform_context *t,
- 	struct prog_instruction* orig_inst, void* data)
- {
--	struct r500_fragment_program_compiler *compiler =
--		(struct r500_fragment_program_compiler*)data;
-+	struct r300_fragment_program_compiler *compiler =
-+		(struct r300_fragment_program_compiler*)data;
- 	struct prog_instruction inst = *orig_inst;
- 	struct prog_instruction* tgt;
- 	GLboolean destredirect = GL_FALSE;
-@@ -188,121 +184,7 @@ static GLboolean transform_TEX(
- 	return GL_TRUE;
- }
- 
--
--static void update_params(r300ContextPtr r300, struct r500_fragment_program *fp)
--{
--	struct gl_fragment_program *mp = &fp->mesa_program;
--
--	/* Ask Mesa nicely to fill in ParameterValues for us */
--	if (mp->Base.Parameters)
--		_mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters);
--}
--
--
--/**
-- * Transform the program to support fragment.position.
-- *
-- * Introduce a small fragment at the start of the program that will be
-- * the only code that directly reads the FRAG_ATTRIB_WPOS input.
-- * All other code pieces that reference that input will be rewritten
-- * to read from a newly allocated temporary.
-- *
-- * \todo if/when r5xx supports the radeon_program architecture, this is a
-- * likely candidate for code sharing.
-- */
--static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler)
--{
--	GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead;
--
--	if (!(InputsRead & FRAG_BIT_WPOS))
--		return;
--
--	static gl_state_index tokens[STATE_LENGTH] = {
--		STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
--	};
--	struct prog_instruction *fpi;
--	GLuint window_index;
--	int i = 0;
--	GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
--
--	_mesa_insert_instructions(compiler->program, 0, 3);
--	fpi = compiler->program->Instructions;
--
--	/* perspective divide */
--	fpi[i].Opcode = OPCODE_RCP;
--
--	fpi[i].DstReg.File = PROGRAM_TEMPORARY;
--	fpi[i].DstReg.Index = tempregi;
--	fpi[i].DstReg.WriteMask = WRITEMASK_W;
--	fpi[i].DstReg.CondMask = COND_TR;
--
--	fpi[i].SrcReg[0].File = PROGRAM_INPUT;
--	fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
--	fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
--	i++;
--
--	fpi[i].Opcode = OPCODE_MUL;
--
--	fpi[i].DstReg.File = PROGRAM_TEMPORARY;
--	fpi[i].DstReg.Index = tempregi;
--	fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
--	fpi[i].DstReg.CondMask = COND_TR;
--
--	fpi[i].SrcReg[0].File = PROGRAM_INPUT;
--	fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
--	fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
--
--	fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
--	fpi[i].SrcReg[1].Index = tempregi;
--	fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
--	i++;
--
--	/* viewport transformation */
--	window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
--
--	fpi[i].Opcode = OPCODE_MAD;
--
--	fpi[i].DstReg.File = PROGRAM_TEMPORARY;
--	fpi[i].DstReg.Index = tempregi;
--	fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
--	fpi[i].DstReg.CondMask = COND_TR;
--
--	fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
--	fpi[i].SrcReg[0].Index = tempregi;
--	fpi[i].SrcReg[0].Swizzle =
--	    MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
--
--	fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
--	fpi[i].SrcReg[1].Index = window_index;
--	fpi[i].SrcReg[1].Swizzle =
--	    MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
--
--	fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
--	fpi[i].SrcReg[2].Index = window_index;
--	fpi[i].SrcReg[2].Swizzle =
--	    MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
--	i++;
--
--	for (; i < compiler->program->NumInstructions; ++i) {
--		int reg;
--		for (reg = 0; reg < 3; reg++) {
--			if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
--			    fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) {
--				fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY;
--				fpi[i].SrcReg[reg].Index = tempregi;
--			}
--		}
--	}
--}
--
--
--static void nqssadce_init(struct nqssadce_state* s)
--{
--	s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW;
--	s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W;
--}
--
--static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
-+GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
- {
- 	GLuint relevant;
- 	int i;
-@@ -367,8 +249,7 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
-  * The only thing we *cannot* do in an ALU instruction is per-component
-  * negation. Therefore, we split the MOV into two instructions when necessary.
-  */
--static void nqssadce_build_swizzle(struct nqssadce_state *s,
--	struct prog_dst_register dst, struct prog_src_register src)
-+void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
- {
- 	struct prog_instruction *inst;
- 	GLuint negatebase[2] = { 0, 0 };
-@@ -397,124 +278,6 @@ static void nqssadce_build_swizzle(struct nqssadce_state *s,
- 	}
- }
- 
--static GLuint build_dtm(GLuint depthmode)
--{
--	switch(depthmode) {
--	default:
--	case GL_LUMINANCE: return 0;
--	case GL_INTENSITY: return 1;
--	case GL_ALPHA: return 2;
--	}
--}
--
--static GLuint build_func(GLuint comparefunc)
--{
--	return comparefunc - GL_NEVER;
--}
--
--
--/**
-- * Collect all external state that is relevant for compiling the given
-- * fragment program.
-- */
--static void build_state(
--	r300ContextPtr r300,
--	struct r500_fragment_program *fp,
--	struct r500_fragment_program_external_state *state)
--{
--	int unit;
--
--	_mesa_bzero(state, sizeof(*state));
--
--	for(unit = 0; unit < 16; ++unit) {
--		if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) {
--			struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
--
--			state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
--			state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
--		}
--	}
--}
--
--static void dump_program(struct r500_fragment_program_code *code);
--
--void r500TranslateFragmentShader(r300ContextPtr r300,
--				 struct r500_fragment_program *fp)
--{
--	struct r500_fragment_program_external_state state;
--
--	build_state(r300, fp, &state);
--	if (_mesa_memcmp(&fp->state, &state, sizeof(state))) {
--		/* TODO: cache compiled programs */
--		fp->translated = GL_FALSE;
--		_mesa_memcpy(&fp->state, &state, sizeof(state));
--	}
--
--	if (!fp->translated) {
--		struct r500_fragment_program_compiler compiler;
--
--		compiler.r300 = r300;
--		compiler.fp = fp;
--		compiler.code = &fp->code;
--		compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base);
--
--		if (RADEON_DEBUG & DEBUG_PIXEL) {
--			_mesa_printf("Compiler: Initial program:\n");
--			_mesa_print_program(compiler.program);
--		}
--
--		insert_WPOS_trailer(&compiler);
--
--		struct radeon_program_transformation transformations[] = {
--			{ &transform_TEX, &compiler },
--			{ &radeonTransformALU, 0 },
--			{ &radeonTransformDeriv, 0 },
--			{ &radeonTransformTrigScale, 0 }
--		};
--		radeonLocalTransform(r300->radeon.glCtx, compiler.program,
--			4, transformations);
--
--		if (RADEON_DEBUG & DEBUG_PIXEL) {
--			_mesa_printf("Compiler: after native rewrite:\n");
--			_mesa_print_program(compiler.program);
--		}
--
--		struct radeon_nqssadce_descr nqssadce = {
--			.Init = &nqssadce_init,
--			.IsNativeSwizzle = &is_native_swizzle,
--			.BuildSwizzle = &nqssadce_build_swizzle,
--			.RewriteDepthOut = GL_TRUE
--		};
--		radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce);
--
--		if (RADEON_DEBUG & DEBUG_PIXEL) {
--			_mesa_printf("Compiler: after NqSSA-DCE:\n");
--			_mesa_print_program(compiler.program);
--		}
--
--		fp->translated = r500FragmentProgramEmit(&compiler);
--
--		/* Subtle: Rescue any parameters that have been added during transformations */
--		_mesa_free_parameter_list(fp->mesa_program.Base.Parameters);
--		fp->mesa_program.Base.Parameters = compiler.program->Parameters;
--		compiler.program->Parameters = 0;
--
--		_mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0);
--
--		r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
--
--		if (RADEON_DEBUG & DEBUG_PIXEL) {
--			if (fp->translated) {
--				_mesa_printf("Machine-readable code:\n");
--				dump_program(&fp->code);
--			}
--		}
--
--	}
--
--	update_params(r300, fp);
--
--}
- 
- static char *toswiz(int swiz_val) {
-   switch(swiz_val) {
-@@ -613,9 +376,9 @@ static char *to_texop(int val)
-   return NULL;
- }
- 
--static void dump_program(struct r500_fragment_program_code *code)
-+void r500FragmentProgramDump(union rX00_fragment_program_code *c)
- {
--
-+  struct r500_fragment_program_code *code = &c->r500;
-   fprintf(stderr, "R500 Fragment Program:\n--------\n");
- 
-   int n;
-diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h
-index 1e45538..9ca2f9b 100644
---- a/src/mesa/drivers/dri/r300/r500_fragprog.h
-+++ b/src/mesa/drivers/dri/r300/r500_fragprog.h
-@@ -33,30 +33,20 @@
- #ifndef __R500_FRAGPROG_H_
- #define __R500_FRAGPROG_H_
- 
--#include "main/glheader.h"
--#include "main/macros.h"
--#include "main/enums.h"
- #include "shader/prog_parameter.h"
--#include "shader/prog_print.h"
--#include "shader/program.h"
- #include "shader/prog_instruction.h"
- 
- #include "r300_context.h"
--#include "r300_state.h"
--#include "radeon_program.h"
-+#include "radeon_nqssadce.h"
- 
--struct r500_fragment_program;
-+extern GLboolean r500FragmentProgramEmit(struct r300_fragment_program_compiler *compiler);
- 
--extern void r500TranslateFragmentShader(r300ContextPtr r300,
--					struct r500_fragment_program *fp);
-+extern void r500FragmentProgramDump(union rX00_fragment_program_code *c);
- 
--struct r500_fragment_program_compiler {
--	r300ContextPtr r300;
--	struct r500_fragment_program *fp;
--	struct r500_fragment_program_code *code;
--	struct gl_program *program;
--};
-+extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg);
- 
--extern GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler);
-+extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src);
-+
-+extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data);
- 
- #endif
-diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
-index 4631235..277f801 100644
---- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
-+++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
-@@ -49,8 +49,8 @@
- 
- 
- #define PROG_CODE \
--	struct r500_fragment_program_compiler *c = (struct r500_fragment_program_compiler*)data; \
--	struct r500_fragment_program_code *code = c->code
-+	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
-+	struct r500_fragment_program_code *code = &c->code->r500
- 
- #define error(fmt, args...) do {			\
- 		fprintf(stderr, "%s::%s(): " fmt "\n",	\
-@@ -72,7 +72,7 @@ static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex
- 	}
- 
- 	if (*hwindex >= code->const_nr) {
--		if (*hwindex >= PFS_NUM_CONST_REGS) {
-+		if (*hwindex >= R500_PFS_NUM_CONST_REGS) {
- 			error("Out of hw constants!\n");
- 			return GL_FALSE;
- 		}
-@@ -299,9 +299,9 @@ static const struct radeon_pair_handler pair_handler = {
- 	.MaxHwTemps = 128
- };
- 
--GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler)
-+GLboolean r500FragmentProgramEmit(struct r300_fragment_program_compiler *compiler)
- {
--	struct r500_fragment_program_code *code = compiler->code;
-+	struct r500_fragment_program_code *code = &compiler->code->r500;
- 
- 	_mesa_bzero(code, sizeof(*code));
- 	code->max_temp_idx = 1;
-diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c
-deleted file mode 100644
-index 5267fe9..0000000
---- a/src/mesa/drivers/dri/r300/radeon_context.c
-+++ /dev/null
-@@ -1,330 +0,0 @@
--/*
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/**
-- * \file radeon_context.c
-- * Common context initialization.
-- *
-- * \author Keith Whitwell <keith@tungstengraphics.com>
-- */
--
--#include <dlfcn.h>
--
--#include "main/glheader.h"
--#include "main/imports.h"
--#include "main/context.h"
--#include "main/state.h"
--#include "main/matrix.h"
--#include "main/framebuffer.h"
--
--#include "drivers/common/driverfuncs.h"
--#include "swrast/swrast.h"
--
--#include "radeon_screen.h"
--#include "radeon_ioctl.h"
--#include "radeon_macros.h"
--#include "radeon_reg.h"
--
--#include "radeon_state.h"
--#include "r300_state.h"
--
--#include "utils.h"
--#include "vblank.h"
--#include "xmlpool.h"		/* for symbolic values of enum-type options */
--
--#define DRIVER_DATE "20060815"
--
--
--/* Return various strings for glGetString().
-- */
--static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name)
--{
--	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
--	static char buffer[128];
--
--	switch (name) {
--	case GL_VENDOR:
--		if (IS_R300_CLASS(radeon->radeonScreen))
--			return (GLubyte *) "DRI R300 Project";
--		else
--			return (GLubyte *) "Tungsten Graphics, Inc.";
--
--	case GL_RENDERER:
--	{
--		unsigned offset;
--		GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
--			radeon->radeonScreen->AGPMode;
--		const char* chipname;
--
--		if (IS_R300_CLASS(radeon->radeonScreen))
--			chipname = "R300";
--		else
--			chipname = "R200";
--
--		offset = driGetRendererString(buffer, chipname, DRIVER_DATE,
--					      agp_mode);
--
--		if (IS_R300_CLASS(radeon->radeonScreen)) {
--		sprintf(&buffer[offset], " %sTCL",
--			(radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)
--			? "" : "NO-");
--		} else {
--			sprintf(&buffer[offset], " %sTCL",
--			!(radeon->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
--			? "" : "NO-");
--		}
--
--		return (GLubyte *) buffer;
--	}
--
--	default:
--		return NULL;
--	}
--}
--
--/* Initialize the driver's misc functions.
-- */
--static void radeonInitDriverFuncs(struct dd_function_table *functions)
--{
--	functions->GetString = radeonGetString;
--}
--
--
--/**
-- * Create and initialize all common fields of the context,
-- * including the Mesa context itself.
-- */
--GLboolean radeonInitContext(radeonContextPtr radeon,
--			    struct dd_function_table* functions,
--			    const __GLcontextModes * glVisual,
--			    __DRIcontextPrivate * driContextPriv,
--			    void *sharedContextPrivate)
--{
--	__DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
--	radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
--	GLcontext* ctx;
--	GLcontext* shareCtx;
--	int fthrottle_mode;
--
--	/* Fill in additional standard functions. */
--	radeonInitDriverFuncs(functions);
--
--	radeon->radeonScreen = screen;
--	/* Allocate and initialize the Mesa context */
--	if (sharedContextPrivate)
--		shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx;
--	else
--		shareCtx = NULL;
--	radeon->glCtx = _mesa_create_context(glVisual, shareCtx,
--					    functions, (void *)radeon);
--	if (!radeon->glCtx)
--		return GL_FALSE;
--
--	ctx = radeon->glCtx;
--	driContextPriv->driverPrivate = radeon;
--
--	/* DRI fields */
--	radeon->dri.context = driContextPriv;
--	radeon->dri.screen = sPriv;
--	radeon->dri.drawable = NULL;
--	radeon->dri.readable = NULL;
--	radeon->dri.hwContext = driContextPriv->hHWContext;
--	radeon->dri.hwLock = &sPriv->pSAREA->lock;
--	radeon->dri.fd = sPriv->fd;
--	radeon->dri.drmMinor = sPriv->drm_version.minor;
--
--	radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
--					       screen->sarea_priv_offset);
--
--	/* Setup IRQs */
--	fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode");
--	radeon->iw.irq_seq = -1;
--	radeon->irqsEmitted = 0;
--	radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
--			  radeon->radeonScreen->irq);
--
--	radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
--
--	if (!radeon->do_irqs)
--		fprintf(stderr,
--			"IRQ's not enabled, falling back to %s: %d %d\n",
--			radeon->do_usleeps ? "usleeps" : "busy waits",
--			fthrottle_mode, radeon->radeonScreen->irq);
--
--	(*sPriv->systemTime->getUST) (&radeon->swap_ust);
--
--	return GL_TRUE;
--}
--
--
--/**
-- * Cleanup common context fields.
-- * Called by r200DestroyContext/r300DestroyContext
-- */
--void radeonCleanupContext(radeonContextPtr radeon)
--{
--	/* _mesa_destroy_context() might result in calls to functions that
--	 * depend on the DriverCtx, so don't set it to NULL before.
--	 *
--	 * radeon->glCtx->DriverCtx = NULL;
--	 */
--
--	/* free the Mesa context */
--	_mesa_destroy_context(radeon->glCtx);
--
--	if (radeon->state.scissor.pClipRects) {
--		FREE(radeon->state.scissor.pClipRects);
--		radeon->state.scissor.pClipRects = 0;
--	}
--}
--
--
--/**
-- * Swap front and back buffer.
-- */
--void radeonSwapBuffers(__DRIdrawablePrivate * dPriv)
--{
--	if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
--		radeonContextPtr radeon;
--		GLcontext *ctx;
--
--		radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--		ctx = radeon->glCtx;
--
--		if (ctx->Visual.doubleBufferMode) {
--			_mesa_notifySwapBuffers(ctx);	/* flush pending rendering comands */
--			if (radeon->doPageFlip) {
--				radeonPageFlip(dPriv);
--			} else {
--			    radeonCopyBuffer(dPriv, NULL);
--			}
--		}
--	} else {
--		/* XXX this shouldn't be an error but we can't handle it for now */
--		_mesa_problem(NULL, "%s: drawable has no context!",
--			      __FUNCTION__);
--	}
--}
--
--void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
--			 int x, int y, int w, int h )
--{
--    if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
--	radeonContextPtr radeon;
--	GLcontext *ctx;
--
--	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--	ctx = radeon->glCtx;
--
--	if (ctx->Visual.doubleBufferMode) {
--	    drm_clip_rect_t rect;
--	    rect.x1 = x + dPriv->x;
--	    rect.y1 = (dPriv->h - y - h) + dPriv->y;
--	    rect.x2 = rect.x1 + w;
--	    rect.y2 = rect.y1 + h;
--	    _mesa_notifySwapBuffers(ctx);	/* flush pending rendering comands */
--	    radeonCopyBuffer(dPriv, &rect);
--	}
--    } else {
--	/* XXX this shouldn't be an error but we can't handle it for now */
--	_mesa_problem(NULL, "%s: drawable has no context!",
--		      __FUNCTION__);
--    }
--}
--
--/* Force the context `c' to be the current context and associate with it
-- * buffer `b'.
-- */
--GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
--			    __DRIdrawablePrivate * driDrawPriv,
--			    __DRIdrawablePrivate * driReadPriv)
--{
--	if (driContextPriv) {
--		radeonContextPtr radeon =
--			(radeonContextPtr) driContextPriv->driverPrivate;
--
--		if (RADEON_DEBUG & DEBUG_DRI)
--			fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
--				radeon->glCtx);
--
--		if (radeon->dri.drawable != driDrawPriv) {
--			if (driDrawPriv->swap_interval == (unsigned)-1) {
--				driDrawPriv->vblFlags =
--					(radeon->radeonScreen->irq != 0)
--					? driGetDefaultVBlankFlags(&radeon->
--								   optionCache)
--					: VBLANK_FLAG_NO_IRQ;
--
--				driDrawableInitVBlank(driDrawPriv);
--			}
--		}
--
--		radeon->dri.readable = driReadPriv;
--
--		if (radeon->dri.drawable != driDrawPriv ||
--		    radeon->lastStamp != driDrawPriv->lastStamp) {
--			radeon->dri.drawable = driDrawPriv;
--
--			radeonSetCliprects(radeon);
--			r300UpdateViewportOffset(radeon->glCtx);
--		}
--
--		_mesa_make_current(radeon->glCtx,
--				    (GLframebuffer *) driDrawPriv->
--				    driverPrivate,
--				    (GLframebuffer *) driReadPriv->
--				    driverPrivate);
--
--		_mesa_update_state(radeon->glCtx);		
--
--		radeonUpdatePageFlipping(radeon);
--	} else {
--		if (RADEON_DEBUG & DEBUG_DRI)
--			fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
--		_mesa_make_current(0, 0, 0);
--	}
--
--	if (RADEON_DEBUG & DEBUG_DRI)
--		fprintf(stderr, "End %s\n", __FUNCTION__);
--	return GL_TRUE;
--}
--
--/* Force the context `c' to be unbound from its buffer.
-- */
--GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv)
--{
--	radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
--
--	if (RADEON_DEBUG & DEBUG_DRI)
--		fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
--			radeon->glCtx);
--
--	return GL_TRUE;
--}
--
-diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h
-index 47cbc22..250570f 100644
---- a/src/mesa/drivers/dri/r300/radeon_context.h
-+++ b/src/mesa/drivers/dri/r300/radeon_context.h
-@@ -49,20 +49,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "drm.h"
- #include "dri_util.h"
- 
--struct radeon_context;
--typedef struct radeon_context radeonContextRec;
--typedef struct radeon_context *radeonContextPtr;
--
--/* Rasterizing fallbacks */
--/* See correponding strings in r200_swtcl.c */
--#define RADEON_FALLBACK_TEXTURE		0x0001
--#define RADEON_FALLBACK_DRAW_BUFFER	0x0002
--#define RADEON_FALLBACK_STENCIL		0x0004
--#define RADEON_FALLBACK_RENDER_MODE	0x0008
--#define RADEON_FALLBACK_BLEND_EQ	0x0010
--#define RADEON_FALLBACK_BLEND_FUNC	0x0020
--#define RADEON_FALLBACK_DISABLE		0x0040
--#define RADEON_FALLBACK_BORDER_MODE	0x0080
-+#include "radeon_screen.h"
- 
- #if R200_MERGED
- extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
-@@ -79,155 +66,11 @@ extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
- /* TCL fallbacks */
- extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
- 
--#define RADEON_TCL_FALLBACK_RASTER		0x0001	/* rasterization */
--#define RADEON_TCL_FALLBACK_UNFILLED		0x0002	/* unfilled tris */
--#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE	0x0004	/* twoside tris */
--#define RADEON_TCL_FALLBACK_MATERIAL		0x0008	/* material in vb */
--#define RADEON_TCL_FALLBACK_TEXGEN_0		0x0010	/* texgen, unit 0 */
--#define RADEON_TCL_FALLBACK_TEXGEN_1		0x0020	/* texgen, unit 1 */
--#define RADEON_TCL_FALLBACK_TEXGEN_2		0x0040	/* texgen, unit 2 */
--#define RADEON_TCL_FALLBACK_TEXGEN_3		0x0080	/* texgen, unit 3 */
--#define RADEON_TCL_FALLBACK_TEXGEN_4		0x0100	/* texgen, unit 4 */
--#define RADEON_TCL_FALLBACK_TEXGEN_5		0x0200	/* texgen, unit 5 */
--#define RADEON_TCL_FALLBACK_TCL_DISABLE		0x0400	/* user disable */
--#define RADEON_TCL_FALLBACK_BITMAP		0x0800	/* draw bitmap with points */
--#define RADEON_TCL_FALLBACK_VERTEX_PROGRAM	0x1000	/* vertex program active */
--
- #if R200_MERGED
- #define TCL_FALLBACK( ctx, bit, mode )	radeonTclFallback( ctx, bit, mode )
- #else
- #define TCL_FALLBACK( ctx, bit, mode )	;
- #endif
- 
--struct radeon_dri_mirror {
--	__DRIcontextPrivate *context;	/* DRI context */
--	__DRIscreenPrivate *screen;	/* DRI screen */
--	/**
--	 * DRI drawable bound to this context for drawing.
--	 */
--	__DRIdrawablePrivate *drawable;
--
--	/**
--	 * DRI drawable bound to this context for reading.
--	 */
--	__DRIdrawablePrivate *readable;
--
--	drm_context_t hwContext;
--	drm_hw_lock_t *hwLock;
--	int fd;
--	int drmMinor;
--};
--
--/**
-- * Derived state for internal purposes.
-- */
--struct radeon_scissor_state {
--	drm_clip_rect_t rect;
--	GLboolean enabled;
--
--	GLuint numClipRects;	/* Cliprects active */
--	GLuint numAllocedClipRects;	/* Cliprects available */
--	drm_clip_rect_t *pClipRects;
--};
--
--struct radeon_colorbuffer_state {
--	GLuint clear;
--	GLint drawOffset, drawPitch;
--};
--
--struct radeon_state {
--	struct radeon_colorbuffer_state color;
--	struct radeon_scissor_state scissor;
--};
--
--/**
-- * Common per-context variables shared by R200 and R300.
-- * R200- and R300-specific code "derive" their own context from this
-- * structure.
-- */
--struct radeon_context {
--	GLcontext *glCtx;	/* Mesa context */
--	radeonScreenPtr radeonScreen;	/* Screen private DRI data */
--
--	/* Fallback state */
--	GLuint Fallback;
--	GLuint TclFallback;
--
--	/* Page flipping */
--	GLuint doPageFlip;
--
--	/* Drawable, cliprect and scissor information */
--	GLuint numClipRects;	/* Cliprects for the draw buffer */
--	drm_clip_rect_t *pClipRects;
--	unsigned int lastStamp;
--	GLboolean lost_context;
--	drm_radeon_sarea_t *sarea;	/* Private SAREA data */
--
--	/* Mirrors of some DRI state */
--	struct radeon_dri_mirror dri;
--
--	/* Busy waiting */
--	GLuint do_usleeps;
--	GLuint do_irqs;
--	GLuint irqsEmitted;
--	drm_radeon_irq_wait_t iw;
--
--	/* buffer swap */
--	int64_t swap_ust;
--	int64_t swap_missed_ust;
--
--	GLuint swap_count;
--	GLuint swap_missed_count;
--
--	/* Derived state */
--	struct radeon_state state;
--
--	/* Configuration cache
--	 */
--	driOptionCache optionCache;
--};
--
--#define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx))
--
--extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv);
--extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
--				int x, int y, int w, int h);
--extern GLboolean radeonInitContext(radeonContextPtr radeon,
--				   struct dd_function_table *functions,
--				   const __GLcontextModes * glVisual,
--				   __DRIcontextPrivate * driContextPriv,
--				   void *sharedContextPrivate);
--extern void radeonCleanupContext(radeonContextPtr radeon);
--extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
--				   __DRIdrawablePrivate * driDrawPriv,
--				   __DRIdrawablePrivate * driReadPriv);
--extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
--
--/* ================================================================
-- * Debugging:
-- */
--#define DO_DEBUG		1
--
--#if DO_DEBUG
--extern int RADEON_DEBUG;
--#else
--#define RADEON_DEBUG		0
--#endif
--
--#define DEBUG_TEXTURE	0x0001
--#define DEBUG_STATE	0x0002
--#define DEBUG_IOCTL	0x0004
--#define DEBUG_PRIMS	0x0008
--#define DEBUG_VERTS	0x0010
--#define DEBUG_FALLBACKS	0x0020
--#define DEBUG_VFMT	0x0040
--#define DEBUG_CODEGEN	0x0080
--#define DEBUG_VERBOSE	0x0100
--#define DEBUG_DRI       0x0200
--#define DEBUG_DMA       0x0400
--#define DEBUG_SANITY    0x0800
--#define DEBUG_SYNC      0x1000
--#define DEBUG_PIXEL     0x2000
--#define DEBUG_MEMORY    0x4000
- 
- #endif				/* __RADEON_CONTEXT_H__ */
-diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.c b/src/mesa/drivers/dri/r300/radeon_ioctl.c
-deleted file mode 100644
-index f042a7b..0000000
---- a/src/mesa/drivers/dri/r300/radeon_ioctl.c
-+++ /dev/null
-@@ -1,396 +0,0 @@
--/*
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- */
--
--#include <sched.h>
--#include <errno.h>
--
--#include "main/glheader.h"
--#include "main/imports.h"
--#include "main/macros.h"
--#include "main/context.h"
--#include "swrast/swrast.h"
--#include "r300_context.h"
--#include "radeon_ioctl.h"
--#include "r300_ioctl.h"
--#include "r300_state.h"
--#include "radeon_reg.h"
--
--#include "drirenderbuffer.h"
--#include "vblank.h"
--
--static void radeonWaitForIdle(radeonContextPtr radeon);
--
--/* ================================================================
-- * SwapBuffers with client-side throttling
-- */
--
--static uint32_t radeonGetLastFrame(radeonContextPtr radeon)
--{
--	drm_radeon_getparam_t gp;
--	int ret;
--	uint32_t frame = 0;
--
--	gp.param = RADEON_PARAM_LAST_FRAME;
--	gp.value = (int *)&frame;
--	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
--				  &gp, sizeof(gp));
--	if (ret) {
--		fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
--			ret);
--		exit(1);
--	}
--
--	return frame;
--}
--
--uint32_t radeonGetAge(radeonContextPtr radeon)
--{
--	drm_radeon_getparam_t gp;
--	int ret;
--	uint32_t age = 0;
--
--	gp.param = RADEON_PARAM_LAST_CLEAR;
--	gp.value = (int *)&age;
--	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
--				  &gp, sizeof(gp));
--	if (ret) {
--		fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
--			ret);
--		exit(1);
--	}
--
--	return age;
--}
--
--static void radeonEmitIrqLocked(radeonContextPtr radeon)
--{
--	drm_radeon_irq_emit_t ie;
--	int ret;
--
--	ie.irq_seq = &radeon->iw.irq_seq;
--	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_IRQ_EMIT,
--				  &ie, sizeof(ie));
--	if (ret) {
--		fprintf(stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__,
--			ret);
--		exit(1);
--	}
--}
--
--static void radeonWaitIrq(radeonContextPtr radeon)
--{
--	int ret;
--
--	do {
--		ret = drmCommandWrite(radeon->dri.fd, DRM_RADEON_IRQ_WAIT,
--				      &radeon->iw, sizeof(radeon->iw));
--	} while (ret && (errno == EINTR || errno == EBUSY));
--
--	if (ret) {
--		fprintf(stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__,
--			ret);
--		exit(1);
--	}
--}
--
--static void radeonWaitForFrameCompletion(radeonContextPtr radeon)
--{
--	drm_radeon_sarea_t *sarea = radeon->sarea;
--
--	if (radeon->do_irqs) {
--		if (radeonGetLastFrame(radeon) < sarea->last_frame) {
--			if (!radeon->irqsEmitted) {
--				while (radeonGetLastFrame(radeon) <
--				       sarea->last_frame) ;
--			} else {
--				UNLOCK_HARDWARE(radeon);
--				radeonWaitIrq(radeon);
--				LOCK_HARDWARE(radeon);
--			}
--			radeon->irqsEmitted = 10;
--		}
--
--		if (radeon->irqsEmitted) {
--			radeonEmitIrqLocked(radeon);
--			radeon->irqsEmitted--;
--		}
--	} else {
--		while (radeonGetLastFrame(radeon) < sarea->last_frame) {
--			UNLOCK_HARDWARE(radeon);
--			if (radeon->do_usleeps)
--				DO_USLEEP(1);
--			LOCK_HARDWARE(radeon);
--		}
--	}
--}
--
--/* Copy the back color buffer to the front color buffer.
-- */
--void radeonCopyBuffer(__DRIdrawablePrivate * dPriv,
--		      const drm_clip_rect_t	 * rect)
--{
--	radeonContextPtr radeon;
--	GLint nbox, i, ret;
--	GLboolean missed_target;
--	int64_t ust;
--	__DRIscreenPrivate *psp = dPriv->driScreenPriv;
--
--	assert(dPriv);
--	assert(dPriv->driContextPriv);
--	assert(dPriv->driContextPriv->driverPrivate);
--
--	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--
--	if (RADEON_DEBUG & DEBUG_IOCTL) {
--		fprintf(stderr, "\n%s( %p )\n\n", __FUNCTION__,
--			(void *)radeon->glCtx);
--	}
--
--	r300Flush(radeon->glCtx);
--
--	LOCK_HARDWARE(radeon);
--
--	/* Throttle the frame rate -- only allow one pending swap buffers
--	 * request at a time.
--	 */
--	radeonWaitForFrameCompletion(radeon);
--	if (!rect)
--	{
--	    UNLOCK_HARDWARE(radeon);
--	    driWaitForVBlank(dPriv, &missed_target);
--	    LOCK_HARDWARE(radeon);
--	}
--
--	nbox = dPriv->numClipRects;	/* must be in locked region */
--
--	for (i = 0; i < nbox;) {
--		GLint nr = MIN2(i + RADEON_NR_SAREA_CLIPRECTS, nbox);
--		drm_clip_rect_t *box = dPriv->pClipRects;
--		drm_clip_rect_t *b = radeon->sarea->boxes;
--		GLint n = 0;
--
--		for ( ; i < nr ; i++ ) {
--
--		    *b = box[i];
--
--		    if (rect)
--		    {
--			if (rect->x1 > b->x1)
--			    b->x1 = rect->x1;
--			if (rect->y1 > b->y1)
--			    b->y1 = rect->y1;
--			if (rect->x2 < b->x2)
--			    b->x2 = rect->x2;
--			if (rect->y2 < b->y2)
--			    b->y2 = rect->y2;
--
--			if (b->x1 >= b->x2 || b->y1 >= b->y2)
--			    continue;
--		    }
--
--		    b++;
--		    n++;
--		}
--		radeon->sarea->nbox = n;
--
--		if (!n)
--		   continue;
--
--		ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_SWAP);
--
--		if (ret) {
--			fprintf(stderr, "DRM_RADEON_SWAP: return = %d\n",
--				ret);
--			UNLOCK_HARDWARE(radeon);
--			exit(1);
--		}
--	}
--
--	UNLOCK_HARDWARE(radeon);
--	if (!rect)
--	{
--	    ((r300ContextPtr)radeon)->hw.all_dirty = GL_TRUE;
--
--	    radeon->swap_count++;
--	    (*psp->systemTime->getUST) (&ust);
--	    if (missed_target) {
--		radeon->swap_missed_count++;
--		radeon->swap_missed_ust = ust - radeon->swap_ust;
--	    }
--
--	    radeon->swap_ust = ust;
--
--	    sched_yield();
--	}
--}
--
--void radeonPageFlip(__DRIdrawablePrivate * dPriv)
--{
--	radeonContextPtr radeon;
--	GLint ret;
--	GLboolean missed_target;
--	__DRIscreenPrivate *psp = dPriv->driScreenPriv;
--
--	assert(dPriv);
--	assert(dPriv->driContextPriv);
--	assert(dPriv->driContextPriv->driverPrivate);
--
--	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--
--	if (RADEON_DEBUG & DEBUG_IOCTL) {
--		fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
--			radeon->sarea->pfCurrentPage);
--	}
--
--	r300Flush(radeon->glCtx);
--	LOCK_HARDWARE(radeon);
--
--	if (!dPriv->numClipRects) {
--		UNLOCK_HARDWARE(radeon);
--		usleep(10000);	/* throttle invisible client 10ms */
--		return;
--	}
--
--	/* Need to do this for the perf box placement:
--	 */
--	{
--		drm_clip_rect_t *box = dPriv->pClipRects;
--		drm_clip_rect_t *b = radeon->sarea->boxes;
--		b[0] = box[0];
--		radeon->sarea->nbox = 1;
--	}
--
--	/* Throttle the frame rate -- only allow a few pending swap buffers
--	 * request at a time.
--	 */
--	radeonWaitForFrameCompletion(radeon);
--	UNLOCK_HARDWARE(radeon);
--	driWaitForVBlank(dPriv, &missed_target);
--	if (missed_target) {
--		radeon->swap_missed_count++;
--		(void)(*psp->systemTime->getUST) (&radeon->swap_missed_ust);
--	}
--	LOCK_HARDWARE(radeon);
--
--	ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_FLIP);
--
--	UNLOCK_HARDWARE(radeon);
--
--	if (ret) {
--		fprintf(stderr, "DRM_RADEON_FLIP: return = %d\n", ret);
--		exit(1);
--	}
--
--	radeon->swap_count++;
--	(void)(*psp->systemTime->getUST) (&radeon->swap_ust);
--
--        driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer, 
--                             radeon->sarea->pfCurrentPage);
--
--	if (radeon->sarea->pfCurrentPage == 1) {
--		radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
--		radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
--	} else {
--		radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
--		radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
--	}
--
--	if (IS_R300_CLASS(radeon->radeonScreen)) {
--		r300ContextPtr r300 = (r300ContextPtr)radeon;
--		R300_STATECHANGE(r300, cb);
--		r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset + 
--						r300->radeon.radeonScreen->fbLocation;
--		r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
--		
--		if (r300->radeon.radeonScreen->cpp == 4)
--			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
--		else
--			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
--	
--		if (r300->radeon.sarea->tiling_enabled)
--			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
--	}
--}
--
--void radeonWaitForIdleLocked(radeonContextPtr radeon)
--{
--	int ret;
--	int i = 0;
--
--	do {
--		ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_CP_IDLE);
--		if (ret)
--			DO_USLEEP(1);
--	} while (ret && ++i < 100);
--
--	if (ret < 0) {
--		UNLOCK_HARDWARE(radeon);
--		fprintf(stderr, "Error: R300 timed out... exiting\n");
--		exit(-1);
--	}
--}
--
--static void radeonWaitForIdle(radeonContextPtr radeon)
--{
--	LOCK_HARDWARE(radeon);
--	radeonWaitForIdleLocked(radeon);
--	UNLOCK_HARDWARE(radeon);
--}
--
--void radeonFlush(GLcontext * ctx)
--{
--	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
--
--	if (IS_R300_CLASS(radeon->radeonScreen))
--		r300Flush(ctx);
--}
--
--
--/* Make sure all commands have been sent to the hardware and have
-- * completed processing.
-- */
--void radeonFinish(GLcontext * ctx)
--{
--	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
--
--	radeonFlush(ctx);
--
--	if (radeon->do_irqs) {
--		LOCK_HARDWARE(radeon);
--		radeonEmitIrqLocked(radeon);
--		UNLOCK_HARDWARE(radeon);
--		radeonWaitIrq(radeon);
--	} else
--		radeonWaitForIdle(radeon);
--}
-diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.h b/src/mesa/drivers/dri/r300/radeon_ioctl.h
-deleted file mode 100644
-index 3add775..0000000
---- a/src/mesa/drivers/dri/r300/radeon_ioctl.h
-+++ /dev/null
-@@ -1,57 +0,0 @@
--/*
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- */
--
--#ifndef __RADEON_IOCTL_H__
--#define __RADEON_IOCTL_H__
--
--#include "main/simple_list.h"
--#include "radeon_dri.h"
--#include "radeon_lock.h"
--
--#include "xf86drm.h"
--#include "drm.h"
--#if 0
--#include "r200context.h"
--#endif
--#include "radeon_drm.h"
--
--extern void radeonCopyBuffer(__DRIdrawablePrivate * drawable,
--			     const drm_clip_rect_t	* rect);
--extern void radeonPageFlip(__DRIdrawablePrivate * drawable);
--extern void radeonFlush(GLcontext * ctx);
--extern void radeonFinish(GLcontext * ctx);
--extern void radeonWaitForIdleLocked(radeonContextPtr radeon);
--extern uint32_t radeonGetAge(radeonContextPtr radeon);
--
--#endif				/* __RADEON_IOCTL_H__ */
-diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c
-deleted file mode 100644
-index 4f47afd..0000000
---- a/src/mesa/drivers/dri/r300/radeon_lock.c
-+++ /dev/null
-@@ -1,137 +0,0 @@
--/**************************************************************************
--
--Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
--                     VA Linux Systems Inc., Fremont, California.
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--All Rights Reserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Gareth Hughes <gareth@valinux.com>
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- *   Kevin E. Martin <martin@valinux.com>
-- */
--
--#include "radeon_lock.h"
--#include "radeon_ioctl.h"
--#include "radeon_state.h"
--#include "r300_context.h"
--#include "r300_state.h"
--
--#include "main/framebuffer.h"
--
--#include "drirenderbuffer.h"
--
--#if DEBUG_LOCKING
--char *prevLockFile = NULL;
--int prevLockLine = 0;
--#endif
--
--/* Turn on/off page flipping according to the flags in the sarea:
-- */
--void radeonUpdatePageFlipping(radeonContextPtr rmesa)
--{
--	int use_back;
--
--	rmesa->doPageFlip = rmesa->sarea->pfState;
--	if (rmesa->glCtx->WinSysDrawBuffer) {
--		driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
--				     rmesa->sarea->pfCurrentPage);
--		r300UpdateDrawBuffer(rmesa->glCtx);
--	}
--
--	use_back = rmesa->glCtx->DrawBuffer ?
--	    (rmesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0] ==
--	     BUFFER_BACK_LEFT) : 1;
--	use_back ^= (rmesa->sarea->pfCurrentPage == 1);
--
--	if (use_back) {
--		rmesa->state.color.drawOffset =
--		    rmesa->radeonScreen->backOffset;
--		rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch;
--	} else {
--		rmesa->state.color.drawOffset =
--		    rmesa->radeonScreen->frontOffset;
--		rmesa->state.color.drawPitch =
--		    rmesa->radeonScreen->frontPitch;
--	}
--}
--
--/* Update the hardware state.  This is called if another context has
-- * grabbed the hardware lock, which includes the X server.  This
-- * function also updates the driver's window state after the X server
-- * moves, resizes or restacks a window -- the change will be reflected
-- * in the drawable position and clip rects.  Since the X server grabs
-- * the hardware lock when it changes the window state, this routine will
-- * automatically be called after such a change.
-- */
--void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
--{
--	__DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
--	__DRIdrawablePrivate *const readable = rmesa->dri.readable;
--	__DRIscreenPrivate *sPriv = rmesa->dri.screen;
--	drm_radeon_sarea_t *sarea = rmesa->sarea;
--	r300ContextPtr r300 = (r300ContextPtr) rmesa;
--
--	assert(drawable != NULL);
--
--	drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags);
--
--	/* The window might have moved, so we might need to get new clip
--	 * rects.
--	 *
--	 * NOTE: This releases and regrabs the hw lock to allow the X server
--	 * to respond to the DRI protocol request for new drawable info.
--	 * Since the hardware state depends on having the latest drawable
--	 * clip rects, all state checking must be done _after_ this call.
--	 */
--	DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable);
--	if (drawable != readable) {
--		DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable);
--	}
--
--	if (rmesa->lastStamp != drawable->lastStamp) {
--		radeonUpdatePageFlipping(rmesa);
--		radeonSetCliprects(rmesa);
--		r300UpdateViewportOffset(rmesa->glCtx);
--		driUpdateFramebufferSize(rmesa->glCtx, drawable);
--	}
--
--	if (sarea->ctx_owner != rmesa->dri.hwContext) {
--		int i;
--
--		sarea->ctx_owner = rmesa->dri.hwContext;
--		for (i = 0; i < r300->nr_heaps; i++) {
--			DRI_AGE_TEXTURES(r300->texture_heaps[i]);
--		}
--	}
--
--	rmesa->lost_context = GL_TRUE;
--}
-diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h
-deleted file mode 100644
-index a344837..0000000
---- a/src/mesa/drivers/dri/r300/radeon_lock.h
-+++ /dev/null
-@@ -1,115 +0,0 @@
--/**************************************************************************
--
--Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
--                     VA Linux Systems Inc., Fremont, California.
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--All Rights Reserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Gareth Hughes <gareth@valinux.com>
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- *   Kevin E. Martin <martin@valinux.com>
-- */
--
--#ifndef __RADEON_LOCK_H__
--#define __RADEON_LOCK_H__
--
--#include "radeon_context.h"
--
--extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
--extern void radeonUpdatePageFlipping(radeonContextPtr rmesa);
--
--/* Turn DEBUG_LOCKING on to find locking conflicts.
-- */
--#define DEBUG_LOCKING	0
--
--#if DEBUG_LOCKING
--extern char *prevLockFile;
--extern int prevLockLine;
--
--#define DEBUG_LOCK()							\
--   do {									\
--      prevLockFile = (__FILE__);					\
--      prevLockLine = (__LINE__);					\
--   } while (0)
--
--#define DEBUG_RESET()							\
--   do {									\
--      prevLockFile = 0;							\
--      prevLockLine = 0;							\
--   } while (0)
--
--#define DEBUG_CHECK_LOCK()						\
--   do {									\
--      if (prevLockFile) {						\
--	 fprintf(stderr,						\
--		  "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",	\
--		  prevLockFile, prevLockLine, __FILE__, __LINE__);	\
--	 exit(1);							\
--      }									\
--   } while (0)
--
--#else
--
--#define DEBUG_LOCK()
--#define DEBUG_RESET()
--#define DEBUG_CHECK_LOCK()
--
--#endif
--
--/*
-- * !!! We may want to separate locks from locks with validation.  This
-- * could be used to improve performance for those things commands that
-- * do not do any drawing !!!
-- */
--
--/* Lock the hardware and validate our state.
-- */
--#define LOCK_HARDWARE( rmesa )						\
--	do {								\
--		char __ret = 0;						\
--		DEBUG_CHECK_LOCK();					\
--		DRM_CAS((rmesa)->dri.hwLock, (rmesa)->dri.hwContext,	\
--			(DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret); \
--		if (__ret)						\
--			radeonGetLock((rmesa), 0);			\
--		DEBUG_LOCK();						\
--	} while (0)
--
--#define UNLOCK_HARDWARE( rmesa )					\
--	do {								\
--		DRM_UNLOCK((rmesa)->dri.fd,				\
--			(rmesa)->dri.hwLock,				\
--			(rmesa)->dri.hwContext);			\
--		DEBUG_RESET();						\
--	} while (0)
--
--#endif				/* __RADEON_LOCK_H__ */
-diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c
-index 4aa2319..906d36e 100644
---- a/src/mesa/drivers/dri/r300/radeon_program_pair.c
-+++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c
-@@ -35,7 +35,7 @@
- 
- #include "radeon_program_pair.h"
- 
--#include "radeon_context.h"
-+#include "radeon_common.h"
- 
- #include "shader/prog_print.h"
- 
-@@ -609,6 +609,7 @@ static void emit_all_tex(struct pair_state *s)
- 		if (s->Debug) {
- 			_mesa_printf("   ");
- 			_mesa_print_instruction(inst);
-+			fflush(stdout);
- 		}
- 		s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst);
- 	}
-diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c
-deleted file mode 100644
-index 16f9fb9..0000000
---- a/src/mesa/drivers/dri/r300/radeon_span.c
-+++ /dev/null
-@@ -1,349 +0,0 @@
--/**************************************************************************
--
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
--                     VA Linux Systems Inc., Fremont, California.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--All Rights Reserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Kevin E. Martin <martin@valinux.com>
-- *   Gareth Hughes <gareth@valinux.com>
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- *
-- */
--
--#include "main/glheader.h"
--#include "swrast/swrast.h"
--
--#include "r300_state.h"
--#include "radeon_ioctl.h"
--#include "r300_ioctl.h"
--#include "radeon_span.h"
--
--#include "drirenderbuffer.h"
--
--#define DBG 0
--
--/*
-- * Note that all information needed to access pixels in a renderbuffer
-- * should be obtained through the gl_renderbuffer parameter, not per-context
-- * information.
-- */
--#define LOCAL_VARS						\
--   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
--   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
--   const GLuint bottom = dPriv->h - 1;				\
--   GLubyte *buf = (GLubyte *) drb->flippedData			\
--      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
--   GLuint p;							\
--   (void) p;
--
--#define LOCAL_DEPTH_VARS				\
--   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
--   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
--   const GLuint bottom = dPriv->h - 1;			\
--   GLuint xo = dPriv->x;				\
--   GLuint yo = dPriv->y;				\
--   GLubyte *buf = (GLubyte *) drb->Base.Data;
--
--#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
--
--#define Y_FLIP(Y) (bottom - (Y))
--
--#define HW_LOCK()
--
--#define HW_UNLOCK()
--
--/* ================================================================
-- * Color buffer
-- */
--
--/* 16 bit, RGB565 color spanline and pixel functions
-- */
--#define SPANTMP_PIXEL_FMT GL_RGB
--#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
--
--#define TAG(x)    radeon##x##_RGB565
--#define TAG2(x,y) radeon##x##_RGB565##y
--#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
--#include "spantmp2.h"
--
--/* 32 bit, ARGB8888 color spanline and pixel functions
-- */
--#define SPANTMP_PIXEL_FMT GL_BGRA
--#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
--
--#define TAG(x)    radeon##x##_ARGB8888
--#define TAG2(x,y) radeon##x##_ARGB8888##y
--#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
--#include "spantmp2.h"
--
--/* ================================================================
-- * Depth buffer
-- */
--
--/* The Radeon family has depth tiling on all the time, so we have to convert
-- * the x,y coordinates into the memory bus address (mba) in the same
-- * manner as the engine.  In each case, the linear block address (ba)
-- * is calculated, and then wired with x and y to produce the final
-- * memory address.
-- * The chip will do address translation on its own if the surface registers
-- * are set up correctly. It is not quite enough to get it working with hyperz
-- * too...
-- */
--
--static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
--{
--	GLuint pitch = drb->pitch;
--	if (drb->depthHasSurface) {
--		return 4 * (x + y * pitch);
--	} else {
--		GLuint ba, address = 0;	/* a[0..1] = 0           */
--
--#ifdef COMPILE_R300
--		ba = (y / 8) * (pitch / 8) + (x / 8);
--#else
--		ba = (y / 16) * (pitch / 16) + (x / 16);
--#endif
--
--		address |= (x & 0x7) << 2;	/* a[2..4] = x[0..2]     */
--		address |= (y & 0x3) << 5;	/* a[5..6] = y[0..1]     */
--		address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5;	/* a[7]    = x[4] ^ y[2] */
--		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
--
--		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
--		address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7;	/* a[11]   = x[3] ^ y[4] */
--		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
--
--		return address;
--	}
--}
--
--static INLINE GLuint
--radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
--{
--	GLuint pitch = drb->pitch;
--	if (drb->depthHasSurface) {
--		return 2 * (x + y * pitch);
--	} else {
--		GLuint ba, address = 0;	/* a[0]    = 0           */
--
--		ba = (y / 16) * (pitch / 32) + (x / 32);
--
--		address |= (x & 0x7) << 1;	/* a[1..3] = x[0..2]     */
--		address |= (y & 0x7) << 4;	/* a[4..6] = y[0..2]     */
--		address |= (x & 0x8) << 4;	/* a[7]    = x[3]        */
--		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
--		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
--		address |= ((x & 0x10) ^ (y & 0x10)) << 7;	/* a[11]   = x[4] ^ y[4] */
--		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
--
--		return address;
--	}
--}
--
--/* 16-bit depth buffer functions
-- */
--#define VALUE_TYPE GLushort
--
--#define WRITE_DEPTH( _x, _y, d )					\
--   *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
--
--#define READ_DEPTH( d, _x, _y )						\
--   d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
--
--#define TAG(x) radeon##x##_z16
--#include "depthtmp.h"
--
--/* 24 bit depth, 8 bit stencil depthbuffer functions
-- *
-- * Careful: It looks like the R300 uses ZZZS byte order while the R200
-- * uses SZZZ for 24 bit depth, 8 bit stencil mode.
-- */
--#define VALUE_TYPE GLuint
--
--#ifdef COMPILE_R300
--#define WRITE_DEPTH( _x, _y, d )					\
--do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   tmp &= 0x000000ff;							\
--   tmp |= ((d << 8) & 0xffffff00);					\
--   *(GLuint *)(buf + offset) = tmp;					\
--} while (0)
--#else
--#define WRITE_DEPTH( _x, _y, d )					\
--do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   tmp &= 0xff000000;							\
--   tmp |= ((d) & 0x00ffffff);						\
--   *(GLuint *)(buf + offset) = tmp;					\
--} while (0)
--#endif
--
--#ifdef COMPILE_R300
--#define READ_DEPTH( d, _x, _y )						\
--  do { \
--    d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,		\
--					 _y + yo )) & 0xffffff00) >> 8; \
--  }while(0)
--#else
--#define READ_DEPTH( d, _x, _y )						\
--   d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,			\
--					 _y + yo )) & 0x00ffffff;
--#endif
--
--#define TAG(x) radeon##x##_z24_s8
--#include "depthtmp.h"
--
--/* ================================================================
-- * Stencil buffer
-- */
--
--/* 24 bit depth, 8 bit stencil depthbuffer functions
-- */
--#ifdef COMPILE_R300
--#define WRITE_STENCIL( _x, _y, d )					\
--do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   tmp &= 0xffffff00;							\
--   tmp |= (d) & 0xff;							\
--   *(GLuint *)(buf + offset) = tmp;					\
--} while (0)
--#else
--#define WRITE_STENCIL( _x, _y, d )					\
--do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   tmp &= 0x00ffffff;							\
--   tmp |= (((d) & 0xff) << 24);						\
--   *(GLuint *)(buf + offset) = tmp;					\
--} while (0)
--#endif
--
--#ifdef COMPILE_R300
--#define READ_STENCIL( d, _x, _y )					\
--do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   d = tmp & 0x000000ff;						\
--} while (0)
--#else
--#define READ_STENCIL( d, _x, _y )					\
--do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   d = (tmp & 0xff000000) >> 24;					\
--} while (0)
--#endif
--
--#define TAG(x) radeon##x##_z24_s8
--#include "stenciltmp.h"
--
--/* Move locking out to get reasonable span performance (10x better
-- * than doing this in HW_LOCK above).  WaitForIdle() is the main
-- * culprit.
-- */
--
--static void radeonSpanRenderStart(GLcontext * ctx)
--{
--	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--#ifdef COMPILE_R300
--	r300ContextPtr r300 = (r300ContextPtr) rmesa;
--	R300_FIREVERTICES(r300);
--#else
--	RADEON_FIREVERTICES(rmesa);
--#endif
--	LOCK_HARDWARE(rmesa);
--	radeonWaitForIdleLocked(rmesa);
--
--	/* Read the first pixel in the frame buffer.  This should
--	 * be a noop, right?  In fact without this conform fails as reading
--	 * from the framebuffer sometimes produces old results -- the
--	 * on-card read cache gets mixed up and doesn't notice that the
--	 * framebuffer has been updated.
--	 *
--	 * Note that we should probably be reading some otherwise unused
--	 * region of VRAM, otherwise we might get incorrect results when
--	 * reading pixels from the top left of the screen.
--	 *
--	 * I found this problem on an R420 with glean's texCube test.
--	 * Note that the R200 span code also *writes* the first pixel in the
--	 * framebuffer, but I've found this to be unnecessary.
--	 *  -- Nicolai Hähnle, June 2008
--	 */
--	{
--		int p;
--		driRenderbuffer *drb =
--			(driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
--		volatile int *buf =
--			(volatile int *)(rmesa->dri.screen->pFB + drb->offset);
--		p = *buf;
--	}
--}
--
--static void radeonSpanRenderFinish(GLcontext * ctx)
--{
--	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--	_swrast_flush(ctx);
--	UNLOCK_HARDWARE(rmesa);
--}
--
--void radeonInitSpanFuncs(GLcontext * ctx)
--{
--	struct swrast_device_driver *swdd =
--	    _swrast_GetDeviceDriverReference(ctx);
--	swdd->SpanRenderStart = radeonSpanRenderStart;
--	swdd->SpanRenderFinish = radeonSpanRenderFinish;
--}
--
--/**
-- * Plug in the Get/Put routines for the given driRenderbuffer.
-- */
--void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
--{
--	if (drb->Base.InternalFormat == GL_RGBA) {
--		if (vis->redBits == 5 && vis->greenBits == 6
--		    && vis->blueBits == 5) {
--			radeonInitPointers_RGB565(&drb->Base);
--		} else {
--			radeonInitPointers_ARGB8888(&drb->Base);
--		}
--	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
--		radeonInitDepthPointers_z16(&drb->Base);
--	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
--		radeonInitDepthPointers_z24_s8(&drb->Base);
--	} else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
--		radeonInitStencilPointers_z24_s8(&drb->Base);
--	}
--}
-diff --git a/src/mesa/drivers/dri/r300/radeon_state.c b/src/mesa/drivers/dri/r300/radeon_state.c
-deleted file mode 100644
-index c401da6..0000000
---- a/src/mesa/drivers/dri/r300/radeon_state.c
-+++ /dev/null
-@@ -1,244 +0,0 @@
--/**************************************************************************
--
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- */
--
--#include "main/glheader.h"
--#include "main/imports.h"
--#include "main/api_arrayelt.h"
--#include "main/enums.h"
--#include "main/framebuffer.h"
--#include "main/colormac.h"
--#include "main/light.h"
--
--#include "swrast/swrast.h"
--#include "vbo/vbo.h"
--#include "tnl/tnl.h"
--#include "tnl/t_pipeline.h"
--#include "swrast_setup/swrast_setup.h"
--
--#include "radeon_ioctl.h"
--#include "radeon_state.h"
--#include "r300_ioctl.h"
--
--
--/* =============================================================
-- * Scissoring
-- */
--
--static GLboolean intersect_rect(drm_clip_rect_t * out,
--				drm_clip_rect_t * a, drm_clip_rect_t * b)
--{
--	*out = *a;
--	if (b->x1 > out->x1)
--		out->x1 = b->x1;
--	if (b->y1 > out->y1)
--		out->y1 = b->y1;
--	if (b->x2 < out->x2)
--		out->x2 = b->x2;
--	if (b->y2 < out->y2)
--		out->y2 = b->y2;
--	if (out->x1 >= out->x2)
--		return GL_FALSE;
--	if (out->y1 >= out->y2)
--		return GL_FALSE;
--	return GL_TRUE;
--}
--
--void radeonRecalcScissorRects(radeonContextPtr radeon)
--{
--	drm_clip_rect_t *out;
--	int i;
--
--	/* Grow cliprect store?
--	 */
--	if (radeon->state.scissor.numAllocedClipRects < radeon->numClipRects) {
--		while (radeon->state.scissor.numAllocedClipRects <
--		       radeon->numClipRects) {
--			radeon->state.scissor.numAllocedClipRects += 1;	/* zero case */
--			radeon->state.scissor.numAllocedClipRects *= 2;
--		}
--
--		if (radeon->state.scissor.pClipRects)
--			FREE(radeon->state.scissor.pClipRects);
--
--		radeon->state.scissor.pClipRects =
--		    MALLOC(radeon->state.scissor.numAllocedClipRects *
--			   sizeof(drm_clip_rect_t));
--
--		if (radeon->state.scissor.pClipRects == NULL) {
--			radeon->state.scissor.numAllocedClipRects = 0;
--			return;
--		}
--	}
--
--	out = radeon->state.scissor.pClipRects;
--	radeon->state.scissor.numClipRects = 0;
--
--	for (i = 0; i < radeon->numClipRects; i++) {
--		if (intersect_rect(out,
--				   &radeon->pClipRects[i],
--				   &radeon->state.scissor.rect)) {
--			radeon->state.scissor.numClipRects++;
--			out++;
--		}
--	}
--}
--
--void radeonUpdateScissor(GLcontext* ctx)
--{
--	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
--
--	if (radeon->dri.drawable) {
--		__DRIdrawablePrivate *dPriv = radeon->dri.drawable;
--		int x1 = dPriv->x + ctx->Scissor.X;
--		int y1 = dPriv->y + dPriv->h - (ctx->Scissor.Y + ctx->Scissor.Height);
--
--		radeon->state.scissor.rect.x1 = x1;
--		radeon->state.scissor.rect.y1 = y1;
--		radeon->state.scissor.rect.x2 = x1 + ctx->Scissor.Width;
--		radeon->state.scissor.rect.y2 = y1 + ctx->Scissor.Height;
--
--		radeonRecalcScissorRects(radeon);
--	}
--}
--
--static void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h)
--{
--	if (ctx->Scissor.Enabled) {
--		/* We don't pipeline cliprect changes */
--		r300Flush(ctx);
--		radeonUpdateScissor(ctx);
--	}
--}
--
--
--/**
-- * Update cliprects and scissors.
-- */
--void radeonSetCliprects(radeonContextPtr radeon)
--{
--	__DRIdrawablePrivate *const drawable = radeon->dri.drawable;
--	__DRIdrawablePrivate *const readable = radeon->dri.readable;
--	GLframebuffer *const draw_fb = (GLframebuffer*)drawable->driverPrivate;
--	GLframebuffer *const read_fb = (GLframebuffer*)readable->driverPrivate;
--
--	if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
--		/* Can't ignore 2d windows if we are page flipping. */
--		if (drawable->numBackClipRects == 0 || radeon->doPageFlip ||
--		    radeon->sarea->pfCurrentPage == 1) {
--			radeon->numClipRects = drawable->numClipRects;
--			radeon->pClipRects = drawable->pClipRects;
--		} else {
--			radeon->numClipRects = drawable->numBackClipRects;
--			radeon->pClipRects = drawable->pBackClipRects;
--		}
--	} else {
--		/* front buffer (or none, or multiple buffers */
--		radeon->numClipRects = drawable->numClipRects;
--		radeon->pClipRects = drawable->pClipRects;
--	}
--
--	if ((draw_fb->Width != drawable->w) ||
--	    (draw_fb->Height != drawable->h)) {
--		_mesa_resize_framebuffer(radeon->glCtx, draw_fb,
--					 drawable->w, drawable->h);
--		draw_fb->Initialized = GL_TRUE;
--	}
--
--	if (drawable != readable) {
--		if ((read_fb->Width != readable->w) ||
--		    (read_fb->Height != readable->h)) {
--			_mesa_resize_framebuffer(radeon->glCtx, read_fb,
--						 readable->w, readable->h);
--			read_fb->Initialized = GL_TRUE;
--		}
--	}
--
--	if (radeon->state.scissor.enabled)
--		radeonRecalcScissorRects(radeon);
--
--	radeon->lastStamp = drawable->lastStamp;
--}
--
--
--/**
-- * Handle common enable bits.
-- * Called as a fallback by r200Enable/r300Enable.
-- */
--void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state)
--{
--	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
--
--	switch(cap) {
--	case GL_SCISSOR_TEST:
--		/* We don't pipeline cliprect & scissor changes */
--		r300Flush(ctx);
--
--		radeon->state.scissor.enabled = state;
--		radeonUpdateScissor(ctx);
--		break;
--
--	default:
--		return;
--	}
--}
--
--
--/**
-- * Initialize default state.
-- * This function is called once at context init time from
-- * r200InitState/r300InitState
-- */
--void radeonInitState(radeonContextPtr radeon)
--{
--	radeon->Fallback = 0;
--
--	if (radeon->glCtx->Visual.doubleBufferMode && radeon->sarea->pfCurrentPage == 0) {
--		radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
--		radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
--	} else {
--		radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
--		radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
--	}
--}
--
--
--/**
-- * Initialize common state functions.
-- * Called by r200InitStateFuncs/r300InitStateFuncs
-- */
--void radeonInitStateFuncs(struct dd_function_table *functions)
--{
--	functions->Scissor = radeonScissor;
--}
-diff --git a/src/mesa/drivers/dri/r300/radeon_state.h b/src/mesa/drivers/dri/r300/radeon_state.h
-deleted file mode 100644
-index 821cb40..0000000
---- a/src/mesa/drivers/dri/r300/radeon_state.h
-+++ /dev/null
-@@ -1,43 +0,0 @@
--/*
--Copyright (C) 2004 Nicolai Haehnle.  All Rights Reserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Nicolai Haehnle <prefect_@gmx.net>
-- */
--
--#ifndef __RADEON_STATE_H__
--#define __RADEON_STATE_H__
--
--extern void radeonRecalcScissorRects(radeonContextPtr radeon);
--extern void radeonSetCliprects(radeonContextPtr radeon);
--extern void radeonUpdateScissor(GLcontext* ctx);
--
--extern void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state);
--
--extern void radeonInitState(radeonContextPtr radeon);
--extern void radeonInitStateFuncs(struct dd_function_table* functions);
--
--#endif
-diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile
-index f223b2d..ba409ba 100644
---- a/src/mesa/drivers/dri/radeon/Makefile
-+++ b/src/mesa/drivers/dri/radeon/Makefile
-@@ -4,25 +4,37 @@
- TOP = ../../../../..
- include $(TOP)/configs/current
- 
-+CFLAGS += $(RADEON_CFLAGS)
-+
- LIBNAME = radeon_dri.so
- 
- MINIGLX_SOURCES = server/radeon_dri.c 
- 
-+RADEON_COMMON_SOURCES = \
-+	radeon_texture.c \
-+	radeon_common_context.c \
-+	radeon_common.c \
-+	radeon_dma.c \
-+	radeon_lock.c \
-+	radeon_bo_legacy.c \
-+	radeon_cs_legacy.c \
-+	radeon_mipmap_tree.c \
-+	radeon_span.c \
-+	radeon_fbo.c
-+
- DRIVER_SOURCES = \
- 	radeon_context.c \
- 	radeon_ioctl.c \
--	radeon_lock.c \
- 	radeon_screen.c \
- 	radeon_state.c \
- 	radeon_state_init.c \
- 	radeon_tex.c \
--	radeon_texmem.c \
- 	radeon_texstate.c \
- 	radeon_tcl.c \
- 	radeon_swtcl.c \
--	radeon_span.c \
- 	radeon_maos.c \
--	radeon_sanity.c 
-+	radeon_sanity.c \
-+	$(RADEON_COMMON_SOURCES)
- 
- C_SOURCES = \
- 	$(COMMON_SOURCES) \
-@@ -30,6 +42,8 @@ C_SOURCES = \
- 
- DRIVER_DEFINES = -DRADEON_COMMON=0
- 
-+DRI_LIB_DEPS += $(RADEON_LDFLAGS)
-+
- X86_SOURCES = 
- 
- include ../Makefile.template
-diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h
-new file mode 100644
-index 0000000..1ed13f1
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h
-@@ -0,0 +1,182 @@
-+/* 
-+ * Copyright © 2008 Jérôme Glisse
-+ * All Rights Reserved.
-+ * 
-+ * Permission is hereby granted, free of charge, to any person obtaining
-+ * a copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sub license, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ * 
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
-+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
-+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial portions
-+ * of the Software.
-+ */
-+/*
-+ * Authors:
-+ *      Jérôme Glisse <glisse@freedesktop.org>
-+ */
-+#ifndef RADEON_BO_H
-+#define RADEON_BO_H
-+
-+#include <stdio.h>
-+#include <stdint.h>
-+//#include "radeon_track.h"
-+
-+/* bo object */
-+#define RADEON_BO_FLAGS_MACRO_TILE  1
-+#define RADEON_BO_FLAGS_MICRO_TILE  2
-+
-+struct radeon_bo_manager;
-+
-+struct radeon_bo {
-+    uint32_t                    alignment;
-+    uint32_t                    handle;
-+    uint32_t                    size;
-+    uint32_t                    domains;
-+    uint32_t                    flags;
-+    unsigned                    cref;
-+#ifdef RADEON_BO_TRACK
-+    struct radeon_track         *track;
-+#endif
-+    void                        *ptr;
-+    struct radeon_bo_manager    *bom;
-+    uint32_t                    space_accounted;
-+};
-+
-+/* bo functions */
-+struct radeon_bo_funcs {
-+    struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom,
-+                                 uint32_t handle,
-+                                 uint32_t size,
-+                                 uint32_t alignment,
-+                                 uint32_t domains,
-+                                 uint32_t flags);
-+    void (*bo_ref)(struct radeon_bo *bo);
-+    struct radeon_bo *(*bo_unref)(struct radeon_bo *bo);
-+    int (*bo_map)(struct radeon_bo *bo, int write);
-+    int (*bo_unmap)(struct radeon_bo *bo);
-+    int (*bo_wait)(struct radeon_bo *bo);
-+};
-+
-+struct radeon_bo_manager {
-+    struct radeon_bo_funcs  *funcs;
-+    int                     fd;
-+
-+#ifdef RADEON_BO_TRACK
-+    struct radeon_tracker   tracker;
-+#endif
-+};
-+    
-+static inline void _radeon_bo_debug(struct radeon_bo *bo,
-+                                    const char *op,
-+                                    const char *file,
-+                                    const char *func,
-+                                    int line)
-+{
-+    fprintf(stderr, "%s %p 0x%08X 0x%08X 0x%08X [%s %s %d]\n",
-+            op, bo, bo->handle, bo->size, bo->cref, file, func, line);
-+}
-+
-+static inline struct radeon_bo *_radeon_bo_open(struct radeon_bo_manager *bom,
-+                                                uint32_t handle,
-+                                                uint32_t size,
-+                                                uint32_t alignment,
-+                                                uint32_t domains,
-+                                                uint32_t flags,
-+                                                const char *file,
-+                                                const char *func,
-+                                                int line)
-+{
-+    struct radeon_bo *bo;
-+
-+    bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags);
-+#ifdef RADEON_BO_TRACK
-+    if (bo) {
-+        bo->track = radeon_tracker_add_track(&bom->tracker, bo->handle);
-+        radeon_track_add_event(bo->track, file, func, "open", line);
-+    }
-+#endif
-+    return bo;
-+}
-+
-+static inline void _radeon_bo_ref(struct radeon_bo *bo,
-+                                  const char *file,
-+                                  const char *func,
-+                                  int line)
-+{
-+    bo->cref++;
-+#ifdef RADEON_BO_TRACK
-+    radeon_track_add_event(bo->track, file, func, "ref", line); 
-+#endif
-+    bo->bom->funcs->bo_ref(bo);
-+}
-+
-+static inline struct radeon_bo *_radeon_bo_unref(struct radeon_bo *bo,
-+                                                 const char *file,
-+                                                 const char *func,
-+                                                 int line)
-+{
-+    bo->cref--;
-+#ifdef RADEON_BO_TRACK
-+    radeon_track_add_event(bo->track, file, func, "unref", line);
-+    if (bo->cref <= 0) {
-+        radeon_tracker_remove_track(&bo->bom->tracker, bo->track);
-+        bo->track = NULL;
-+    }
-+#endif
-+    return bo->bom->funcs->bo_unref(bo);
-+}
-+
-+static inline int _radeon_bo_map(struct radeon_bo *bo,
-+                                 int write,
-+                                 const char *file,
-+                                 const char *func,
-+                                 int line)
-+{
-+    return bo->bom->funcs->bo_map(bo, write);
-+}
-+
-+static inline int _radeon_bo_unmap(struct radeon_bo *bo,
-+                                   const char *file,
-+                                   const char *func,
-+                                   int line)
-+{
-+    return bo->bom->funcs->bo_unmap(bo);
-+}
-+
-+static inline int _radeon_bo_wait(struct radeon_bo *bo,
-+                                  const char *file,
-+                                  const char *func,
-+                                  int line)
-+{
-+    return bo->bom->funcs->bo_wait(bo);
-+}
-+
-+#define radeon_bo_open(bom, h, s, a, d, f)\
-+    _radeon_bo_open(bom, h, s, a, d, f, __FILE__, __FUNCTION__, __LINE__)
-+#define radeon_bo_ref(bo)\
-+    _radeon_bo_ref(bo, __FILE__, __FUNCTION__, __LINE__)
-+#define radeon_bo_unref(bo)\
-+    _radeon_bo_unref(bo, __FILE__, __FUNCTION__, __LINE__)
-+#define radeon_bo_map(bo, w)\
-+    _radeon_bo_map(bo, w, __FILE__, __FUNCTION__, __LINE__)
-+#define radeon_bo_unmap(bo)\
-+    _radeon_bo_unmap(bo, __FILE__, __FUNCTION__, __LINE__)
-+#define radeon_bo_debug(bo, opcode)\
-+    _radeon_bo_debug(bo, opcode, __FILE__, __FUNCTION__, __LINE__)
-+#define radeon_bo_wait(bo) \
-+    _radeon_bo_wait(bo, __FILE__, __func__, __LINE__)
-+
-+#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c
-new file mode 100644
-index 0000000..03a6299
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c
-@@ -0,0 +1,825 @@
-+/* 
-+ * Copyright © 2008 Nicolai Haehnle
-+ * Copyright © 2008 Dave Airlie
-+ * Copyright © 2008 Jérôme Glisse
-+ * All Rights Reserved.
-+ * 
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sub license, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ * 
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
-+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
-+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
-+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial portions
-+ * of the Software.
-+ */
-+/*
-+ * Authors:
-+ *      Aapo Tahkola <aet@rasterburn.org>
-+ *      Nicolai Haehnle <prefect_@gmx.net>
-+ *      Dave Airlie
-+ *      Jérôme Glisse <glisse@freedesktop.org>
-+ */
-+#include <stdio.h>
-+#include <stddef.h>
-+#include <stdint.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include <errno.h>
-+#include <unistd.h>
-+#include <sys/mman.h>
-+#include <sys/ioctl.h>
-+#include "xf86drm.h"
-+#include "texmem.h"
-+#include "main/simple_list.h"
-+
-+#include "drm.h"
-+#include "radeon_drm.h"
-+#include "radeon_common.h"
-+#include "radeon_bocs_wrapper.h"
-+
-+/* no seriously texmem.c is this screwed up */
-+struct bo_legacy_texture_object {
-+    driTextureObject    base;
-+    struct bo_legacy *parent;
-+};
-+
-+struct bo_legacy {
-+    struct radeon_bo    base;
-+    int                 map_count;
-+    uint32_t            pending;
-+    int                 is_pending;
-+    int                 static_bo;
-+    uint32_t            offset;
-+    struct bo_legacy_texture_object *tobj;
-+    int                 validated;
-+    int                 dirty;
-+    void                *ptr;
-+    struct bo_legacy    *next, *prev;
-+    struct bo_legacy    *pnext, *pprev;
-+};
-+
-+struct bo_manager_legacy {
-+    struct radeon_bo_manager    base;
-+    unsigned                    nhandle;
-+    unsigned                    nfree_handles;
-+    unsigned                    cfree_handles;
-+    uint32_t                    current_age;
-+    struct bo_legacy            bos;
-+    struct bo_legacy            pending_bos;
-+    uint32_t                    fb_location;
-+    uint32_t                    texture_offset;
-+    unsigned                    dma_alloc_size;
-+    uint32_t                    dma_buf_count;
-+    unsigned                    cpendings;
-+    driTextureObject            texture_swapped;
-+    driTexHeap                  *texture_heap;
-+    struct radeon_screen        *screen;
-+    unsigned                    *free_handles;
-+};
-+
-+static void bo_legacy_tobj_destroy(void *data, driTextureObject *t)
-+{
-+    struct bo_legacy_texture_object *tobj = (struct bo_legacy_texture_object *)t;
-+    
-+    if (tobj->parent) {
-+        tobj->parent->tobj = NULL;
-+        tobj->parent->validated = 0;
-+    }
-+}
-+
-+static void inline clean_handles(struct bo_manager_legacy *bom)
-+{
-+  while (bom->cfree_handles > 0 &&
-+	 !bom->free_handles[bom->cfree_handles - 1])
-+    bom->cfree_handles--;
-+
-+}
-+static int legacy_new_handle(struct bo_manager_legacy *bom, uint32_t *handle)
-+{
-+    uint32_t tmp;
-+
-+    *handle = 0;
-+    if (bom->nhandle == 0xFFFFFFFF) {
-+        return -EINVAL;
-+    }
-+    if (bom->cfree_handles > 0) {
-+        tmp = bom->free_handles[--bom->cfree_handles];
-+	clean_handles(bom);
-+    } else {
-+        bom->cfree_handles = 0;
-+        tmp = bom->nhandle++;
-+    }
-+    assert(tmp);
-+    *handle = tmp;
-+    return 0;
-+}
-+
-+static int legacy_free_handle(struct bo_manager_legacy *bom, uint32_t handle)
-+{
-+    uint32_t *handles;
-+
-+    if (!handle) {
-+        return 0;
-+    }
-+    if (handle == (bom->nhandle - 1)) {
-+        int i;
-+
-+        bom->nhandle--;
-+        for (i = bom->cfree_handles - 1; i >= 0; i--) {
-+            if (bom->free_handles[i] == (bom->nhandle - 1)) {
-+                bom->nhandle--;
-+                bom->free_handles[i] = 0;
-+            }
-+        }
-+        clean_handles(bom);
-+        return 0;
-+    }
-+    if (bom->cfree_handles < bom->nfree_handles) {
-+        bom->free_handles[bom->cfree_handles++] = handle;
-+        return 0;
-+    }
-+    bom->nfree_handles += 0x100;
-+    handles = (uint32_t*)realloc(bom->free_handles, bom->nfree_handles * 4);
-+    if (handles == NULL) {
-+        bom->nfree_handles -= 0x100;
-+        return -ENOMEM;
-+    }
-+    bom->free_handles = handles;
-+    bom->free_handles[bom->cfree_handles++] = handle;
-+    return 0;
-+}
-+
-+static void legacy_get_current_age(struct bo_manager_legacy *boml)
-+{
-+    drm_radeon_getparam_t gp;
-+    int r;
-+
-+    if (IS_R300_CLASS(boml->screen)) {
-+    	gp.param = RADEON_PARAM_LAST_CLEAR;
-+    	gp.value = (int *)&boml->current_age;
-+    	r = drmCommandWriteRead(boml->base.fd, DRM_RADEON_GETPARAM,
-+       	                     &gp, sizeof(gp));
-+    	if (r) {
-+       	 fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, r);
-+         exit(1);
-+       }
-+    } else
-+	boml->current_age = boml->screen->scratch[3];
-+}
-+
-+static int legacy_is_pending(struct radeon_bo *bo)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+
-+    if (bo_legacy->is_pending <= 0) {
-+        bo_legacy->is_pending = 0;
-+        return 0;
-+    }
-+    if (boml->current_age >= bo_legacy->pending) {
-+        if (boml->pending_bos.pprev == bo_legacy) {
-+            boml->pending_bos.pprev = bo_legacy->pprev;
-+        }
-+        bo_legacy->pprev->pnext = bo_legacy->pnext;
-+        if (bo_legacy->pnext) {
-+            bo_legacy->pnext->pprev = bo_legacy->pprev;
-+        }
-+	assert(bo_legacy->is_pending <= bo->cref);
-+        while (bo_legacy->is_pending--) {
-+	    bo = radeon_bo_unref(bo);
-+	    if (!bo)
-+	      break;
-+        }
-+	if (bo)
-+	  bo_legacy->is_pending = 0;
-+        boml->cpendings--;
-+        return 0;
-+    }
-+    return 1;
-+}
-+
-+static int legacy_wait_pending(struct radeon_bo *bo)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+
-+    if (!bo_legacy->is_pending) {
-+        return 0;
-+    }
-+    /* FIXME: lockup and userspace busy looping that's all the folks */
-+    legacy_get_current_age(boml);
-+    while (legacy_is_pending(bo)) {
-+        usleep(10);
-+        legacy_get_current_age(boml);
-+    }
-+    return 0;
-+}
-+
-+static void legacy_track_pending(struct bo_manager_legacy *boml, int debug)
-+{
-+    struct bo_legacy *bo_legacy;
-+    struct bo_legacy *next;
-+
-+    legacy_get_current_age(boml);
-+    bo_legacy = boml->pending_bos.pnext;
-+    while (bo_legacy) {
-+        if (debug)
-+	  fprintf(stderr,"pending %p %d %d %d\n", bo_legacy, bo_legacy->base.size,
-+		  boml->current_age, bo_legacy->pending);
-+        next = bo_legacy->pnext;
-+        if (legacy_is_pending(&(bo_legacy->base))) {
-+        }
-+        bo_legacy = next;
-+    } 
-+}
-+
-+static int legacy_wait_any_pending(struct bo_manager_legacy *boml)
-+{
-+    struct bo_legacy *bo_legacy;
-+
-+    legacy_get_current_age(boml);
-+    bo_legacy = boml->pending_bos.pnext;
-+    if (!bo_legacy)
-+      return -1;
-+    legacy_wait_pending(&bo_legacy->base);
-+    return 0;
-+}
-+
-+static void legacy_kick_all_buffers(struct bo_manager_legacy *boml)
-+{
-+    struct bo_legacy *legacy;
-+
-+    legacy = boml->bos.next;
-+    while (legacy != &boml->bos) {
-+	if (legacy->tobj) {
-+	    if (legacy->validated) {
-+		driDestroyTextureObject(&legacy->tobj->base);
-+		legacy->tobj = 0;
-+		legacy->validated = 0;
-+	    }
-+	}
-+	legacy = legacy->next;
-+    }
-+}
-+
-+static struct bo_legacy *bo_allocate(struct bo_manager_legacy *boml,
-+                                     uint32_t size,
-+                                     uint32_t alignment,
-+                                     uint32_t domains,
-+                                     uint32_t flags)
-+{
-+    struct bo_legacy *bo_legacy;
-+    static int pgsize;
-+
-+    if (pgsize == 0)
-+        pgsize = getpagesize() - 1;
-+
-+    size = (size + pgsize) & ~pgsize;
-+
-+    bo_legacy = (struct bo_legacy*)calloc(1, sizeof(struct bo_legacy));
-+    if (bo_legacy == NULL) {
-+        return NULL;
-+    }
-+    bo_legacy->base.bom = (struct radeon_bo_manager*)boml;
-+    bo_legacy->base.handle = 0;
-+    bo_legacy->base.size = size;
-+    bo_legacy->base.alignment = alignment;
-+    bo_legacy->base.domains = domains;
-+    bo_legacy->base.flags = flags;
-+    bo_legacy->base.ptr = NULL;
-+    bo_legacy->map_count = 0;
-+    bo_legacy->next = NULL;
-+    bo_legacy->prev = NULL;
-+    bo_legacy->pnext = NULL;
-+    bo_legacy->pprev = NULL;
-+    bo_legacy->next = boml->bos.next;
-+    bo_legacy->prev = &boml->bos;
-+    boml->bos.next = bo_legacy;
-+    if (bo_legacy->next) {
-+        bo_legacy->next->prev = bo_legacy;
-+    }
-+    return bo_legacy;
-+}
-+
-+static int bo_dma_alloc(struct radeon_bo *bo)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+    drm_radeon_mem_alloc_t alloc;
-+    unsigned size;
-+    int base_offset;
-+    int r;
-+
-+    /* align size on 4Kb */
-+    size = (((4 * 1024) - 1) + bo->size) & ~((4 * 1024) - 1);
-+    alloc.region = RADEON_MEM_REGION_GART;
-+    alloc.alignment = bo_legacy->base.alignment;
-+    alloc.size = size;
-+    alloc.region_offset = &base_offset;
-+    r = drmCommandWriteRead(bo->bom->fd,
-+                            DRM_RADEON_ALLOC,
-+                            &alloc,
-+                            sizeof(alloc));
-+    if (r) {
-+        /* ptr is set to NULL if dma allocation failed */
-+        bo_legacy->ptr = NULL;
-+        return r;
-+    }
-+    bo_legacy->ptr = boml->screen->gartTextures.map + base_offset;
-+    bo_legacy->offset = boml->screen->gart_texture_offset + base_offset;
-+    bo->size = size;
-+    boml->dma_alloc_size += size;
-+    boml->dma_buf_count++;
-+    return 0;
-+}
-+
-+static int bo_dma_free(struct radeon_bo *bo)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+    drm_radeon_mem_free_t memfree;
-+    int r;
-+
-+    if (bo_legacy->ptr == NULL) {
-+        /* ptr is set to NULL if dma allocation failed */
-+        return 0;
-+    }
-+    legacy_get_current_age(boml);
-+    memfree.region = RADEON_MEM_REGION_GART;
-+    memfree.region_offset  = bo_legacy->offset;
-+    memfree.region_offset -= boml->screen->gart_texture_offset;
-+    r = drmCommandWrite(boml->base.fd,
-+                        DRM_RADEON_FREE,
-+                        &memfree,
-+                        sizeof(memfree));
-+    if (r) {
-+        fprintf(stderr, "Failed to free bo[%p] at %08x\n",
-+                &bo_legacy->base, memfree.region_offset);
-+        fprintf(stderr, "ret = %s\n", strerror(-r));
-+        return r;
-+    }
-+    boml->dma_alloc_size -= bo_legacy->base.size;
-+    boml->dma_buf_count--;
-+    return 0;
-+}
-+
-+static void bo_free(struct bo_legacy *bo_legacy)
-+{
-+    struct bo_manager_legacy *boml;
-+
-+    if (bo_legacy == NULL) {
-+        return;
-+    }
-+    boml = (struct bo_manager_legacy *)bo_legacy->base.bom;
-+    bo_legacy->prev->next = bo_legacy->next;
-+    if (bo_legacy->next) {
-+        bo_legacy->next->prev = bo_legacy->prev;
-+    }
-+    if (!bo_legacy->static_bo) {
-+        legacy_free_handle(boml, bo_legacy->base.handle);
-+        if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) {
-+            /* dma buffers */
-+            bo_dma_free(&bo_legacy->base);
-+        } else {
-+  	    driDestroyTextureObject(&bo_legacy->tobj->base);
-+	    bo_legacy->tobj = NULL;
-+            /* free backing store */
-+            free(bo_legacy->ptr);
-+        }
-+    }
-+    memset(bo_legacy, 0 , sizeof(struct bo_legacy));
-+    free(bo_legacy);
-+}
-+
-+static struct radeon_bo *bo_open(struct radeon_bo_manager *bom,
-+                                 uint32_t handle,
-+                                 uint32_t size,
-+                                 uint32_t alignment,
-+                                 uint32_t domains,
-+                                 uint32_t flags)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
-+    struct bo_legacy *bo_legacy;
-+    int r;
-+
-+    if (handle) {
-+        bo_legacy = boml->bos.next;
-+        while (bo_legacy) {
-+            if (bo_legacy->base.handle == handle) {
-+                radeon_bo_ref(&(bo_legacy->base));
-+                return (struct radeon_bo*)bo_legacy;
-+            }
-+            bo_legacy = bo_legacy->next;
-+        }
-+        return NULL;
-+    }
-+
-+    bo_legacy = bo_allocate(boml, size, alignment, domains, flags);
-+    bo_legacy->static_bo = 0;
-+    r = legacy_new_handle(boml, &bo_legacy->base.handle);
-+    if (r) {
-+        bo_free(bo_legacy);
-+        return NULL;
-+    }
-+    if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) {
-+    retry:
-+        legacy_track_pending(boml, 0);
-+        /* dma buffers */
-+
-+        r = bo_dma_alloc(&(bo_legacy->base));
-+        if (r) {
-+	  if (legacy_wait_any_pending(boml) == -1) {
-+            bo_free(bo_legacy);
-+	    return NULL;
-+	  }
-+	  goto retry;
-+	  return NULL;
-+        }
-+    } else {
-+        bo_legacy->ptr = malloc(bo_legacy->base.size);
-+        if (bo_legacy->ptr == NULL) {
-+            bo_free(bo_legacy);
-+            return NULL;
-+        }
-+    }
-+    radeon_bo_ref(&(bo_legacy->base));
-+    return (struct radeon_bo*)bo_legacy;
-+}
-+
-+static void bo_ref(struct radeon_bo *bo)
-+{
-+}
-+
-+static struct radeon_bo *bo_unref(struct radeon_bo *bo)
-+{
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+
-+    if (bo->cref <= 0) {
-+        bo_legacy->prev->next = bo_legacy->next;
-+        if (bo_legacy->next) {
-+            bo_legacy->next->prev = bo_legacy->prev;
-+        }
-+        if (!bo_legacy->is_pending) {
-+            bo_free(bo_legacy);
-+        }
-+        return NULL;
-+    }
-+    return bo;
-+}
-+
-+static int bo_map(struct radeon_bo *bo, int write)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+    
-+    legacy_wait_pending(bo);
-+    bo_legacy->validated = 0;
-+    bo_legacy->dirty = 1;
-+    bo_legacy->map_count++;
-+    bo->ptr = bo_legacy->ptr;
-+    /* Read the first pixel in the frame buffer.  This should
-+     * be a noop, right?  In fact without this conform fails as reading
-+     * from the framebuffer sometimes produces old results -- the
-+     * on-card read cache gets mixed up and doesn't notice that the
-+     * framebuffer has been updated.
-+     *
-+     * Note that we should probably be reading some otherwise unused
-+     * region of VRAM, otherwise we might get incorrect results when
-+     * reading pixels from the top left of the screen.
-+     *
-+     * I found this problem on an R420 with glean's texCube test.
-+     * Note that the R200 span code also *writes* the first pixel in the
-+     * framebuffer, but I've found this to be unnecessary.
-+     *  -- Nicolai Hähnle, June 2008
-+     */
-+    if (!(bo->domains & RADEON_GEM_DOMAIN_GTT)) {
-+        int p;
-+        volatile int *buf = (int*)boml->screen->driScreen->pFB;
-+        p = *buf;
-+    }
-+    return 0;
-+}
-+
-+static int bo_unmap(struct radeon_bo *bo)
-+{
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+
-+    if (--bo_legacy->map_count > 0) {
-+        return 0;
-+    }
-+    bo->ptr = NULL;
-+    return 0;
-+}
-+
-+static struct radeon_bo_funcs bo_legacy_funcs = {
-+    bo_open,
-+    bo_ref,
-+    bo_unref,
-+    bo_map,
-+    bo_unmap
-+};
-+
-+static int bo_vram_validate(struct radeon_bo *bo,
-+                            uint32_t *soffset,
-+                            uint32_t *eoffset)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+    int r;
-+    int retry_count = 0, pending_retry = 0;
-+    
-+    if (!bo_legacy->tobj) {
-+	bo_legacy->tobj = CALLOC(sizeof(struct bo_legacy_texture_object));
-+	bo_legacy->tobj->parent = bo_legacy;
-+	make_empty_list(&bo_legacy->tobj->base);
-+	bo_legacy->tobj->base.totalSize = bo->size;
-+    retry:
-+        r = driAllocateTexture(&boml->texture_heap, 1,
-+                               &bo_legacy->tobj->base);
-+        if (r) {
-+		pending_retry = 0;
-+		while(boml->cpendings && pending_retry++ < 10000) {
-+			legacy_track_pending(boml, 0);
-+			retry_count++;
-+			if (retry_count > 2) {
-+				free(bo_legacy->tobj);
-+				bo_legacy->tobj = NULL;
-+				fprintf(stderr, "Ouch! vram_validate failed %d\n", r);
-+				return -1;
-+			}
-+			goto retry;
-+		}
-+	}
-+        bo_legacy->offset = boml->texture_offset +
-+                            bo_legacy->tobj->base.memBlock->ofs;
-+        bo_legacy->dirty = 1;
-+    }
-+
-+    assert(bo_legacy->tobj->base.memBlock);
-+
-+    if (bo_legacy->tobj)
-+	driUpdateTextureLRU(&bo_legacy->tobj->base);
-+
-+    if (bo_legacy->dirty || bo_legacy->tobj->base.dirty_images[0]) {
-+        /* Copy to VRAM using a blit.
-+         * All memory is 4K aligned. We're using 1024 pixels wide blits.
-+         */
-+        drm_radeon_texture_t tex;
-+        drm_radeon_tex_image_t tmp;
-+        int ret;
-+
-+        tex.offset = bo_legacy->offset;
-+        tex.image = &tmp;
-+        assert(!(tex.offset & 1023));
-+
-+        tmp.x = 0;
-+        tmp.y = 0;
-+        if (bo->size < 4096) {
-+            tmp.width = (bo->size + 3) / 4;
-+            tmp.height = 1;
-+        } else {
-+            tmp.width = 1024;
-+            tmp.height = (bo->size + 4095) / 4096;
-+        }
-+        tmp.data = bo_legacy->ptr;
-+        tex.format = RADEON_TXFORMAT_ARGB8888;
-+        tex.width = tmp.width;
-+        tex.height = tmp.height;
-+        tex.pitch = MAX2(tmp.width / 16, 1);
-+        do {
-+            ret = drmCommandWriteRead(bo->bom->fd,
-+                                      DRM_RADEON_TEXTURE,
-+                                      &tex,
-+                                      sizeof(drm_radeon_texture_t));
-+            if (ret) {
-+                if (RADEON_DEBUG & DEBUG_IOCTL)
-+                    fprintf(stderr, "DRM_RADEON_TEXTURE:  again!\n");
-+                usleep(1);
-+            }
-+        } while (ret == -EAGAIN);
-+        bo_legacy->dirty = 0;
-+	bo_legacy->tobj->base.dirty_images[0] = 0;
-+    }
-+    return 0;
-+}
-+
-+/* 
-+ *  radeon_bo_legacy_validate -
-+ *  returns:
-+ *  0 - all good
-+ *  -EINVAL - mapped buffer can't be validated
-+ *  -EAGAIN - restart validation we've kicked all the buffers out
-+ */
-+int radeon_bo_legacy_validate(struct radeon_bo *bo,
-+                              uint32_t *soffset,
-+                              uint32_t *eoffset)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+    int r;
-+    int retries = 0;
-+
-+    if (bo_legacy->map_count) {
-+        fprintf(stderr, "bo(%p, %d) is mapped (%d) can't valide it.\n",
-+                bo, bo->size, bo_legacy->map_count);
-+        return -EINVAL;
-+    }
-+    if (bo_legacy->static_bo || bo_legacy->validated) {
-+        *soffset = bo_legacy->offset;
-+        *eoffset = bo_legacy->offset + bo->size;
-+        return 0;
-+    }
-+    if (!(bo->domains & RADEON_GEM_DOMAIN_GTT)) {
-+
-+        r = bo_vram_validate(bo, soffset, eoffset);
-+        if (r) {
-+	    legacy_track_pending(boml, 0);
-+	    legacy_kick_all_buffers(boml);
-+	    retries++;
-+	    if (retries == 2) {
-+		fprintf(stderr,"legacy bo: failed to get relocations into aperture\n");
-+		assert(0);
-+		exit(-1);
-+	    }
-+	    return -EAGAIN;
-+        }
-+    }
-+    *soffset = bo_legacy->offset;
-+    *eoffset = bo_legacy->offset + bo->size;
-+    bo_legacy->validated = 1;
-+    return 0;
-+}
-+
-+void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+
-+    bo_legacy->pending = pending;
-+    bo_legacy->is_pending++;
-+    /* add to pending list */
-+    radeon_bo_ref(bo);
-+    if (bo_legacy->is_pending > 1) {
-+        return;    
-+    }
-+    bo_legacy->pprev = boml->pending_bos.pprev;
-+    bo_legacy->pnext = NULL;
-+    bo_legacy->pprev->pnext = bo_legacy;
-+    boml->pending_bos.pprev = bo_legacy;
-+    boml->cpendings++;
-+}
-+
-+void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
-+    struct bo_legacy *bo_legacy;
-+
-+    if (bom == NULL) {
-+        return;
-+    }
-+    bo_legacy = boml->bos.next;
-+    while (bo_legacy) {
-+        struct bo_legacy *next;
-+
-+        next = bo_legacy->next;
-+        bo_free(bo_legacy);
-+        bo_legacy = next;
-+    }
-+    driDestroyTextureHeap(boml->texture_heap);
-+    free(boml->free_handles);
-+    free(boml);
-+}
-+
-+static struct bo_legacy *radeon_legacy_bo_alloc_static(struct bo_manager_legacy *bom,
-+						       int size, uint32_t offset)
-+{
-+    struct bo_legacy *bo;
-+
-+    bo = bo_allocate(bom, size, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+    if (bo == NULL)
-+	return NULL;
-+    bo->static_bo = 1;
-+    bo->offset = offset + bom->fb_location;
-+    bo->base.handle = bo->offset;
-+    bo->ptr = bom->screen->driScreen->pFB + offset;
-+    if (bo->base.handle > bom->nhandle) {
-+        bom->nhandle = bo->base.handle + 1;
-+    }
-+    radeon_bo_ref(&(bo->base));
-+    return bo;
-+}
-+
-+struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *scrn)
-+{
-+    struct bo_manager_legacy *bom;
-+    struct bo_legacy *bo;
-+    unsigned size;
-+
-+    bom = (struct bo_manager_legacy*)
-+          calloc(1, sizeof(struct bo_manager_legacy));
-+    if (bom == NULL) {
-+        return NULL;
-+    }
-+
-+    make_empty_list(&bom->texture_swapped);
-+
-+    bom->texture_heap = driCreateTextureHeap(0,
-+                                             bom,
-+                                             scrn->texSize[0],
-+                                             12,
-+                                             RADEON_NR_TEX_REGIONS,
-+                                             (drmTextureRegionPtr)scrn->sarea->tex_list[0],
-+                                             &scrn->sarea->tex_age[0],
-+                                             &bom->texture_swapped,
-+                                             sizeof(struct bo_legacy_texture_object),
-+                                             &bo_legacy_tobj_destroy);
-+    bom->texture_offset = scrn->texOffset[0];
-+
-+    bom->base.funcs = &bo_legacy_funcs;
-+    bom->base.fd = scrn->driScreen->fd;
-+    bom->bos.next = NULL;
-+    bom->bos.prev = NULL;
-+    bom->pending_bos.pprev = &bom->pending_bos;
-+    bom->pending_bos.pnext = NULL;
-+    bom->screen = scrn;
-+    bom->fb_location = scrn->fbLocation;
-+    bom->nhandle = 1;
-+    bom->cfree_handles = 0;
-+    bom->nfree_handles = 0x400;
-+    bom->free_handles = (uint32_t*)malloc(bom->nfree_handles * 4);
-+    if (bom->free_handles == NULL) {
-+        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
-+        return NULL;
-+    }
-+
-+    /* biggest framebuffer size */
-+    size = 4096*4096*4; 
-+
-+    /* allocate front */
-+    bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->frontOffset);
-+    if (!bo) {
-+        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
-+        return NULL;
-+    }
-+    if (scrn->sarea->tiling_enabled) {
-+        bo->base.flags = RADEON_BO_FLAGS_MACRO_TILE;
-+    }
-+
-+    /* allocate back */
-+    bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->backOffset);
-+    if (!bo) {
-+        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
-+        return NULL;
-+    }
-+    if (scrn->sarea->tiling_enabled) {
-+        bo->base.flags = RADEON_BO_FLAGS_MACRO_TILE;
-+    }
-+
-+    /* allocate depth */
-+    bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->depthOffset);
-+    if (!bo) {
-+        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
-+        return NULL;
-+    }
-+    bo->base.flags = 0;
-+    if (scrn->sarea->tiling_enabled) {
-+        bo->base.flags |= RADEON_BO_FLAGS_MACRO_TILE;
-+        bo->base.flags |= RADEON_BO_FLAGS_MICRO_TILE;
-+    }
-+    return (struct radeon_bo_manager*)bom;
-+}
-+
-+void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
-+    DRI_AGE_TEXTURES(boml->texture_heap);
-+}
-+
-+unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo)
-+{
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+
-+    if (bo_legacy->static_bo || (bo->domains & RADEON_GEM_DOMAIN_GTT)) {
-+        return 0;
-+    }
-+    return bo->size;
-+}
-+
-+int radeon_legacy_bo_is_static(struct radeon_bo *bo)
-+{
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+    return bo_legacy->static_bo;
-+}
-+
-diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h
-new file mode 100644
-index 0000000..9187cd7
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h
-@@ -0,0 +1,47 @@
-+/* 
-+ * Copyright © 2008 Nicolai Haehnle
-+ * Copyright © 2008 Jérôme Glisse
-+ * All Rights Reserved.
-+ * 
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sub license, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ * 
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
-+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
-+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
-+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial portions
-+ * of the Software.
-+ */
-+/*
-+ * Authors:
-+ *      Aapo Tahkola <aet@rasterburn.org>
-+ *      Nicolai Haehnle <prefect_@gmx.net>
-+ *      Jérôme Glisse <glisse@freedesktop.org>
-+ */
-+#ifndef RADEON_BO_LEGACY_H
-+#define RADEON_BO_LEGACY_H
-+
-+#include "radeon_screen.h"
-+
-+void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending);
-+int radeon_bo_legacy_validate(struct radeon_bo *bo,
-+                              uint32_t *soffset,
-+                              uint32_t *eoffset);
-+struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *scrn);
-+void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom);
-+void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom);
-+unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo);
-+
-+int radeon_legacy_bo_is_static(struct radeon_bo *bo);
-+#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
-new file mode 100644
-index 0000000..6f1a0b4
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
-@@ -0,0 +1,85 @@
-+#ifndef RADEON_CS_WRAPPER_H
-+#define RADEON_CS_WRAPPER_H
-+
-+#ifndef RADEON_PARAM_DEVICE_ID
-+#define RADEON_PARAM_DEVICE_ID 16
-+#endif
-+
-+#ifndef RADEON_INFO_DEVICE_ID
-+#define RADEON_INFO_DEVICE_ID 0
-+#endif
-+#ifndef RADEON_INFO_NUM_GB_PIPES
-+#define RADEON_INFO_NUM_GB_PIPES 0
-+#endif
-+
-+#ifndef DRM_RADEON_INFO
-+#define DRM_RADEON_INFO 0x1
-+#endif
-+
-+#ifdef HAVE_LIBDRM_RADEON
-+
-+#include "radeon_bo.h"
-+#include "radeon_bo_gem.h"
-+#include "radeon_cs.h"
-+#include "radeon_cs_gem.h"
-+
-+#else
-+#include <stdint.h>
-+
-+#define RADEON_GEM_DOMAIN_CPU 0x1   // Cached CPU domain
-+#define RADEON_GEM_DOMAIN_GTT 0x2   // GTT or cache flushed
-+#define RADEON_GEM_DOMAIN_VRAM 0x4  // VRAM domain
-+
-+/* to be used to build locally in mesa with no libdrm bits */
-+#include "../radeon/radeon_bo_drm.h"
-+#include "../radeon/radeon_cs_drm.h"
-+
-+#ifndef DRM_RADEON_GEM_INFO
-+#define DRM_RADEON_GEM_INFO 0x1c
-+
-+struct drm_radeon_gem_info {
-+        uint64_t gart_size;
-+        uint64_t vram_size;
-+        uint64_t vram_visible;
-+};
-+
-+struct drm_radeon_info {
-+	uint32_t request;
-+	uint32_t pad;
-+	uint32_t value;
-+};
-+#endif
-+
-+
-+static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy)
-+{
-+  return 0;
-+}
-+
-+static inline void *radeon_bo_manager_gem_ctor(int fd)
-+{
-+  return NULL;
-+}
-+
-+static inline void radeon_bo_manager_gem_dtor(void *dummy)
-+{
-+}
-+
-+static inline void *radeon_cs_manager_gem_ctor(int fd)
-+{
-+  return NULL;
-+}
-+
-+static inline void radeon_cs_manager_gem_dtor(void *dummy)
-+{
-+}
-+
-+static inline void radeon_tracker_print(void *ptr, int io)
-+{
-+}
-+#endif
-+
-+#include "radeon_bo_legacy.h"
-+#include "radeon_cs_legacy.h"
-+
-+#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h
-new file mode 100644
-index 0000000..4b5116c
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h
-@@ -0,0 +1,143 @@
-+#ifndef COMMON_CMDBUF_H
-+#define COMMON_CMDBUF_H
-+
-+#include "radeon_bocs_wrapper.h"
-+
-+void rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller);
-+int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller);
-+int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller);
-+void rcommonInitCmdBuf(radeonContextPtr rmesa);
-+void rcommonDestroyCmdBuf(radeonContextPtr rmesa);
-+
-+void rcommonBeginBatch(radeonContextPtr rmesa,
-+		       int n,
-+		       int dostate,
-+		       const char *file,
-+		       const char *function,
-+		       int line);
-+
-+#define RADEON_CP_PACKET3_NOP                       0xC0001000
-+#define RADEON_CP_PACKET3_NEXT_CHAR                 0xC0001900
-+#define RADEON_CP_PACKET3_PLY_NEXTSCAN              0xC0001D00
-+#define RADEON_CP_PACKET3_SET_SCISSORS              0xC0001E00
-+#define RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM     0xC0002300
-+#define RADEON_CP_PACKET3_LOAD_MICROCODE            0xC0002400
-+#define RADEON_CP_PACKET3_WAIT_FOR_IDLE             0xC0002600
-+#define RADEON_CP_PACKET3_3D_DRAW_VBUF              0xC0002800
-+#define RADEON_CP_PACKET3_3D_DRAW_IMMD              0xC0002900
-+#define RADEON_CP_PACKET3_3D_DRAW_INDX              0xC0002A00
-+#define RADEON_CP_PACKET3_LOAD_PALETTE              0xC0002C00
-+#define RADEON_CP_PACKET3_3D_LOAD_VBPNTR            0xC0002F00
-+#define RADEON_CP_PACKET3_CNTL_PAINT                0xC0009100
-+#define RADEON_CP_PACKET3_CNTL_BITBLT               0xC0009200
-+#define RADEON_CP_PACKET3_CNTL_SMALLTEXT            0xC0009300
-+#define RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT         0xC0009400
-+#define RADEON_CP_PACKET3_CNTL_POLYLINE             0xC0009500
-+#define RADEON_CP_PACKET3_CNTL_POLYSCANLINES        0xC0009800
-+#define RADEON_CP_PACKET3_CNTL_PAINT_MULTI          0xC0009A00
-+#define RADEON_CP_PACKET3_CNTL_BITBLT_MULTI         0xC0009B00
-+#define RADEON_CP_PACKET3_CNTL_TRANS_BITBLT         0xC0009C00
-+
-+#define CP_PACKET2  (2 << 30)
-+#define CP_PACKET0(reg, n)	(RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2))
-+#define CP_PACKET0_ONE(reg, n)	(RADEON_CP_PACKET0 | RADEON_CP_PACKET0_ONE_REG_WR | ((n)<<16) | ((reg)>>2))
-+#define CP_PACKET3( pkt, n )						\
-+	(RADEON_CP_PACKET3 | (pkt) | ((n) << 16))
-+
-+/**
-+ * Every function writing to the command buffer needs to declare this
-+ * to get the necessary local variables.
-+ */
-+#define BATCH_LOCALS(rmesa) \
-+	const radeonContextPtr b_l_rmesa = rmesa
-+
-+/**
-+ * Prepare writing n dwords to the command buffer,
-+ * including producing any necessary state emits on buffer wraparound.
-+ */
-+#define BEGIN_BATCH(n) rcommonBeginBatch(b_l_rmesa, n, 1, __FILE__, __FUNCTION__, __LINE__)
-+
-+/**
-+ * Same as BEGIN_BATCH, but do not cause automatic state emits.
-+ */
-+#define BEGIN_BATCH_NO_AUTOSTATE(n) rcommonBeginBatch(b_l_rmesa, n, 0, __FILE__, __FUNCTION__, __LINE__)
-+
-+/**
-+ * Write one dword to the command buffer.
-+ */
-+#define OUT_BATCH(data) \
-+	do { \
-+        radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, data);\
-+	} while(0)
-+
-+/**
-+ * Write a relocated dword to the command buffer.
-+ */
-+#define OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags) 	\
-+	do { 							\
-+        if (0 && offset) {					\
-+            fprintf(stderr, "(%s:%s:%d) offset : %d\n",		\
-+            __FILE__, __FUNCTION__, __LINE__, offset);		\
-+        }							\
-+        radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, offset);	\
-+        radeon_cs_write_reloc(b_l_rmesa->cmdbuf.cs, 		\
-+                              bo, rd, wd, flags);		\
-+	if (!b_l_rmesa->radeonScreen->kernel_mm) 		\
-+		b_l_rmesa->cmdbuf.cs->section_cdw += 2;		\
-+	} while(0)
-+
-+
-+/**
-+ * Write n dwords from ptr to the command buffer.
-+ */
-+#define OUT_BATCH_TABLE(ptr,n) \
-+	do { \
-+		int _i; \
-+        for (_i=0; _i < n; _i++) {\
-+            radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, ptr[_i]);\
-+        }\
-+	} while(0)
-+
-+/**
-+ * Finish writing dwords to the command buffer.
-+ * The number of (direct or indirect) OUT_BATCH calls between the previous
-+ * BEGIN_BATCH and END_BATCH must match the number specified at BEGIN_BATCH time.
-+ */
-+#define END_BATCH() \
-+	do { \
-+        radeon_cs_end(b_l_rmesa->cmdbuf.cs, __FILE__, __FUNCTION__, __LINE__);\
-+	} while(0)
-+
-+/**
-+ * After the last END_BATCH() of rendering, this indicates that flushing
-+ * the command buffer now is okay.
-+ */
-+#define COMMIT_BATCH() \
-+	do { \
-+	} while(0)
-+
-+
-+/** Single register write to command buffer; requires 2 dwords. */
-+#define OUT_BATCH_REGVAL(reg, val) \
-+	OUT_BATCH(cmdpacket0(b_l_rmesa->radeonScreen, (reg), 1)); \
-+	OUT_BATCH((val))
-+
-+/** Continuous register range write to command buffer; requires 1 dword,
-+ * expects count dwords afterwards for register contents. */
-+#define OUT_BATCH_REGSEQ(reg, count) \
-+	OUT_BATCH(cmdpacket0(b_l_rmesa->radeonScreen, (reg), (count)));
-+
-+/** Write a 32 bit float to the ring; requires 1 dword. */
-+#define OUT_BATCH_FLOAT32(f) \
-+	OUT_BATCH(radeonPackFloat32((f)));
-+
-+
-+/* Fire the buffered vertices no matter what.
-+ */
-+static INLINE void radeon_firevertices(radeonContextPtr radeon)
-+{
-+   if (radeon->cmdbuf.cs->cdw || radeon->dma.flush )
-+      radeonFlush(radeon->glCtx);
-+}
-+
-+#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
-new file mode 100644
-index 0000000..8b5b892
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
-@@ -0,0 +1,1456 @@
-+/**************************************************************************
-+
-+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
-+
-+The Weather Channel (TM) funded Tungsten Graphics to develop the
-+initial release of the Radeon 8500 driver under the XFree86 license.
-+This notice must be preserved.
-+
-+Permission is hereby granted, free of charge, to any person obtaining
-+a copy of this software and associated documentation files (the
-+"Software"), to deal in the Software without restriction, including
-+without limitation the rights to use, copy, modify, merge, publish,
-+distribute, sublicense, and/or sell copies of the Software, and to
-+permit persons to whom the Software is furnished to do so, subject to
-+the following conditions:
-+
-+The above copyright notice and this permission notice (including the
-+next paragraph) shall be included in all copies or substantial
-+portions of the Software.
-+
-+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+
-+**************************************************************************/
-+
-+/*
-+ * Authors:
-+ *   Keith Whitwell <keith@tungstengraphics.com>
-+ */
-+
-+/*
-+   - Scissor implementation
-+   - buffer swap/copy ioctls
-+   - finish/flush
-+   - state emission
-+   - cmdbuffer management
-+*/
-+
-+#include <errno.h>
-+#include "main/glheader.h"
-+#include "main/imports.h"
-+#include "main/context.h"
-+#include "main/api_arrayelt.h"
-+#include "main/enums.h"
-+#include "main/colormac.h"
-+#include "main/light.h"
-+#include "main/framebuffer.h"
-+#include "main/simple_list.h"
-+#include "main/renderbuffer.h"
-+#include "swrast/swrast.h"
-+#include "vbo/vbo.h"
-+#include "tnl/tnl.h"
-+#include "tnl/t_pipeline.h"
-+#include "swrast_setup/swrast_setup.h"
-+
-+#include "main/blend.h"
-+#include "main/bufferobj.h"
-+#include "main/buffers.h"
-+#include "main/depth.h"
-+#include "main/shaders.h"
-+#include "main/texstate.h"
-+#include "main/varray.h"
-+#include "glapi/dispatch.h"
-+#include "swrast/swrast.h"
-+#include "main/stencil.h"
-+#include "main/matrix.h"
-+#include "main/attrib.h"
-+#include "main/enable.h"
-+#include "main/viewport.h"
-+
-+#include "dri_util.h"
-+#include "vblank.h"
-+
-+#include "radeon_common.h"
-+#include "radeon_bocs_wrapper.h"
-+#include "radeon_lock.h"
-+#include "radeon_drm.h"
-+#include "radeon_mipmap_tree.h"
-+
-+#define DEBUG_CMDBUF         0
-+
-+/* =============================================================
-+ * Scissoring
-+ */
-+
-+static GLboolean intersect_rect(drm_clip_rect_t * out,
-+				drm_clip_rect_t * a, drm_clip_rect_t * b)
-+{
-+	*out = *a;
-+	if (b->x1 > out->x1)
-+		out->x1 = b->x1;
-+	if (b->y1 > out->y1)
-+		out->y1 = b->y1;
-+	if (b->x2 < out->x2)
-+		out->x2 = b->x2;
-+	if (b->y2 < out->y2)
-+		out->y2 = b->y2;
-+	if (out->x1 >= out->x2)
-+		return GL_FALSE;
-+	if (out->y1 >= out->y2)
-+		return GL_FALSE;
-+	return GL_TRUE;
-+}
-+
-+void radeonRecalcScissorRects(radeonContextPtr radeon)
-+{
-+	drm_clip_rect_t *out;
-+	int i;
-+
-+	/* Grow cliprect store?
-+	 */
-+	if (radeon->state.scissor.numAllocedClipRects < radeon->numClipRects) {
-+		while (radeon->state.scissor.numAllocedClipRects <
-+		       radeon->numClipRects) {
-+			radeon->state.scissor.numAllocedClipRects += 1;	/* zero case */
-+			radeon->state.scissor.numAllocedClipRects *= 2;
-+		}
-+
-+		if (radeon->state.scissor.pClipRects)
-+			FREE(radeon->state.scissor.pClipRects);
-+
-+		radeon->state.scissor.pClipRects =
-+			MALLOC(radeon->state.scissor.numAllocedClipRects *
-+			       sizeof(drm_clip_rect_t));
-+
-+		if (radeon->state.scissor.pClipRects == NULL) {
-+			radeon->state.scissor.numAllocedClipRects = 0;
-+			return;
-+		}
-+	}
-+
-+	out = radeon->state.scissor.pClipRects;
-+	radeon->state.scissor.numClipRects = 0;
-+
-+	for (i = 0; i < radeon->numClipRects; i++) {
-+		if (intersect_rect(out,
-+				   &radeon->pClipRects[i],
-+				   &radeon->state.scissor.rect)) {
-+			radeon->state.scissor.numClipRects++;
-+			out++;
-+		}
-+	}
-+}
-+
-+void radeon_get_cliprects(radeonContextPtr radeon,
-+			  struct drm_clip_rect **cliprects,
-+			  unsigned int *num_cliprects,
-+			  int *x_off, int *y_off)
-+{
-+	__DRIdrawablePrivate *dPriv = radeon->dri.drawable;
-+	struct radeon_framebuffer *rfb = dPriv->driverPrivate;
-+
-+	if (radeon->constant_cliprect) {
-+		radeon->fboRect.x1 = 0;
-+		radeon->fboRect.y1 = 0;
-+		radeon->fboRect.x2 = radeon->glCtx->DrawBuffer->Width;
-+		radeon->fboRect.y2 = radeon->glCtx->DrawBuffer->Height;
-+
-+		*cliprects = &radeon->fboRect;
-+		*num_cliprects = 1;
-+		*x_off = 0;
-+		*y_off = 0;
-+	} else if (radeon->front_cliprects ||
-+		   rfb->pf_active || dPriv->numBackClipRects == 0) {
-+		*cliprects = dPriv->pClipRects;
-+		*num_cliprects = dPriv->numClipRects;
-+		*x_off = dPriv->x;
-+		*y_off = dPriv->y;
-+	} else {
-+		*num_cliprects = dPriv->numBackClipRects;
-+		*cliprects = dPriv->pBackClipRects;
-+		*x_off = dPriv->backX;
-+		*y_off = dPriv->backY;
-+	}
-+}
-+
-+/**
-+ * Update cliprects and scissors.
-+ */
-+void radeonSetCliprects(radeonContextPtr radeon)
-+{
-+	__DRIdrawablePrivate *const drawable = radeon->dri.drawable;
-+	__DRIdrawablePrivate *const readable = radeon->dri.readable;
-+	struct radeon_framebuffer *const draw_rfb = drawable->driverPrivate;
-+	struct radeon_framebuffer *const read_rfb = readable->driverPrivate;
-+	int x_off, y_off;
-+
-+	radeon_get_cliprects(radeon, &radeon->pClipRects,
-+			     &radeon->numClipRects, &x_off, &y_off);
-+	
-+	if ((draw_rfb->base.Width != drawable->w) ||
-+	    (draw_rfb->base.Height != drawable->h)) {
-+		_mesa_resize_framebuffer(radeon->glCtx, &draw_rfb->base,
-+					 drawable->w, drawable->h);
-+		draw_rfb->base.Initialized = GL_TRUE;
-+	}
-+
-+	if (drawable != readable) {
-+		if ((read_rfb->base.Width != readable->w) ||
-+		    (read_rfb->base.Height != readable->h)) {
-+			_mesa_resize_framebuffer(radeon->glCtx, &read_rfb->base,
-+						 readable->w, readable->h);
-+			read_rfb->base.Initialized = GL_TRUE;
-+		}
-+	}
-+
-+	if (radeon->state.scissor.enabled)
-+		radeonRecalcScissorRects(radeon);
-+
-+}
-+
-+
-+
-+void radeonUpdateScissor( GLcontext *ctx )
-+{
-+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+
-+	if ( rmesa->dri.drawable ) {
-+		__DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-+      
-+		int x = ctx->Scissor.X;
-+		int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
-+		int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
-+		int h = dPriv->h - ctx->Scissor.Y - 1;
-+
-+		rmesa->state.scissor.rect.x1 = x + dPriv->x;
-+		rmesa->state.scissor.rect.y1 = y + dPriv->y;
-+		rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
-+		rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
-+
-+		radeonRecalcScissorRects( rmesa );
-+	}
-+}
-+
-+/* =============================================================
-+ * Scissoring
-+ */
-+
-+void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h)
-+{
-+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-+	if (ctx->Scissor.Enabled) {
-+		/* We don't pipeline cliprect changes */
-+		radeon_firevertices(radeon);
-+		radeonUpdateScissor(ctx);
-+	}
-+}
-+
-+
-+/* ================================================================
-+ * SwapBuffers with client-side throttling
-+ */
-+
-+static uint32_t radeonGetLastFrame(radeonContextPtr radeon)
-+{
-+	drm_radeon_getparam_t gp;
-+	int ret;
-+	uint32_t frame = 0;
-+
-+	gp.param = RADEON_PARAM_LAST_FRAME;
-+	gp.value = (int *)&frame;
-+	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
-+				  &gp, sizeof(gp));
-+	if (ret) {
-+		fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
-+			ret);
-+		exit(1);
-+	}
-+
-+	return frame;
-+}
-+
-+uint32_t radeonGetAge(radeonContextPtr radeon)
-+{
-+	drm_radeon_getparam_t gp;
-+	int ret;
-+	uint32_t age;
-+
-+	gp.param = RADEON_PARAM_LAST_CLEAR;
-+	gp.value = (int *)&age;
-+	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
-+				  &gp, sizeof(gp));
-+	if (ret) {
-+		fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
-+			ret);
-+		exit(1);
-+	}
-+
-+	return age;
-+}
-+
-+static void radeonEmitIrqLocked(radeonContextPtr radeon)
-+{
-+	drm_radeon_irq_emit_t ie;
-+	int ret;
-+
-+	ie.irq_seq = &radeon->iw.irq_seq;
-+	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_IRQ_EMIT,
-+				  &ie, sizeof(ie));
-+	if (ret) {
-+		fprintf(stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__,
-+			ret);
-+		exit(1);
-+	}
-+}
-+
-+static void radeonWaitIrq(radeonContextPtr radeon)
-+{
-+	int ret;
-+
-+	do {
-+		ret = drmCommandWrite(radeon->dri.fd, DRM_RADEON_IRQ_WAIT,
-+				      &radeon->iw, sizeof(radeon->iw));
-+	} while (ret && (errno == EINTR || errno == EBUSY));
-+
-+	if (ret) {
-+		fprintf(stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__,
-+			ret);
-+		exit(1);
-+	}
-+}
-+
-+static void radeonWaitForFrameCompletion(radeonContextPtr radeon)
-+{
-+	drm_radeon_sarea_t *sarea = radeon->sarea;
-+
-+	if (radeon->do_irqs) {
-+		if (radeonGetLastFrame(radeon) < sarea->last_frame) {
-+			if (!radeon->irqsEmitted) {
-+				while (radeonGetLastFrame(radeon) <
-+				       sarea->last_frame) ;
-+			} else {
-+				UNLOCK_HARDWARE(radeon);
-+				radeonWaitIrq(radeon);
-+				LOCK_HARDWARE(radeon);
-+			}
-+			radeon->irqsEmitted = 10;
-+		}
-+
-+		if (radeon->irqsEmitted) {
-+			radeonEmitIrqLocked(radeon);
-+			radeon->irqsEmitted--;
-+		}
-+	} else {
-+		while (radeonGetLastFrame(radeon) < sarea->last_frame) {
-+			UNLOCK_HARDWARE(radeon);
-+			if (radeon->do_usleeps)
-+				DO_USLEEP(1);
-+			LOCK_HARDWARE(radeon);
-+		}
-+	}
-+}
-+
-+/* wait for idle */
-+void radeonWaitForIdleLocked(radeonContextPtr radeon)
-+{
-+	int ret;
-+	int i = 0;
-+
-+	do {
-+		ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_CP_IDLE);
-+		if (ret)
-+			DO_USLEEP(1);
-+	} while (ret && ++i < 100);
-+
-+	if (ret < 0) {
-+		UNLOCK_HARDWARE(radeon);
-+		fprintf(stderr, "Error: R300 timed out... exiting\n");
-+		exit(-1);
-+	}
-+}
-+
-+static void radeonWaitForIdle(radeonContextPtr radeon)
-+{
-+	if (!radeon->radeonScreen->driScreen->dri2.enabled) {
-+        LOCK_HARDWARE(radeon);
-+	    radeonWaitForIdleLocked(radeon);
-+	    UNLOCK_HARDWARE(radeon);
-+    }
-+}
-+
-+static void radeon_flip_renderbuffers(struct radeon_framebuffer *rfb)
-+{
-+	int current_page = rfb->pf_current_page;
-+	int next_page = (current_page + 1) % rfb->pf_num_pages;
-+	struct gl_renderbuffer *tmp_rb;
-+
-+	/* Exchange renderbuffers if necessary but make sure their
-+	 * reference counts are preserved.
-+	 */
-+	if (rfb->color_rb[current_page] &&
-+	    rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer !=
-+	    &rfb->color_rb[current_page]->base) {
-+		tmp_rb = NULL;
-+		_mesa_reference_renderbuffer(&tmp_rb,
-+					     rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
-+		tmp_rb = &rfb->color_rb[current_page]->base;
-+		_mesa_reference_renderbuffer(&rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer, tmp_rb);
-+		_mesa_reference_renderbuffer(&tmp_rb, NULL);
-+	}
-+
-+	if (rfb->color_rb[next_page] &&
-+	    rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer !=
-+	    &rfb->color_rb[next_page]->base) {
-+		tmp_rb = NULL;
-+		_mesa_reference_renderbuffer(&tmp_rb,
-+					     rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer);
-+		tmp_rb = &rfb->color_rb[next_page]->base;
-+		_mesa_reference_renderbuffer(&rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer, tmp_rb);
-+		_mesa_reference_renderbuffer(&tmp_rb, NULL);
-+	}
-+}
-+
-+/* Copy the back color buffer to the front color buffer.
-+ */
-+void radeonCopyBuffer( __DRIdrawablePrivate *dPriv,
-+		       const drm_clip_rect_t	  *rect)
-+{
-+	radeonContextPtr rmesa;
-+	struct radeon_framebuffer *rfb;
-+	GLint nbox, i, ret;
-+   
-+	assert(dPriv);
-+	assert(dPriv->driContextPriv);
-+	assert(dPriv->driContextPriv->driverPrivate);
-+   
-+	rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
-+
-+	LOCK_HARDWARE(rmesa);
-+
-+	rfb = dPriv->driverPrivate;
-+
-+	if ( RADEON_DEBUG & DEBUG_IOCTL ) {
-+		fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
-+	}
-+
-+	nbox = dPriv->numClipRects; /* must be in locked region */
-+
-+	for ( i = 0 ; i < nbox ; ) {
-+		GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
-+		drm_clip_rect_t *box = dPriv->pClipRects;
-+		drm_clip_rect_t *b = rmesa->sarea->boxes;
-+		GLint n = 0;
-+
-+		for ( ; i < nr ; i++ ) {
-+
-+			*b = box[i];
-+
-+			if (rect)
-+			{
-+				if (rect->x1 > b->x1)
-+					b->x1 = rect->x1;
-+				if (rect->y1 > b->y1)
-+					b->y1 = rect->y1;
-+				if (rect->x2 < b->x2)
-+					b->x2 = rect->x2;
-+				if (rect->y2 < b->y2)
-+					b->y2 = rect->y2;
-+
-+				if (b->x1 >= b->x2 || b->y1 >= b->y2)
-+					continue;
-+			}
-+
-+			b++;
-+			n++;
-+		}
-+		rmesa->sarea->nbox = n;
-+
-+		if (!n)
-+			continue;
-+
-+		ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
-+
-+		if ( ret ) {
-+			fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
-+			UNLOCK_HARDWARE( rmesa );
-+			exit( 1 );
-+		}
-+	}
-+
-+	UNLOCK_HARDWARE( rmesa );
-+}
-+
-+static int radeonScheduleSwap(__DRIdrawablePrivate *dPriv, GLboolean *missed_target)
-+{
-+	radeonContextPtr rmesa;
-+
-+	rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
-+	radeon_firevertices(rmesa);
-+
-+	LOCK_HARDWARE( rmesa );
-+
-+	if (!dPriv->numClipRects) {
-+		UNLOCK_HARDWARE(rmesa);
-+		usleep(10000);	/* throttle invisible client 10ms */
-+		return 0;
-+	}
-+
-+	radeonWaitForFrameCompletion(rmesa);
-+
-+	UNLOCK_HARDWARE(rmesa);
-+	driWaitForVBlank(dPriv, missed_target);
-+	
-+	return 0;
-+}
-+
-+static GLboolean radeonPageFlip( __DRIdrawablePrivate *dPriv )
-+{
-+	radeonContextPtr radeon;
-+	GLint ret;
-+	__DRIscreenPrivate *psp;
-+	struct radeon_renderbuffer *rrb;
-+	struct radeon_framebuffer *rfb;
-+
-+	assert(dPriv);
-+	assert(dPriv->driContextPriv);
-+	assert(dPriv->driContextPriv->driverPrivate);
-+
-+	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
-+	rfb = dPriv->driverPrivate;
-+	rrb = (void *)rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+
-+	psp = dPriv->driScreenPriv;
-+
-+	LOCK_HARDWARE(radeon);
-+
-+	if ( RADEON_DEBUG & DEBUG_IOCTL ) {
-+		fprintf(stderr, "%s: pfCurrentPage: %d %d\n", __FUNCTION__,
-+			radeon->sarea->pfCurrentPage, radeon->sarea->pfState);
-+	}
-+	drm_clip_rect_t *box = dPriv->pClipRects;
-+	drm_clip_rect_t *b = radeon->sarea->boxes;
-+	b[0] = box[0];
-+	radeon->sarea->nbox = 1;
-+
-+	ret = drmCommandNone( radeon->dri.fd, DRM_RADEON_FLIP );
-+	
-+	UNLOCK_HARDWARE(radeon);
-+
-+	if ( ret ) {
-+		fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
-+		return GL_FALSE;
-+	}
-+
-+	if (!rfb->pf_active)
-+		return GL_FALSE;
-+
-+	rfb->pf_current_page = radeon->sarea->pfCurrentPage;
-+	radeon_flip_renderbuffers(rfb);
-+	radeon_draw_buffer(radeon->glCtx, &rfb->base);
-+
-+	return GL_TRUE;
-+}
-+
-+
-+/**
-+ * Swap front and back buffer.
-+ */
-+void radeonSwapBuffers(__DRIdrawablePrivate * dPriv)
-+{
-+	int64_t ust;
-+	__DRIscreenPrivate *psp;
-+
-+	if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
-+		radeonContextPtr radeon;
-+		GLcontext *ctx;
-+
-+		radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
-+		ctx = radeon->glCtx;
-+
-+		if (ctx->Visual.doubleBufferMode) {
-+			GLboolean missed_target;
-+			struct radeon_framebuffer *rfb = dPriv->driverPrivate;
-+			_mesa_notifySwapBuffers(ctx);/* flush pending rendering comands */
-+
-+			radeonScheduleSwap(dPriv, &missed_target);
-+
-+			if (rfb->pf_active) {
-+				radeonPageFlip(dPriv);
-+			} else {
-+				radeonCopyBuffer(dPriv, NULL);
-+			}
-+
-+			psp = dPriv->driScreenPriv;
-+
-+			rfb->swap_count++;
-+			(*psp->systemTime->getUST)( & ust );
-+			if ( missed_target ) {
-+				rfb->swap_missed_count++;
-+				rfb->swap_missed_ust = ust - rfb->swap_ust;
-+			}
-+
-+			rfb->swap_ust = ust;
-+			radeon->hw.all_dirty = GL_TRUE;
-+		}
-+	} else {
-+		/* XXX this shouldn't be an error but we can't handle it for now */
-+		_mesa_problem(NULL, "%s: drawable has no context!",
-+			      __FUNCTION__);
-+	}
-+}
-+
-+void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
-+			 int x, int y, int w, int h )
-+{
-+	if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
-+		radeonContextPtr radeon;
-+		GLcontext *ctx;
-+
-+		radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
-+		ctx = radeon->glCtx;
-+
-+		if (ctx->Visual.doubleBufferMode) {
-+			drm_clip_rect_t rect;
-+			rect.x1 = x + dPriv->x;
-+			rect.y1 = (dPriv->h - y - h) + dPriv->y;
-+			rect.x2 = rect.x1 + w;
-+			rect.y2 = rect.y1 + h;
-+			_mesa_notifySwapBuffers(ctx);	/* flush pending rendering comands */
-+			radeonCopyBuffer(dPriv, &rect);
-+		}
-+	} else {
-+		/* XXX this shouldn't be an error but we can't handle it for now */
-+		_mesa_problem(NULL, "%s: drawable has no context!",
-+			      __FUNCTION__);
-+	}
-+}
-+
-+void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb)
-+{
-+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-+	struct radeon_renderbuffer *rrbDepth = NULL, *rrbStencil = NULL,
-+		*rrbColor = NULL;
-+	uint32_t offset = 0;
-+       
-+
-+	if (!fb) {
-+		/* this can happen during the initial context initialization */
-+		return;
-+	}
-+
-+	/* radeons only handle 1 color draw so far */
-+	if (fb->_NumColorDrawBuffers != 1) {
-+		radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE);
-+		return;
-+	}
-+		
-+	/* Do this here, note core Mesa, since this function is called from
-+	 * many places within the driver.
-+	 */
-+	if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
-+		/* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */
-+		_mesa_update_framebuffer(ctx);
-+		/* this updates the DrawBuffer's Width/Height if it's a FBO */
-+		_mesa_update_draw_buffer_bounds(ctx);
-+	}
-+
-+	if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
-+		/* this may occur when we're called by glBindFrameBuffer() during
-+		 * the process of someone setting up renderbuffers, etc.
-+		 */
-+		/*_mesa_debug(ctx, "DrawBuffer: incomplete user FBO\n");*/
-+		return;
-+	}
-+
-+	if (fb->Name)
-+		;/* do something depthy/stencily TODO */
-+
-+
-+		/* none */
-+	if (fb->Name == 0) {
-+		if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
-+			rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
-+			radeon->front_cliprects = GL_TRUE;
-+		} else {
-+			rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer);
-+			radeon->front_cliprects = GL_FALSE;
-+		}
-+	} else {
-+		/* user FBO in theory */
-+		struct radeon_renderbuffer *rrb;
-+		rrb = radeon_renderbuffer(fb->_ColorDrawBuffers[0]);
-+		if (rrb) {
-+			offset = rrb->draw_offset;
-+			rrbColor = rrb;
-+		}
-+		radeon->constant_cliprect = GL_TRUE;
-+	}
-+
-+	if (rrbColor == NULL)
-+		radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE);
-+	else
-+		radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE);
-+
-+
-+	if (fb->_DepthBuffer && fb->_DepthBuffer->Wrapped) {
-+		rrbDepth = radeon_renderbuffer(fb->_DepthBuffer->Wrapped);
-+		if (rrbDepth && rrbDepth->bo) {
-+			radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_FALSE);
-+		} else {
-+			radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_TRUE);
-+		}
-+	} else {
-+		radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_FALSE);
-+		rrbDepth = NULL;
-+	}
-+
-+	if (fb->_StencilBuffer && fb->_StencilBuffer->Wrapped) {
-+		rrbStencil = radeon_renderbuffer(fb->_DepthBuffer->Wrapped);
-+		if (rrbStencil && rrbStencil->bo) {
-+			radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_FALSE);
-+			/* need to re-compute stencil hw state */
-+			if (!rrbDepth)
-+				rrbDepth = rrbStencil;
-+		} else {
-+			radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_TRUE);
-+		}
-+	} else {
-+		radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_FALSE);
-+		if (ctx->Driver.Enable != NULL)
-+			ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
-+		else
-+			ctx->NewState |= _NEW_STENCIL;
-+	}
-+
-+	/* Update culling direction which changes depending on the
-+	 * orientation of the buffer:
-+	 */
-+	if (ctx->Driver.FrontFace)
-+		ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
-+	else
-+		ctx->NewState |= _NEW_POLYGON;
-+	
-+	/*
-+	 * Update depth test state
-+	 */
-+	if (ctx->Driver.Enable) {
-+		ctx->Driver.Enable(ctx, GL_DEPTH_TEST,
-+				   (ctx->Depth.Test && fb->Visual.depthBits > 0));
-+		ctx->Driver.Enable(ctx, GL_STENCIL_TEST,
-+				   (ctx->Stencil._Enabled && fb->Visual.stencilBits > 0));
-+	} else {
-+		ctx->NewState |= (_NEW_DEPTH | _NEW_STENCIL);
-+	}
-+	
-+	radeon->state.depth.rrb = rrbDepth;
-+	radeon->state.color.rrb = rrbColor;
-+	radeon->state.color.draw_offset = offset;
-+
-+#if 0
-+	/* update viewport since it depends on window size */
-+	if (ctx->Driver.Viewport) {
-+		ctx->Driver.Viewport(ctx, ctx->Viewport.X, ctx->Viewport.Y,
-+				     ctx->Viewport.Width, ctx->Viewport.Height);
-+	} else {
-+	
-+	}
-+#endif
-+	ctx->NewState |= _NEW_VIEWPORT;
-+
-+	/* Set state we know depends on drawable parameters:
-+	 */
-+	if (ctx->Driver.Scissor)
-+		ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
-+				    ctx->Scissor.Width, ctx->Scissor.Height);
-+	radeon->NewGLState |= _NEW_SCISSOR;
-+
-+	if (ctx->Driver.DepthRange)
-+		ctx->Driver.DepthRange(ctx,
-+				       ctx->Viewport.Near,
-+				       ctx->Viewport.Far);
-+
-+	/* Update culling direction which changes depending on the
-+	 * orientation of the buffer:
-+	 */
-+	if (ctx->Driver.FrontFace)
-+		ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
-+	else
-+		ctx->NewState |= _NEW_POLYGON;
-+}
-+
-+/**
-+ * Called via glDrawBuffer.
-+ */
-+void radeonDrawBuffer( GLcontext *ctx, GLenum mode )
-+{
-+	if (RADEON_DEBUG & DEBUG_DRI)
-+		fprintf(stderr, "%s %s\n", __FUNCTION__,
-+			_mesa_lookup_enum_by_nr( mode ));
-+	
-+	radeon_draw_buffer(ctx, ctx->DrawBuffer);
-+}
-+
-+void radeonReadBuffer( GLcontext *ctx, GLenum mode )
-+{
-+	/* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
-+	if (ctx->ReadBuffer == ctx->DrawBuffer) {
-+		/* This will update FBO completeness status.
-+		 * A framebuffer will be incomplete if the GL_READ_BUFFER setting
-+		 * refers to a missing renderbuffer.  Calling glReadBuffer can set
-+		 * that straight and can make the drawing buffer complete.
-+		 */
-+		radeon_draw_buffer(ctx, ctx->DrawBuffer);
-+	}
-+}
-+
-+
-+/* Turn on/off page flipping according to the flags in the sarea:
-+ */
-+void radeonUpdatePageFlipping(radeonContextPtr radeon)
-+{
-+	struct radeon_framebuffer *rfb = radeon->dri.drawable->driverPrivate;
-+
-+	rfb->pf_active = radeon->sarea->pfState;
-+	rfb->pf_current_page = radeon->sarea->pfCurrentPage;
-+	rfb->pf_num_pages = 2;
-+	radeon_flip_renderbuffers(rfb);
-+	radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer);
-+}
-+
-+void radeon_window_moved(radeonContextPtr radeon)
-+{
-+	if (!radeon->radeonScreen->driScreen->dri2.enabled) {
-+		radeonUpdatePageFlipping(radeon);
-+	}
-+	radeonSetCliprects(radeon);
-+}
-+
-+void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height)
-+{
-+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-+	__DRIcontext *driContext = radeon->dri.context;
-+	void (*old_viewport)(GLcontext *ctx, GLint x, GLint y,
-+			     GLsizei w, GLsizei h);
-+
-+	if (!driContext->driScreenPriv->dri2.enabled)
-+		return;
-+
-+	radeon_update_renderbuffers(driContext, driContext->driDrawablePriv);
-+	if (driContext->driDrawablePriv != driContext->driReadablePriv)
-+		radeon_update_renderbuffers(driContext, driContext->driReadablePriv);
-+
-+	old_viewport = ctx->Driver.Viewport;
-+	ctx->Driver.Viewport = NULL;
-+	radeon->dri.drawable = driContext->driDrawablePriv;
-+	radeon_window_moved(radeon);
-+	radeon_draw_buffer(ctx, radeon->glCtx->DrawBuffer);
-+	ctx->Driver.Viewport = old_viewport;
-+}
-+
-+static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state)
-+{
-+	int i, j, reg;
-+	int dwords = (*state->check) (radeon->glCtx, state);
-+	drm_r300_cmd_header_t cmd;
-+
-+	fprintf(stderr, "  emit %s %d/%d\n", state->name, dwords, state->cmd_size);
-+
-+	if (RADEON_DEBUG & DEBUG_VERBOSE) {
-+		for (i = 0; i < dwords;) {
-+			cmd = *((drm_r300_cmd_header_t *) &state->cmd[i]);
-+			reg = (cmd.packet0.reghi << 8) | cmd.packet0.reglo;
-+			fprintf(stderr, "      %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n",
-+					state->name, i, reg, cmd.packet0.count);
-+			++i;
-+			for (j = 0; j < cmd.packet0.count && i < dwords; j++) {
-+				fprintf(stderr, "      %s[%d]: 0x%04x = %08x\n",
-+						state->name, i, reg, state->cmd[i]);
-+				reg += 4;
-+				++i;
-+			}
-+		}
-+	}
-+}
-+
-+static void radeon_print_state_atom_kmm(radeonContextPtr radeon, struct radeon_state_atom *state)
-+{
-+	int i, j, reg, count;
-+	int dwords = (*state->check) (radeon->glCtx, state);
-+	uint32_t packet0;
-+
-+	fprintf(stderr, "  emit %s %d/%d\n", state->name, dwords, state->cmd_size);
-+
-+	if (RADEON_DEBUG & DEBUG_VERBOSE) {
-+		for (i = 0; i < dwords;) {
-+			packet0 = state->cmd[i];
-+			reg = (packet0 & 0x1FFF) << 2;
-+			count = ((packet0 & 0x3FFF0000) >> 16) + 1;
-+			fprintf(stderr, "      %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n",
-+					state->name, i, reg, count);
-+			++i;
-+			for (j = 0; j < count && i < dwords; j++) {
-+				fprintf(stderr, "      %s[%d]: 0x%04x = %08x\n",
-+						state->name, i, reg, state->cmd[i]);
-+				reg += 4;
-+				++i;
-+			}
-+		}
-+	}
-+}
-+
-+static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean dirty)
-+{
-+	BATCH_LOCALS(radeon);
-+	struct radeon_state_atom *atom;
-+	int dwords;
-+
-+	if (radeon->vtbl.pre_emit_atoms)
-+		radeon->vtbl.pre_emit_atoms(radeon);
-+
-+	/* Emit actual atoms */
-+	foreach(atom, &radeon->hw.atomlist) {
-+		if ((atom->dirty || radeon->hw.all_dirty) == dirty) {
-+			dwords = (*atom->check) (radeon->glCtx, atom);
-+			if (dwords) {
-+				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
-+					if (radeon->radeonScreen->kernel_mm)
-+						radeon_print_state_atom_kmm(radeon, atom);
-+					else
-+						radeon_print_state_atom(radeon, atom);
-+				}
-+				if (atom->emit) {
-+					(*atom->emit)(radeon->glCtx, atom);
-+				} else {
-+					BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+					OUT_BATCH_TABLE(atom->cmd, dwords);
-+					END_BATCH();
-+				}
-+				atom->dirty = GL_FALSE;
-+			} else {
-+				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
-+					fprintf(stderr, "  skip state %s\n",
-+						atom->name);
-+				}
-+			}
-+		}
-+	}
-+   
-+	COMMIT_BATCH();
-+}
-+
-+GLboolean radeon_revalidate_bos(GLcontext *ctx)
-+{
-+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-+	int flushed = 0;
-+	int ret;
-+again:
-+	ret = radeon_cs_space_check(radeon->cmdbuf.cs, radeon->state.bos, radeon->state.validated_bo_count);
-+	if (ret == RADEON_CS_SPACE_OP_TO_BIG)
-+		return GL_FALSE;
-+	if (ret == RADEON_CS_SPACE_FLUSH) {
-+		radeonFlush(ctx);
-+		if (flushed)
-+			return GL_FALSE;
-+		flushed = 1;
-+		goto again;
-+	}
-+	return GL_TRUE;
-+}
-+
-+void radeon_validate_reset_bos(radeonContextPtr radeon)
-+{
-+	int i;
-+
-+	for (i = 0; i < radeon->state.validated_bo_count; i++) {
-+		radeon_bo_unref(radeon->state.bos[i].bo);
-+		radeon->state.bos[i].bo = NULL;
-+		radeon->state.bos[i].read_domains = 0;
-+		radeon->state.bos[i].write_domain = 0;
-+		radeon->state.bos[i].new_accounted = 0;
-+	}
-+	radeon->state.validated_bo_count = 0;
-+}
-+
-+void radeon_validate_bo(radeonContextPtr radeon, struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain)
-+{
-+	radeon_bo_ref(bo);
-+	radeon->state.bos[radeon->state.validated_bo_count].bo = bo;
-+	radeon->state.bos[radeon->state.validated_bo_count].read_domains = read_domains;
-+	radeon->state.bos[radeon->state.validated_bo_count].write_domain = write_domain;
-+	radeon->state.bos[radeon->state.validated_bo_count].new_accounted = 0;
-+	radeon->state.validated_bo_count++;
-+
-+	assert(radeon->state.validated_bo_count < RADEON_MAX_BOS);
-+}
-+
-+void radeonEmitState(radeonContextPtr radeon)
-+{
-+	if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
-+		fprintf(stderr, "%s\n", __FUNCTION__);
-+
-+	if (radeon->vtbl.pre_emit_state)
-+		radeon->vtbl.pre_emit_state(radeon);
-+
-+	/* this code used to return here but now it emits zbs */
-+	if (radeon->cmdbuf.cs->cdw && !radeon->hw.is_dirty && !radeon->hw.all_dirty)
-+		return;
-+
-+	/* To avoid going across the entire set of states multiple times, just check
-+	 * for enough space for the case of emitting all state, and inline the
-+	 * radeonAllocCmdBuf code here without all the checks.
-+	 */
-+	rcommonEnsureCmdBufSpace(radeon, radeon->hw.max_state_size, __FUNCTION__);
-+
-+	if (!radeon->cmdbuf.cs->cdw) {
-+		if (RADEON_DEBUG & DEBUG_STATE)
-+			fprintf(stderr, "Begin reemit state\n");
-+		
-+		radeonEmitAtoms(radeon, GL_FALSE);
-+	}
-+
-+	if (RADEON_DEBUG & DEBUG_STATE)
-+		fprintf(stderr, "Begin dirty state\n");
-+
-+	radeonEmitAtoms(radeon, GL_TRUE);
-+	radeon->hw.is_dirty = GL_FALSE;
-+	radeon->hw.all_dirty = GL_FALSE;
-+
-+}
-+
-+
-+void radeonFlush(GLcontext *ctx)
-+{
-+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-+	if (RADEON_DEBUG & DEBUG_IOCTL)
-+		fprintf(stderr, "%s %d\n", __FUNCTION__, radeon->cmdbuf.cs->cdw);
-+
-+	/* okay if we have no cmds in the buffer &&
-+	   we have no DMA flush &&
-+	   we have no DMA buffer allocated.
-+	   then no point flushing anything at all.
-+	*/
-+	if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && !radeon->dma.current)
-+		return;
-+
-+	if (radeon->dma.flush)
-+		radeon->dma.flush( ctx );
-+
-+	radeonEmitState(radeon);
-+   
-+	if (radeon->cmdbuf.cs->cdw)
-+		rcommonFlushCmdBuf(radeon, __FUNCTION__);
-+}
-+
-+/* Make sure all commands have been sent to the hardware and have
-+ * completed processing.
-+ */
-+void radeonFinish(GLcontext * ctx)
-+{
-+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-+	struct gl_framebuffer *fb = ctx->DrawBuffer;
-+	int i;
-+
-+	radeonFlush(ctx);
-+
-+	if (radeon->radeonScreen->kernel_mm) {
-+		for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
-+			struct radeon_renderbuffer *rrb;
-+			rrb = radeon_renderbuffer(fb->_ColorDrawBuffers[i]);
-+			if (rrb && rrb->bo)
-+				radeon_bo_wait(rrb->bo);
-+		}
-+		{
-+			struct radeon_renderbuffer *rrb;
-+			rrb = radeon_get_depthbuffer(radeon);
-+			if (rrb && rrb->bo)
-+				radeon_bo_wait(rrb->bo);
-+		}
-+	} else if (radeon->do_irqs) {
-+		LOCK_HARDWARE(radeon);
-+		radeonEmitIrqLocked(radeon);
-+		UNLOCK_HARDWARE(radeon);
-+		radeonWaitIrq(radeon);
-+	} else {
-+		radeonWaitForIdle(radeon);
-+	}
-+}
-+
-+/* cmdbuffer */
-+/**
-+ * Send the current command buffer via ioctl to the hardware.
-+ */
-+int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller)
-+{
-+	int ret = 0;
-+
-+	if (rmesa->cmdbuf.flushing) {
-+		fprintf(stderr, "Recursive call into r300FlushCmdBufLocked!\n");
-+		exit(-1);
-+	}
-+	rmesa->cmdbuf.flushing = 1;
-+
-+	if (RADEON_DEBUG & DEBUG_IOCTL) {
-+		fprintf(stderr, "%s from %s - %i cliprects\n",
-+			__FUNCTION__, caller, rmesa->numClipRects);
-+	}
-+
-+	if (rmesa->cmdbuf.cs->cdw) {
-+		ret = radeon_cs_emit(rmesa->cmdbuf.cs);
-+		rmesa->hw.all_dirty = GL_TRUE;
-+	}
-+	radeon_cs_erase(rmesa->cmdbuf.cs);
-+	rmesa->cmdbuf.flushing = 0;
-+
-+	if (radeon_revalidate_bos(rmesa->glCtx) == GL_FALSE) {
-+		fprintf(stderr,"failed to revalidate buffers\n");
-+	}
-+
-+	return ret;
-+}
-+
-+int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller)
-+{
-+	int ret;
-+
-+	radeonReleaseDmaRegion(rmesa);
-+	
-+	LOCK_HARDWARE(rmesa);
-+	ret = rcommonFlushCmdBufLocked(rmesa, caller);
-+	UNLOCK_HARDWARE(rmesa);
-+
-+	if (ret) {
-+		fprintf(stderr, "drmRadeonCmdBuffer: %d\n", ret);
-+		_mesa_exit(ret);
-+	}
-+
-+	return ret;
-+}
-+
-+/**
-+ * Make sure that enough space is available in the command buffer
-+ * by flushing if necessary.
-+ *
-+ * \param dwords The number of dwords we need to be free on the command buffer
-+ */
-+void rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller)
-+{
-+	if ((rmesa->cmdbuf.cs->cdw + dwords + 128) > rmesa->cmdbuf.size ||
-+	    radeon_cs_need_flush(rmesa->cmdbuf.cs)) {
-+		rcommonFlushCmdBuf(rmesa, caller);
-+	}
-+}
-+
-+void rcommonInitCmdBuf(radeonContextPtr rmesa)
-+{
-+	GLuint size;
-+	/* Initialize command buffer */
-+	size = 256 * driQueryOptioni(&rmesa->optionCache,
-+				     "command_buffer_size");
-+	if (size < 2 * rmesa->hw.max_state_size) {
-+		size = 2 * rmesa->hw.max_state_size + 65535;
-+	}
-+	if (size > 64 * 256)
-+		size = 64 * 256;
-+
-+	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) {
-+		fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n",
-+			sizeof(drm_r300_cmd_header_t));
-+		fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%zd\n",
-+			sizeof(drm_radeon_cmd_buffer_t));
-+		fprintf(stderr,
-+			"Allocating %d bytes command buffer (max state is %d bytes)\n",
-+			size * 4, rmesa->hw.max_state_size * 4);
-+	}
-+
-+	if (rmesa->radeonScreen->kernel_mm) {
-+		int fd = rmesa->radeonScreen->driScreen->fd;
-+		rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd);
-+	} else {
-+		rmesa->cmdbuf.csm = radeon_cs_manager_legacy_ctor(rmesa);
-+	}
-+	if (rmesa->cmdbuf.csm == NULL) {
-+		/* FIXME: fatal error */
-+		return;
-+	}
-+	rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size);
-+	assert(rmesa->cmdbuf.cs != NULL);
-+	rmesa->cmdbuf.size = size;
-+	
-+	if (!rmesa->radeonScreen->kernel_mm) {
-+		radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]);
-+		radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size);
-+	} else {
-+		struct drm_radeon_gem_info mminfo = { 0 };
-+
-+		if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo)))
-+		{
-+			radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, mminfo.vram_visible);
-+			radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, mminfo.gart_size);
-+		}
-+	}
-+
-+}
-+/**
-+ * Destroy the command buffer
-+ */
-+void rcommonDestroyCmdBuf(radeonContextPtr rmesa)
-+{
-+	radeon_cs_destroy(rmesa->cmdbuf.cs);
-+	if (rmesa->radeonScreen->driScreen->dri2.enabled || rmesa->radeonScreen->kernel_mm) {
-+		radeon_cs_manager_gem_dtor(rmesa->cmdbuf.csm);
-+	} else {
-+		radeon_cs_manager_legacy_dtor(rmesa->cmdbuf.csm);
-+	}
-+}
-+
-+void rcommonBeginBatch(radeonContextPtr rmesa, int n,
-+		       int dostate,
-+		       const char *file,
-+		       const char *function,
-+		       int line)
-+{
-+	rcommonEnsureCmdBufSpace(rmesa, n, function);
-+	if (!rmesa->cmdbuf.cs->cdw && dostate) {
-+		if (RADEON_DEBUG & DEBUG_IOCTL)
-+			fprintf(stderr, "Reemit state after flush (from %s)\n", function);
-+		radeonEmitState(rmesa);
-+	}
-+	radeon_cs_begin(rmesa->cmdbuf.cs, n, file, function, line);
-+
-+        if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_IOCTL)
-+                fprintf(stderr, "BEGIN_BATCH(%d) at %d, from %s:%i\n",
-+                        n, rmesa->cmdbuf.cs->cdw, function, line);
-+
-+}
-+
-+
-+
-+static void
-+radeon_meta_set_passthrough_transform(radeonContextPtr radeon)
-+{
-+   GLcontext *ctx = radeon->glCtx;
-+
-+   radeon->meta.saved_vp_x = ctx->Viewport.X;
-+   radeon->meta.saved_vp_y = ctx->Viewport.Y;
-+   radeon->meta.saved_vp_width = ctx->Viewport.Width;
-+   radeon->meta.saved_vp_height = ctx->Viewport.Height;
-+   radeon->meta.saved_matrix_mode = ctx->Transform.MatrixMode;
-+
-+   _mesa_Viewport(0, 0, ctx->DrawBuffer->Width, ctx->DrawBuffer->Height);
-+
-+   _mesa_MatrixMode(GL_PROJECTION);
-+   _mesa_PushMatrix();
-+   _mesa_LoadIdentity();
-+   _mesa_Ortho(0, ctx->DrawBuffer->Width, 0, ctx->DrawBuffer->Height, 1, -1);
-+
-+   _mesa_MatrixMode(GL_MODELVIEW);
-+   _mesa_PushMatrix();
-+   _mesa_LoadIdentity();
-+}
-+
-+static void
-+radeon_meta_restore_transform(radeonContextPtr radeon)
-+{
-+   _mesa_MatrixMode(GL_PROJECTION);
-+   _mesa_PopMatrix();
-+   _mesa_MatrixMode(GL_MODELVIEW);
-+   _mesa_PopMatrix();
-+
-+   _mesa_MatrixMode(radeon->meta.saved_matrix_mode);
-+
-+   _mesa_Viewport(radeon->meta.saved_vp_x, radeon->meta.saved_vp_y,
-+		  radeon->meta.saved_vp_width, radeon->meta.saved_vp_height);
-+}
-+
-+
-+/**
-+ * Perform glClear where mask contains only color, depth, and/or stencil.
-+ *
-+ * The implementation is based on calling into Mesa to set GL state and
-+ * performing normal triangle rendering.  The intent of this path is to
-+ * have as generic a path as possible, so that any driver could make use of
-+ * it.
-+ */
-+
-+
-+void radeon_clear_tris(GLcontext *ctx, GLbitfield mask)
-+{
-+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   GLfloat vertices[4][3];
-+   GLfloat color[4][4];
-+   GLfloat dst_z;
-+   struct gl_framebuffer *fb = ctx->DrawBuffer;
-+   int i;
-+   GLboolean saved_fp_enable = GL_FALSE, saved_vp_enable = GL_FALSE;
-+   GLboolean saved_shader_program = 0;
-+   unsigned int saved_active_texture;
-+
-+   assert((mask & ~(TRI_CLEAR_COLOR_BITS | BUFFER_BIT_DEPTH |
-+		    BUFFER_BIT_STENCIL)) == 0);   
-+
-+   _mesa_PushAttrib(GL_COLOR_BUFFER_BIT |
-+		    GL_CURRENT_BIT |
-+		    GL_DEPTH_BUFFER_BIT |
-+		    GL_ENABLE_BIT |
-+		    GL_POLYGON_BIT |
-+		    GL_STENCIL_BUFFER_BIT |
-+		    GL_TRANSFORM_BIT |
-+		    GL_CURRENT_BIT);
-+   _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT);
-+   saved_active_texture = ctx->Texture.CurrentUnit;
-+  
-+  /* Disable existing GL state we don't want to apply to a clear. */
-+   _mesa_Disable(GL_ALPHA_TEST);
-+   _mesa_Disable(GL_BLEND);
-+   _mesa_Disable(GL_CULL_FACE);
-+   _mesa_Disable(GL_FOG);
-+   _mesa_Disable(GL_POLYGON_SMOOTH);
-+   _mesa_Disable(GL_POLYGON_STIPPLE);
-+   _mesa_Disable(GL_POLYGON_OFFSET_FILL);
-+   _mesa_Disable(GL_LIGHTING);
-+   _mesa_Disable(GL_CLIP_PLANE0);
-+   _mesa_Disable(GL_CLIP_PLANE1);
-+   _mesa_Disable(GL_CLIP_PLANE2);
-+   _mesa_Disable(GL_CLIP_PLANE3);
-+   _mesa_Disable(GL_CLIP_PLANE4);
-+   _mesa_Disable(GL_CLIP_PLANE5);
-+   _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL);
-+   if (ctx->Extensions.ARB_fragment_program && ctx->FragmentProgram.Enabled) {
-+      saved_fp_enable = GL_TRUE;
-+      _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB);
-+   }
-+   if (ctx->Extensions.ARB_vertex_program && ctx->VertexProgram.Enabled) {
-+      saved_vp_enable = GL_TRUE;
-+      _mesa_Disable(GL_VERTEX_PROGRAM_ARB);
-+   }
-+   if (ctx->Extensions.ARB_shader_objects && ctx->Shader.CurrentProgram) {
-+      saved_shader_program = ctx->Shader.CurrentProgram->Name;
-+      _mesa_UseProgramObjectARB(0);
-+   }
-+   
-+   if (ctx->Texture._EnabledUnits != 0) {
-+      int i;
-+      
-+      for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
-+	 _mesa_ActiveTextureARB(GL_TEXTURE0 + i);
-+	 _mesa_Disable(GL_TEXTURE_1D);
-+	 _mesa_Disable(GL_TEXTURE_2D);
-+	 _mesa_Disable(GL_TEXTURE_3D);
-+	 if (ctx->Extensions.ARB_texture_cube_map)
-+	    _mesa_Disable(GL_TEXTURE_CUBE_MAP_ARB);
-+	 if (ctx->Extensions.NV_texture_rectangle)
-+	    _mesa_Disable(GL_TEXTURE_RECTANGLE_NV);
-+	 if (ctx->Extensions.MESA_texture_array) {
-+	    _mesa_Disable(GL_TEXTURE_1D_ARRAY_EXT);
-+	    _mesa_Disable(GL_TEXTURE_2D_ARRAY_EXT);
-+	 }
-+      }
-+   }
-+  
-+#if FEATURE_ARB_vertex_buffer_object
-+   _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
-+   _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0);
-+#endif
-+
-+   radeon_meta_set_passthrough_transform(rmesa);
-+   
-+   for (i = 0; i < 4; i++) {
-+      color[i][0] = ctx->Color.ClearColor[0];
-+      color[i][1] = ctx->Color.ClearColor[1];
-+      color[i][2] = ctx->Color.ClearColor[2];
-+      color[i][3] = ctx->Color.ClearColor[3];
-+   }
-+
-+   /* convert clear Z from [0,1] to NDC coord in [-1,1] */
-+
-+   dst_z = -1.0 + 2.0 * ctx->Depth.Clear;
-+   /* Prepare the vertices, which are the same regardless of which buffer we're
-+    * drawing to.
-+    */
-+   vertices[0][0] = fb->_Xmin;
-+   vertices[0][1] = fb->_Ymin;
-+   vertices[0][2] = dst_z;
-+   vertices[1][0] = fb->_Xmax;
-+   vertices[1][1] = fb->_Ymin;
-+   vertices[1][2] = dst_z;
-+   vertices[2][0] = fb->_Xmax;
-+   vertices[2][1] = fb->_Ymax;
-+   vertices[2][2] = dst_z;
-+   vertices[3][0] = fb->_Xmin;
-+   vertices[3][1] = fb->_Ymax;
-+   vertices[3][2] = dst_z;
-+
-+   _mesa_ColorPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &color);
-+   _mesa_VertexPointer(3, GL_FLOAT, 3 * sizeof(GLfloat), &vertices);
-+   _mesa_Enable(GL_COLOR_ARRAY);
-+   _mesa_Enable(GL_VERTEX_ARRAY);
-+
-+   while (mask != 0) {
-+      GLuint this_mask = 0;
-+      GLuint color_bit;
-+
-+      color_bit = _mesa_ffs(mask & TRI_CLEAR_COLOR_BITS);
-+      if (color_bit != 0)
-+	 this_mask |= (1 << (color_bit - 1));
-+
-+      /* Clear depth/stencil in the same pass as color. */
-+      this_mask |= (mask & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL));
-+
-+      /* Select the current color buffer and use the color write mask if
-+       * we have one, otherwise don't write any color channels.
-+       */
-+      if (this_mask & BUFFER_BIT_FRONT_LEFT)
-+	 _mesa_DrawBuffer(GL_FRONT_LEFT);
-+      else if (this_mask & BUFFER_BIT_BACK_LEFT)
-+	 _mesa_DrawBuffer(GL_BACK_LEFT);
-+      else if (color_bit != 0)
-+	 _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0 +
-+			  (color_bit - BUFFER_COLOR0 - 1));
-+      else
-+	 _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
-+
-+      /* Control writing of the depth clear value to depth. */
-+      if (this_mask & BUFFER_BIT_DEPTH) {
-+	 _mesa_DepthFunc(GL_ALWAYS);
-+	 _mesa_DepthMask(GL_TRUE);
-+	 _mesa_Enable(GL_DEPTH_TEST);
-+      } else {
-+	 _mesa_Disable(GL_DEPTH_TEST);
-+	 _mesa_DepthMask(GL_FALSE);
-+      }
-+
-+      /* Control writing of the stencil clear value to stencil. */
-+      if (this_mask & BUFFER_BIT_STENCIL) {
-+	 _mesa_Enable(GL_STENCIL_TEST);
-+	 _mesa_StencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE);
-+	 _mesa_StencilFuncSeparate(GL_FRONT, GL_ALWAYS, ctx->Stencil.Clear,
-+				   ctx->Stencil.WriteMask[0]);
-+      } else {
-+	 _mesa_Disable(GL_STENCIL_TEST);
-+      }
-+
-+      CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4));
-+
-+      mask &= ~this_mask;
-+   }
-+
-+   radeon_meta_restore_transform(rmesa);
-+
-+   _mesa_ActiveTextureARB(GL_TEXTURE0 + saved_active_texture);
-+   if (saved_fp_enable)
-+      _mesa_Enable(GL_FRAGMENT_PROGRAM_ARB);
-+   if (saved_vp_enable)
-+      _mesa_Enable(GL_VERTEX_PROGRAM_ARB);
-+
-+   if (saved_shader_program)
-+      _mesa_UseProgramObjectARB(saved_shader_program);
-+
-+   _mesa_PopClientAttrib();
-+   _mesa_PopAttrib();
-+}
-diff --git a/src/mesa/drivers/dri/radeon/radeon_common.h b/src/mesa/drivers/dri/radeon/radeon_common.h
-new file mode 100644
-index 0000000..c2fbb09
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_common.h
-@@ -0,0 +1,100 @@
-+#ifndef COMMON_MISC_H
-+#define COMMON_MISC_H
-+
-+#include "radeon_common_context.h"
-+#include "radeon_dma.h"
-+#include "radeon_texture.h"
-+
-+
-+#define TRI_CLEAR_COLOR_BITS (BUFFER_BIT_BACK_LEFT |			\
-+			      BUFFER_BIT_FRONT_LEFT |			\
-+			      BUFFER_BIT_COLOR0 |			\
-+			      BUFFER_BIT_COLOR1 |			\
-+			      BUFFER_BIT_COLOR2 |			\
-+			      BUFFER_BIT_COLOR3 |			\
-+			      BUFFER_BIT_COLOR4 |			\
-+			      BUFFER_BIT_COLOR5 |			\
-+			      BUFFER_BIT_COLOR6 |			\
-+			      BUFFER_BIT_COLOR7)
-+
-+void radeonRecalcScissorRects(radeonContextPtr radeon);
-+void radeonSetCliprects(radeonContextPtr radeon);
-+void radeonUpdateScissor( GLcontext *ctx );
-+void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h);
-+
-+void radeonWaitForIdleLocked(radeonContextPtr radeon);
-+extern uint32_t radeonGetAge(radeonContextPtr radeon);
-+void radeonCopyBuffer( __DRIdrawablePrivate *dPriv,
-+		       const drm_clip_rect_t	  *rect);
-+void radeonSwapBuffers(__DRIdrawablePrivate * dPriv);
-+void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
-+			 int x, int y, int w, int h );
-+
-+void radeonUpdatePageFlipping(radeonContextPtr rmesa);
-+
-+void radeonFlush(GLcontext *ctx);
-+void radeonFinish(GLcontext * ctx);
-+void radeonEmitState(radeonContextPtr radeon);
-+
-+void radeon_clear_tris(GLcontext *ctx, GLbitfield mask);
-+
-+void radeon_window_moved(radeonContextPtr radeon);
-+void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb);
-+void radeonDrawBuffer( GLcontext *ctx, GLenum mode );
-+void radeonReadBuffer( GLcontext *ctx, GLenum mode );
-+void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height);
-+void radeon_get_cliprects(radeonContextPtr radeon,
-+			  struct drm_clip_rect **cliprects,
-+			  unsigned int *num_cliprects,
-+			  int *x_off, int *y_off);
-+GLboolean radeon_revalidate_bos(GLcontext *ctx);
-+void radeon_validate_bo(radeonContextPtr radeon, struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain);
-+void radeon_validate_reset_bos(radeonContextPtr radeon);
-+
-+void radeon_fbo_init(struct radeon_context *radeon);
-+void
-+radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb,
-+			   struct radeon_bo *bo);
-+struct radeon_renderbuffer *
-+radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv);
-+static inline struct radeon_renderbuffer *radeon_renderbuffer(struct gl_renderbuffer *rb)
-+{
-+	struct radeon_renderbuffer *rrb = (struct radeon_renderbuffer *)rb;
-+	if (rrb && rrb->base.ClassID == RADEON_RB_CLASS)
-+		return rrb;
-+	else
-+		return NULL;
-+}
-+
-+static inline struct radeon_renderbuffer *radeon_get_renderbuffer(struct gl_framebuffer *fb, int att_index)
-+{
-+	if (att_index >= 0)
-+		return radeon_renderbuffer(fb->Attachment[att_index].Renderbuffer);
-+	else
-+		return NULL;
-+}
-+
-+static inline struct radeon_renderbuffer *radeon_get_depthbuffer(radeonContextPtr rmesa)
-+{
-+	struct radeon_renderbuffer *rrb;
-+	rrb = rmesa->state.depth.rrb;
-+	if (!rrb)
-+		return NULL;
-+
-+	return rrb;
-+}
-+
-+static inline struct radeon_renderbuffer *radeon_get_colorbuffer(radeonContextPtr rmesa)
-+{
-+	struct radeon_renderbuffer *rrb;
-+
-+	rrb = rmesa->state.color.rrb;
-+	if (!rrb)
-+		return NULL;
-+	return rrb;
-+}
-+
-+#include "radeon_cmdbuf.h"
-+
-+
-+#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
-new file mode 100644
-index 0000000..3e71362
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
-@@ -0,0 +1,625 @@
-+/**************************************************************************
-+
-+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
-+                     VA Linux Systems Inc., Fremont, California.
-+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
-+
-+The Weather Channel (TM) funded Tungsten Graphics to develop the
-+initial release of the Radeon 8500 driver under the XFree86 license.
-+This notice must be preserved.
-+
-+All Rights Reserved.
-+
-+Permission is hereby granted, free of charge, to any person obtaining
-+a copy of this software and associated documentation files (the
-+"Software"), to deal in the Software without restriction, including
-+without limitation the rights to use, copy, modify, merge, publish,
-+distribute, sublicense, and/or sell copies of the Software, and to
-+permit persons to whom the Software is furnished to do so, subject to
-+the following conditions:
-+
-+The above copyright notice and this permission notice (including the
-+next paragraph) shall be included in all copies or substantial
-+portions of the Software.
-+
-+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+
-+**************************************************************************/
-+
-+#include "radeon_common.h"
-+#include "xmlpool.h"		/* for symbolic values of enum-type options */
-+#include "utils.h"
-+#include "vblank.h"
-+#include "drirenderbuffer.h"
-+#include "main/context.h"
-+#include "main/framebuffer.h"
-+#include "main/state.h"
-+#include "main/simple_list.h"
-+#include "swrast/swrast.h"
-+#include "swrast_setup/swrast_setup.h"
-+#include "tnl/tnl.h"
-+
-+#define DRIVER_DATE "20090101"
-+
-+#ifndef RADEON_DEBUG
-+int RADEON_DEBUG = (0);
-+#endif
-+
-+/* Return various strings for glGetString().
-+ */
-+static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name)
-+{
-+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-+	static char buffer[128];
-+
-+	switch (name) {
-+	case GL_VENDOR:
-+		if (IS_R300_CLASS(radeon->radeonScreen))
-+			return (GLubyte *) "DRI R300 Project";
-+		else
-+			return (GLubyte *) "Tungsten Graphics, Inc.";
-+
-+	case GL_RENDERER:
-+	{
-+		unsigned offset;
-+		GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
-+			radeon->radeonScreen->AGPMode;
-+		const char* chipname;
-+
-+		if (IS_R300_CLASS(radeon->radeonScreen))
-+			chipname = "R300";
-+		else if (IS_R200_CLASS(radeon->radeonScreen))
-+			chipname = "R200";
-+		else
-+			chipname = "R100";
-+
-+		offset = driGetRendererString(buffer, chipname, DRIVER_DATE,
-+					      agp_mode);
-+
-+		if (IS_R300_CLASS(radeon->radeonScreen)) {
-+			sprintf(&buffer[offset], " %sTCL",
-+				(radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)
-+				? "" : "NO-");
-+		} else {
-+			sprintf(&buffer[offset], " %sTCL",
-+				!(radeon->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
-+				? "" : "NO-");
-+		}
-+
-+		if (radeon->radeonScreen->driScreen->dri2.enabled)
-+			strcat(buffer, " DRI2");
-+
-+		return (GLubyte *) buffer;
-+	}
-+
-+	default:
-+		return NULL;
-+	}
-+}
-+
-+/* Initialize the driver's misc functions.
-+ */
-+static void radeonInitDriverFuncs(struct dd_function_table *functions)
-+{
-+	functions->GetString = radeonGetString;
-+}
-+
-+/**
-+ * Create and initialize all common fields of the context,
-+ * including the Mesa context itself.
-+ */
-+GLboolean radeonInitContext(radeonContextPtr radeon,
-+			    struct dd_function_table* functions,
-+			    const __GLcontextModes * glVisual,
-+			    __DRIcontextPrivate * driContextPriv,
-+			    void *sharedContextPrivate)
-+{
-+	__DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
-+	radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
-+	GLcontext* ctx;
-+	GLcontext* shareCtx;
-+	int fthrottle_mode;
-+
-+	/* Fill in additional standard functions. */
-+	radeonInitDriverFuncs(functions);
-+
-+	radeon->radeonScreen = screen;
-+	/* Allocate and initialize the Mesa context */
-+	if (sharedContextPrivate)
-+		shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx;
-+	else
-+		shareCtx = NULL;
-+	radeon->glCtx = _mesa_create_context(glVisual, shareCtx,
-+					    functions, (void *)radeon);
-+	if (!radeon->glCtx)
-+		return GL_FALSE;
-+
-+	ctx = radeon->glCtx;
-+	driContextPriv->driverPrivate = radeon;
-+
-+	/* DRI fields */
-+	radeon->dri.context = driContextPriv;
-+	radeon->dri.screen = sPriv;
-+	radeon->dri.drawable = NULL;
-+	radeon->dri.readable = NULL;
-+	radeon->dri.hwContext = driContextPriv->hHWContext;
-+	radeon->dri.hwLock = &sPriv->pSAREA->lock;
-+	radeon->dri.fd = sPriv->fd;
-+	radeon->dri.drmMinor = sPriv->drm_version.minor;
-+
-+	radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
-+					       screen->sarea_priv_offset);
-+
-+	/* Setup IRQs */
-+	fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode");
-+	radeon->iw.irq_seq = -1;
-+	radeon->irqsEmitted = 0;
-+	radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
-+			  radeon->radeonScreen->irq);
-+
-+	radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
-+
-+	if (!radeon->do_irqs)
-+		fprintf(stderr,
-+			"IRQ's not enabled, falling back to %s: %d %d\n",
-+			radeon->do_usleeps ? "usleeps" : "busy waits",
-+			fthrottle_mode, radeon->radeonScreen->irq);
-+	
-+        radeon->texture_depth = driQueryOptioni (&radeon->optionCache,
-+					        "texture_depth");
-+        if (radeon->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
-+                radeon->texture_depth = ( glVisual->rgbBits > 16 ) ?
-+	        DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
-+
-+	radeon->texture_row_align = 32;
-+
-+	return GL_TRUE;
-+}
-+
-+
-+
-+/**
-+ * Destroy the command buffer and state atoms.
-+ */
-+static void radeon_destroy_atom_list(radeonContextPtr radeon)
-+{
-+	struct radeon_state_atom *atom;
-+
-+	foreach(atom, &radeon->hw.atomlist) {
-+		FREE(atom->cmd);
-+		if (atom->lastcmd)
-+			FREE(atom->lastcmd);
-+	}
-+
-+}
-+
-+/**
-+ * Cleanup common context fields.
-+ * Called by r200DestroyContext/r300DestroyContext
-+ */
-+void radeonDestroyContext(__DRIcontextPrivate *driContextPriv )
-+{
-+#ifdef RADEON_BO_TRACK
-+	FILE *track;
-+#endif
-+	GET_CURRENT_CONTEXT(ctx);
-+	radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
-+	radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL;
-+
-+	if (radeon == current) {
-+		radeon_firevertices(radeon);
-+		_mesa_make_current(NULL, NULL, NULL);
-+	}
-+	
-+	assert(radeon);
-+	if (radeon) {
-+
-+		if (radeon->dma.current) {
-+			rcommonFlushCmdBuf( radeon, __FUNCTION__ );
-+		}
-+
-+		radeonReleaseArrays(radeon->glCtx, ~0);
-+
-+		if (radeon->vtbl.free_context)
-+			radeon->vtbl.free_context(radeon->glCtx);
-+		_swsetup_DestroyContext( radeon->glCtx );
-+		_tnl_DestroyContext( radeon->glCtx );
-+		_vbo_DestroyContext( radeon->glCtx );
-+		_swrast_DestroyContext( radeon->glCtx );
-+	
-+		radeonDestroyBuffer(radeon->dri.drawable);
-+		radeonDestroyBuffer(radeon->dri.readable);
-+
-+		/* free atom list */
-+		/* free the Mesa context */
-+		_mesa_destroy_context(radeon->glCtx);
-+		
-+		/* _mesa_destroy_context() might result in calls to functions that
-+		 * depend on the DriverCtx, so don't set it to NULL before.
-+		 *
-+		 * radeon->glCtx->DriverCtx = NULL;
-+		 */
-+		/* free the option cache */
-+		driDestroyOptionCache(&radeon->optionCache);
-+		
-+		rcommonDestroyCmdBuf(radeon);
-+
-+		radeon_destroy_atom_list(radeon);
-+
-+		if (radeon->state.scissor.pClipRects) {
-+			FREE(radeon->state.scissor.pClipRects);
-+			radeon->state.scissor.pClipRects = 0;
-+		}
-+	}
-+#ifdef RADEON_BO_TRACK
-+	track = fopen("/tmp/tracklog", "w");
-+	if (track) {
-+		radeon_tracker_print(&radeon->radeonScreen->bom->tracker, track);
-+		fclose(track);
-+	}
-+#endif
-+	FREE(radeon);
-+}
-+
-+/* Force the context `c' to be unbound from its buffer.
-+ */
-+GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv)
-+{
-+	radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
-+
-+	if (RADEON_DEBUG & DEBUG_DRI)
-+		fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
-+			radeon->glCtx);
-+
-+	return GL_TRUE;
-+}
-+
-+
-+static void
-+radeon_make_kernel_renderbuffer_current(radeonContextPtr radeon,
-+					struct radeon_framebuffer *draw)
-+{
-+	/* if radeon->fake */
-+	struct radeon_renderbuffer *rb;
-+
-+	if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
-+		if (!rb->bo) {
-+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+						radeon->radeonScreen->frontOffset,
-+						0,
-+						0,
-+						RADEON_GEM_DOMAIN_VRAM,
-+						0);
-+		}
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp;
-+	}
-+	if ((rb = (void *)draw->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
-+		if (!rb->bo) {
-+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+						radeon->radeonScreen->backOffset,
-+						0,
-+						0,
-+						RADEON_GEM_DOMAIN_VRAM,
-+						0);
-+		}
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->backPitch * rb->cpp;
-+	}
-+	if ((rb = (void *)draw->base.Attachment[BUFFER_DEPTH].Renderbuffer)) {
-+		if (!rb->bo) {
-+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+						radeon->radeonScreen->depthOffset,
-+						0,
-+						0,
-+						RADEON_GEM_DOMAIN_VRAM,
-+						0);
-+		}
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
-+	}
-+	if ((rb = (void *)draw->base.Attachment[BUFFER_STENCIL].Renderbuffer)) {
-+		if (!rb->bo) {
-+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+						radeon->radeonScreen->depthOffset,
-+						0,
-+						0,
-+						RADEON_GEM_DOMAIN_VRAM,
-+						0);
-+		}
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
-+	}
-+}
-+
-+static void
-+radeon_make_renderbuffer_current(radeonContextPtr radeon,
-+				 struct radeon_framebuffer *draw)
-+{
-+	int size = 4096*4096*4;
-+	/* if radeon->fake */
-+	struct radeon_renderbuffer *rb;
-+	
-+	if (radeon->radeonScreen->kernel_mm) {
-+		radeon_make_kernel_renderbuffer_current(radeon, draw);
-+		return;
-+	}
-+			
-+
-+	if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
-+		if (!rb->bo) {
-+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+						radeon->radeonScreen->frontOffset +
-+						radeon->radeonScreen->fbLocation,
-+						size,
-+						4096,
-+						RADEON_GEM_DOMAIN_VRAM,
-+						0);
-+		}
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp;
-+	}
-+	if ((rb = (void *)draw->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
-+		if (!rb->bo) {
-+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+						radeon->radeonScreen->backOffset +
-+						radeon->radeonScreen->fbLocation,
-+						size,
-+						4096,
-+						RADEON_GEM_DOMAIN_VRAM,
-+						0);
-+		}
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->backPitch * rb->cpp;
-+	}
-+	if ((rb = (void *)draw->base.Attachment[BUFFER_DEPTH].Renderbuffer)) {
-+		if (!rb->bo) {
-+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+						radeon->radeonScreen->depthOffset +
-+						radeon->radeonScreen->fbLocation,
-+						size,
-+						4096,
-+						RADEON_GEM_DOMAIN_VRAM,
-+						0);
-+		}
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
-+	}
-+	if ((rb = (void *)draw->base.Attachment[BUFFER_STENCIL].Renderbuffer)) {
-+		if (!rb->bo) {
-+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+						radeon->radeonScreen->depthOffset +
-+						radeon->radeonScreen->fbLocation,
-+						size,
-+						4096,
-+						RADEON_GEM_DOMAIN_VRAM,
-+						0);
-+		}
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
-+	}
-+}
-+
-+
-+void
-+radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
-+{
-+	unsigned int attachments[10];
-+	__DRIbuffer *buffers;
-+	__DRIscreen *screen;
-+	struct radeon_renderbuffer *rb;
-+	int i, count;
-+	struct radeon_framebuffer *draw;
-+	radeonContextPtr radeon;
-+	char *regname;
-+	struct radeon_bo *depth_bo = NULL, *bo;
-+
-+	if (RADEON_DEBUG & DEBUG_DRI)
-+	    fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
-+	
-+	draw = drawable->driverPrivate;
-+	screen = context->driScreenPriv;
-+	radeon = (radeonContextPtr) context->driverPrivate;
-+	i = 0;
-+	if (draw->color_rb[0])
-+		attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
-+	if (draw->color_rb[1])
-+		attachments[i++] = __DRI_BUFFER_BACK_LEFT;
-+	if (radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH))
-+		attachments[i++] = __DRI_BUFFER_DEPTH;
-+	if (radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL))
-+		attachments[i++] = __DRI_BUFFER_STENCIL;
-+	
-+	buffers = (*screen->dri2.loader->getBuffers)(drawable,
-+						     &drawable->w,
-+						     &drawable->h,
-+						     attachments, i,
-+						     &count,
-+						     drawable->loaderPrivate);
-+	if (buffers == NULL)
-+		return;
-+
-+	/* set one cliprect to cover the whole drawable */
-+	drawable->x = 0;
-+	drawable->y = 0;
-+	drawable->backX = 0;
-+	drawable->backY = 0;
-+	drawable->numClipRects = 1;
-+	drawable->pClipRects[0].x1 = 0;
-+	drawable->pClipRects[0].y1 = 0;
-+	drawable->pClipRects[0].x2 = drawable->w;
-+	drawable->pClipRects[0].y2 = drawable->h;
-+	drawable->numBackClipRects = 1;
-+	drawable->pBackClipRects[0].x1 = 0;
-+	drawable->pBackClipRects[0].y1 = 0;
-+	drawable->pBackClipRects[0].x2 = drawable->w;
-+	drawable->pBackClipRects[0].y2 = drawable->h;
-+	for (i = 0; i < count; i++) {
-+		switch (buffers[i].attachment) {
-+		case __DRI_BUFFER_FRONT_LEFT:
-+			rb = draw->color_rb[0];
-+			regname = "dri2 front buffer";
-+			break;
-+		case __DRI_BUFFER_BACK_LEFT:
-+			rb = draw->color_rb[1];
-+			regname = "dri2 back buffer";
-+			break;
-+		case __DRI_BUFFER_DEPTH:
-+			rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH);
-+			regname = "dri2 depth buffer";
-+			break;
-+		case __DRI_BUFFER_STENCIL:
-+			rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL);
-+			regname = "dri2 stencil buffer";
-+			break;
-+		case __DRI_BUFFER_ACCUM:
-+		default:
-+			fprintf(stderr,
-+				"unhandled buffer attach event, attacment type %d\n",
-+				buffers[i].attachment);
-+			return;
-+		}
-+
-+		if (rb == NULL)
-+			continue;
-+
-+		if (rb->bo) {
-+			uint32_t name = radeon_gem_name_bo(rb->bo);
-+			if (name == buffers[i].name)
-+				continue;
-+		}
-+
-+		if (RADEON_DEBUG & DEBUG_DRI)
-+			fprintf(stderr,
-+				"attaching buffer %s, %d, at %d, cpp %d, pitch %d\n",
-+				regname, buffers[i].name, buffers[i].attachment,
-+				buffers[i].cpp, buffers[i].pitch);
-+
-+		rb->cpp = buffers[i].cpp;
-+		rb->pitch = buffers[i].pitch;
-+		rb->width = drawable->w;
-+		rb->height = drawable->h;
-+		rb->has_surface = 0;
-+
-+		if (buffers[i].attachment == __DRI_BUFFER_STENCIL && depth_bo) {
-+			if (RADEON_DEBUG & DEBUG_DRI)
-+				fprintf(stderr, "(reusing depth buffer as stencil)\n");
-+			bo = depth_bo;
-+			radeon_bo_ref(bo);
-+		} else {
-+			bo = radeon_bo_open(radeon->radeonScreen->bom,
-+						buffers[i].name,
-+						0,
-+						0,
-+						RADEON_GEM_DOMAIN_VRAM,
-+						buffers[i].flags);
-+			if (bo == NULL) {
-+
-+				fprintf(stderr, "failed to attach %s %d\n",
-+					regname, buffers[i].name);
-+				
-+			}
-+		}
-+
-+		if (buffers[i].attachment == __DRI_BUFFER_DEPTH) {
-+			if (draw->base.Visual.depthBits == 16)
-+				rb->cpp = 2;
-+			depth_bo = bo;
-+		}
-+
-+		radeon_renderbuffer_set_bo(rb, bo);
-+		radeon_bo_unref(bo);
-+		    
-+	}
-+
-+	driUpdateFramebufferSize(radeon->glCtx, drawable);
-+}
-+
-+/* Force the context `c' to be the current context and associate with it
-+ * buffer `b'.
-+ */
-+GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
-+			    __DRIdrawablePrivate * driDrawPriv,
-+			    __DRIdrawablePrivate * driReadPriv)
-+{
-+	radeonContextPtr radeon;
-+	struct radeon_framebuffer *drfb;
-+	struct gl_framebuffer *readfb;
-+
-+	if (!driContextPriv) {
-+		if (RADEON_DEBUG & DEBUG_DRI)
-+			fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
-+		_mesa_make_current(NULL, NULL, NULL);
-+		return GL_TRUE;
-+	}
-+
-+	radeon = (radeonContextPtr) driContextPriv->driverPrivate;
-+	drfb = driDrawPriv->driverPrivate;
-+	readfb = driReadPriv->driverPrivate;
-+
-+	if (driContextPriv->driScreenPriv->dri2.enabled) {    
-+		radeon_update_renderbuffers(driContextPriv, driDrawPriv);
-+		if (driDrawPriv != driReadPriv)
-+			radeon_update_renderbuffers(driContextPriv, driReadPriv);
-+		radeon->state.color.rrb =
-+			radeon_get_renderbuffer(&drfb->base, BUFFER_BACK_LEFT);
-+		radeon->state.depth.rrb =
-+			radeon_get_renderbuffer(&drfb->base, BUFFER_DEPTH);
-+	} else {
-+		radeon_make_renderbuffer_current(radeon, drfb);
-+	}
-+
-+
-+	if (RADEON_DEBUG & DEBUG_DRI)
-+	     fprintf(stderr, "%s ctx %p dfb %p rfb %p\n", __FUNCTION__, radeon->glCtx, drfb, readfb);
-+
-+	if (radeon->dri.readable != driReadPriv)
-+		radeon->dri.readable = driReadPriv;
-+
-+	driUpdateFramebufferSize(radeon->glCtx, driDrawPriv);
-+	if (driReadPriv != driDrawPriv)
-+		driUpdateFramebufferSize(radeon->glCtx, driReadPriv);
-+
-+	_mesa_make_current(radeon->glCtx, &drfb->base, readfb);
-+
-+	_mesa_update_state(radeon->glCtx);
-+
-+	if (radeon->glCtx->DrawBuffer == &drfb->base) {
-+
-+		if (radeon->dri.drawable != driDrawPriv) {
-+			if (driDrawPriv->swap_interval == (unsigned)-1) {
-+				int i;
-+				driDrawPriv->vblFlags =
-+					(radeon->radeonScreen->irq != 0)
-+					? driGetDefaultVBlankFlags(&radeon->
-+								   optionCache)
-+					: VBLANK_FLAG_NO_IRQ;
-+				    
-+				driDrawableInitVBlank(driDrawPriv);
-+				drfb->vbl_waited = driDrawPriv->vblSeq;
-+
-+				for (i = 0; i < 2; i++) {
-+					if (drfb->color_rb[i])
-+						drfb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq;
-+				}
-+				    
-+			}
-+			radeon->dri.drawable = driDrawPriv;
-+			
-+			radeon_window_moved(radeon);
-+		}
-+		radeon_draw_buffer(radeon->glCtx, &drfb->base);
-+	}
-+
-+
-+	if (RADEON_DEBUG & DEBUG_DRI)
-+		fprintf(stderr, "End %s\n", __FUNCTION__);
-+	return GL_TRUE;
-+}
-+
-diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
-new file mode 100644
-index 0000000..181688c
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
-@@ -0,0 +1,564 @@
-+
-+#ifndef COMMON_CONTEXT_H
-+#define COMMON_CONTEXT_H
-+
-+#include "main/mm.h"
-+#include "math/m_vector.h"
-+#include "texmem.h"
-+#include "tnl/t_context.h"
-+#include "main/colormac.h"
-+
-+#include "radeon_screen.h"
-+#include "radeon_drm.h"
-+#include "dri_util.h"
-+#include "tnl/t_vertex.h"
-+
-+struct radeon_context;
-+
-+#include "radeon_bocs_wrapper.h"
-+
-+/* This union is used to avoid warnings/miscompilation
-+   with float to uint32_t casts due to strict-aliasing */
-+typedef union { GLfloat f; uint32_t ui32; } float_ui32_type;
-+
-+struct radeon_context;
-+typedef struct radeon_context radeonContextRec;
-+typedef struct radeon_context *radeonContextPtr;
-+
-+
-+#define TEX_0   0x1
-+#define TEX_1   0x2
-+#define TEX_2   0x4
-+#define TEX_3	0x8
-+#define TEX_4	0x10
-+#define TEX_5	0x20
-+
-+/* Rasterizing fallbacks */
-+/* See correponding strings in r200_swtcl.c */
-+#define RADEON_FALLBACK_TEXTURE		0x0001
-+#define RADEON_FALLBACK_DRAW_BUFFER	0x0002
-+#define RADEON_FALLBACK_STENCIL		0x0004
-+#define RADEON_FALLBACK_RENDER_MODE	0x0008
-+#define RADEON_FALLBACK_BLEND_EQ	0x0010
-+#define RADEON_FALLBACK_BLEND_FUNC	0x0020
-+#define RADEON_FALLBACK_DISABLE 	0x0040
-+#define RADEON_FALLBACK_BORDER_MODE	0x0080
-+#define RADEON_FALLBACK_DEPTH_BUFFER	0x0100
-+#define RADEON_FALLBACK_STENCIL_BUFFER  0x0200
-+
-+#define R200_FALLBACK_TEXTURE           0x01
-+#define R200_FALLBACK_DRAW_BUFFER       0x02
-+#define R200_FALLBACK_STENCIL           0x04
-+#define R200_FALLBACK_RENDER_MODE       0x08
-+#define R200_FALLBACK_DISABLE           0x10
-+#define R200_FALLBACK_BORDER_MODE       0x20
-+
-+#define RADEON_TCL_FALLBACK_RASTER            0x1 /* rasterization */
-+#define RADEON_TCL_FALLBACK_UNFILLED          0x2 /* unfilled tris */
-+#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE     0x4 /* twoside tris */
-+#define RADEON_TCL_FALLBACK_MATERIAL          0x8 /* material in vb */
-+#define RADEON_TCL_FALLBACK_TEXGEN_0          0x10 /* texgen, unit 0 */
-+#define RADEON_TCL_FALLBACK_TEXGEN_1          0x20 /* texgen, unit 1 */
-+#define RADEON_TCL_FALLBACK_TEXGEN_2          0x40 /* texgen, unit 2 */
-+#define RADEON_TCL_FALLBACK_TCL_DISABLE       0x80 /* user disable */
-+#define RADEON_TCL_FALLBACK_FOGCOORDSPEC      0x100 /* fogcoord, sep. spec light */
-+
-+/* The blit width for texture uploads
-+ */
-+#define BLIT_WIDTH_BYTES 1024
-+
-+/* Use the templated vertex format:
-+ */
-+#define COLOR_IS_RGBA
-+#define TAG(x) radeon##x
-+#include "tnl_dd/t_dd_vertex.h"
-+#undef TAG
-+
-+#define RADEON_RB_CLASS 0xdeadbeef
-+
-+struct radeon_renderbuffer
-+{
-+	struct gl_renderbuffer base;
-+	struct radeon_bo *bo;
-+	unsigned int cpp;
-+	/* unsigned int offset; */
-+	unsigned int pitch;
-+	unsigned int width;
-+	unsigned int height;
-+
-+	uint32_t draw_offset; /* FBO */
-+	/* boo Xorg 6.8.2 compat */
-+	int has_surface;
-+
-+	GLuint pf_pending;  /**< sequence number of pending flip */
-+	GLuint vbl_pending;   /**< vblank sequence number of pending flip */
-+	__DRIdrawablePrivate *dPriv;
-+};
-+
-+struct radeon_framebuffer
-+{
-+	struct gl_framebuffer base;
-+
-+	struct radeon_renderbuffer *color_rb[2];
-+
-+	GLuint vbl_waited;
-+
-+	/* buffer swap */
-+	int64_t swap_ust;
-+	int64_t swap_missed_ust;
-+
-+	GLuint swap_count;
-+	GLuint swap_missed_count;
-+
-+	/* Drawable page flipping state */
-+	GLboolean pf_active;
-+	GLint pf_current_page;
-+	GLint pf_num_pages;
-+
-+};
-+
-+ 
-+struct radeon_colorbuffer_state {
-+	GLuint clear;
-+	int roundEnable;
-+	struct radeon_renderbuffer *rrb;
-+	uint32_t draw_offset; /* offset into color renderbuffer - FBOs */
-+};
-+
-+struct radeon_depthbuffer_state {
-+	GLuint clear;
-+	struct radeon_renderbuffer *rrb;
-+};
-+
-+struct radeon_scissor_state {
-+	drm_clip_rect_t rect;
-+	GLboolean enabled;
-+
-+	GLuint numClipRects;	/* Cliprects active */
-+	GLuint numAllocedClipRects;	/* Cliprects available */
-+	drm_clip_rect_t *pClipRects;
-+};
-+
-+struct radeon_stencilbuffer_state {
-+	GLuint clear;		/* rb3d_stencilrefmask value */
-+};
-+
-+struct radeon_stipple_state {
-+	GLuint mask[32];
-+};
-+
-+struct radeon_state_atom {
-+	struct radeon_state_atom *next, *prev;
-+	const char *name;	/* for debug */
-+	int cmd_size;		/* size in bytes */
-+        GLuint idx;
-+	GLuint is_tcl;
-+        GLuint *cmd;		/* one or more cmd's */
-+	GLuint *lastcmd;		/* one or more cmd's */
-+	GLboolean dirty;	/* dirty-mark in emit_state_list */
-+        int (*check) (GLcontext *, struct radeon_state_atom *atom); /* is this state active? */
-+        void (*emit) (GLcontext *, struct radeon_state_atom *atom);
-+};
-+
-+struct radeon_hw_state {
-+  	/* Head of the linked list of state atoms. */
-+	struct radeon_state_atom atomlist;
-+	int max_state_size;	/* Number of bytes necessary for a full state emit. */
-+	GLboolean is_dirty, all_dirty;
-+};
-+
-+
-+/* Texture related */
-+typedef struct _radeon_texture_image radeon_texture_image;
-+
-+struct _radeon_texture_image {
-+	struct gl_texture_image base;
-+
-+	/**
-+	 * If mt != 0, the image is stored in hardware format in the
-+	 * given mipmap tree. In this case, base.Data may point into the
-+	 * mapping of the buffer object that contains the mipmap tree.
-+	 *
-+	 * If mt == 0, the image is stored in normal memory pointed to
-+	 * by base.Data.
-+	 */
-+	struct _radeon_mipmap_tree *mt;
-+	struct radeon_bo *bo;
-+
-+	int mtlevel; /** if mt != 0, this is the image's level in the mipmap tree */
-+	int mtface; /** if mt != 0, this is the image's face in the mipmap tree */
-+};
-+
-+
-+static INLINE radeon_texture_image *get_radeon_texture_image(struct gl_texture_image *image)
-+{
-+	return (radeon_texture_image*)image;
-+}
-+
-+
-+typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr;
-+
-+#define RADEON_TXO_MICRO_TILE               (1 << 3)
-+
-+/* Texture object in locally shared texture space.
-+ */
-+struct radeon_tex_obj {
-+	struct gl_texture_object base;
-+	struct _radeon_mipmap_tree *mt;
-+
-+	/**
-+	 * This is true if we've verified that the mipmap tree above is complete
-+	 * and so on.
-+	 */
-+	GLboolean validated;
-+
-+	GLuint override_offset;
-+	GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
-+	GLuint tile_bits;	/* hw texture tile bits used on this texture */
-+        struct radeon_bo *bo;
-+
-+	GLuint pp_txfilter;	/* hardware register values */
-+	GLuint pp_txformat;
-+	GLuint pp_txformat_x;
-+	GLuint pp_txsize;	/* npot only */
-+	GLuint pp_txpitch;	/* npot only */
-+	GLuint pp_border_color;
-+	GLuint pp_cubic_faces;	/* cube face 1,2,3,4 log2 sizes */
-+
-+        GLuint pp_txfilter_1;	/*  r300 */
-+
-+	GLboolean border_fallback;
-+
-+
-+};
-+
-+static INLINE radeonTexObj* radeon_tex_obj(struct gl_texture_object *texObj)
-+{
-+	return (radeonTexObj*)texObj;
-+}
-+
-+/* Need refcounting on dma buffers:
-+ */
-+struct radeon_dma_buffer {
-+	int refcount;		/* the number of retained regions in buf */
-+	drmBufPtr buf;
-+};
-+
-+struct radeon_aos {
-+	struct radeon_bo *bo; /** Buffer object where vertex data is stored */
-+	int offset; /** Offset into buffer object, in bytes */
-+	int components; /** Number of components per vertex */
-+	int stride; /** Stride in dwords (may be 0 for repeating) */
-+	int count; /** Number of vertices */
-+};
-+
-+struct radeon_dma {
-+        /* Active dma region.  Allocations for vertices and retained
-+         * regions come from here.  Also used for emitting random vertices,
-+         * these may be flushed by calling flush_current();
-+         */
-+        struct radeon_bo *current; /** Buffer that DMA memory is allocated from */
-+        int current_used; /** Number of bytes allocated and forgotten about */
-+        int current_vertexptr; /** End of active vertex region */
-+
-+        /**
-+         * If current_vertexptr != current_used then flush must be non-zero.
-+         * flush must be called before non-active vertex allocations can be
-+         * performed.
-+         */
-+        void (*flush) (GLcontext *);
-+
-+        /* Number of "in-flight" DMA buffers, i.e. the number of buffers
-+         * for which a DISCARD command is currently queued in the command buffer
-+.
-+         */
-+        GLuint nr_released_bufs;
-+};
-+
-+/* radeon_swtcl.c
-+ */
-+struct radeon_swtcl_info {
-+
-+	GLuint RenderIndex;
-+	GLuint vertex_size;
-+	GLubyte *verts;
-+
-+	/* Fallback rasterization functions
-+	 */
-+	GLuint hw_primitive;
-+	GLenum render_primitive;
-+	GLuint numverts;
-+
-+	struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
-+	GLuint vertex_attr_count;
-+
-+};
-+
-+#define RADEON_MAX_AOS_ARRAYS		16
-+struct radeon_tcl_info {
-+	struct radeon_aos aos[RADEON_MAX_AOS_ARRAYS];
-+	GLuint aos_count;
-+	struct radeon_bo *elt_dma_bo; /** Buffer object that contains element indices */
-+	int elt_dma_offset; /** Offset into this buffer object, in bytes */
-+};
-+
-+struct radeon_ioctl {
-+	GLuint vertex_offset;
-+        struct radeon_bo *bo;
-+	GLuint vertex_size;
-+};
-+
-+#define RADEON_MAX_PRIMS 64
-+
-+struct radeon_prim {
-+	GLuint start;
-+	GLuint end;
-+	GLuint prim;
-+};
-+
-+static INLINE GLuint radeonPackColor(GLuint cpp,
-+                                     GLubyte r, GLubyte g,
-+                                     GLubyte b, GLubyte a)
-+{
-+	switch (cpp) {
-+	case 2:
-+		return PACK_COLOR_565(r, g, b);
-+	case 4:
-+		return PACK_COLOR_8888(a, r, g, b);
-+	default:
-+		return 0;
-+	}
-+}
-+
-+#define MAX_CMD_BUF_SZ (16*1024)
-+
-+#define MAX_DMA_BUF_SZ (64*1024)
-+
-+struct radeon_store {
-+	GLuint statenr;
-+	GLuint primnr;
-+	char cmd_buf[MAX_CMD_BUF_SZ];
-+	int cmd_used;
-+	int elts_start;
-+};
-+
-+struct radeon_dri_mirror {
-+	__DRIcontextPrivate *context;	/* DRI context */
-+	__DRIscreenPrivate *screen;	/* DRI screen */
-+
-+   /**
-+    * DRI drawable bound to this context for drawing.
-+    */
-+	__DRIdrawablePrivate *drawable;
-+
-+   /**
-+    * DRI drawable bound to this context for reading.
-+    */
-+	__DRIdrawablePrivate *readable;
-+
-+	drm_context_t hwContext;
-+	drm_hw_lock_t *hwLock;
-+	int fd;
-+	int drmMinor;
-+};
-+
-+#define DEBUG_TEXTURE	0x001
-+#define DEBUG_STATE	0x002
-+#define DEBUG_IOCTL	0x004
-+#define DEBUG_PRIMS	0x008
-+#define DEBUG_VERTS	0x010
-+#define DEBUG_FALLBACKS	0x020
-+#define DEBUG_VFMT	0x040
-+#define DEBUG_CODEGEN	0x080
-+#define DEBUG_VERBOSE	0x100
-+#define DEBUG_DRI       0x200
-+#define DEBUG_DMA       0x400
-+#define DEBUG_SANITY    0x800
-+#define DEBUG_SYNC      0x1000
-+#define DEBUG_PIXEL     0x2000
-+#define DEBUG_MEMORY    0x4000
-+
-+
-+typedef void (*radeon_tri_func) (radeonContextPtr,
-+				 radeonVertex *,
-+				 radeonVertex *, radeonVertex *);
-+
-+typedef void (*radeon_line_func) (radeonContextPtr,
-+				  radeonVertex *, radeonVertex *);
-+
-+typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *);
-+
-+#define RADEON_MAX_BOS 24
-+struct radeon_state {
-+	struct radeon_colorbuffer_state color;
-+	struct radeon_depthbuffer_state depth;
-+	struct radeon_scissor_state scissor;
-+	struct radeon_stencilbuffer_state stencil;
-+
-+	struct radeon_cs_space_check bos[RADEON_MAX_BOS];
-+	int validated_bo_count;
-+};
-+
-+/**
-+ * This structure holds the command buffer while it is being constructed.
-+ *
-+ * The first batch of commands in the buffer is always the state that needs
-+ * to be re-emitted when the context is lost. This batch can be skipped
-+ * otherwise.
-+ */
-+struct radeon_cmdbuf {
-+	struct radeon_cs_manager    *csm;
-+	struct radeon_cs            *cs;
-+	int size; /** # of dwords total */
-+	unsigned int flushing:1; /** whether we're currently in FlushCmdBufLocked */
-+};
-+
-+struct radeon_context {
-+   GLcontext *glCtx;
-+   radeonScreenPtr radeonScreen;	/* Screen private DRI data */
-+  
-+   /* Texture object bookkeeping
-+    */
-+   int                   texture_depth;
-+   float                 initialMaxAnisotropy;
-+   uint32_t              texture_row_align;
-+
-+  struct radeon_dma dma;
-+  struct radeon_hw_state hw;
-+   /* Rasterization and vertex state:
-+    */
-+   GLuint TclFallback;
-+   GLuint Fallback;
-+   GLuint NewGLState;
-+   DECLARE_RENDERINPUTS(tnl_index_bitset);	/* index of bits for last tnl_install_attrs */
-+
-+   /* Drawable, cliprect and scissor information */
-+   GLuint numClipRects;	/* Cliprects for the draw buffer */
-+   drm_clip_rect_t *pClipRects;
-+   unsigned int lastStamp;
-+   GLboolean lost_context;
-+   drm_radeon_sarea_t *sarea;	/* Private SAREA data */
-+
-+   /* Mirrors of some DRI state */
-+   struct radeon_dri_mirror dri;
-+
-+   /* Busy waiting */
-+   GLuint do_usleeps;
-+   GLuint do_irqs;
-+   GLuint irqsEmitted;
-+   drm_radeon_irq_wait_t iw;
-+
-+   /* Derived state - for r300 only */
-+   struct radeon_state state;
-+
-+   struct radeon_swtcl_info swtcl;
-+   struct radeon_tcl_info tcl;
-+   /* Configuration cache
-+    */
-+   driOptionCache optionCache;
-+
-+   struct radeon_cmdbuf cmdbuf;
-+	
-+  drm_clip_rect_t fboRect;
-+  GLboolean constant_cliprect; /* use for FBO or DRI2 rendering */
-+  GLboolean front_cliprects;
-+
-+  struct {
-+      struct gl_fragment_program *bitmap_fp;
-+      struct gl_vertex_program *passthrough_vp;
-+
-+      struct gl_fragment_program *saved_fp;
-+      GLboolean saved_fp_enable;
-+      struct gl_vertex_program *saved_vp;
-+      GLboolean saved_vp_enable;
-+
-+      GLint saved_vp_x, saved_vp_y;
-+      GLsizei saved_vp_width, saved_vp_height;
-+      GLenum saved_matrix_mode;
-+   } meta;
-+
-+   struct {
-+	   void (*get_lock)(radeonContextPtr radeon);
-+	   void (*update_viewport_offset)(GLcontext *ctx);
-+	   void (*emit_cs_header)(struct radeon_cs *cs, radeonContextPtr rmesa);
-+	   void (*swtcl_flush)(GLcontext *ctx, uint32_t offset);
-+	   void (*pre_emit_atoms)(radeonContextPtr rmesa);
-+	   void (*pre_emit_state)(radeonContextPtr rmesa);
-+	   void (*fallback)(GLcontext *ctx, GLuint bit, GLboolean mode);
-+	   void (*free_context)(GLcontext *ctx);
-+   } vtbl;
-+};
-+
-+#define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx))
-+
-+/**
-+ * This function takes a float and packs it into a uint32_t
-+ */
-+static INLINE uint32_t radeonPackFloat32(float fl)
-+{
-+	union {
-+		float fl;
-+		uint32_t u;
-+	} u;
-+
-+	u.fl = fl;
-+	return u.u;
-+}
-+
-+/* This is probably wrong for some values, I need to test this
-+ * some more.  Range checking would be a good idea also..
-+ *
-+ * But it works for most things.  I'll fix it later if someone
-+ * else with a better clue doesn't
-+ */
-+static INLINE uint32_t radeonPackFloat24(float f)
-+{
-+	float mantissa;
-+	int exponent;
-+	uint32_t float24 = 0;
-+
-+	if (f == 0.0)
-+		return 0;
-+
-+	mantissa = frexpf(f, &exponent);
-+
-+	/* Handle -ve */
-+	if (mantissa < 0) {
-+		float24 |= (1 << 23);
-+		mantissa = mantissa * -1.0;
-+	}
-+	/* Handle exponent, bias of 63 */
-+	exponent += 62;
-+	float24 |= (exponent << 16);
-+	/* Kill 7 LSB of mantissa */
-+	float24 |= (radeonPackFloat32(mantissa) & 0x7FFFFF) >> 7;
-+
-+	return float24;
-+}
-+
-+GLboolean radeonInitContext(radeonContextPtr radeon,
-+			    struct dd_function_table* functions,
-+			    const __GLcontextModes * glVisual,
-+			    __DRIcontextPrivate * driContextPriv,
-+			    void *sharedContextPrivate);
-+
-+void radeonCleanupContext(radeonContextPtr radeon);
-+GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
-+void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable);
-+GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
-+			    __DRIdrawablePrivate * driDrawPriv,
-+			    __DRIdrawablePrivate * driReadPriv);
-+extern void radeonDestroyContext(__DRIcontextPrivate * driContextPriv);
-+
-+/* ================================================================
-+ * Debugging:
-+ */
-+#define DO_DEBUG		1
-+
-+#if DO_DEBUG
-+extern int RADEON_DEBUG;
-+#else
-+#define RADEON_DEBUG		0
-+#endif
-+
-+#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_compat.c b/src/mesa/drivers/dri/radeon/radeon_compat.c
-deleted file mode 100644
-index 46b490d..0000000
---- a/src/mesa/drivers/dri/radeon/radeon_compat.c
-+++ /dev/null
-@@ -1,301 +0,0 @@
--/**************************************************************************
--
--Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
--               Tungsten Graphics Inc., Austin, Texas.
--
--All Rights Reserved.
--
--Permission is hereby granted, free of charge, to any person obtaining a
--copy of this software and associated documentation files (the "Software"),
--to deal in the Software without restriction, including without limitation
--on the rights to use, copy, modify, merge, publish, distribute, sub
--license, and/or sell copies of the Software, and to permit persons to whom
--the Software is furnished to do so, subject to the following conditions:
--
--The above copyright notice and this permission notice (including the next
--paragraph) shall be included in all copies or substantial portions of the
--Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
--IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
--FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
--ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
--DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
--OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
--USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- *
-- */
--
--#include "main/glheader.h"
--#include "main/imports.h"
--
--#include "radeon_context.h"
--#include "radeon_state.h"
--#include "radeon_ioctl.h"
--
--
--static struct { 
--	int start; 
--	int len; 
--	const char *name;
--} packet[RADEON_MAX_STATE_PACKETS] = {
--	{ RADEON_PP_MISC,7,"RADEON_PP_MISC" },
--	{ RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
--	{ RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
--	{ RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
--	{ RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
--	{ RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
--	{ RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
--	{ RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
--	{ RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
--	{ RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
--	{ RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
--	{ RADEON_RE_MISC,1,"RADEON_RE_MISC" },
--	{ RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
--	{ RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
--	{ RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
--	{ RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
--	{ RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
--	{ RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
--	{ RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
--	{ RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
--	{ RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
--};
--
--
--static void radeonCompatEmitPacket( radeonContextPtr rmesa, 
--				    struct radeon_state_atom *state )
--{
--   drm_radeon_sarea_t *sarea = rmesa->sarea;
--   drm_radeon_context_regs_t *ctx = &sarea->context_state;
--   drm_radeon_texture_regs_t *tex0 = &sarea->tex_state[0];
--   drm_radeon_texture_regs_t *tex1 = &sarea->tex_state[1];
--   int i;
--   int *buf = state->cmd;
--
--   for ( i = 0 ; i < state->cmd_size ; ) {
--      drm_radeon_cmd_header_t *header = (drm_radeon_cmd_header_t *)&buf[i++];
--
--      if (RADEON_DEBUG & DEBUG_STATE)
--	 fprintf(stderr, "%s %d: %s\n", __FUNCTION__, header->packet.packet_id,
--		 packet[(int)header->packet.packet_id].name);
--
--      switch (header->packet.packet_id) {
--      case RADEON_EMIT_PP_MISC:
--	 ctx->pp_misc = buf[i++]; 
--	 ctx->pp_fog_color = buf[i++];
--	 ctx->re_solid_color = buf[i++];
--	 ctx->rb3d_blendcntl = buf[i++];
--	 ctx->rb3d_depthoffset = buf[i++];
--	 ctx->rb3d_depthpitch = buf[i++];
--	 ctx->rb3d_zstencilcntl = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
--	 break;
--      case RADEON_EMIT_PP_CNTL:
--	 ctx->pp_cntl = buf[i++];
--	 ctx->rb3d_cntl = buf[i++];
--	 ctx->rb3d_coloroffset = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
--	 break;
--      case RADEON_EMIT_RB3D_COLORPITCH:
--	 ctx->rb3d_colorpitch = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
--	 break;
--      case RADEON_EMIT_RE_LINE_PATTERN:
--	 ctx->re_line_pattern = buf[i++];
--	 ctx->re_line_state = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_LINE;
--	 break;
--      case RADEON_EMIT_SE_LINE_WIDTH:
--	 ctx->se_line_width = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_LINE;
--	 break;
--      case RADEON_EMIT_PP_LUM_MATRIX:
--	 ctx->pp_lum_matrix = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
--	 break;
--      case RADEON_EMIT_PP_ROT_MATRIX_0:
--	 ctx->pp_rot_matrix_0 = buf[i++];
--	 ctx->pp_rot_matrix_1 = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
--	 break;
--      case RADEON_EMIT_RB3D_STENCILREFMASK:
--	 ctx->rb3d_stencilrefmask = buf[i++];
--	 ctx->rb3d_ropcntl = buf[i++];
--	 ctx->rb3d_planemask = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_MASKS;
--	 break;
--      case RADEON_EMIT_SE_VPORT_XSCALE:
--	 ctx->se_vport_xscale = buf[i++];
--	 ctx->se_vport_xoffset = buf[i++];
--	 ctx->se_vport_yscale = buf[i++];
--	 ctx->se_vport_yoffset = buf[i++];
--	 ctx->se_vport_zscale = buf[i++];
--	 ctx->se_vport_zoffset = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_VIEWPORT;
--	 break;
--      case RADEON_EMIT_SE_CNTL:
--	 ctx->se_cntl = buf[i++];
--	 ctx->se_coord_fmt = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_CONTEXT | RADEON_UPLOAD_VERTFMT;
--	 break;
--      case RADEON_EMIT_SE_CNTL_STATUS:
--	 ctx->se_cntl_status = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_SETUP;
--	 break;
--      case RADEON_EMIT_RE_MISC:
--	 ctx->re_misc = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_MISC;
--	 break;
--      case RADEON_EMIT_PP_TXFILTER_0:
--	 tex0->pp_txfilter = buf[i++];
--	 tex0->pp_txformat = buf[i++];
--	 tex0->pp_txoffset = buf[i++];
--	 tex0->pp_txcblend = buf[i++];
--	 tex0->pp_txablend = buf[i++];
--	 tex0->pp_tfactor = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_TEX0;
--	 break;
--      case RADEON_EMIT_PP_BORDER_COLOR_0:
--	 tex0->pp_border_color = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_TEX0;
--	 break;
--      case RADEON_EMIT_PP_TXFILTER_1:
--	 tex1->pp_txfilter = buf[i++];
--	 tex1->pp_txformat = buf[i++];
--	 tex1->pp_txoffset = buf[i++];
--	 tex1->pp_txcblend = buf[i++];
--	 tex1->pp_txablend = buf[i++];
--	 tex1->pp_tfactor = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_TEX1;
--	 break;
--      case RADEON_EMIT_PP_BORDER_COLOR_1:
--	 tex1->pp_border_color = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_TEX1;
--	 break;
--
--      case RADEON_EMIT_SE_ZBIAS_FACTOR:
--	 i++;
--	 i++;
--	 break;
--
--      case RADEON_EMIT_PP_TXFILTER_2:
--      case RADEON_EMIT_PP_BORDER_COLOR_2:
--      case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
--      case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
--      default:
--	 /* These states aren't understood by radeon drm 1.1 */
--	 fprintf(stderr, "Tried to emit unsupported state\n");
--	 return;
--      }
--   }
--}
--
--
--
--static void radeonCompatEmitStateLocked( radeonContextPtr rmesa )
--{
--   struct radeon_state_atom *atom;
--
--   if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
--      fprintf(stderr, "%s\n", __FUNCTION__);
--
--   if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
--      return;
--
--   foreach(atom, &rmesa->hw.atomlist) {
--      if (rmesa->hw.all_dirty)
--	 atom->dirty = GL_TRUE;
--      if (atom->is_tcl)
--	 atom->dirty = GL_FALSE;
--      if (atom->dirty)
--	 radeonCompatEmitPacket(rmesa, atom);
--   }
-- 
--   rmesa->hw.is_dirty = GL_FALSE;
--   rmesa->hw.all_dirty = GL_FALSE;
--}
--
--
--static void radeonCompatEmitPrimitiveLocked( radeonContextPtr rmesa,
--					     GLuint hw_primitive,
--					     GLuint nverts,
--					     drm_clip_rect_t *pbox,
--					     GLuint nbox )
--{
--   int i;
--
--   for ( i = 0 ; i < nbox ; ) {
--      int nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, nbox );
--      drm_clip_rect_t *b = rmesa->sarea->boxes;
--      drm_radeon_vertex_t vtx;
--      
--      rmesa->sarea->dirty |= RADEON_UPLOAD_CLIPRECTS;
--      rmesa->sarea->nbox = nr - i;
--
--      for ( ; i < nr ; i++) 
--	 *b++ = pbox[i];
--      
--      if (RADEON_DEBUG & DEBUG_IOCTL)
--	 fprintf(stderr, 
--		 "RadeonFlushVertexBuffer: prim %x buf %d verts %d "
--		 "disc %d nbox %d\n",
--		 hw_primitive, 
--		 rmesa->dma.current.buf->buf->idx, 
--		 nverts, 
--		 nr == nbox,
--		 rmesa->sarea->nbox );
--
--      vtx.prim = hw_primitive;
--      vtx.idx = rmesa->dma.current.buf->buf->idx;
--      vtx.count = nverts;
--      vtx.discard = (nr == nbox);      
--
--      drmCommandWrite( rmesa->dri.fd, 
--		       DRM_RADEON_VERTEX,
--		       &vtx, sizeof(vtx));
--   }
--}
--
--
--
--/* No 'start' for 1.1 vertices ioctl: only one vertex prim/buffer!  
-- */
--void radeonCompatEmitPrimitive( radeonContextPtr rmesa,
--				GLuint vertex_format,
--				GLuint hw_primitive,
--				GLuint nrverts )
--{
--   if (RADEON_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s\n", __FUNCTION__);
--
--   LOCK_HARDWARE( rmesa );
--
--   radeonCompatEmitStateLocked( rmesa );
--   rmesa->sarea->vc_format = vertex_format;
--   
--   if (rmesa->state.scissor.enabled) {
--      radeonCompatEmitPrimitiveLocked( rmesa, 
--				       hw_primitive,
--				       nrverts,
--				       rmesa->state.scissor.pClipRects,
--				       rmesa->state.scissor.numClipRects );
--   }
--   else {
--      radeonCompatEmitPrimitiveLocked( rmesa, 
--				       hw_primitive,
--				       nrverts,
--				       rmesa->pClipRects,
--				       rmesa->numClipRects );
--   }
--
--
--   UNLOCK_HARDWARE( rmesa );
--}
--
-diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
-index ea81a32..2600c78 100644
---- a/src/mesa/drivers/dri/radeon/radeon_context.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
-@@ -53,6 +53,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "drivers/common/driverfuncs.h"
- 
-+#include "radeon_common.h"
- #include "radeon_context.h"
- #include "radeon_ioctl.h"
- #include "radeon_state.h"
-@@ -65,6 +66,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define need_GL_EXT_blend_minmax
- #define need_GL_EXT_fog_coord
- #define need_GL_EXT_secondary_color
-+#define need_GL_EXT_framebuffer_object
- #include "extension_helper.h"
- 
- #define DRIVER_DATE	"20061018"
-@@ -72,40 +74,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "vblank.h"
- #include "utils.h"
- #include "xmlpool.h" /* for symbolic values of enum-type options */
--#ifndef RADEON_DEBUG
--int RADEON_DEBUG = (0);
--#endif
--
--
--/* Return various strings for glGetString().
-- */
--static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   static char buffer[128];
--   unsigned   offset;
--   GLuint agp_mode = (rmesa->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
--      rmesa->radeonScreen->AGPMode;
--
--   switch ( name ) {
--   case GL_VENDOR:
--      return (GLubyte *)"Tungsten Graphics, Inc.";
--
--   case GL_RENDERER:
--      offset = driGetRendererString( buffer, "Radeon", DRIVER_DATE,
--				     agp_mode );
--
--      sprintf( & buffer[ offset ], " %sTCL",
--	       !(rmesa->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
--	       ? "" : "NO-" );
--
--      return (GLubyte *)buffer;
--
--   default:
--      return NULL;
--   }
--}
--
- 
- /* Extension strings exported by the R100 driver.
-  */
-@@ -121,6 +89,7 @@ const struct dri_extension card_extensions[] =
-     { "GL_EXT_blend_logic_op",             NULL },
-     { "GL_EXT_blend_subtract",             GL_EXT_blend_minmax_functions },
-     { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
-+    { "GL_EXT_packed_depth_stencil",	   NULL},
-     { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
-     { "GL_EXT_stencil_wrap",               NULL },
-     { "GL_EXT_texture_edge_clamp",         NULL },
-@@ -137,6 +106,11 @@ const struct dri_extension card_extensions[] =
-     { NULL,                                NULL }
- };
- 
-+const struct dri_extension mm_extensions[] = {
-+  { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
-+  { NULL, NULL }
-+};
-+
- extern const struct tnl_pipeline_stage _radeon_render_stage;
- extern const struct tnl_pipeline_stage _radeon_tcl_stage;
- 
-@@ -160,15 +134,6 @@ static const struct tnl_pipeline_stage *radeon_pipeline[] = {
-    NULL,
- };
- 
--
--
--/* Initialize the driver's misc functions.
-- */
--static void radeonInitDriverFuncs( struct dd_function_table *functions )
--{
--    functions->GetString	= radeonGetString;
--}
--
- static const struct dri_debug_control debug_control[] =
- {
-     { "fall",  DEBUG_FALLBACKS },
-@@ -188,19 +153,69 @@ static const struct dri_debug_control debug_control[] =
-     { NULL,    0 }
- };
- 
-+static void r100_get_lock(radeonContextPtr radeon)
-+{
-+   r100ContextPtr rmesa = (r100ContextPtr)radeon;
-+   drm_radeon_sarea_t *sarea = radeon->sarea;
-+
-+   RADEON_STATECHANGE(rmesa, ctx);
-+   if (rmesa->radeon.sarea->tiling_enabled) {
-+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
-+	 RADEON_COLOR_TILE_ENABLE;
-+   } else {
-+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &=
-+	 ~RADEON_COLOR_TILE_ENABLE;
-+   }
-+   
-+   if (sarea->ctx_owner != rmesa->radeon.dri.hwContext) {
-+      sarea->ctx_owner = rmesa->radeon.dri.hwContext;
-+      
-+      if (!radeon->radeonScreen->kernel_mm)
-+         radeon_bo_legacy_texture_age(radeon->radeonScreen->bom);
-+   }
-+}
-+
-+static void r100_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
-+{
-+}
-+
-+static void r100_vtbl_pre_emit_state(radeonContextPtr radeon)
-+{
-+   r100ContextPtr rmesa = (r100ContextPtr)radeon;
-+   
-+   /* r100 always needs to emit ZBS to avoid TCL lockups */
-+   rmesa->hw.zbs.dirty = 1;
-+   radeon->hw.is_dirty = 1;
-+}
-+
-+static void r100_vtbl_free_context(GLcontext *ctx)
-+{
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   _mesa_vector4f_free( &rmesa->tcl.ObjClean );
-+}
-+
-+static void r100_init_vtbl(radeonContextPtr radeon)
-+{
-+   radeon->vtbl.get_lock = r100_get_lock;
-+   radeon->vtbl.update_viewport_offset = radeonUpdateViewportOffset;
-+   radeon->vtbl.emit_cs_header = r100_vtbl_emit_cs_header;
-+   radeon->vtbl.swtcl_flush = r100_swtcl_flush;
-+   radeon->vtbl.pre_emit_state = r100_vtbl_pre_emit_state;
-+   radeon->vtbl.fallback = radeonFallback;
-+}
- 
- /* Create the device specific context.
-  */
- GLboolean
--radeonCreateContext( const __GLcontextModes *glVisual,
-+r100CreateContext( const __GLcontextModes *glVisual,
-                      __DRIcontextPrivate *driContextPriv,
-                      void *sharedContextPrivate)
- {
-    __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
-    radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private);
-    struct dd_function_table functions;
--   radeonContextPtr rmesa;
--   GLcontext *ctx, *shareCtx;
-+   r100ContextPtr rmesa;
-+   GLcontext *ctx;
-    int i;
-    int tcl_mode, fthrottle_mode;
- 
-@@ -209,10 +224,12 @@ radeonCreateContext( const __GLcontextModes *glVisual,
-    assert(screen);
- 
-    /* Allocate the Radeon context */
--   rmesa = (radeonContextPtr) CALLOC( sizeof(*rmesa) );
-+   rmesa = (r100ContextPtr) CALLOC( sizeof(*rmesa) );
-    if ( !rmesa )
-       return GL_FALSE;
- 
-+   r100_init_vtbl(&rmesa->radeon);
-+
-    /* init exp fog table data */
-    radeonInitStaticFogData();
-    
-@@ -220,12 +237,12 @@ radeonCreateContext( const __GLcontextModes *glVisual,
-     * Do this here so that initialMaxAnisotropy is set before we create
-     * the default textures.
-     */
--   driParseConfigFiles (&rmesa->optionCache, &screen->optionCache,
-+   driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache,
- 			screen->driScreen->myNum, "radeon");
--   rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache,
-+   rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
-                                                  "def_max_anisotropy");
- 
--   if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) {
-+   if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
-       if ( sPriv->drm_version.minor < 13 )
- 	 fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
- 			  "disabling.\n", sPriv->drm_version.minor );
-@@ -240,65 +257,17 @@ radeonCreateContext( const __GLcontextModes *glVisual,
-     * (the texture functions are especially important)
-     */
-    _mesa_init_driver_functions( &functions );
--   radeonInitDriverFuncs( &functions );
-    radeonInitTextureFuncs( &functions );
- 
--   /* Allocate the Mesa context */
--   if (sharedContextPrivate)
--      shareCtx = ((radeonContextPtr) sharedContextPrivate)->glCtx;
--   else
--      shareCtx = NULL;
--   rmesa->glCtx = _mesa_create_context(glVisual, shareCtx,
--                                       &functions, (void *) rmesa);
--   if (!rmesa->glCtx) {
--      FREE(rmesa);
--      return GL_FALSE;
--   }
--   driContextPriv->driverPrivate = rmesa;
--
--   /* Init radeon context data */
--   rmesa->dri.context = driContextPriv;
--   rmesa->dri.screen = sPriv;
--   rmesa->dri.drawable = NULL;
--   rmesa->dri.readable = NULL;
--   rmesa->dri.hwContext = driContextPriv->hHWContext;
--   rmesa->dri.hwLock = &sPriv->pSAREA->lock;
--   rmesa->dri.fd = sPriv->fd;
--   rmesa->dri.drmMinor = sPriv->drm_version.minor;
--
--   rmesa->radeonScreen = screen;
--   rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA +
--				       screen->sarea_priv_offset);
--
--
--   rmesa->dma.buf0_address = rmesa->radeonScreen->buffers->list[0].address;
--
--   (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) );
--   make_empty_list( & rmesa->swapped );
--
--   rmesa->nr_heaps = screen->numTexHeaps;
--   for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
--      rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa,
--	    screen->texSize[i],
--	    12,
--	    RADEON_NR_TEX_REGIONS,
--	    (drmTextureRegionPtr)rmesa->sarea->tex_list[i],
--	    & rmesa->sarea->tex_age[i],
--	    & rmesa->swapped,
--	    sizeof( radeonTexObj ),
--	    (destroy_texture_object_t *) radeonDestroyTexObj );
--
--      driSetTextureSwapCounterLocation( rmesa->texture_heaps[i],
--					& rmesa->c_textureSwaps );
-+   if (!radeonInitContext(&rmesa->radeon, &functions,
-+			  glVisual, driContextPriv,
-+			  sharedContextPrivate)) {
-+     FREE(rmesa);
-+     return GL_FALSE;
-    }
--   rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache,
--					   "texture_depth");
--   if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
--      rmesa->texture_depth = ( screen->cpp == 4 ) ?
--	 DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
- 
--   rmesa->swtcl.RenderIndex = ~0;
--   rmesa->hw.all_dirty = GL_TRUE;
-+   rmesa->radeon.swtcl.RenderIndex = ~0;
-+   rmesa->radeon.hw.all_dirty = GL_TRUE;
- 
-    /* Set the maximum texture size small enough that we can guarentee that
-     * all texture units can bind a maximal texture and have all of them in
-@@ -306,26 +275,13 @@ radeonCreateContext( const __GLcontextModes *glVisual,
-     * setting allow larger textures.
-     */
- 
--   ctx = rmesa->glCtx;
--   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache,
-+   ctx = rmesa->radeon.glCtx;
-+   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache,
- 						 "texture_units");
-    ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
-    ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
- 
--   i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures");
--
--   driCalculateMaxTextureLevels( rmesa->texture_heaps,
--				 rmesa->nr_heaps,
--				 & ctx->Const,
--				 4,
--				 11, /* max 2D texture size is 2048x2048 */
--				 8,  /* 256^3 */
--				 9,  /* \todo: max cube texture size seems to be 512x512(x6) */
--				 11, /* max rect texture size is 2048x2048. */
--				 12,
--				 GL_FALSE,
--				 i );
--
-+   i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures");
- 
-    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
- 
-@@ -388,38 +344,39 @@ radeonCreateContext( const __GLcontextModes *glVisual,
-    }
- 
-    driInitExtensions( ctx, card_extensions, GL_TRUE );
--   if (rmesa->radeonScreen->drmSupportsCubeMapsR100)
-+   if (rmesa->radeon.radeonScreen->kernel_mm)
-+     driInitExtensions(ctx, mm_extensions, GL_FALSE);
-+   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100)
-       _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" );
--   if (rmesa->glCtx->Mesa_DXTn) {
-+   if (rmesa->radeon.glCtx->Mesa_DXTn) {
-       _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
-       _mesa_enable_extension( ctx, "GL_S3_s3tc" );
-    }
--   else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) {
-+   else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) {
-       _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
-    }
- 
--   if (rmesa->dri.drmMinor >= 9)
-+   if (rmesa->radeon.dri.drmMinor >= 9)
-       _mesa_enable_extension( ctx, "GL_NV_texture_rectangle");
- 
-    /* XXX these should really go right after _mesa_init_driver_functions() */
-+   radeon_fbo_init(&rmesa->radeon);
-+   radeonInitSpanFuncs( ctx );
-    radeonInitIoctlFuncs( ctx );
-    radeonInitStateFuncs( ctx );
--   radeonInitSpanFuncs( ctx );
-    radeonInitState( rmesa );
-    radeonInitSwtcl( ctx );
- 
-    _mesa_vector4f_alloc( &rmesa->tcl.ObjClean, 0, 
- 			 ctx->Const.MaxArrayLockSize, 32 );
- 
--   fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode");
--   rmesa->iw.irq_seq = -1;
--   rmesa->irqsEmitted = 0;
--   rmesa->do_irqs = (rmesa->radeonScreen->irq != 0 &&
--		     fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
--
--   rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
-+   fthrottle_mode = driQueryOptioni(&rmesa->radeon.optionCache, "fthrottle_mode");
-+   rmesa->radeon.iw.irq_seq = -1;
-+   rmesa->radeon.irqsEmitted = 0;
-+   rmesa->radeon.do_irqs = (rmesa->radeon.radeonScreen->irq != 0 &&
-+			    fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
- 
--   (*sPriv->systemTime->getUST)( & rmesa->swap_ust );
-+   rmesa->radeon.do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
- 
- 
- #if DO_DEBUG
-@@ -427,206 +384,21 @@ radeonCreateContext( const __GLcontextModes *glVisual,
- 				       debug_control );
- #endif
- 
--   tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode");
--   if (driQueryOptionb(&rmesa->optionCache, "no_rast")) {
-+   tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
-+   if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) {
-       fprintf(stderr, "disabling 3D acceleration\n");
-       FALLBACK(rmesa, RADEON_FALLBACK_DISABLE, 1);
-    } else if (tcl_mode == DRI_CONF_TCL_SW ||
--	      !(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
--      if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
--	 rmesa->radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
-+	      !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
-+      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
-+	 rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
- 	 fprintf(stderr, "Disabling HW TCL support\n");
-       }
--      TCL_FALLBACK(rmesa->glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
-+      TCL_FALLBACK(rmesa->radeon.glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
-    }
- 
--   if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
-+   if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
- /*       _tnl_need_dlist_norm_lengths( ctx, GL_FALSE ); */
-    }
-    return GL_TRUE;
- }
--
--
--/* Destroy the device specific context.
-- */
--/* Destroy the Mesa and driver specific context data.
-- */
--void radeonDestroyContext( __DRIcontextPrivate *driContextPriv )
--{
--   GET_CURRENT_CONTEXT(ctx);
--   radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate;
--   radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL;
--
--   /* check if we're deleting the currently bound context */
--   if (rmesa == current) {
--      RADEON_FIREVERTICES( rmesa );
--      _mesa_make_current(NULL, NULL, NULL);
--   }
--
--   /* Free radeon context resources */
--   assert(rmesa); /* should never be null */
--   if ( rmesa ) {
--      GLboolean   release_texture_heaps;
--
--
--      release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1);
--      _swsetup_DestroyContext( rmesa->glCtx );
--      _tnl_DestroyContext( rmesa->glCtx );
--      _vbo_DestroyContext( rmesa->glCtx );
--      _swrast_DestroyContext( rmesa->glCtx );
--
--      radeonDestroySwtcl( rmesa->glCtx );
--      radeonReleaseArrays( rmesa->glCtx, ~0 );
--      if (rmesa->dma.current.buf) {
--	 radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
--	 radeonFlushCmdBuf( rmesa, __FUNCTION__ );
--      }
--
--      _mesa_vector4f_free( &rmesa->tcl.ObjClean );
--
--      if (rmesa->state.scissor.pClipRects) {
--	 FREE(rmesa->state.scissor.pClipRects);
--	 rmesa->state.scissor.pClipRects = NULL;
--      }
--
--      if ( release_texture_heaps ) {
--         /* This share group is about to go away, free our private
--          * texture object data.
--          */
--         int i;
--
--         for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
--	    driDestroyTextureHeap( rmesa->texture_heaps[ i ] );
--	    rmesa->texture_heaps[ i ] = NULL;
--         }
--
--	 assert( is_empty_list( & rmesa->swapped ) );
--      }
--
--      /* free the Mesa context */
--      rmesa->glCtx->DriverCtx = NULL;
--      _mesa_destroy_context( rmesa->glCtx );
--
--      /* free the option cache */
--      driDestroyOptionCache (&rmesa->optionCache);
--
--      FREE( rmesa );
--   }
--}
--
--
--
--
--void
--radeonSwapBuffers( __DRIdrawablePrivate *dPriv )
--{
--
--   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
--      radeonContextPtr rmesa;
--      GLcontext *ctx;
--      rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--      ctx = rmesa->glCtx;
--      if (ctx->Visual.doubleBufferMode) {
--         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
--
--         if ( rmesa->doPageFlip ) {
--            radeonPageFlip( dPriv );
--         }
--         else {
--	     radeonCopyBuffer( dPriv, NULL );
--         }
--      }
--   }
--   else {
--      /* XXX this shouldn't be an error but we can't handle it for now */
--      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
--   }
--}
--
--void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
--			 int x, int y, int w, int h )
--{
--    if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
--	radeonContextPtr radeon;
--	GLcontext *ctx;
--
--	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--	ctx = radeon->glCtx;
--
--	if (ctx->Visual.doubleBufferMode) {
--	    drm_clip_rect_t rect;
--	    rect.x1 = x + dPriv->x;
--	    rect.y1 = (dPriv->h - y - h) + dPriv->y;
--	    rect.x2 = rect.x1 + w;
--	    rect.y2 = rect.y1 + h;
--	    _mesa_notifySwapBuffers(ctx);	/* flush pending rendering comands */
--	    radeonCopyBuffer(dPriv, &rect);
--	}
--    } else {
--	/* XXX this shouldn't be an error but we can't handle it for now */
--	_mesa_problem(NULL, "%s: drawable has no context!",
--		      __FUNCTION__);
--    }
--}
--
--/* Make context `c' the current context and bind it to the given
-- * drawing and reading surfaces.
-- */
--GLboolean
--radeonMakeCurrent( __DRIcontextPrivate *driContextPriv,
--                   __DRIdrawablePrivate *driDrawPriv,
--                   __DRIdrawablePrivate *driReadPriv )
--{
--   if ( driContextPriv ) {
--      radeonContextPtr newCtx = 
--	 (radeonContextPtr) driContextPriv->driverPrivate;
--
--      if (RADEON_DEBUG & DEBUG_DRI)
--	 fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) newCtx->glCtx);
--
--      newCtx->dri.readable = driReadPriv;
--
--      if ( (newCtx->dri.drawable != driDrawPriv) ||
--           newCtx->lastStamp != driDrawPriv->lastStamp ) {
--	 if (driDrawPriv->swap_interval == (unsigned)-1) {
--	    driDrawPriv->vblFlags = (newCtx->radeonScreen->irq != 0)
--	       ? driGetDefaultVBlankFlags(&newCtx->optionCache)
--	       : VBLANK_FLAG_NO_IRQ;
--
--	    driDrawableInitVBlank( driDrawPriv );
--	 }
--
--	 newCtx->dri.drawable = driDrawPriv;
--
--	 radeonSetCliprects(newCtx);
--	 radeonUpdateViewportOffset( newCtx->glCtx );
--      }
--
--      _mesa_make_current( newCtx->glCtx,
--			  (GLframebuffer *) driDrawPriv->driverPrivate,
--			  (GLframebuffer *) driReadPriv->driverPrivate );
--
--      _mesa_update_state( newCtx->glCtx );
--   } else {
--      if (RADEON_DEBUG & DEBUG_DRI)
--	 fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
--      _mesa_make_current( NULL, NULL, NULL );
--   }
--
--   if (RADEON_DEBUG & DEBUG_DRI)
--      fprintf(stderr, "End %s\n", __FUNCTION__);
--   return GL_TRUE;
--}
--
--/* Force the context `c' to be unbound from its buffer.
-- */
--GLboolean
--radeonUnbindContext( __DRIcontextPrivate *driContextPriv )
--{
--   radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate;
--
--   if (RADEON_DEBUG & DEBUG_DRI)
--      fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) rmesa->glCtx);
--
--   return GL_TRUE;
--}
-diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h
-index 53df766..1795d8b 100644
---- a/src/mesa/drivers/dri/radeon/radeon_context.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_context.h
-@@ -48,91 +48,23 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "drm.h"
- #include "radeon_drm.h"
- #include "texmem.h"
--
- #include "main/macros.h"
- #include "main/mtypes.h"
- #include "main/colormac.h"
--
--struct radeon_context;
--typedef struct radeon_context radeonContextRec;
--typedef struct radeon_context *radeonContextPtr;
--
--/* This union is used to avoid warnings/miscompilation
--   with float to uint32_t casts due to strict-aliasing */
--typedef union {
--	GLfloat f;
--	uint32_t ui32;
--} float_ui32_type;
--
--#include "radeon_lock.h"
- #include "radeon_screen.h"
--#include "main/mm.h"
--
--#include "math/m_vector.h"
--
--#define TEX_0   0x1
--#define TEX_1   0x2
--#define TEX_2   0x4
--#define TEX_ALL 0x7
--
--/* Rasterizing fallbacks */
--/* See correponding strings in r200_swtcl.c */
--#define RADEON_FALLBACK_TEXTURE		0x0001
--#define RADEON_FALLBACK_DRAW_BUFFER	0x0002
--#define RADEON_FALLBACK_STENCIL		0x0004
--#define RADEON_FALLBACK_RENDER_MODE	0x0008
--#define RADEON_FALLBACK_BLEND_EQ	0x0010
--#define RADEON_FALLBACK_BLEND_FUNC	0x0020
--#define RADEON_FALLBACK_DISABLE 	0x0040
--#define RADEON_FALLBACK_BORDER_MODE	0x0080
--
--/* The blit width for texture uploads
-- */
--#define BLIT_WIDTH_BYTES 1024
- 
--/* Use the templated vertex format:
-- */
--#define COLOR_IS_RGBA
--#define TAG(x) radeon##x
--#include "tnl_dd/t_dd_vertex.h"
--#undef TAG
--
--typedef void (*radeon_tri_func) (radeonContextPtr,
--				 radeonVertex *,
--				 radeonVertex *, radeonVertex *);
--
--typedef void (*radeon_line_func) (radeonContextPtr,
--				  radeonVertex *, radeonVertex *);
-+#include "radeon_common.h"
- 
--typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *);
--
--struct radeon_colorbuffer_state {
--	GLuint clear;
--	int roundEnable;
--};
- 
--struct radeon_depthbuffer_state {
--	GLuint clear;
--	GLfloat scale;
--};
-+struct r100_context;
-+typedef struct r100_context r100ContextRec;
-+typedef struct r100_context *r100ContextPtr;
- 
--struct radeon_scissor_state {
--	drm_clip_rect_t rect;
--	GLboolean enabled;
-+#include "radeon_lock.h"
- 
--	GLuint numClipRects;	/* Cliprects active */
--	GLuint numAllocedClipRects;	/* Cliprects available */
--	drm_clip_rect_t *pClipRects;
--};
- 
--struct radeon_stencilbuffer_state {
--	GLboolean hwBuffer;
--	GLuint clear;		/* rb3d_stencilrefmask value */
--};
- 
--struct radeon_stipple_state {
--	GLuint mask[32];
--};
-+#define R100_TEX_ALL 0x7
- 
- /* used for both tcl_vtx and vc_frmt tex bits (they are identical) */
- #define RADEON_ST_BIT(unit) \
-@@ -141,42 +73,6 @@ struct radeon_stipple_state {
- #define RADEON_Q_BIT(unit) \
- (unit == 0 ? RADEON_CP_VC_FRMT_Q0 : (RADEON_CP_VC_FRMT_Q1 >> 2) << (2 * unit))
- 
--typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr;
--
--/* Texture object in locally shared texture space.
-- */
--struct radeon_tex_obj {
--	driTextureObject base;
--
--	GLuint bufAddr;		/* Offset to start of locally
--				   shared texture block */
--
--	GLuint dirty_state;	/* Flags (1 per texunit) for
--				   whether or not this texobj
--				   has dirty hardware state
--				   (pp_*) that needs to be
--				   brought into the
--				   texunit. */
--
--	drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
--	/* Six, for the cube faces */
--
--	GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
--
--	GLuint pp_txfilter;	/* hardware register values */
--	GLuint pp_txformat;
--	GLuint pp_txoffset;	/* Image location in texmem.
--				   All cube faces follow. */
--	GLuint pp_txsize;	/* npot only */
--	GLuint pp_txpitch;	/* npot only */
--	GLuint pp_border_color;
--	GLuint pp_cubic_faces;	/* cube face 1,2,3,4 log2 sizes */
--
--	GLboolean border_fallback;
--
--	GLuint tile_bits;	/* hw texture tile bits used on this texture */
--};
--
- struct radeon_texture_env_state {
- 	radeonTexObjPtr texobj;
- 	GLenum format;
-@@ -187,17 +83,6 @@ struct radeon_texture_state {
- 	struct radeon_texture_env_state unit[RADEON_MAX_TEXTURE_UNITS];
- };
- 
--struct radeon_state_atom {
--	struct radeon_state_atom *next, *prev;
--	const char *name;	/* for debug */
--	int cmd_size;		/* size in bytes */
--	GLuint is_tcl;
--	int *cmd;		/* one or more cmd's */
--	int *lastcmd;		/* one or more cmd's */
--	GLboolean dirty;	/* dirty-mark in emit_state_list */
--	 GLboolean(*check) (GLcontext *);	/* is this state active? */
--};
--
- /* Trying to keep these relatively short as the variables are becoming
-  * extravagently long.  Drop the driver name prefix off the front of
-  * everything - I think we know which driver we're in by now, and keep the
-@@ -410,10 +295,7 @@ struct radeon_state_atom {
- #define SHN_SHININESS      1
- #define SHN_STATE_SIZE     2
- 
--struct radeon_hw_state {
--	/* Head of the linked list of state atoms. */
--	struct radeon_state_atom atomlist;
--
-+struct r100_hw_state {
- 	/* Hardware state, stored as cmdbuf commands:  
- 	 *   -- Need to doublebuffer for
- 	 *           - eliding noop statechange loops? (except line stipple count)
-@@ -438,89 +320,19 @@ struct radeon_hw_state {
- 	struct radeon_state_atom glt;
- 	struct radeon_state_atom txr[3];	/* for NPOT */
- 
--	int max_state_size;	/* Number of bytes necessary for a full state emit. */
--	GLboolean is_dirty, all_dirty;
- };
- 
--struct radeon_state {
--	/* Derived state for internal purposes:
--	 */
--	struct radeon_colorbuffer_state color;
--	struct radeon_depthbuffer_state depth;
--	struct radeon_scissor_state scissor;
--	struct radeon_stencilbuffer_state stencil;
-+
-+struct r100_state {
- 	struct radeon_stipple_state stipple;
- 	struct radeon_texture_state texture;
- };
- 
--/* Need refcounting on dma buffers:
-- */
--struct radeon_dma_buffer {
--	int refcount;		/* the number of retained regions in buf */
--	drmBufPtr buf;
--};
--
--#define GET_START(rvb) (rmesa->radeonScreen->gart_buffer_offset +			\
--			(rvb)->address - rmesa->dma.buf0_address +	\
--			(rvb)->start)
--
--/* A retained region, eg vertices for indexed vertices.
-- */
--struct radeon_dma_region {
--	struct radeon_dma_buffer *buf;
--	char *address;		/* == buf->address */
--	int start, end, ptr;	/* offsets from start of buf */
--	int aos_start;
--	int aos_stride;
--	int aos_size;
--};
--
--struct radeon_dma {
--	/* Active dma region.  Allocations for vertices and retained
--	 * regions come from here.  Also used for emitting random vertices,
--	 * these may be flushed by calling flush_current();
--	 */
--	struct radeon_dma_region current;
--
--	void (*flush) (radeonContextPtr);
--
--	char *buf0_address;	/* start of buf[0], for index calcs */
--	GLuint nr_released_bufs;	/* flush after so many buffers released */
--};
--
--struct radeon_dri_mirror {
--	__DRIcontextPrivate *context;	/* DRI context */
--	__DRIscreenPrivate *screen;	/* DRI screen */
--
--   /**
--    * DRI drawable bound to this context for drawing.
--    */
--	__DRIdrawablePrivate *drawable;
--
--   /**
--    * DRI drawable bound to this context for reading.
--    */
--	__DRIdrawablePrivate *readable;
--
--	drm_context_t hwContext;
--	drm_hw_lock_t *hwLock;
--	int fd;
--	int drmMinor;
--};
--
- #define RADEON_CMD_BUF_SZ  (8*1024)
--
--struct radeon_store {
--	GLuint statenr;
--	GLuint primnr;
--	char cmd_buf[RADEON_CMD_BUF_SZ];
--	int cmd_used;
--	int elts_start;
--};
--
-+#define R200_ELT_BUF_SZ  (8*1024)
- /* radeon_tcl.c
-  */
--struct radeon_tcl_info {
-+struct r100_tcl_info {
- 	GLuint vertex_format;
- 	GLuint hw_primitive;
- 
-@@ -529,30 +341,18 @@ struct radeon_tcl_info {
- 	 */
- 	GLvector4f ObjClean;
- 
--	struct radeon_dma_region *aos_components[8];
--	GLuint nr_aos_components;
--
- 	GLuint *Elts;
- 
--	struct radeon_dma_region indexed_verts;
--	struct radeon_dma_region obj;
--	struct radeon_dma_region rgba;
--	struct radeon_dma_region spec;
--	struct radeon_dma_region fog;
--	struct radeon_dma_region tex[RADEON_MAX_TEXTURE_UNITS];
--	struct radeon_dma_region norm;
-+        int elt_cmd_offset;
-+	int elt_cmd_start;
-+        int elt_used;
- };
- 
- /* radeon_swtcl.c
-  */
--struct radeon_swtcl_info {
--	GLuint RenderIndex;
--	GLuint vertex_size;
-+struct r100_swtcl_info {
- 	GLuint vertex_format;
- 
--	struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
--	GLuint vertex_attr_count;
--
- 	GLubyte *verts;
- 
- 	/* Fallback rasterization functions
-@@ -561,10 +361,6 @@ struct radeon_swtcl_info {
- 	radeon_line_func draw_line;
- 	radeon_tri_func draw_tri;
- 
--	GLuint hw_primitive;
--	GLenum render_primitive;
--	GLuint numverts;
--
-    /**
-     * Offset of the 4UB color data within a hardware (swtcl) vertex.
-     */
-@@ -576,22 +372,9 @@ struct radeon_swtcl_info {
- 	GLuint specoffset;
- 
- 	GLboolean needproj;
--
--	struct radeon_dma_region indexed_verts;
- };
- 
--struct radeon_ioctl {
--	GLuint vertex_offset;
--	GLuint vertex_size;
--};
--
--#define RADEON_MAX_PRIMS 64
- 
--struct radeon_prim {
--	GLuint start;
--	GLuint end;
--	GLuint prim;
--};
- 
- /* A maximum total of 20 elements per vertex:  3 floats for position, 3
-  * floats for normal, 4 floats for color, 4 bytes for secondary color,
-@@ -602,59 +385,18 @@ struct radeon_prim {
-  */
- #define RADEON_MAX_VERTEX_SIZE 20
- 
--struct radeon_context {
--	GLcontext *glCtx;	/* Mesa context */
-+struct r100_context {
-+        struct radeon_context radeon;
- 
- 	/* Driver and hardware state management
- 	 */
--	struct radeon_hw_state hw;
--	struct radeon_state state;
--
--	/* Texture object bookkeeping
--	 */
--	unsigned nr_heaps;
--	driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS];
--	driTextureObject swapped;
--	int texture_depth;
--	float initialMaxAnisotropy;
--
--	/* Rasterization and vertex state:
--	 */
--	GLuint TclFallback;
--	GLuint Fallback;
--	GLuint NewGLState;
--	 DECLARE_RENDERINPUTS(tnl_index_bitset);	/* index of bits for last tnl_install_attrs */
-+	struct r100_hw_state hw;
-+	struct r100_state state;
- 
- 	/* Vertex buffers
- 	 */
- 	struct radeon_ioctl ioctl;
--	struct radeon_dma dma;
- 	struct radeon_store store;
--	/* A full state emit as of the first state emit in the main store, in case
--	 * the context is lost.
--	 */
--	struct radeon_store backup_store;
--
--	/* Page flipping
--	 */
--	GLuint doPageFlip;
--
--	/* Busy waiting
--	 */
--	GLuint do_usleeps;
--	GLuint do_irqs;
--	GLuint irqsEmitted;
--	drm_radeon_irq_wait_t iw;
--
--	/* Drawable, cliprect and scissor information
--	 */
--	GLuint numClipRects;	/* Cliprects for the draw buffer */
--	drm_clip_rect_t *pClipRects;
--	unsigned int lastStamp;
--	GLboolean lost_context;
--	GLboolean save_on_next_emit;
--	radeonScreenPtr radeonScreen;	/* Screen private DRI data */
--	drm_radeon_sarea_t *sarea;	/* Private SAREA data */
- 
- 	/* TCL stuff
- 	 */
-@@ -667,29 +409,13 @@ struct radeon_context {
- 	GLmatrix tmpmat[RADEON_MAX_TEXTURE_UNITS];
- 	GLuint last_ReallyEnabled;
- 
--	/* VBI
--	 */
--	int64_t swap_ust;
--	int64_t swap_missed_ust;
--
--	GLuint swap_count;
--	GLuint swap_missed_count;
--
- 	/* radeon_tcl.c
- 	 */
--	struct radeon_tcl_info tcl;
-+	struct r100_tcl_info tcl;
- 
- 	/* radeon_swtcl.c
- 	 */
--	struct radeon_swtcl_info swtcl;
--
--	/* Mirrors of some DRI state
--	 */
--	struct radeon_dri_mirror dri;
--
--	/* Configuration cache
--	 */
--	driOptionCache optionCache;
-+	struct r100_swtcl_info swtcl;
- 
- 	GLboolean using_hyperz;
- 	GLboolean texmicrotile;
-@@ -703,61 +429,19 @@ struct radeon_context {
- 	GLuint c_textureSwaps;
- 	GLuint c_textureBytes;
- 	GLuint c_vertexBuffers;
-+
- };
- 
--#define RADEON_CONTEXT(ctx)		((radeonContextPtr)(ctx->DriverCtx))
--
--static INLINE GLuint radeonPackColor(GLuint cpp,
--                                     GLubyte r, GLubyte g,
--                                     GLubyte b, GLubyte a)
--{
--	switch (cpp) {
--	case 2:
--		return PACK_COLOR_565(r, g, b);
--	case 4:
--		return PACK_COLOR_8888(a, r, g, b);
--	default:
--		return 0;
--	}
--}
-+
-+#define R100_CONTEXT(ctx)		((r100ContextPtr)(ctx->DriverCtx))
-+
- 
- #define RADEON_OLD_PACKETS 1
- 
--extern void radeonDestroyContext(__DRIcontextPrivate * driContextPriv);
--extern GLboolean radeonCreateContext(const __GLcontextModes * glVisual,
--				     __DRIcontextPrivate * driContextPriv,
--				     void *sharedContextPrivate);
--extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv);
--extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
--				int x, int y, int w, int h);
--extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
--				   __DRIdrawablePrivate * driDrawPriv,
--				   __DRIdrawablePrivate * driReadPriv);
--extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
--
--/* ================================================================
-- * Debugging:
-- */
--#define DO_DEBUG		1
--
--#if DO_DEBUG
--extern int RADEON_DEBUG;
--#else
--#define RADEON_DEBUG		0
--#endif
--
--#define DEBUG_TEXTURE	0x0001
--#define DEBUG_STATE	0x0002
--#define DEBUG_IOCTL	0x0004
--#define DEBUG_PRIMS	0x0008
--#define DEBUG_VERTS	0x0010
--#define DEBUG_FALLBACKS	0x0020
--#define DEBUG_VFMT	0x0040
--#define DEBUG_CODEGEN	0x0080
--#define DEBUG_VERBOSE	0x0100
--#define DEBUG_DRI       0x0200
--#define DEBUG_DMA       0x0400
--#define DEBUG_SANITY    0x0800
--#define DEBUG_SYNC      0x1000
-+extern GLboolean r100CreateContext( const __GLcontextModes *glVisual,
-+				    __DRIcontextPrivate *driContextPriv,
-+				    void *sharedContextPrivate);
-+  
-+
- 
- #endif				/* __RADEON_CONTEXT_H__ */
-diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_drm.h b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h
-new file mode 100644
-index 0000000..984725a
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h
-@@ -0,0 +1,207 @@
-+/* 
-+ * Copyright © 2008 Nicolai Haehnle
-+ * Copyright © 2008 Jérôme Glisse
-+ * All Rights Reserved.
-+ * 
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sub license, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ * 
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
-+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
-+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
-+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial portions
-+ * of the Software.
-+ */
-+/*
-+ * Authors:
-+ *      Aapo Tahkola <aet@rasterburn.org>
-+ *      Nicolai Haehnle <prefect_@gmx.net>
-+ *      Jérôme Glisse <glisse@freedesktop.org>
-+ */
-+#ifndef RADEON_CS_H
-+#define RADEON_CS_H
-+
-+#include <stdint.h>
-+#include <string.h>
-+#include "drm.h"
-+#include "radeon_drm.h"
-+
-+struct radeon_cs_reloc {
-+    struct radeon_bo    *bo;
-+    uint32_t            read_domain;
-+    uint32_t            write_domain;
-+    uint32_t            flags;
-+};
-+
-+
-+#define RADEON_CS_SPACE_OK 0
-+#define RADEON_CS_SPACE_OP_TO_BIG 1
-+#define RADEON_CS_SPACE_FLUSH 2
-+
-+struct radeon_cs_space_check {
-+    struct radeon_bo *bo;
-+    uint32_t read_domains;
-+    uint32_t write_domain;
-+    uint32_t new_accounted;
-+};
-+
-+struct radeon_cs_manager;
-+
-+struct radeon_cs {
-+    struct radeon_cs_manager    *csm;
-+    void                        *relocs;
-+    uint32_t                    *packets;
-+    unsigned                    crelocs;
-+    unsigned                    relocs_total_size;
-+    unsigned                    cdw;
-+    unsigned                    ndw;
-+    int                         section;
-+    unsigned                    section_ndw;
-+    unsigned                    section_cdw;
-+    const char                  *section_file;
-+    const char                  *section_func;
-+    int                         section_line;
-+
-+};
-+
-+/* cs functions */
-+struct radeon_cs_funcs {
-+    struct radeon_cs *(*cs_create)(struct radeon_cs_manager *csm,
-+                                   uint32_t ndw);
-+    int (*cs_write_reloc)(struct radeon_cs *cs,
-+                          struct radeon_bo *bo,
-+                          uint32_t read_domain,
-+                          uint32_t write_domain,
-+                          uint32_t flags);
-+    int (*cs_begin)(struct radeon_cs *cs,
-+                    uint32_t ndw,
-+                    const char *file,
-+                    const char *func,
-+                    int line);
-+    int (*cs_end)(struct radeon_cs *cs,
-+                  const char *file,
-+                  const char *func,
-+                  int line);
-+    int (*cs_emit)(struct radeon_cs *cs);
-+    int (*cs_destroy)(struct radeon_cs *cs);
-+    int (*cs_erase)(struct radeon_cs *cs);
-+    int (*cs_need_flush)(struct radeon_cs *cs);
-+    void (*cs_print)(struct radeon_cs *cs, FILE *file);
-+    int (*cs_space_check)(struct radeon_cs *cs, struct radeon_cs_space_check *bos,
-+			  int num_bo);
-+};
-+
-+struct radeon_cs_manager {
-+    struct radeon_cs_funcs  *funcs;
-+    int                     fd;
-+    uint32_t vram_limit, gart_limit;
-+    uint32_t vram_write_used, gart_write_used;
-+    uint32_t read_used;
-+};
-+
-+static inline struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm,
-+                                                 uint32_t ndw)
-+{
-+    return csm->funcs->cs_create(csm, ndw);
-+}
-+
-+static inline int radeon_cs_write_reloc(struct radeon_cs *cs,
-+                                        struct radeon_bo *bo,
-+                                        uint32_t read_domain,
-+                                        uint32_t write_domain,
-+                                        uint32_t flags)
-+{
-+    return cs->csm->funcs->cs_write_reloc(cs,
-+                                          bo,
-+                                          read_domain,
-+                                          write_domain,
-+                                          flags);
-+}
-+
-+static inline int radeon_cs_begin(struct radeon_cs *cs,
-+                                  uint32_t ndw,
-+                                  const char *file,
-+                                  const char *func,
-+                                  int line)
-+{
-+    return cs->csm->funcs->cs_begin(cs, ndw, file, func, line);
-+}
-+
-+static inline int radeon_cs_end(struct radeon_cs *cs,
-+                                const char *file,
-+                                const char *func,
-+                                int line)
-+{
-+    return cs->csm->funcs->cs_end(cs, file, func, line);
-+}
-+
-+static inline int radeon_cs_emit(struct radeon_cs *cs)
-+{
-+    return cs->csm->funcs->cs_emit(cs);
-+}
-+
-+static inline int radeon_cs_destroy(struct radeon_cs *cs)
-+{
-+    return cs->csm->funcs->cs_destroy(cs);
-+}
-+
-+static inline int radeon_cs_erase(struct radeon_cs *cs)
-+{
-+    return cs->csm->funcs->cs_erase(cs);
-+}
-+
-+static inline int radeon_cs_need_flush(struct radeon_cs *cs)
-+{
-+    return cs->csm->funcs->cs_need_flush(cs);
-+}
-+
-+static inline void radeon_cs_print(struct radeon_cs *cs, FILE *file)
-+{
-+    cs->csm->funcs->cs_print(cs, file);
-+}
-+
-+static inline int radeon_cs_space_check(struct radeon_cs *cs,
-+					    struct radeon_cs_space_check *bos,
-+					    int num_bo)
-+{
-+    return cs->csm->funcs->cs_space_check(cs, bos, num_bo);
-+}
-+
-+static inline void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit)
-+{
-+    
-+    if (domain == RADEON_GEM_DOMAIN_VRAM)
-+	cs->csm->vram_limit = limit;
-+    else
-+	cs->csm->gart_limit = limit;
-+}
-+
-+static inline void radeon_cs_write_dword(struct radeon_cs *cs, uint32_t dword)
-+{
-+    cs->packets[cs->cdw++] = dword;
-+    if (cs->section) {
-+        cs->section_cdw++;
-+    }
-+}
-+
-+static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword)
-+{
-+
-+    memcpy(cs->packets + cs->cdw, &qword, sizeof(qword));
-+    cs->cdw+=2;
-+    if (cs->section) {
-+        cs->section_cdw+=2;
-+    }
-+}
-+#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
-new file mode 100644
-index 0000000..b47b095
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
-@@ -0,0 +1,504 @@
-+/* 
-+ * Copyright © 2008 Nicolai Haehnle
-+ * Copyright © 2008 Jérôme Glisse
-+ * All Rights Reserved.
-+ * 
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sub license, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ * 
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
-+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
-+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
-+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial portions
-+ * of the Software.
-+ */
-+/*
-+ * Authors:
-+ *      Aapo Tahkola <aet@rasterburn.org>
-+ *      Nicolai Haehnle <prefect_@gmx.net>
-+ *      Jérôme Glisse <glisse@freedesktop.org>
-+ */
-+#include <errno.h>
-+
-+#include "radeon_bocs_wrapper.h"
-+
-+struct cs_manager_legacy {
-+    struct radeon_cs_manager    base;
-+    struct radeon_context       *ctx;
-+    /* hack for scratch stuff */
-+    uint32_t                    pending_age;
-+    uint32_t                    pending_count;
-+
-+
-+};
-+
-+struct cs_reloc_legacy {
-+    struct radeon_cs_reloc  base;
-+    uint32_t                cindices;
-+    uint32_t                *indices;
-+};
-+
-+
-+static struct radeon_cs *cs_create(struct radeon_cs_manager *csm,
-+                                   uint32_t ndw)
-+{
-+    struct radeon_cs *cs;
-+
-+    cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs));
-+    if (cs == NULL) {
-+        return NULL;
-+    }
-+    cs->csm = csm;
-+    cs->ndw = (ndw + 0x3FF) & (~0x3FF);
-+    cs->packets = (uint32_t*)malloc(4*cs->ndw);
-+    if (cs->packets == NULL) {
-+        free(cs);
-+        return NULL;
-+    }
-+    cs->relocs_total_size = 0;
-+    return cs;
-+}
-+
-+static int cs_write_reloc(struct radeon_cs *cs,
-+                          struct radeon_bo *bo,
-+                          uint32_t read_domain,
-+                          uint32_t write_domain,
-+                          uint32_t flags)
-+{
-+    struct cs_reloc_legacy *relocs;
-+    int i;
-+
-+    relocs = (struct cs_reloc_legacy *)cs->relocs;
-+    /* check domains */
-+    if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
-+        /* in one CS a bo can only be in read or write domain but not
-+         * in read & write domain at the same sime
-+         */
-+        return -EINVAL;
-+    }
-+    if (read_domain == RADEON_GEM_DOMAIN_CPU) {
-+        return -EINVAL;
-+    }
-+    if (write_domain == RADEON_GEM_DOMAIN_CPU) {
-+        return -EINVAL;
-+    }
-+    /* check if bo is already referenced */
-+    for(i = 0; i < cs->crelocs; i++) {
-+        uint32_t *indices;
-+
-+        if (relocs[i].base.bo->handle == bo->handle) {
-+            /* Check domains must be in read or write. As we check already
-+             * checked that in argument one of the read or write domain was
-+             * set we only need to check that if previous reloc as the read
-+             * domain set then the read_domain should also be set for this
-+             * new relocation.
-+             */
-+            if (relocs[i].base.read_domain && !read_domain) {
-+                return -EINVAL;
-+            }
-+            if (relocs[i].base.write_domain && !write_domain) {
-+                return -EINVAL;
-+            }
-+            relocs[i].base.read_domain |= read_domain;
-+            relocs[i].base.write_domain |= write_domain;
-+            /* save indice */
-+            relocs[i].cindices++;
-+            indices = (uint32_t*)realloc(relocs[i].indices,
-+                                         relocs[i].cindices * 4);
-+            if (indices == NULL) {
-+                relocs[i].cindices -= 1;
-+                return -ENOMEM;
-+            }
-+            relocs[i].indices = indices;
-+            relocs[i].indices[relocs[i].cindices - 1] = cs->cdw - 1;
-+            return 0;
-+        }
-+    }
-+    /* add bo to reloc */
-+    relocs = (struct cs_reloc_legacy*)
-+             realloc(cs->relocs,
-+                     sizeof(struct cs_reloc_legacy) * (cs->crelocs + 1));
-+    if (relocs == NULL) {
-+        return -ENOMEM;
-+    }
-+    cs->relocs = relocs;
-+    relocs[cs->crelocs].base.bo = bo;
-+    relocs[cs->crelocs].base.read_domain = read_domain;
-+    relocs[cs->crelocs].base.write_domain = write_domain;
-+    relocs[cs->crelocs].base.flags = flags;
-+    relocs[cs->crelocs].indices = (uint32_t*)malloc(4);
-+    if (relocs[cs->crelocs].indices == NULL) {
-+        return -ENOMEM;
-+    }
-+    relocs[cs->crelocs].indices[0] = cs->cdw - 1;
-+    relocs[cs->crelocs].cindices = 1;
-+    cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
-+    cs->crelocs++;
-+    radeon_bo_ref(bo);
-+    return 0;
-+}
-+
-+static int cs_begin(struct radeon_cs *cs,
-+                    uint32_t ndw,
-+                    const char *file,
-+                    const char *func,
-+                    int line)
-+{
-+    if (cs->section) {
-+        fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
-+                cs->section_file, cs->section_func, cs->section_line);
-+        fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
-+                file, func, line);
-+        return -EPIPE;
-+    }
-+    cs->section = 1;
-+    cs->section_ndw = ndw;
-+    cs->section_cdw = 0;
-+    cs->section_file = file;
-+    cs->section_func = func;
-+    cs->section_line = line;
-+
-+
-+    if (cs->cdw + ndw > cs->ndw) {
-+        uint32_t tmp, *ptr;
-+	int num = (ndw > 0x3FF) ? ndw : 0x3FF;
-+
-+        tmp = (cs->cdw + 1 + num) & (~num);
-+        ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
-+        if (ptr == NULL) {
-+            return -ENOMEM;
-+        }
-+        cs->packets = ptr;
-+        cs->ndw = tmp;
-+    }
-+
-+    return 0;
-+}
-+
-+static int cs_end(struct radeon_cs *cs,
-+                  const char *file,
-+                  const char *func,
-+                  int line)
-+
-+{
-+    if (!cs->section) {
-+        fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
-+                file, func, line);
-+        return -EPIPE;
-+    }
-+    cs->section = 0;
-+    if (cs->section_ndw != cs->section_cdw) {
-+        fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
-+                cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
-+        fprintf(stderr, "CS section end at (%s,%s,%d)\n",
-+                file, func, line);
-+        return -EPIPE;
-+    }
-+    return 0;
-+}
-+
-+static int cs_process_relocs(struct radeon_cs *cs)
-+{
-+    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
-+    struct cs_reloc_legacy *relocs;
-+    int i, j, r;
-+
-+    csm = (struct cs_manager_legacy*)cs->csm;
-+    relocs = (struct cs_reloc_legacy *)cs->relocs;
-+ restart:
-+    for (i = 0; i < cs->crelocs; i++) {
-+        for (j = 0; j < relocs[i].cindices; j++) {
-+            uint32_t soffset, eoffset;
-+
-+            r = radeon_bo_legacy_validate(relocs[i].base.bo,
-+                                           &soffset, &eoffset);
-+	    if (r == -EAGAIN)
-+	      goto restart;
-+            if (r) {
-+                fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
-+                        relocs[i].base.bo, soffset, eoffset);
-+                return r;
-+            }
-+            cs->packets[relocs[i].indices[j]] += soffset;
-+            if (cs->packets[relocs[i].indices[j]] >= eoffset) {
-+	      /*                radeon_bo_debug(relocs[i].base.bo, 12); */
-+                fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
-+                        relocs[i].base.bo, soffset, eoffset);
-+                fprintf(stderr, "above end: %p 0x%08X 0x%08X\n",
-+                        relocs[i].base.bo,
-+                        cs->packets[relocs[i].indices[j]],
-+                        eoffset);
-+                exit(0);
-+                return -EINVAL;
-+            }
-+        }
-+    }
-+    return 0;
-+}
-+
-+static int cs_set_age(struct radeon_cs *cs)
-+{
-+    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
-+    struct cs_reloc_legacy *relocs;
-+    int i;
-+
-+    relocs = (struct cs_reloc_legacy *)cs->relocs;
-+    for (i = 0; i < cs->crelocs; i++) {
-+        radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
-+        radeon_bo_unref(relocs[i].base.bo);
-+    }
-+    return 0;
-+}
-+
-+static void dump_cmdbuf(struct radeon_cs *cs)
-+{
-+  int i;
-+  for (i = 0; i < cs->cdw; i++){
-+    fprintf(stderr,"%x: %08x\n", i, cs->packets[i]);
-+  }
-+
-+}
-+static int cs_emit(struct radeon_cs *cs)
-+{
-+    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
-+    drm_radeon_cmd_buffer_t cmd;
-+    drm_r300_cmd_header_t age;
-+    uint64_t ull;
-+    int r;
-+
-+    csm->ctx->vtbl.emit_cs_header(cs, csm->ctx);
-+
-+    /* append buffer age */
-+    if (IS_R300_CLASS(csm->ctx->radeonScreen)) {
-+      age.scratch.cmd_type = R300_CMD_SCRATCH;
-+      /* Scratch register 2 corresponds to what radeonGetAge polls */
-+      csm->pending_age = 0;
-+      csm->pending_count = 1;
-+      ull = (uint64_t) (intptr_t) &csm->pending_age;
-+      age.scratch.reg = 2;
-+      age.scratch.n_bufs = 1;
-+      age.scratch.flags = 0;
-+      radeon_cs_write_dword(cs, age.u);
-+      radeon_cs_write_qword(cs, ull);
-+      radeon_cs_write_dword(cs, 0);
-+    }
-+
-+    r = cs_process_relocs(cs);
-+    if (r) {
-+        return 0;
-+    }
-+
-+    cmd.buf = (char *)cs->packets;
-+    cmd.bufsz = cs->cdw * 4;
-+    if (csm->ctx->state.scissor.enabled) {
-+        cmd.nbox = csm->ctx->state.scissor.numClipRects;
-+        cmd.boxes = (drm_clip_rect_t *) csm->ctx->state.scissor.pClipRects;
-+    } else {
-+        cmd.nbox = csm->ctx->numClipRects;
-+        cmd.boxes = (drm_clip_rect_t *) csm->ctx->pClipRects;
-+    }
-+
-+    //dump_cmdbuf(cs);
-+
-+    r = drmCommandWrite(cs->csm->fd, DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
-+    if (r) {
-+        return r;
-+    }
-+    if (!IS_R300_CLASS(csm->ctx->radeonScreen)) {
-+	drm_radeon_irq_emit_t emit_cmd;
-+	emit_cmd.irq_seq = &csm->pending_age;
-+	r = drmCommandWrite(cs->csm->fd, DRM_RADEON_IRQ_EMIT, &emit_cmd, sizeof(emit_cmd));
-+	if (r) {
-+		return r;
-+	}
-+    }
-+    cs_set_age(cs);
-+
-+    cs->csm->read_used = 0;
-+    cs->csm->vram_write_used = 0;
-+    cs->csm->gart_write_used = 0;
-+    return 0;
-+}
-+
-+static void inline cs_free_reloc(void *relocs_p, int crelocs)
-+{
-+    struct cs_reloc_legacy *relocs = relocs_p;
-+    int i;
-+    if (!relocs_p)
-+      return;
-+    for (i = 0; i < crelocs; i++)
-+      free(relocs[i].indices);
-+}
-+
-+static int cs_destroy(struct radeon_cs *cs)
-+{
-+    cs_free_reloc(cs->relocs, cs->crelocs);
-+    free(cs->relocs);
-+    free(cs->packets);
-+    free(cs);
-+    return 0;
-+}
-+
-+static int cs_erase(struct radeon_cs *cs)
-+{
-+    cs_free_reloc(cs->relocs, cs->crelocs);
-+    free(cs->relocs);
-+    cs->relocs_total_size = 0;
-+    cs->relocs = NULL;
-+    cs->crelocs = 0;
-+    cs->cdw = 0;
-+    cs->section = 0;
-+    return 0;
-+}
-+
-+static int cs_need_flush(struct radeon_cs *cs)
-+{
-+    /* this function used to flush when the BO usage got to
-+     * a certain size, now the higher levels handle this better */
-+    return 0;
-+}
-+
-+static void cs_print(struct radeon_cs *cs, FILE *file)
-+{
-+}
-+
-+static int cs_check_space(struct radeon_cs *cs, struct radeon_cs_space_check *bos, int num_bo)
-+{
-+    struct radeon_cs_manager *csm = cs->csm;
-+    int this_op_read = 0, this_op_gart_write = 0, this_op_vram_write = 0;
-+    uint32_t read_domains, write_domain;
-+    int i;
-+    struct radeon_bo *bo;
-+
-+    /* check the totals for this operation */
-+
-+    if (num_bo == 0)
-+        return 0;
-+
-+    /* prepare */
-+    for (i = 0; i < num_bo; i++) {
-+      bo = bos[i].bo;
-+
-+      bos[i].new_accounted = 0;
-+      read_domains = bos[i].read_domains;
-+      write_domain = bos[i].write_domain;
-+		
-+      /* pinned bos don't count */
-+      if (radeon_legacy_bo_is_static(bo))
-+	  continue;
-+ 
-+      /* already accounted this bo */
-+      if (write_domain && (write_domain == bo->space_accounted))
-+	  continue;
-+
-+      if (read_domains && ((read_domains << 16) == bo->space_accounted))
-+	  continue;
-+      
-+      if (bo->space_accounted == 0) {
-+	  if (write_domain == RADEON_GEM_DOMAIN_VRAM)
-+	      this_op_vram_write += bo->size;
-+	  else if (write_domain == RADEON_GEM_DOMAIN_GTT)
-+	      this_op_gart_write += bo->size;
-+	  else
-+	      this_op_read += bo->size;
-+	  bos[i].new_accounted = (read_domains << 16) | write_domain;
-+      } else {
-+	  uint16_t old_read, old_write;
-+	  
-+	  old_read = bo->space_accounted >> 16;
-+	  old_write = bo->space_accounted & 0xffff;
-+
-+	  if (write_domain && (old_read & write_domain)) {
-+	      bos[i].new_accounted = write_domain;
-+	      /* moving from read to a write domain */
-+	      if (write_domain == RADEON_GEM_DOMAIN_VRAM) {
-+		  this_op_read -= bo->size;
-+		  this_op_vram_write += bo->size;
-+	      } else if (write_domain == RADEON_GEM_DOMAIN_VRAM) {
-+		  this_op_read -= bo->size;
-+		  this_op_gart_write += bo->size;
-+	      }
-+	  } else if (read_domains & old_write) {
-+	      bos[i].new_accounted = bo->space_accounted & 0xffff;
-+	  } else {
-+	      /* rewrite the domains */
-+	      if (write_domain != old_write)
-+		  fprintf(stderr,"WRITE DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, write_domain, old_write);
-+	      if (read_domains != old_read)
-+		  fprintf(stderr,"READ DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, read_domains, old_read);
-+	      return RADEON_CS_SPACE_FLUSH;
-+	  }
-+      }
-+	}
-+	
-+	if (this_op_read < 0)
-+		this_op_read = 0;
-+
-+	/* check sizes - operation first */
-+	if ((this_op_read + this_op_gart_write > csm->gart_limit) ||
-+	    (this_op_vram_write > csm->vram_limit)) {
-+	    return RADEON_CS_SPACE_OP_TO_BIG;
-+	}
-+
-+	if (((csm->vram_write_used + this_op_vram_write) > csm->vram_limit) ||
-+	    ((csm->read_used + csm->gart_write_used + this_op_gart_write + this_op_read) > csm->gart_limit)) {
-+		return RADEON_CS_SPACE_FLUSH;
-+	}
-+
-+	csm->gart_write_used += this_op_gart_write;
-+	csm->vram_write_used += this_op_vram_write;
-+	csm->read_used += this_op_read;
-+	/* commit */
-+	for (i = 0; i < num_bo; i++) {
-+		bo = bos[i].bo;
-+		bo->space_accounted = bos[i].new_accounted;
-+	}
-+
-+	return RADEON_CS_SPACE_OK;
-+}
-+
-+static struct radeon_cs_funcs  radeon_cs_legacy_funcs = {
-+    cs_create,
-+    cs_write_reloc,
-+    cs_begin,
-+    cs_end,
-+    cs_emit,
-+    cs_destroy,
-+    cs_erase,
-+    cs_need_flush,
-+    cs_print,
-+    cs_check_space
-+};
-+
-+struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx)
-+{
-+    struct cs_manager_legacy *csm;
-+
-+    csm = (struct cs_manager_legacy*)
-+          calloc(1, sizeof(struct cs_manager_legacy));
-+    if (csm == NULL) {
-+        return NULL;
-+    }
-+    csm->base.funcs = &radeon_cs_legacy_funcs;
-+    csm->base.fd = ctx->dri.fd;
-+    csm->ctx = ctx;
-+    csm->pending_age = 1;
-+    return (struct radeon_cs_manager*)csm;
-+}
-+
-+void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm)
-+{
-+    free(csm);
-+}
-+
-diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h
-new file mode 100644
-index 0000000..e177b4b
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h
-@@ -0,0 +1,40 @@
-+/* 
-+ * Copyright © 2008 Nicolai Haehnle
-+ * Copyright © 2008 Jérôme Glisse
-+ * All Rights Reserved.
-+ * 
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sub license, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ * 
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
-+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
-+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
-+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial portions
-+ * of the Software.
-+ */
-+/*
-+ * Authors:
-+ *      Aapo Tahkola <aet@rasterburn.org>
-+ *      Nicolai Haehnle <prefect_@gmx.net>
-+ *      Jérôme Glisse <glisse@freedesktop.org>
-+ */
-+#ifndef RADEON_CS_LEGACY_H
-+#define RADEON_CS_LEGACY_H
-+
-+#include "radeon_common.h"
-+
-+struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx);
-+void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm);
-+
-+#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c
-new file mode 100644
-index 0000000..5ffee86
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_dma.c
-@@ -0,0 +1,332 @@
-+/**************************************************************************
-+
-+Copyright (C) 2004 Nicolai Haehnle.
-+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
-+
-+The Weather Channel (TM) funded Tungsten Graphics to develop the
-+initial release of the Radeon 8500 driver under the XFree86 license.
-+This notice must be preserved.
-+
-+All Rights Reserved.
-+
-+Permission is hereby granted, free of charge, to any person obtaining a
-+copy of this software and associated documentation files (the "Software"),
-+to deal in the Software without restriction, including without limitation
-+on the rights to use, copy, modify, merge, publish, distribute, sub
-+license, and/or sell copies of the Software, and to permit persons to whom
-+the Software is furnished to do so, subject to the following conditions:
-+
-+The above copyright notice and this permission notice (including the next
-+paragraph) shall be included in all copies or substantial portions of the
-+Software.
-+
-+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
-+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
-+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
-+USE OR OTHER DEALINGS IN THE SOFTWARE.
-+
-+**************************************************************************/
-+
-+#include "radeon_common.h"
-+
-+#if defined(USE_X86_ASM)
-+#define COPY_DWORDS( dst, src, nr )					\
-+do {									\
-+	int __tmp;							\
-+	__asm__ __volatile__( "rep ; movsl"				\
-+			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
-+			      : "0" (nr),				\
-+			        "D" ((long)dst),			\
-+			        "S" ((long)src) );			\
-+} while (0)
-+#else
-+#define COPY_DWORDS( dst, src, nr )		\
-+do {						\
-+   int j;					\
-+   for ( j = 0 ; j < nr ; j++ )			\
-+      dst[j] = ((int *)src)[j];			\
-+   dst += nr;					\
-+} while (0)
-+#endif
-+
-+static void radeonEmitVec4(uint32_t *out, GLvoid * data, int stride, int count)
-+{
-+	int i;
-+
-+	if (RADEON_DEBUG & DEBUG_VERTS)
-+		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
-+			__FUNCTION__, count, stride, (void *)out, (void *)data);
-+
-+	if (stride == 4)
-+		COPY_DWORDS(out, data, count);
-+	else
-+		for (i = 0; i < count; i++) {
-+			out[0] = *(int *)data;
-+			out++;
-+			data += stride;
-+		}
-+}
-+
-+void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count)
-+{
-+	int i;
-+
-+	if (RADEON_DEBUG & DEBUG_VERTS)
-+		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
-+			__FUNCTION__, count, stride, (void *)out, (void *)data);
-+
-+	if (stride == 8)
-+		COPY_DWORDS(out, data, count * 2);
-+	else
-+		for (i = 0; i < count; i++) {
-+			out[0] = *(int *)data;
-+			out[1] = *(int *)(data + 4);
-+			out += 2;
-+			data += stride;
-+		}
-+}
-+
-+void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count)
-+{
-+	int i;
-+
-+	if (RADEON_DEBUG & DEBUG_VERTS)
-+		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
-+			__FUNCTION__, count, stride, (void *)out, (void *)data);
-+
-+	if (stride == 12) {
-+		COPY_DWORDS(out, data, count * 3);
-+    }
-+	else
-+		for (i = 0; i < count; i++) {
-+			out[0] = *(int *)data;
-+			out[1] = *(int *)(data + 4);
-+			out[2] = *(int *)(data + 8);
-+			out += 3;
-+			data += stride;
-+		}
-+}
-+
-+static void radeonEmitVec16(uint32_t *out, GLvoid * data, int stride, int count)
-+{
-+	int i;
-+
-+	if (RADEON_DEBUG & DEBUG_VERTS)
-+		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
-+			__FUNCTION__, count, stride, (void *)out, (void *)data);
-+
-+	if (stride == 16)
-+		COPY_DWORDS(out, data, count * 4);
-+	else
-+		for (i = 0; i < count; i++) {
-+			out[0] = *(int *)data;
-+			out[1] = *(int *)(data + 4);
-+			out[2] = *(int *)(data + 8);
-+			out[3] = *(int *)(data + 12);
-+			out += 4;
-+			data += stride;
-+		}
-+}
-+
-+void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
-+			 GLvoid * data, int size, int stride, int count)
-+{
-+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+	uint32_t *out;
-+
-+	if (stride == 0) {
-+		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
-+		count = 1;
-+		aos->stride = 0;
-+	} else {
-+		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
-+		aos->stride = size;
-+	}
-+
-+	aos->components = size;
-+	aos->count = count;
-+
-+	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
-+	switch (size) {
-+	case 1: radeonEmitVec4(out, data, stride, count); break;
-+	case 2: radeonEmitVec8(out, data, stride, count); break;
-+	case 3: radeonEmitVec12(out, data, stride, count); break;
-+	case 4: radeonEmitVec16(out, data, stride, count); break;
-+	default:
-+		assert(0);
-+		break;
-+	}
-+}
-+
-+void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
-+{
-+
-+	size = MAX2(size, MAX_DMA_BUF_SZ * 16);
-+
-+	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
-+		fprintf(stderr, "%s\n", __FUNCTION__);
-+
-+	if (rmesa->dma.flush) {
-+		rmesa->dma.flush(rmesa->glCtx);
-+	}
-+
-+	if (rmesa->dma.nr_released_bufs > 4) {
-+		rcommonFlushCmdBuf(rmesa, __FUNCTION__);
-+		rmesa->dma.nr_released_bufs = 0;
-+	}
-+
-+	if (rmesa->dma.current) {
-+		radeon_bo_unmap(rmesa->dma.current);
-+		radeon_bo_unref(rmesa->dma.current);
-+		rmesa->dma.current = 0;
-+	}
-+
-+again_alloc:	
-+	rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom,
-+					    0, size, 4, RADEON_GEM_DOMAIN_GTT,
-+					    0);
-+
-+	if (!rmesa->dma.current) {
-+		rcommonFlushCmdBuf(rmesa, __FUNCTION__);
-+		rmesa->dma.nr_released_bufs = 0;
-+		goto again_alloc;
-+	}
-+
-+	rmesa->dma.current_used = 0;
-+	rmesa->dma.current_vertexptr = 0;
-+	
-+	radeon_validate_bo(rmesa, rmesa->dma.current, RADEON_GEM_DOMAIN_GTT, 0);
-+
-+	if (radeon_revalidate_bos(rmesa->glCtx) == GL_FALSE)
-+	  fprintf(stderr,"failure to revalidate BOs - badness\n");
-+	  
-+	radeon_bo_map(rmesa->dma.current, 1);
-+}
-+
-+/* Allocates a region from rmesa->dma.current.  If there isn't enough
-+ * space in current, grab a new buffer (and discard what was left of current)
-+ */
-+void radeonAllocDmaRegion(radeonContextPtr rmesa,
-+			  struct radeon_bo **pbo, int *poffset,
-+			  int bytes, int alignment)
-+{
-+	if (RADEON_DEBUG & DEBUG_IOCTL)
-+		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
-+
-+	if (rmesa->dma.flush)
-+		rmesa->dma.flush(rmesa->glCtx);
-+
-+	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
-+
-+	alignment--;
-+	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
-+
-+	if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size)
-+		radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15);
-+
-+	*poffset = rmesa->dma.current_used;
-+	*pbo = rmesa->dma.current;
-+	radeon_bo_ref(*pbo);
-+
-+	/* Always align to at least 16 bytes */
-+	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
-+	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
-+
-+	assert(rmesa->dma.current_used <= rmesa->dma.current->size);
-+}
-+
-+void radeonReleaseDmaRegion(radeonContextPtr rmesa)
-+{
-+	if (RADEON_DEBUG & DEBUG_IOCTL)
-+		fprintf(stderr, "%s %p\n", __FUNCTION__, rmesa->dma.current);
-+	if (rmesa->dma.current) {
-+		rmesa->dma.nr_released_bufs++;
-+		radeon_bo_unmap(rmesa->dma.current);
-+	        radeon_bo_unref(rmesa->dma.current);
-+	}
-+	rmesa->dma.current = NULL;
-+}
-+
-+
-+/* Flush vertices in the current dma region.
-+ */
-+void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
-+{
-+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+	struct radeon_dma *dma = &rmesa->dma;
-+		
-+
-+	if (RADEON_DEBUG & DEBUG_IOCTL)
-+		fprintf(stderr, "%s %p\n", __FUNCTION__, dma->current);
-+	dma->flush = NULL;
-+
-+	if (dma->current) {
-+	    GLuint current_offset = dma->current_used;
-+
-+	    assert (dma->current_used +
-+		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
-+		    dma->current_vertexptr);
-+
-+	    if (dma->current_used != dma->current_vertexptr) {
-+		    dma->current_used = dma->current_vertexptr;
-+
-+		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
-+	    }
-+	    rmesa->swtcl.numverts = 0;
-+	}
-+}
-+/* Alloc space in the current dma region.
-+ */
-+void *
-+rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
-+{
-+	GLuint bytes = vsize * nverts;
-+	void *head;
-+restart:
-+	if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) {
-+                radeonRefillCurrentDmaRegion(rmesa, bytes);
-+	}
-+
-+        if (!rmesa->dma.flush) {
-+		/* make sure we have enough space to use this in cmdbuf */
-+   		rcommonEnsureCmdBufSpace(rmesa,
-+			      rmesa->hw.max_state_size + (12*sizeof(int)),
-+			      __FUNCTION__);
-+		/* if cmdbuf flushed DMA restart */
-+		if (!rmesa->dma.current)
-+			goto restart;
-+                rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
-+                rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
-+        }
-+
-+	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
-+        ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
-+        ASSERT( rmesa->dma.current_used +
-+                rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
-+                rmesa->dma.current_vertexptr );
-+
-+	head = (rmesa->dma.current->ptr + rmesa->dma.current_vertexptr);
-+	rmesa->dma.current_vertexptr += bytes;
-+	rmesa->swtcl.numverts += nverts;
-+	return head;
-+}
-+
-+void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
-+{
-+   radeonContextPtr radeon = RADEON_CONTEXT( ctx );
-+   int i;
-+
-+   if (radeon->tcl.elt_dma_bo) {
-+	   radeon_bo_unref(radeon->tcl.elt_dma_bo);
-+	   radeon->tcl.elt_dma_bo = NULL;
-+   }
-+   for (i = 0; i < radeon->tcl.aos_count; i++) {
-+      if (radeon->tcl.aos[i].bo) {
-+         radeon_bo_unref(radeon->tcl.aos[i].bo);
-+         radeon->tcl.aos[i].bo = NULL;
-+      }
-+   }
-+}
-diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.h b/src/mesa/drivers/dri/radeon/radeon_dma.h
-new file mode 100644
-index 0000000..06e388f
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_dma.h
-@@ -0,0 +1,52 @@
-+/**************************************************************************
-+
-+Copyright (C) 2004 Nicolai Haehnle.
-+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
-+
-+The Weather Channel (TM) funded Tungsten Graphics to develop the
-+initial release of the Radeon 8500 driver under the XFree86 license.
-+This notice must be preserved.
-+
-+All Rights Reserved.
-+
-+Permission is hereby granted, free of charge, to any person obtaining a
-+copy of this software and associated documentation files (the "Software"),
-+to deal in the Software without restriction, including without limitation
-+on the rights to use, copy, modify, merge, publish, distribute, sub
-+license, and/or sell copies of the Software, and to permit persons to whom
-+the Software is furnished to do so, subject to the following conditions:
-+
-+The above copyright notice and this permission notice (including the next
-+paragraph) shall be included in all copies or substantial portions of the
-+Software.
-+
-+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
-+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
-+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
-+USE OR OTHER DEALINGS IN THE SOFTWARE.
-+
-+**************************************************************************/
-+
-+#ifndef RADEON_DMA_H
-+#define RADEON_DMA_H
-+
-+void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count);
-+void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count);
-+
-+void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
-+			 GLvoid * data, int size, int stride, int count);
-+
-+void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size);
-+void radeonAllocDmaRegion(radeonContextPtr rmesa,
-+			  struct radeon_bo **pbo, int *poffset,
-+			  int bytes, int alignment);
-+void radeonReleaseDmaRegion(radeonContextPtr rmesa);
-+
-+void rcommon_flush_last_swtcl_prim(GLcontext *ctx);
-+
-+void *rcommonAllocDmaLowVerts(radeonContextPtr rmesa, int nverts, int vsize);
-+void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs );
-+#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c
-new file mode 100644
-index 0000000..f62ca7f
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c
-@@ -0,0 +1,588 @@
-+/**************************************************************************
-+ * 
-+ * Copyright 2008 Red Hat Inc.
-+ * All Rights Reserved.
-+ * 
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sub license, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ * 
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial portions
-+ * of the Software.
-+ * 
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
-+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ * 
-+ **************************************************************************/
-+
-+
-+#include "main/imports.h"
-+#include "main/macros.h"
-+#include "main/mtypes.h"
-+#include "main/fbobject.h"
-+#include "main/framebuffer.h"
-+#include "main/renderbuffer.h"
-+#include "main/context.h"
-+#include "main/texformat.h"
-+#include "main/texrender.h"
-+
-+#include "radeon_common.h"
-+#include "radeon_mipmap_tree.h"
-+
-+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
-+#define DBG(...) do {                                           \
-+        if (RADEON_DEBUG & FILE_DEBUG_FLAG)                      \
-+                _mesa_printf(__VA_ARGS__);                      \
-+} while(0)
-+
-+static struct gl_framebuffer *
-+radeon_new_framebuffer(GLcontext *ctx, GLuint name)
-+{
-+  return _mesa_new_framebuffer(ctx, name);
-+}
-+
-+static void
-+radeon_delete_renderbuffer(struct gl_renderbuffer *rb)
-+{
-+  struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
-+
-+  ASSERT(rrb);
-+
-+  if (rrb && rrb->bo) {
-+    radeon_bo_unref(rrb->bo);
-+  }
-+  _mesa_free(rrb);
-+}
-+
-+static void *
-+radeon_get_pointer(GLcontext *ctx, struct gl_renderbuffer *rb,
-+		   GLint x, GLint y)
-+{
-+  return NULL;
-+}
-+
-+/**
-+ * Called via glRenderbufferStorageEXT() to set the format and allocate
-+ * storage for a user-created renderbuffer.
-+ */
-+static GLboolean
-+radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
-+                                 GLenum internalFormat,
-+                                 GLuint width, GLuint height)
-+{
-+  struct radeon_context *radeon = RADEON_CONTEXT(ctx);
-+  struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
-+  GLboolean software_buffer = GL_FALSE;
-+  int cpp;
-+
-+   ASSERT(rb->Name != 0);
-+  switch (internalFormat) {
-+   case GL_R3_G3_B2:
-+   case GL_RGB4:
-+   case GL_RGB5:
-+      rb->_ActualFormat = GL_RGB5;
-+      rb->DataType = GL_UNSIGNED_BYTE;
-+      rb->RedBits = 5;
-+      rb->GreenBits = 6;
-+      rb->BlueBits = 5;
-+      cpp = 2;
-+      break;
-+   case GL_RGB:
-+   case GL_RGB8:
-+   case GL_RGB10:
-+   case GL_RGB12:
-+   case GL_RGB16:
-+      rb->_ActualFormat = GL_RGB8;
-+      rb->DataType = GL_UNSIGNED_BYTE;
-+      rb->RedBits = 8;
-+      rb->GreenBits = 8;
-+      rb->BlueBits = 8;
-+      rb->AlphaBits = 0;
-+      cpp = 4;
-+      break;
-+   case GL_RGBA:
-+   case GL_RGBA2:
-+   case GL_RGBA4:
-+   case GL_RGB5_A1:
-+   case GL_RGBA8:
-+   case GL_RGB10_A2:
-+   case GL_RGBA12:
-+   case GL_RGBA16:
-+      rb->_ActualFormat = GL_RGBA8;
-+      rb->DataType = GL_UNSIGNED_BYTE;
-+      rb->RedBits = 8;
-+      rb->GreenBits = 8;
-+      rb->BlueBits = 8;
-+      rb->AlphaBits = 8;
-+      cpp = 4;
-+      break;
-+   case GL_STENCIL_INDEX:
-+   case GL_STENCIL_INDEX1_EXT:
-+   case GL_STENCIL_INDEX4_EXT:
-+   case GL_STENCIL_INDEX8_EXT:
-+   case GL_STENCIL_INDEX16_EXT:
-+      /* alloc a depth+stencil buffer */
-+      rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT;
-+      rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
-+      rb->StencilBits = 8;
-+      cpp = 4;
-+      break;
-+   case GL_DEPTH_COMPONENT16:
-+      rb->_ActualFormat = GL_DEPTH_COMPONENT16;
-+      rb->DataType = GL_UNSIGNED_SHORT;
-+      rb->DepthBits = 16;
-+      cpp = 2;
-+      break;
-+   case GL_DEPTH_COMPONENT:
-+   case GL_DEPTH_COMPONENT24:
-+   case GL_DEPTH_COMPONENT32:
-+      rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT;
-+      rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
-+      rb->DepthBits = 24;
-+      cpp = 4;
-+      break;
-+   case GL_DEPTH_STENCIL_EXT:
-+   case GL_DEPTH24_STENCIL8_EXT:
-+      rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT;
-+      rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
-+      rb->DepthBits = 24;
-+      rb->StencilBits = 8;
-+      cpp = 4;
-+      break;
-+   default:
-+      _mesa_problem(ctx,
-+                    "Unexpected format in intel_alloc_renderbuffer_storage");
-+      return GL_FALSE;
-+   }
-+
-+  radeonFlush(ctx);
-+
-+  if (rrb->bo)
-+    radeon_bo_unref(rrb->bo);
-+  
-+    
-+   if (software_buffer) {
-+      return _mesa_soft_renderbuffer_storage(ctx, rb, internalFormat,
-+                                             width, height);
-+   }
-+   else {
-+     uint32_t size = width * height * cpp;
-+     uint32_t pitch = ((cpp * width + 63) & ~63) / cpp;
-+
-+     fprintf(stderr,"Allocating %d x %d radeon RBO (pitch %d)\n", width,
-+	  height, pitch);
-+
-+     rrb->pitch = pitch * cpp;
-+     rrb->cpp = cpp;
-+     rrb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+			      0,
-+			      size,
-+			      0,
-+			      RADEON_GEM_DOMAIN_VRAM,
-+			      0);
-+     rb->Width = width;
-+     rb->Height = height;
-+       return GL_TRUE;
-+   }    
-+   
-+}
-+
-+
-+/**
-+ * Called for each hardware renderbuffer when a _window_ is resized.
-+ * Just update fields.
-+ * Not used for user-created renderbuffers!
-+ */
-+static GLboolean
-+radeon_alloc_window_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
-+                           GLenum internalFormat, GLuint width, GLuint height)
-+{
-+   ASSERT(rb->Name == 0);
-+   rb->Width = width;
-+   rb->Height = height;
-+   rb->_ActualFormat = internalFormat;
-+
-+   return GL_TRUE;
-+}
-+
-+
-+static void
-+radeon_resize_buffers(GLcontext *ctx, struct gl_framebuffer *fb,
-+		     GLuint width, GLuint height)
-+{
-+     struct radeon_framebuffer *radeon_fb = (struct radeon_framebuffer*)fb;
-+   int i;
-+
-+   _mesa_resize_framebuffer(ctx, fb, width, height);
-+
-+   fb->Initialized = GL_TRUE; /* XXX remove someday */
-+
-+   if (fb->Name != 0) {
-+      return;
-+   }
-+
-+   /* Make sure all window system renderbuffers are up to date */
-+   for (i = 0; i < 2; i++) {
-+      struct gl_renderbuffer *rb = &radeon_fb->color_rb[i]->base;
-+
-+      /* only resize if size is changing */
-+      if (rb && (rb->Width != width || rb->Height != height)) {
-+	 rb->AllocStorage(ctx, rb, rb->InternalFormat, width, height);
-+      }
-+   }
-+}
-+
-+
-+/** Dummy function for gl_renderbuffer::AllocStorage() */
-+static GLboolean
-+radeon_nop_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
-+			 GLenum internalFormat, GLuint width, GLuint height)
-+{
-+   _mesa_problem(ctx, "radeon_op_alloc_storage should never be called.");
-+   return GL_FALSE;
-+}
-+
-+struct radeon_renderbuffer *
-+radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv)
-+{
-+    struct radeon_renderbuffer *rrb;
-+
-+    rrb = CALLOC_STRUCT(radeon_renderbuffer);
-+    if (!rrb)
-+	return NULL;
-+
-+    _mesa_init_renderbuffer(&rrb->base, 0);
-+    rrb->base.ClassID = RADEON_RB_CLASS;
-+
-+    /* XXX format junk */
-+    switch (format) {
-+	case GL_RGB5:
-+	    rrb->base._ActualFormat = GL_RGB5;
-+	    rrb->base._BaseFormat = GL_RGBA;
-+	    rrb->base.RedBits = 5;
-+	    rrb->base.GreenBits = 6;
-+	    rrb->base.BlueBits = 5;
-+	    rrb->base.DataType = GL_UNSIGNED_BYTE;
-+	    break;
-+	case GL_RGB8:
-+	    rrb->base._ActualFormat = GL_RGB8;
-+	    rrb->base._BaseFormat = GL_RGB;
-+	    rrb->base.RedBits = 8;
-+	    rrb->base.GreenBits = 8;
-+	    rrb->base.BlueBits = 8;
-+	    rrb->base.AlphaBits = 8;
-+	    rrb->base.DataType = GL_UNSIGNED_BYTE;
-+	    break;
-+	case GL_RGBA8:
-+	    rrb->base._ActualFormat = GL_RGBA8;
-+	    rrb->base._BaseFormat = GL_RGBA;
-+	    rrb->base.RedBits = 8;
-+	    rrb->base.GreenBits = 8;
-+	    rrb->base.BlueBits = 8;
-+	    rrb->base.AlphaBits = 8;
-+	    rrb->base.DataType = GL_UNSIGNED_BYTE;
-+	    break;
-+	case GL_STENCIL_INDEX8_EXT:
-+	    rrb->base._ActualFormat = GL_STENCIL_INDEX8_EXT;
-+	    rrb->base._BaseFormat = GL_STENCIL_INDEX;
-+	    rrb->base.StencilBits = 8;
-+	    rrb->base.DataType = GL_UNSIGNED_BYTE;
-+	    break;
-+	case GL_DEPTH_COMPONENT16:
-+	    rrb->base._ActualFormat = GL_DEPTH_COMPONENT16;
-+	    rrb->base._BaseFormat = GL_DEPTH_COMPONENT;
-+	    rrb->base.DepthBits = 16;
-+	    rrb->base.DataType = GL_UNSIGNED_SHORT;
-+	    break;
-+	case GL_DEPTH_COMPONENT24:
-+	    rrb->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
-+	    rrb->base._BaseFormat = GL_DEPTH_COMPONENT;
-+	    rrb->base.DepthBits = 24;
-+	    rrb->base.DataType = GL_UNSIGNED_INT;
-+	    break;
-+	case GL_DEPTH24_STENCIL8_EXT:
-+	    rrb->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
-+	    rrb->base._BaseFormat = GL_DEPTH_STENCIL_EXT;
-+	    rrb->base.DepthBits = 24;
-+	    rrb->base.StencilBits = 8;
-+	    rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
-+	    break;
-+	default:
-+	    fprintf(stderr, "%s: Unknown format 0x%04x\n", __FUNCTION__, format);
-+	    _mesa_delete_renderbuffer(&rrb->base);
-+	    return NULL;
-+    }
-+
-+    rrb->dPriv = driDrawPriv;
-+    rrb->base.InternalFormat = format;
-+
-+    rrb->base.Delete = radeon_delete_renderbuffer;
-+    rrb->base.AllocStorage = radeon_alloc_window_storage;
-+    rrb->base.GetPointer = radeon_get_pointer;
-+
-+    rrb->bo = NULL;
-+    return rrb;
-+}
-+
-+static struct gl_renderbuffer *
-+radeon_new_renderbuffer(GLcontext * ctx, GLuint name)
-+{
-+  struct radeon_renderbuffer *rrb;
-+
-+  rrb = CALLOC_STRUCT(radeon_renderbuffer);
-+  if (!rrb)
-+    return NULL;
-+
-+  _mesa_init_renderbuffer(&rrb->base, name);
-+  rrb->base.ClassID = RADEON_RB_CLASS;
-+
-+  rrb->base.Delete = radeon_delete_renderbuffer;
-+  rrb->base.AllocStorage = radeon_alloc_renderbuffer_storage;
-+  rrb->base.GetPointer = radeon_get_pointer;
-+
-+  return &rrb->base;
-+}
-+
-+static void
-+radeon_bind_framebuffer(GLcontext * ctx, GLenum target,
-+                       struct gl_framebuffer *fb, struct gl_framebuffer *fbread)
-+{
-+   if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) {
-+      radeon_draw_buffer(ctx, fb);
-+   }
-+   else {
-+      /* don't need to do anything if target == GL_READ_FRAMEBUFFER_EXT */
-+   }
-+}
-+
-+static void
-+radeon_framebuffer_renderbuffer(GLcontext * ctx,
-+                               struct gl_framebuffer *fb,
-+                               GLenum attachment, struct gl_renderbuffer *rb)
-+{
-+
-+   radeonFlush(ctx);
-+
-+   _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb);
-+   radeon_draw_buffer(ctx, fb);
-+}
-+
-+
-+static GLboolean
-+radeon_update_wrapper(GLcontext *ctx, struct radeon_renderbuffer *rrb, 
-+		     struct gl_texture_image *texImage)
-+{
-+	int retry = 0;
-+restart:
-+	if (texImage->TexFormat == &_mesa_texformat_argb8888) {
-+		rrb->cpp = 4;
-+		rrb->base._ActualFormat = GL_RGBA8;
-+		rrb->base._BaseFormat = GL_RGBA;
-+		rrb->base.DataType = GL_UNSIGNED_BYTE;
-+		DBG("Render to RGBA8 texture OK\n");
-+	}
-+	else if (texImage->TexFormat == &_mesa_texformat_rgb565) {
-+		rrb->cpp = 2;
-+		rrb->base._ActualFormat = GL_RGB5;
-+		rrb->base._BaseFormat = GL_RGB;
-+		rrb->base.DataType = GL_UNSIGNED_SHORT;
-+		DBG("Render to RGB5 texture OK\n");
-+	}
-+	else if (texImage->TexFormat == &_mesa_texformat_z16) {
-+		rrb->cpp = 2;
-+		rrb->base._ActualFormat = GL_DEPTH_COMPONENT16;
-+		rrb->base._BaseFormat = GL_DEPTH_COMPONENT;
-+		rrb->base.DataType = GL_UNSIGNED_SHORT;
-+		DBG("Render to DEPTH16 texture OK\n");
-+	}
-+	else if (texImage->TexFormat == &_mesa_texformat_s8_z24) {
-+		rrb->cpp = 4;
-+		rrb->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
-+		rrb->base._BaseFormat = GL_DEPTH_STENCIL_EXT;
-+		rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
-+		DBG("Render to DEPTH_STENCIL texture OK\n");
-+	}
-+	else {
-+		/* try redoing the FBO */
-+		if (retry == 1) {
-+			DBG("Render to texture BAD FORMAT %d\n",
-+			    texImage->TexFormat->MesaFormat);
-+			return GL_FALSE;
-+		}
-+		texImage->TexFormat = radeonChooseTextureFormat(ctx, texImage->InternalFormat, 0,
-+								texImage->TexFormat->DataType,
-+								1);
-+
-+		retry++;
-+		goto restart;
-+	}
-+	
-+	rrb->pitch = texImage->Width * rrb->cpp;
-+	rrb->base.InternalFormat = rrb->base._ActualFormat;
-+	rrb->base.Width = texImage->Width;
-+	rrb->base.Height = texImage->Height;
-+	rrb->base.RedBits = texImage->TexFormat->RedBits;
-+	rrb->base.GreenBits = texImage->TexFormat->GreenBits;
-+	rrb->base.BlueBits = texImage->TexFormat->BlueBits;
-+	rrb->base.AlphaBits = texImage->TexFormat->AlphaBits;
-+	rrb->base.DepthBits = texImage->TexFormat->DepthBits;
-+	
-+	rrb->base.Delete = radeon_delete_renderbuffer;
-+	rrb->base.AllocStorage = radeon_nop_alloc_storage;
-+	
-+	return GL_TRUE;
-+}
-+
-+
-+static struct radeon_renderbuffer *
-+radeon_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage)
-+{
-+  const GLuint name = ~0;   /* not significant, but distinct for debugging */
-+  struct radeon_renderbuffer *rrb;
-+
-+   /* make an radeon_renderbuffer to wrap the texture image */
-+   rrb = CALLOC_STRUCT(radeon_renderbuffer);
-+   if (!rrb) {
-+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glFramebufferTexture");
-+      return NULL;
-+   }
-+
-+   _mesa_init_renderbuffer(&rrb->base, name);
-+   rrb->base.ClassID = RADEON_RB_CLASS;
-+
-+   if (!radeon_update_wrapper(ctx, rrb, texImage)) {
-+      _mesa_free(rrb);
-+      return NULL;
-+   }
-+
-+   return rrb;
-+  
-+}
-+static void
-+radeon_render_texture(GLcontext * ctx,
-+                     struct gl_framebuffer *fb,
-+                     struct gl_renderbuffer_attachment *att)
-+{
-+   struct gl_texture_image *newImage
-+      = att->Texture->Image[att->CubeMapFace][att->TextureLevel];
-+   struct radeon_renderbuffer *rrb = radeon_renderbuffer(att->Renderbuffer);
-+   radeon_texture_image *radeon_image;
-+   GLuint imageOffset;
-+
-+   (void) fb;
-+
-+   ASSERT(newImage);
-+
-+   if (newImage->Border != 0) {
-+      /* Fallback on drawing to a texture with a border, which won't have a
-+       * miptree.
-+       */
-+      _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
-+      _mesa_render_texture(ctx, fb, att);
-+      return;
-+   }
-+   else if (!rrb) {
-+      rrb = radeon_wrap_texture(ctx, newImage);
-+      if (rrb) {
-+         /* bind the wrapper to the attachment point */
-+         _mesa_reference_renderbuffer(&att->Renderbuffer, &rrb->base);
-+      }
-+      else {
-+         /* fallback to software rendering */
-+         _mesa_render_texture(ctx, fb, att);
-+         return;
-+      }
-+   }
-+
-+   if (!radeon_update_wrapper(ctx, rrb, newImage)) {
-+       _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
-+       _mesa_render_texture(ctx, fb, att);
-+       return;
-+   }
-+
-+   DBG("Begin render texture tid %x tex=%u w=%d h=%d refcount=%d\n",
-+       _glthread_GetID(),
-+       att->Texture->Name, newImage->Width, newImage->Height,
-+       rrb->base.RefCount);
-+
-+   /* point the renderbufer's region to the texture image region */
-+   radeon_image = (radeon_texture_image *)newImage;
-+   if (rrb->bo != radeon_image->mt->bo) {
-+      if (rrb->bo)
-+  	radeon_bo_unref(rrb->bo);
-+      rrb->bo = radeon_image->mt->bo;
-+      radeon_bo_ref(rrb->bo);
-+   }
-+
-+   /* compute offset of the particular 2D image within the texture region */
-+   imageOffset = radeon_miptree_image_offset(radeon_image->mt,
-+                                            att->CubeMapFace,
-+                                            att->TextureLevel);
-+
-+   if (att->Texture->Target == GL_TEXTURE_3D) {
-+      GLuint offsets[6];
-+      radeon_miptree_depth_offsets(radeon_image->mt, att->TextureLevel,
-+				   offsets);
-+      imageOffset += offsets[att->Zoffset];
-+   }
-+
-+   /* store that offset in the region */
-+   rrb->draw_offset = imageOffset;
-+
-+   /* update drawing region, etc */
-+   radeon_draw_buffer(ctx, fb);
-+}
-+
-+static void
-+radeon_finish_render_texture(GLcontext * ctx,
-+                            struct gl_renderbuffer_attachment *att)
-+{
-+
-+}
-+static void
-+radeon_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
-+{
-+}
-+
-+static void
-+radeon_blit_framebuffer(GLcontext *ctx,
-+                       GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
-+                       GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
-+                       GLbitfield mask, GLenum filter)
-+{
-+}
-+
-+void radeon_fbo_init(struct radeon_context *radeon)
-+{
-+  radeon->glCtx->Driver.NewFramebuffer = radeon_new_framebuffer;
-+  radeon->glCtx->Driver.NewRenderbuffer = radeon_new_renderbuffer;
-+  radeon->glCtx->Driver.BindFramebuffer = radeon_bind_framebuffer;
-+  radeon->glCtx->Driver.FramebufferRenderbuffer = radeon_framebuffer_renderbuffer;
-+  radeon->glCtx->Driver.RenderTexture = radeon_render_texture;
-+  radeon->glCtx->Driver.FinishRenderTexture = radeon_finish_render_texture;
-+  radeon->glCtx->Driver.ResizeBuffers = radeon_resize_buffers;
-+  radeon->glCtx->Driver.ValidateFramebuffer = radeon_validate_framebuffer;
-+  radeon->glCtx->Driver.BlitFramebuffer = radeon_blit_framebuffer;
-+}
-+
-+  
-+void radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb,
-+				struct radeon_bo *bo)
-+{
-+  struct radeon_bo *old;
-+  old = rb->bo;
-+  rb->bo = bo;
-+  radeon_bo_ref(bo);
-+  if (old)
-+    radeon_bo_unref(old);
-+}
-diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
-index 09acf6b..b5fde6d 100644
---- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
-@@ -37,12 +37,27 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include <sched.h>
- #include <errno.h> 
- 
-+#include "main/attrib.h"
-+#include "main/enable.h"
-+#include "main/blend.h"
-+#include "main/bufferobj.h"
-+#include "main/buffers.h"
-+#include "main/depth.h"
-+#include "main/shaders.h"
-+#include "main/texstate.h"
-+#include "main/varray.h"
-+#include "glapi/dispatch.h"
-+#include "swrast/swrast.h"
-+#include "main/stencil.h"
-+#include "main/matrix.h"
-+
- #include "main/glheader.h"
- #include "main/imports.h"
- #include "main/simple_list.h"
- #include "swrast/swrast.h"
- 
- #include "radeon_context.h"
-+#include "radeon_common.h"
- #include "radeon_state.h"
- #include "radeon_ioctl.h"
- #include "radeon_tcl.h"
-@@ -58,75 +73,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define RADEON_IDLE_RETRY           16
- 
- 
--static void radeonWaitForIdle( radeonContextPtr rmesa );
--static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
--				    const char * caller );
--
--static void print_state_atom( struct radeon_state_atom *state )
--{
--   int i;
--
--   fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
--
--   if (RADEON_DEBUG & DEBUG_VERBOSE) 
--      for (i = 0 ; i < state->cmd_size ; i++) 
--	 fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
--
--}
--
--static void radeonSaveHwState( radeonContextPtr rmesa )
--{
--   struct radeon_state_atom *atom;
--   char * dest = rmesa->backup_store.cmd_buf;
--
--   if (RADEON_DEBUG & DEBUG_STATE)
--      fprintf(stderr, "%s\n", __FUNCTION__);
--   
--   rmesa->backup_store.cmd_used = 0;
--
--   foreach( atom, &rmesa->hw.atomlist ) {
--      if ( atom->check( rmesa->glCtx ) ) {
--	 int size = atom->cmd_size * 4;
--	 memcpy( dest, atom->cmd, size);
--	 dest += size;
--	 rmesa->backup_store.cmd_used += size;
--	 if (RADEON_DEBUG & DEBUG_STATE)
--	    print_state_atom( atom );
--      }
--   }
--
--   assert( rmesa->backup_store.cmd_used <= RADEON_CMD_BUF_SZ );
--   if (RADEON_DEBUG & DEBUG_STATE)
--      fprintf(stderr, "Returning to radeonEmitState\n");
--}
--
--/* At this point we were in FlushCmdBufLocked but we had lost our context, so
-- * we need to unwire our current cmdbuf, hook the one with the saved state in
-- * it, flush it, and then put the current one back.  This is so commands at the
-- * start of a cmdbuf can rely on the state being kept from the previous one.
-- */
--static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa )
--{
--   GLuint nr_released_bufs;
--   struct radeon_store saved_store;
--
--   if (rmesa->backup_store.cmd_used == 0)
--      return;
--
--   if (RADEON_DEBUG & DEBUG_STATE)
--      fprintf(stderr, "Emitting backup state on lost context\n");
--
--   rmesa->lost_context = GL_FALSE;
--
--   nr_released_bufs = rmesa->dma.nr_released_bufs;
--   saved_store = rmesa->store;
--   rmesa->dma.nr_released_bufs = 0;
--   rmesa->store = rmesa->backup_store;
--   radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
--   rmesa->dma.nr_released_bufs = nr_released_bufs;
--   rmesa->store = saved_store;
--}
--
- /* =============================================================
-  * Kernel command buffer handling
-  */
-@@ -134,965 +80,360 @@ static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa )
- /* The state atoms will be emitted in the order they appear in the atom list,
-  * so this step is important.
-  */
--void radeonSetUpAtomList( radeonContextPtr rmesa )
-+void radeonSetUpAtomList( r100ContextPtr rmesa )
- {
--   int i, mtu = rmesa->glCtx->Const.MaxTextureUnits;
--
--   make_empty_list(&rmesa->hw.atomlist);
--   rmesa->hw.atomlist.name = "atom-list";
--
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ctx);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.set);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lin);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msk);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.vpt);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tcl);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msc);
-+   int i, mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
-+
-+   make_empty_list(&rmesa->radeon.hw.atomlist);
-+   rmesa->radeon.hw.atomlist.name = "atom-list";
-+
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ctx);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.set);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lin);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msk);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.vpt);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tcl);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msc);
-    for (i = 0; i < mtu; ++i) {
--       insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tex[i]);
--       insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.txr[i]);
--       insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.cube[i]);
-+       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i]);
-+       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.txr[i]);
-+       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i]);
-    }
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.zbs);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mtl);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.zbs);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mtl);
-    for (i = 0; i < 3 + mtu; ++i)
--      insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mat[i]);
-+      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i]);
-    for (i = 0; i < 8; ++i)
--      insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lit[i]);
-+      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i]);
-    for (i = 0; i < 6; ++i)
--      insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ucp[i]);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.eye);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.grd);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.fog);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.glt);
--}
--
--void radeonEmitState( radeonContextPtr rmesa )
--{
--   struct radeon_state_atom *atom;
--   char *dest;
--
--   if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
--      fprintf(stderr, "%s\n", __FUNCTION__);
--
--   if (rmesa->save_on_next_emit) {
--      radeonSaveHwState(rmesa);
--      rmesa->save_on_next_emit = GL_FALSE;
--   }
--
--   /* this code used to return here but now it emits zbs */
--
--   /* To avoid going across the entire set of states multiple times, just check
--    * for enough space for the case of emitting all state, and inline the
--    * radeonAllocCmdBuf code here without all the checks.
--    */
--   radeonEnsureCmdBufSpace(rmesa, rmesa->hw.max_state_size);
--   dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
--
--   /* We always always emit zbs, this is due to a bug found by keithw in
--      the hardware and rediscovered after Erics changes by me.
--      if you ever touch this code make sure you emit zbs otherwise
--      you get tcl lockups on at least M7/7500 class of chips - airlied */
--   rmesa->hw.zbs.dirty=1;
--
--   if (RADEON_DEBUG & DEBUG_STATE) {
--      foreach(atom, &rmesa->hw.atomlist) {
--	 if (atom->dirty || rmesa->hw.all_dirty) {
--	    if (atom->check(rmesa->glCtx))
--	       print_state_atom(atom);
--	    else
--	       fprintf(stderr, "skip state %s\n", atom->name);
--	 }
--      }
--   }
--
--   foreach(atom, &rmesa->hw.atomlist) {
--      if (rmesa->hw.all_dirty)
--	 atom->dirty = GL_TRUE;
--      if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) &&
--	   atom->is_tcl)
--	 atom->dirty = GL_FALSE;
--      if (atom->dirty) {
--	 if (atom->check(rmesa->glCtx)) {
--	    int size = atom->cmd_size * 4;
--	    memcpy(dest, atom->cmd, size);
--	    dest += size;
--	    rmesa->store.cmd_used += size;
--	    atom->dirty = GL_FALSE;
--	 }
--      }
--   }
--
--   assert(rmesa->store.cmd_used <= RADEON_CMD_BUF_SZ);
-- 
--   rmesa->hw.is_dirty = GL_FALSE;
--   rmesa->hw.all_dirty = GL_FALSE;
-+      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i]);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.eye);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.grd);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.fog);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.glt);
- }
- 
- /* Fire a section of the retained (indexed_verts) buffer as a regular
-  * primtive.  
-  */
--extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
-+extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
- 				GLuint vertex_format,
- 				GLuint primitive,
- 				GLuint vertex_nr )
- {
--   drm_radeon_cmd_header_t *cmd;
--
-+   BATCH_LOCALS(&rmesa->radeon);
- 
-    assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
-    
--   radeonEmitState( rmesa );
-+   radeonEmitState(&rmesa->radeon);
- 
--   if (RADEON_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__,
--	      rmesa->store.cmd_used/4);
--   
--   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VBUF_BUFSZ,
--						       __FUNCTION__ );
- #if RADEON_OLD_PACKETS
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
--   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16);
--   cmd[2].i = rmesa->ioctl.vertex_offset;
--   cmd[3].i = vertex_nr;
--   cmd[4].i = vertex_format;
--   cmd[5].i = (primitive | 
--	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
--	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
--	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
--	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
--
--   if (RADEON_DEBUG & DEBUG_PRIMS)
--      fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n",
--	      __FUNCTION__,
--	      cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i);
--#else
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
--   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16);
--   cmd[2].i = vertex_format;
--   cmd[3].i = (primitive | 
--	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
--	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
--	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
--	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
--	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
--
--
--   if (RADEON_DEBUG & DEBUG_PRIMS)
--      fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n",
--	      __FUNCTION__,
--	      cmd[1].i, cmd[2].i, cmd[3].i);
-+   BEGIN_BATCH(8);
-+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 3);
-+   if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+     OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
-+   } else {
-+     OUT_BATCH(rmesa->ioctl.vertex_offset);
-+   }
-+    
-+   OUT_BATCH(vertex_nr);
-+   OUT_BATCH(vertex_format);
-+   OUT_BATCH(primitive |  RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
-+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
-+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
-+	     (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
-+
-+   if (rmesa->radeon.radeonScreen->kernel_mm) {
-+     radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+			   rmesa->ioctl.bo,
-+			   RADEON_GEM_DOMAIN_GTT,
-+			   0, 0);
-+   }
-+   
-+   END_BATCH();
-+   
-+#else   
-+   BEGIN_BATCH(4);
-+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 1);
-+   OUT_BATCH(vertex_format);
-+   OUT_BATCH(primitive |
-+	     RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
-+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
-+	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
-+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
-+	     (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
-+   END_BATCH();
- #endif
- }
- 
--
--void radeonFlushElts( radeonContextPtr rmesa )
-+void radeonFlushElts( GLcontext *ctx )
- {
--   int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
--   int dwords;
--#if RADEON_OLD_PACKETS
--   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 24)) / 2;
--#else
--   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 16)) / 2;
--#endif
--
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   BATCH_LOCALS(&rmesa->radeon);
-+   int nr;
-+   uint32_t *cmd = (uint32_t *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_start);
-+   int dwords = (rmesa->radeon.cmdbuf.cs->section_ndw - rmesa->radeon.cmdbuf.cs->section_cdw);
-+   
-    if (RADEON_DEBUG & DEBUG_IOCTL)
-       fprintf(stderr, "%s\n", __FUNCTION__);
- 
--   assert( rmesa->dma.flush == radeonFlushElts );
--   rmesa->dma.flush = NULL;
-+   assert( rmesa->radeon.dma.flush == radeonFlushElts );
-+   rmesa->radeon.dma.flush = NULL;
- 
--   /* Cope with odd number of elts:
--    */
--   rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
--   dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
-+   nr = rmesa->tcl.elt_used;
- 
- #if RADEON_OLD_PACKETS
--   cmd[1] |= (dwords - 3) << 16;
-+   if (rmesa->radeon.radeonScreen->kernel_mm) {
-+     dwords -= 2;
-+   }
-+#endif
-+
-+#if RADEON_OLD_PACKETS
-+   cmd[1] |= (dwords + 3) << 16;
-    cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
- #else
--   cmd[1] |= (dwords - 3) << 16;
-+   cmd[1] |= (dwords + 2) << 16;
-    cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
- #endif
- 
-+   rmesa->radeon.cmdbuf.cs->cdw += dwords;
-+   rmesa->radeon.cmdbuf.cs->section_cdw += dwords;
-+
-+#if RADEON_OLD_PACKETS
-+   if (rmesa->radeon.radeonScreen->kernel_mm) {
-+      radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+			    rmesa->ioctl.bo,
-+			    RADEON_GEM_DOMAIN_GTT,
-+			    0, 0);
-+   }
-+#endif
-+
-+   END_BATCH();
-+
-    if (RADEON_DEBUG & DEBUG_SYNC) {
-       fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
--      radeonFinish( rmesa->glCtx );
-+      radeonFinish( rmesa->radeon.glCtx );
-    }
--}
- 
-+}
- 
--GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
-+GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
- 				    GLuint vertex_format,
- 				    GLuint primitive,
- 				    GLuint min_nr )
- {
--   drm_radeon_cmd_header_t *cmd;
-    GLushort *retval;
-+   int align_min_nr;
-+   BATCH_LOCALS(&rmesa->radeon);
- 
-    if (RADEON_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s %d\n", __FUNCTION__, min_nr);
-+      fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
- 
-    assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
-    
--   radeonEmitState( rmesa );
-+   radeonEmitState(&rmesa->radeon);
-    
--   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa,
--						       ELTS_BUFSZ(min_nr),
--						       __FUNCTION__ );
-+   rmesa->tcl.elt_cmd_start = rmesa->radeon.cmdbuf.cs->cdw;
-+
-+   /* round up min_nr to align the state */
-+   align_min_nr = (min_nr + 1) & ~1;
-+
- #if RADEON_OLD_PACKETS
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
--   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM;
--   cmd[2].i = rmesa->ioctl.vertex_offset;
--   cmd[3].i = 0xffff;
--   cmd[4].i = vertex_format;
--   cmd[5].i = (primitive | 
--	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
--	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
--	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
--
--   retval = (GLushort *)(cmd+6);
--#else   
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
--   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX;
--   cmd[2].i = vertex_format;
--   cmd[3].i = (primitive | 
--	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
--	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
--	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
--	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
--
--   retval = (GLushort *)(cmd+4);
-+   BEGIN_BATCH_NO_AUTOSTATE(2+ELTS_BUFSZ(align_min_nr)/4);
-+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 0);
-+   if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+     OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
-+   } else {
-+     OUT_BATCH(rmesa->ioctl.vertex_offset);
-+   }
-+   OUT_BATCH(0xffff);
-+   OUT_BATCH(vertex_format);
-+   OUT_BATCH(primitive | 
-+	     RADEON_CP_VC_CNTL_PRIM_WALK_IND |
-+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
-+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
-+
-+#else
-+   BEGIN_BATCH_NO_AUTOSTATE(ELTS_BUFSZ(align_min_nr)/4);
-+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_DRAW_INDX, 0);
-+   OUT_BATCH(vertex_format);
-+   OUT_BATCH(primitive | 
-+	     RADEON_CP_VC_CNTL_PRIM_WALK_IND |
-+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
-+	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
-+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
- #endif
- 
--   if (RADEON_DEBUG & DEBUG_PRIMS)
--      fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n",
--	      __FUNCTION__,
--	      cmd[1].i, vertex_format, primitive);
- 
--   assert(!rmesa->dma.flush);
--   rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
--   rmesa->dma.flush = radeonFlushElts;
-+   rmesa->tcl.elt_cmd_offset = rmesa->radeon.cmdbuf.cs->cdw;
-+   rmesa->tcl.elt_used = min_nr;
-+
-+   retval = (GLushort *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_offset);
-+   
-+   if (RADEON_DEBUG & DEBUG_PRIMS)
-+      fprintf(stderr, "%s: header prim %x \n",
-+	      __FUNCTION__, primitive);
- 
--   rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
-+   assert(!rmesa->radeon.dma.flush);
-+   rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
-+   rmesa->radeon.dma.flush = radeonFlushElts;
- 
-    return retval;
- }
- 
--
--
--void radeonEmitVertexAOS( radeonContextPtr rmesa,
-+void radeonEmitVertexAOS( r100ContextPtr rmesa,
- 			  GLuint vertex_size,
-+			  struct radeon_bo *bo,
- 			  GLuint offset )
- {
- #if RADEON_OLD_PACKETS
--   rmesa->ioctl.vertex_size = vertex_size;
-    rmesa->ioctl.vertex_offset = offset;
-+   rmesa->ioctl.bo = bo;
- #else
--   drm_radeon_cmd_header_t *cmd;
-+   BATCH_LOCALS(&rmesa->radeon);
- 
-    if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
-       fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
- 	      __FUNCTION__, vertex_size, offset);
- 
--   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VERT_AOS_BUFSZ,
--						  __FUNCTION__ );
-+   BEGIN_BATCH(7);
-+   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, 2);
-+   OUT_BATCH(1);
-+   OUT_BATCH(vertex_size | (vertex_size << 8));
-+   OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
-+   END_BATCH();
- 
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
--   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16);
--   cmd[2].i = 1;
--   cmd[3].i = vertex_size | (vertex_size << 8);
--   cmd[4].i = offset;
- #endif
- }
- 		       
- 
--void radeonEmitAOS( radeonContextPtr rmesa,
--		    struct radeon_dma_region **component,
-+void radeonEmitAOS( r100ContextPtr rmesa,
- 		    GLuint nr,
- 		    GLuint offset )
- {
- #if RADEON_OLD_PACKETS
-    assert( nr == 1 );
--   assert( component[0]->aos_size == component[0]->aos_stride );
--   rmesa->ioctl.vertex_size = component[0]->aos_size;
-+   rmesa->ioctl.bo = rmesa->radeon.tcl.aos[0].bo;
-    rmesa->ioctl.vertex_offset = 
--      (component[0]->aos_start + offset * component[0]->aos_stride * 4);
-+     (rmesa->radeon.tcl.aos[0].offset + offset * rmesa->radeon.tcl.aos[0].stride * 4);
- #else
--   drm_radeon_cmd_header_t *cmd;
--   int sz = AOS_BUFSZ(nr);
-+   BATCH_LOCALS(&rmesa->radeon);
-+   uint32_t voffset;
-+   //   int sz = AOS_BUFSZ(nr);
-+   int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
-    int i;
--   int *tmp;
- 
-    if (RADEON_DEBUG & DEBUG_IOCTL)
-       fprintf(stderr, "%s\n", __FUNCTION__);
- 
--
--   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sz,
--						  __FUNCTION__ );
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
--   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (((sz / sizeof(int))-3) << 16);
--   cmd[2].i = nr;
--   tmp = &cmd[0].i;
--   cmd += 3;
--
--   for (i = 0 ; i < nr ; i++) {
--      if (i & 1) {
--	 cmd[0].i |= ((component[i]->aos_stride << 24) | 
--		      (component[i]->aos_size << 16));
--	 cmd[2].i = (component[i]->aos_start + 
--		     offset * component[i]->aos_stride * 4);
--	 cmd += 3;
--      }
--      else {
--	 cmd[0].i = ((component[i]->aos_stride << 8) | 
--		     (component[i]->aos_size << 0));
--	 cmd[1].i = (component[i]->aos_start + 
--		     offset * component[i]->aos_stride * 4);
-+   BEGIN_BATCH(sz+2+(nr * 2));
-+   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1);
-+   OUT_BATCH(nr);
-+
-+   if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+      for (i = 0; i + 1 < nr; i += 2) {
-+	 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
-+		   (rmesa->radeon.tcl.aos[i].stride << 8) |
-+		   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
-+		   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
-+			
-+	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
-+	 OUT_BATCH_RELOC(voffset,
-+			 rmesa->radeon.tcl.aos[i].bo,
-+			 voffset,
-+			 RADEON_GEM_DOMAIN_GTT,
-+			 0, 0);
-+	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
-+	 OUT_BATCH_RELOC(voffset,
-+			 rmesa->radeon.tcl.aos[i+1].bo,
-+			 voffset,
-+			 RADEON_GEM_DOMAIN_GTT,
-+			 0, 0);
-       }
--   }
--
--   if (RADEON_DEBUG & DEBUG_VERTS) {
--      fprintf(stderr, "%s:\n", __FUNCTION__);
--      for (i = 0 ; i < sz ; i++)
--	 fprintf(stderr, "   %d: %x\n", i, tmp[i]);
--   }
--#endif
--}
--
--/* using already shifted color_fmt! */
--void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is required? */
--		   GLuint color_fmt,
--		   GLuint src_pitch,
--		   GLuint src_offset,
--		   GLuint dst_pitch,
--		   GLuint dst_offset,
--		   GLint srcx, GLint srcy,
--		   GLint dstx, GLint dsty,
--		   GLuint w, GLuint h )
--{
--   drm_radeon_cmd_header_t *cmd;
--
--   if (RADEON_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
--	      __FUNCTION__, 
--	      src_pitch, src_offset, srcx, srcy,
--	      dst_pitch, dst_offset, dstx, dsty,
--	      w, h);
--
--   assert( (src_pitch & 63) == 0 );
--   assert( (dst_pitch & 63) == 0 );
--   assert( (src_offset & 1023) == 0 ); 
--   assert( (dst_offset & 1023) == 0 ); 
--   assert( w < (1<<16) );
--   assert( h < (1<<16) );
--
--   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 8 * sizeof(int),
--						  __FUNCTION__ );
--
--
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
--   cmd[1].i = RADEON_CP_PACKET3_CNTL_BITBLT_MULTI | (5 << 16);
--   cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
--	       RADEON_GMC_DST_PITCH_OFFSET_CNTL |
--	       RADEON_GMC_BRUSH_NONE |
--	       color_fmt |
--	       RADEON_GMC_SRC_DATATYPE_COLOR |
--	       RADEON_ROP3_S |
--	       RADEON_DP_SRC_SOURCE_MEMORY |
--	       RADEON_GMC_CLR_CMP_CNTL_DIS |
--	       RADEON_GMC_WR_MSK_DIS );
--
--   cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10);
--   cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10);
--   cmd[5].i = (srcx << 16) | srcy;
--   cmd[6].i = (dstx << 16) | dsty; /* dst */
--   cmd[7].i = (w << 16) | h;
--}
--
--
--void radeonEmitWait( radeonContextPtr rmesa, GLuint flags )
--{
--   drm_radeon_cmd_header_t *cmd;
--
--   assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
--
--   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int),
--					   __FUNCTION__ );
--   cmd[0].i = 0;
--   cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
--   cmd[0].wait.flags = flags;
--}
--
--
--static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
--				    const char * caller )
--{
--   int ret, i;
--   drm_radeon_cmd_buffer_t cmd;
--
--   if (rmesa->lost_context)
--      radeonBackUpAndEmitLostStateLocked(rmesa);
--
--   if (RADEON_DEBUG & DEBUG_IOCTL) {
--      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
--
--      if (RADEON_DEBUG & DEBUG_VERBOSE) 
--	 for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
--	    fprintf(stderr, "%d: %x\n", i/4, 
--		    *(int *)(&rmesa->store.cmd_buf[i]));
--   }
--
--   if (RADEON_DEBUG & DEBUG_DMA)
--      fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
--	      rmesa->dma.nr_released_bufs);
--
--
--   if (RADEON_DEBUG & DEBUG_SANITY) {
--      if (rmesa->state.scissor.enabled) 
--	 ret = radeonSanityCmdBuffer( rmesa, 
--				      rmesa->state.scissor.numClipRects,
--				      rmesa->state.scissor.pClipRects);
--      else
--	 ret = radeonSanityCmdBuffer( rmesa, 
--				      rmesa->numClipRects,
--				      rmesa->pClipRects);
--      if (ret) {
--	 fprintf(stderr, "drmSanityCommandWrite: %d\n", ret);	 
--	 goto out;
-+      
-+      if (nr & 1) {
-+	 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
-+		   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
-+	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
-+	 OUT_BATCH_RELOC(voffset,
-+			 rmesa->radeon.tcl.aos[nr - 1].bo,
-+			 voffset,
-+			 RADEON_GEM_DOMAIN_GTT,
-+			 0, 0);
-       }
--   }
--
--
--   cmd.bufsz = rmesa->store.cmd_used;
--   cmd.buf = rmesa->store.cmd_buf;
--
--   if (rmesa->state.scissor.enabled) {
--      cmd.nbox = rmesa->state.scissor.numClipRects;
--      cmd.boxes = rmesa->state.scissor.pClipRects;
-    } else {
--      cmd.nbox = rmesa->numClipRects;
--      cmd.boxes = rmesa->pClipRects;
--   }
--
--   ret = drmCommandWrite( rmesa->dri.fd,
--			  DRM_RADEON_CMDBUF,
--			  &cmd, sizeof(cmd) );
--
--   if (ret)
--      fprintf(stderr, "drmCommandWrite: %d\n", ret);
--
--   if (RADEON_DEBUG & DEBUG_SYNC) {
--      fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__);
--      radeonWaitForIdleLocked( rmesa );
--   }
--
-- out:
--   rmesa->store.primnr = 0;
--   rmesa->store.statenr = 0;
--   rmesa->store.cmd_used = 0;
--   rmesa->dma.nr_released_bufs = 0;
--   rmesa->save_on_next_emit = 1;
--
--   return ret;
--}
--
--
--/* Note: does not emit any commands to avoid recursion on
-- * radeonAllocCmdBuf.
-- */
--void radeonFlushCmdBuf( radeonContextPtr rmesa, const char *caller )
--{
--   int ret;
--
--	      
--   LOCK_HARDWARE( rmesa );
--
--   ret = radeonFlushCmdBufLocked( rmesa, caller );
--
--   UNLOCK_HARDWARE( rmesa );
--
--   if (ret) {
--      fprintf(stderr, "drm_radeon_cmd_buffer_t: %d (exiting)\n", ret);
--      exit(ret);
--   }
--}
--
--/* =============================================================
-- * Hardware vertex buffer handling
-- */
--
--
--void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa )
--{
--   struct radeon_dma_buffer *dmabuf;
--   int fd = rmesa->dri.fd;
--   int index = 0;
--   int size = 0;
--   drmDMAReq dma;
--   int ret;
--
--   if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
--      fprintf(stderr, "%s\n", __FUNCTION__);  
--
--   if (rmesa->dma.flush) {
--      rmesa->dma.flush( rmesa );
--   }
--
--   if (rmesa->dma.current.buf)
--      radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
--
--   if (rmesa->dma.nr_released_bufs > 4)
--      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
--
--   dma.context = rmesa->dri.hwContext;
--   dma.send_count = 0;
--   dma.send_list = NULL;
--   dma.send_sizes = NULL;
--   dma.flags = 0;
--   dma.request_count = 1;
--   dma.request_size = RADEON_BUFFER_SIZE;
--   dma.request_list = &index;
--   dma.request_sizes = &size;
--   dma.granted_count = 0;
--
--   LOCK_HARDWARE(rmesa);	/* no need to validate */
--
--   ret = drmDMA( fd, &dma );
--      
--   if (ret != 0) {
--      /* Free some up this way?
--       */
--      if (rmesa->dma.nr_released_bufs) {
--	 radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
-+      for (i = 0; i + 1 < nr; i += 2) {
-+	 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
-+		   (rmesa->radeon.tcl.aos[i].stride << 8) |
-+		   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
-+		   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
-+	 
-+	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
-+	 OUT_BATCH(voffset);
-+	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
-+	 OUT_BATCH(voffset);
-       }
-       
--      if (RADEON_DEBUG & DEBUG_DMA)
--	 fprintf(stderr, "Waiting for buffers\n");
--
--      radeonWaitForIdleLocked( rmesa );
--      ret = drmDMA( fd, &dma );
--
--      if ( ret != 0 ) {
--	 UNLOCK_HARDWARE( rmesa );
--	 fprintf( stderr, "Error: Could not get dma buffer... exiting\n" );
--	 exit( -1 );
-+      if (nr & 1) {
-+	 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
-+		   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
-+	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
-+	 OUT_BATCH(voffset);
-       }
--   }
--
--   UNLOCK_HARDWARE(rmesa);
--
--   if (RADEON_DEBUG & DEBUG_DMA)
--      fprintf(stderr, "Allocated buffer %d\n", index);
--
--   dmabuf = CALLOC_STRUCT( radeon_dma_buffer );
--   dmabuf->buf = &rmesa->radeonScreen->buffers->list[index];
--   dmabuf->refcount = 1;
--
--   rmesa->dma.current.buf = dmabuf;
--   rmesa->dma.current.address = dmabuf->buf->address;
--   rmesa->dma.current.end = dmabuf->buf->total;
--   rmesa->dma.current.start = 0;
--   rmesa->dma.current.ptr = 0;
--
--   rmesa->c_vertexBuffers++;
--}
--
--void radeonReleaseDmaRegion( radeonContextPtr rmesa,
--			     struct radeon_dma_region *region,
--			     const char *caller )
--{
--   if (RADEON_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
--   
--   if (!region->buf)
--      return;
--
--   if (rmesa->dma.flush)
--      rmesa->dma.flush( rmesa );
--
--   if (--region->buf->refcount == 0) {
--      drm_radeon_cmd_header_t *cmd;
--
--      if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
--	 fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
--		 region->buf->buf->idx);  
--      
--      cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sizeof(*cmd), 
--						     __FUNCTION__ );
--      cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
--      cmd->dma.buf_idx = region->buf->buf->idx;
--      FREE(region->buf);
--      rmesa->dma.nr_released_bufs++;
--   }
--
--   region->buf = NULL;
--   region->start = 0;
--}
--
--/* Allocates a region from rmesa->dma.current.  If there isn't enough
-- * space in current, grab a new buffer (and discard what was left of current)
-- */
--void radeonAllocDmaRegion( radeonContextPtr rmesa, 
--			   struct radeon_dma_region *region,
--			   int bytes,
--			   int alignment )
--{
--   if (RADEON_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
--
--   if (rmesa->dma.flush)
--      rmesa->dma.flush( rmesa );
--
--   if (region->buf)
--      radeonReleaseDmaRegion( rmesa, region, __FUNCTION__ );
--
--   alignment--;
--   rmesa->dma.current.start = rmesa->dma.current.ptr = 
--      (rmesa->dma.current.ptr + alignment) & ~alignment;
--
--   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
--      radeonRefillCurrentDmaRegion( rmesa );
--
--   region->start = rmesa->dma.current.start;
--   region->ptr = rmesa->dma.current.start;
--   region->end = rmesa->dma.current.start + bytes;
--   region->address = rmesa->dma.current.address;
--   region->buf = rmesa->dma.current.buf;
--   region->buf->refcount++;
--
--   rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
--   rmesa->dma.current.start = 
--      rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;  
--}
--
--/* ================================================================
-- * SwapBuffers with client-side throttling
-- */
--
--static uint32_t radeonGetLastFrame (radeonContextPtr rmesa) 
--{
--   drm_radeon_getparam_t gp;
--   int ret;
--   uint32_t frame;
--
--   gp.param = RADEON_PARAM_LAST_FRAME;
--   gp.value = (int *)&frame;
--   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
--			      &gp, sizeof(gp) );
--
--   if ( ret ) {
--      fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret );
--      exit(1);
--   }
--
--   return frame;
--}
--
--static void radeonEmitIrqLocked( radeonContextPtr rmesa )
--{
--   drm_radeon_irq_emit_t ie;
--   int ret;
--
--   ie.irq_seq = &rmesa->iw.irq_seq;
--   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, 
--			      &ie, sizeof(ie) );
--   if ( ret ) {
--      fprintf( stderr, "%s: drm_radeon_irq_emit_t: %d\n", __FUNCTION__, ret );
--      exit(1);
--   }
--}
--
--
--static void radeonWaitIrq( radeonContextPtr rmesa )
--{
--   int ret;
--
--   do {
--      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
--			     &rmesa->iw, sizeof(rmesa->iw) );
--   } while (ret && (errno == EINTR || errno == EBUSY));
--
--   if ( ret ) {
--      fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
--      exit(1);
--   }
--}
--
--
--static void radeonWaitForFrameCompletion( radeonContextPtr rmesa )
--{
--   drm_radeon_sarea_t *sarea = rmesa->sarea;
--
--   if (rmesa->do_irqs) {
--      if (radeonGetLastFrame(rmesa) < sarea->last_frame) {
--	 if (!rmesa->irqsEmitted) {
--	    while (radeonGetLastFrame (rmesa) < sarea->last_frame)
--	       ;
--	 }
--	 else {
--	    UNLOCK_HARDWARE( rmesa ); 
--	    radeonWaitIrq( rmesa );	
--	    LOCK_HARDWARE( rmesa ); 
--	 }
--	 rmesa->irqsEmitted = 10;
--      }
--
--      if (rmesa->irqsEmitted) {
--	 radeonEmitIrqLocked( rmesa );
--	 rmesa->irqsEmitted--;
--      }
--   } 
--   else {
--      while (radeonGetLastFrame (rmesa) < sarea->last_frame) {
--	 UNLOCK_HARDWARE( rmesa ); 
--	 if (rmesa->do_usleeps) 
--	    DO_USLEEP( 1 );
--	 LOCK_HARDWARE( rmesa ); 
--      }
--   }
--}
--
--/* Copy the back color buffer to the front color buffer.
-- */
--void radeonCopyBuffer( __DRIdrawablePrivate *dPriv,
--		       const drm_clip_rect_t	  *rect)
--{
--   radeonContextPtr rmesa;
--   GLint nbox, i, ret;
--   GLboolean   missed_target;
--   int64_t ust;
--   __DRIscreenPrivate *psp;
--
--   assert(dPriv);
--   assert(dPriv->driContextPriv);
--   assert(dPriv->driContextPriv->driverPrivate);
--
--   rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--
--   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
--      fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
--   }
--
--   RADEON_FIREVERTICES( rmesa );
--   LOCK_HARDWARE( rmesa );
--
--   /* Throttle the frame rate -- only allow one pending swap buffers
--    * request at a time.
--    */
--   radeonWaitForFrameCompletion( rmesa );
--   if (!rect)
--   {
--       UNLOCK_HARDWARE( rmesa );
--       driWaitForVBlank( dPriv, & missed_target );
--       LOCK_HARDWARE( rmesa );
--   }
--
--   nbox = dPriv->numClipRects; /* must be in locked region */
--
--   for ( i = 0 ; i < nbox ; ) {
--      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
--      drm_clip_rect_t *box = dPriv->pClipRects;
--      drm_clip_rect_t *b = rmesa->sarea->boxes;
--      GLint n = 0;
--
--      for ( ; i < nr ; i++ ) {
--
--	  *b = box[i];
--
--	  if (rect)
--	  {
--	      if (rect->x1 > b->x1)
--		  b->x1 = rect->x1;
--	      if (rect->y1 > b->y1)
--		  b->y1 = rect->y1;
--	      if (rect->x2 < b->x2)
--		  b->x2 = rect->x2;
--	      if (rect->y2 < b->y2)
--		  b->y2 = rect->y2;
--
--	      if (b->x1 >= b->x2 || b->y1 >= b->y2)
--		  continue;
--	  }
--
--	  b++;
--	  n++;
-+      for (i = 0; i + 1 < nr; i += 2) {
-+	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
-+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+			       rmesa->radeon.tcl.aos[i+0].bo,
-+			       RADEON_GEM_DOMAIN_GTT,
-+			       0, 0);
-+	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
-+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+			       rmesa->radeon.tcl.aos[i+1].bo,
-+			       RADEON_GEM_DOMAIN_GTT,
-+			       0, 0);
-       }
--      rmesa->sarea->nbox = n;
--
--      if (!n)
--	 continue;
--
--      ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
--
--      if ( ret ) {
--	 fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
--	 UNLOCK_HARDWARE( rmesa );
--	 exit( 1 );
-+      if (nr & 1) {
-+	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
-+	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
-+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+			       rmesa->radeon.tcl.aos[nr-1].bo,
-+			       RADEON_GEM_DOMAIN_GTT,
-+			       0, 0);
-       }
-    }
-+   END_BATCH();
- 
--   UNLOCK_HARDWARE( rmesa );
--   if (!rect)
--   {
--       psp = dPriv->driScreenPriv;
--       rmesa->swap_count++;
--       (*psp->systemTime->getUST)( & ust );
--       if ( missed_target ) {
--	   rmesa->swap_missed_count++;
--	   rmesa->swap_missed_ust = ust - rmesa->swap_ust;
--       }
--
--       rmesa->swap_ust = ust;
--       rmesa->hw.all_dirty = GL_TRUE;
--   }
--}
--
--void radeonPageFlip( __DRIdrawablePrivate *dPriv )
--{
--   radeonContextPtr rmesa;
--   GLint ret;
--   GLboolean   missed_target;
--   __DRIscreenPrivate *psp;
--
--   assert(dPriv);
--   assert(dPriv->driContextPriv);
--   assert(dPriv->driContextPriv->driverPrivate);
--
--   rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--   psp = dPriv->driScreenPriv;
--
--   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
--      fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
--	      rmesa->sarea->pfCurrentPage);
--   }
--
--   RADEON_FIREVERTICES( rmesa );
--   LOCK_HARDWARE( rmesa );
--
--   /* Need to do this for the perf box placement:
--    */
--   if (dPriv->numClipRects)
--   {
--      drm_clip_rect_t *box = dPriv->pClipRects;
--      drm_clip_rect_t *b = rmesa->sarea->boxes;
--      b[0] = box[0];
--      rmesa->sarea->nbox = 1;
--   }
--
--   /* Throttle the frame rate -- only allow a few pending swap buffers
--    * request at a time.
--    */
--   radeonWaitForFrameCompletion( rmesa );
--   UNLOCK_HARDWARE( rmesa );
--   driWaitForVBlank( dPriv, & missed_target );
--   if ( missed_target ) {
--      rmesa->swap_missed_count++;
--      (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust );
--   }
--   LOCK_HARDWARE( rmesa );
--
--   ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
--
--   UNLOCK_HARDWARE( rmesa );
--
--   if ( ret ) {
--      fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
--      exit( 1 );
--   }
--
--   rmesa->swap_count++;
--   (void) (*psp->systemTime->getUST)( & rmesa->swap_ust );
--
--   /* Get ready for drawing next frame.  Update the renderbuffers'
--    * flippedOffset/Pitch fields so we draw into the right place.
--    */
--   driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
--                        rmesa->sarea->pfCurrentPage);
--
--   radeonUpdateDrawBuffer(rmesa->glCtx);
-+#endif
- }
- 
--
- /* ================================================================
-  * Buffer clear
-  */
- #define RADEON_MAX_CLEARS	256
- 
--static void radeonClear( GLcontext *ctx, GLbitfield mask )
-+static void radeonUserClear(GLcontext *ctx, GLuint mask)
-+{
-+   radeon_clear_tris(ctx, mask);
-+}
-+
-+static void radeonKernelClear(GLcontext *ctx, GLuint flags)
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
--   drm_radeon_sarea_t *sarea = rmesa->sarea;
-+     r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-+   drm_radeon_sarea_t *sarea = rmesa->radeon.sarea;
-    uint32_t clear;
--   GLuint flags = 0;
--   GLuint color_mask = 0;
-    GLint ret, i;
-    GLint cx, cy, cw, ch;
- 
--   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
--      fprintf( stderr, "radeonClear\n");
--   }
--
--   {
--      LOCK_HARDWARE( rmesa );
--      UNLOCK_HARDWARE( rmesa );
--      if ( dPriv->numClipRects == 0 ) 
--	 return;
--   }
--   
--   radeonFlush( ctx ); 
--
--   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
--      flags |= RADEON_FRONT;
--      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
--      mask &= ~BUFFER_BIT_FRONT_LEFT;
--   }
--
--   if ( mask & BUFFER_BIT_BACK_LEFT ) {
--      flags |= RADEON_BACK;
--      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
--      mask &= ~BUFFER_BIT_BACK_LEFT;
--   }
--
--   if ( mask & BUFFER_BIT_DEPTH ) {
--      flags |= RADEON_DEPTH;
--      mask &= ~BUFFER_BIT_DEPTH;
--   }
--
--   if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) {
--      flags |= RADEON_STENCIL;
--      mask &= ~BUFFER_BIT_STENCIL;
--   }
--
--   if ( mask ) {
--      if (RADEON_DEBUG & DEBUG_FALLBACKS)
--	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
--      _swrast_Clear( ctx, mask );
--   }
--
--   if ( !flags ) 
--      return;
--
--   if (rmesa->using_hyperz) {
--      flags |= RADEON_USE_COMP_ZBUF;
--/*      if (rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL) 
--         flags |= RADEON_USE_HIERZ; */
--      if (!(rmesa->state.stencil.hwBuffer) ||
--	 ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
--	    ((rmesa->state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) {
--	  flags |= RADEON_CLEAR_FASTZ;
--      }
--   }
--
--   LOCK_HARDWARE( rmesa );
-+   LOCK_HARDWARE( &rmesa->radeon );
- 
-    /* compute region after locking: */
-    cx = ctx->DrawBuffer->_Xmin;
-@@ -1112,7 +453,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
- 
-       gp.param = RADEON_PARAM_LAST_CLEAR;
-       gp.value = (int *)&clear;
--      ret = drmCommandWriteRead( rmesa->dri.fd,
-+      ret = drmCommandWriteRead( rmesa->radeon.dri.fd,
- 				 DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
- 
-       if ( ret ) {
-@@ -1124,20 +465,20 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
- 	 break;
-       }
- 
--      if ( rmesa->do_usleeps ) {
--	 UNLOCK_HARDWARE( rmesa );
-+      if ( rmesa->radeon.do_usleeps ) {
-+	 UNLOCK_HARDWARE( &rmesa->radeon );
- 	 DO_USLEEP( 1 );
--	 LOCK_HARDWARE( rmesa );
-+	 LOCK_HARDWARE( &rmesa->radeon );
-       }
-    }
- 
-    /* Send current state to the hardware */
--   radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
-+   rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
- 
-    for ( i = 0 ; i < dPriv->numClipRects ; ) {
-       GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
-       drm_clip_rect_t *box = dPriv->pClipRects;
--      drm_clip_rect_t *b = rmesa->sarea->boxes;
-+      drm_clip_rect_t *b = rmesa->radeon.sarea->boxes;
-       drm_radeon_clear_t clear;
-       drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
-       GLint n = 0;
-@@ -1172,105 +513,107 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
- 	 }
-       }
- 
--      rmesa->sarea->nbox = n;
-+      rmesa->radeon.sarea->nbox = n;
- 
-       clear.flags       = flags;
--      clear.clear_color = rmesa->state.color.clear;
--      clear.clear_depth = rmesa->state.depth.clear;
-+      clear.clear_color = rmesa->radeon.state.color.clear;
-+      clear.clear_depth = rmesa->radeon.state.depth.clear;
-       clear.color_mask  = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
--      clear.depth_mask  = rmesa->state.stencil.clear;
-+      clear.depth_mask  = rmesa->radeon.state.stencil.clear;
-       clear.depth_boxes = depth_boxes;
- 
-       n--;
--      b = rmesa->sarea->boxes;
-+      b = rmesa->radeon.sarea->boxes;
-       for ( ; n >= 0 ; n-- ) {
- 	 depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1;
- 	 depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1;
- 	 depth_boxes[n].f[CLEAR_X2] = (float)b[n].x2;
- 	 depth_boxes[n].f[CLEAR_Y2] = (float)b[n].y2;
- 	 depth_boxes[n].f[CLEAR_DEPTH] = 
--	    (float)rmesa->state.depth.clear;
-+	    (float)rmesa->radeon.state.depth.clear;
-       }
- 
--      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
-+      ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR,
- 			     &clear, sizeof(drm_radeon_clear_t));
- 
-       if ( ret ) {
--	 UNLOCK_HARDWARE( rmesa );
-+	 UNLOCK_HARDWARE( &rmesa->radeon );
- 	 fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
- 	 exit( 1 );
-       }
-    }
--
--   UNLOCK_HARDWARE( rmesa );
--   rmesa->hw.all_dirty = GL_TRUE;
-+   UNLOCK_HARDWARE( &rmesa->radeon );
- }
- 
--
--void radeonWaitForIdleLocked( radeonContextPtr rmesa )
-+static void radeonClear( GLcontext *ctx, GLbitfield mask )
- {
--    int fd = rmesa->dri.fd;
--    int to = 0;
--    int ret, i = 0;
--
--    rmesa->c_drawWaits++;
--
--    do {
--        do {
--            ret = drmCommandNone( fd, DRM_RADEON_CP_IDLE);
--        } while ( ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY );
--    } while ( ( ret == -EBUSY ) && ( to++ < RADEON_TIMEOUT ) );
--
--    if ( ret < 0 ) {
--	UNLOCK_HARDWARE( rmesa );
--	fprintf( stderr, "Error: Radeon timed out... exiting\n" );
--	exit( -1 );
--    }
--}
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-+   GLuint flags = 0;
-+   GLuint color_mask = 0;
-+   GLuint orig_mask = mask;
-+
-+   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
-+      fprintf( stderr, "radeonClear\n");
-+   }
- 
-+   {
-+      LOCK_HARDWARE( &rmesa->radeon );
-+      UNLOCK_HARDWARE( &rmesa->radeon );
-+      if ( dPriv->numClipRects == 0 ) 
-+	 return;
-+   }
-+   
-+   radeon_firevertices(&rmesa->radeon); 
- 
--static void radeonWaitForIdle( radeonContextPtr rmesa )
--{
--   LOCK_HARDWARE(rmesa);
--   radeonWaitForIdleLocked( rmesa );
--   UNLOCK_HARDWARE(rmesa);
--}
-+   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
-+      flags |= RADEON_FRONT;
-+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
-+      mask &= ~BUFFER_BIT_FRONT_LEFT;
-+   }
- 
-+   if ( mask & BUFFER_BIT_BACK_LEFT ) {
-+      flags |= RADEON_BACK;
-+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
-+      mask &= ~BUFFER_BIT_BACK_LEFT;
-+   }
- 
--void radeonFlush( GLcontext *ctx )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-+   if ( mask & BUFFER_BIT_DEPTH ) {
-+      flags |= RADEON_DEPTH;
-+      mask &= ~BUFFER_BIT_DEPTH;
-+   }
- 
--   if (RADEON_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s\n", __FUNCTION__);
-+   if ( (mask & BUFFER_BIT_STENCIL) ) {
-+      flags |= RADEON_STENCIL;
-+      mask &= ~BUFFER_BIT_STENCIL;
-+   }
- 
--   if (rmesa->dma.flush)
--      rmesa->dma.flush( rmesa );
-+   if ( mask ) {
-+      if (RADEON_DEBUG & DEBUG_FALLBACKS)
-+	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
-+      _swrast_Clear( ctx, mask );
-+   }
- 
--   radeonEmitState( rmesa );
--   
--   if (rmesa->store.cmd_used)
--      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
--}
-+   if ( !flags ) 
-+      return;
- 
--/* Make sure all commands have been sent to the hardware and have
-- * completed processing.
-- */
--void radeonFinish( GLcontext *ctx )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   radeonFlush( ctx );
--
--   if (rmesa->do_irqs) {
--      LOCK_HARDWARE( rmesa );
--      radeonEmitIrqLocked( rmesa );
--      UNLOCK_HARDWARE( rmesa );
--      radeonWaitIrq( rmesa );
-+   if (rmesa->using_hyperz) {
-+      flags |= RADEON_USE_COMP_ZBUF;
-+/*      if (rmesa->radeon.radeonScreen->chipset & RADEON_CHIPSET_TCL) 
-+         flags |= RADEON_USE_HIERZ; */
-+      if (((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
-+	    ((rmesa->radeon.state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) {
-+	  flags |= RADEON_CLEAR_FASTZ;
-+      }
-    }
--   else
--      radeonWaitForIdle( rmesa );
--}
- 
-+   if (rmesa->radeon.radeonScreen->kernel_mm)
-+     radeonUserClear(ctx, orig_mask);
-+   else {
-+      radeonKernelClear(ctx, flags);
-+      rmesa->radeon.hw.all_dirty = GL_TRUE;
-+   }
-+}
- 
- void radeonInitIoctlFuncs( GLcontext *ctx )
- {
-diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.h b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
-index 4e3a44d..18805d4 100644
---- a/src/mesa/drivers/dri/radeon/radeon_ioctl.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
-@@ -38,31 +38,32 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "main/simple_list.h"
- #include "radeon_lock.h"
-+#include "radeon_bocs_wrapper.h"
- 
--
--extern void radeonEmitState( radeonContextPtr rmesa );
--extern void radeonEmitVertexAOS( radeonContextPtr rmesa,
-+extern void radeonEmitVertexAOS( r100ContextPtr rmesa,
- 				 GLuint vertex_size,
-+				 struct radeon_bo *bo,
- 				 GLuint offset );
- 
--extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
-+extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
- 				GLuint vertex_format,
- 				GLuint primitive,
- 				GLuint vertex_nr );
- 
--extern void radeonFlushElts( radeonContextPtr rmesa );
-+extern void radeonFlushElts( GLcontext *ctx );
-+			    
- 
--extern GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
-+extern GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
- 					   GLuint vertex_format,
- 					   GLuint primitive,
- 					   GLuint min_nr );
- 
--extern void radeonEmitAOS( radeonContextPtr rmesa,
--			   struct radeon_dma_region **regions,
-+
-+extern void radeonEmitAOS( r100ContextPtr rmesa,
- 			   GLuint n,
- 			   GLuint offset );
- 
--extern void radeonEmitBlit( radeonContextPtr rmesa,
-+extern void radeonEmitBlit( r100ContextPtr rmesa,
- 			    GLuint color_fmt,
- 			    GLuint src_pitch,
- 			    GLuint src_offset,
-@@ -72,30 +73,15 @@ extern void radeonEmitBlit( radeonContextPtr rmesa,
- 			    GLint dstx, GLint dsty,
- 			    GLuint w, GLuint h );
- 
--extern void radeonEmitWait( radeonContextPtr rmesa, GLuint flags );
--
--extern void radeonFlushCmdBuf( radeonContextPtr rmesa, const char * );
--extern void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa );
-+extern void radeonEmitWait( r100ContextPtr rmesa, GLuint flags );
- 
--extern void radeonAllocDmaRegion( radeonContextPtr rmesa,
--				  struct radeon_dma_region *region,
--				  int bytes, 
--				  int alignment );
-+extern void radeonFlushCmdBuf( r100ContextPtr rmesa, const char * );
- 
--extern void radeonReleaseDmaRegion( radeonContextPtr rmesa,
--				    struct radeon_dma_region *region,
--				    const char *caller );
--
--extern void radeonCopyBuffer( __DRIdrawablePrivate *drawable,
--			      const drm_clip_rect_t	 *rect);
--extern void radeonPageFlip( __DRIdrawablePrivate *drawable );
- extern void radeonFlush( GLcontext *ctx );
- extern void radeonFinish( GLcontext *ctx );
--extern void radeonWaitForIdleLocked( radeonContextPtr rmesa );
--extern void radeonWaitForVBlank( radeonContextPtr rmesa );
- extern void radeonInitIoctlFuncs( GLcontext *ctx );
--extern void radeonGetAllParams( radeonContextPtr rmesa );
--extern void radeonSetUpAtomList( radeonContextPtr rmesa );
-+extern void radeonGetAllParams( r100ContextPtr rmesa );
-+extern void radeonSetUpAtomList( r100ContextPtr rmesa );
- 
- /* ================================================================
-  * Helper macros:
-@@ -105,33 +91,33 @@ extern void radeonSetUpAtomList( radeonContextPtr rmesa );
-  */
- #define RADEON_NEWPRIM( rmesa )			\
- do {						\
--   if ( rmesa->dma.flush )			\
--      rmesa->dma.flush( rmesa );	\
-+   if ( rmesa->radeon.dma.flush )			\
-+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	\
- } while (0)
- 
- /* Can accomodate several state changes and primitive changes without
-  * actually firing the buffer.
-  */
-+
- #define RADEON_STATECHANGE( rmesa, ATOM )			\
- do {								\
-    RADEON_NEWPRIM( rmesa );					\
-    rmesa->hw.ATOM.dirty = GL_TRUE;				\
--   rmesa->hw.is_dirty = GL_TRUE;				\
-+   rmesa->radeon.hw.is_dirty = GL_TRUE;				\
- } while (0)
- 
--#define RADEON_DB_STATE( ATOM )			        \
-+#define RADEON_DB_STATE( ATOM )				\
-    memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd,	\
- 	   rmesa->hw.ATOM.cmd_size * 4)
- 
--static INLINE int RADEON_DB_STATECHANGE( 
--   radeonContextPtr rmesa,
--   struct radeon_state_atom *atom )
-+static INLINE int RADEON_DB_STATECHANGE(r100ContextPtr rmesa,
-+					struct radeon_state_atom *atom )
- {
-    if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
--      int *tmp;
-+      GLuint *tmp;
-       RADEON_NEWPRIM( rmesa );
-       atom->dirty = GL_TRUE;
--      rmesa->hw.is_dirty = GL_TRUE;
-+      rmesa->radeon.hw.is_dirty = GL_TRUE;
-       tmp = atom->cmd; 
-       atom->cmd = atom->lastcmd;
-       atom->lastcmd = tmp;
-@@ -141,16 +127,6 @@ static INLINE int RADEON_DB_STATECHANGE(
-       return 0;
- }
- 
--
--/* Fire the buffered vertices no matter what.
-- */
--#define RADEON_FIREVERTICES( rmesa )			\
--do {							\
--   if ( rmesa->store.cmd_used || rmesa->dma.flush ) {	\
--      radeonFlush( rmesa->glCtx );			\
--   }							\
--} while (0)
--
- /* Command lengths.  Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ
-  * are available, you will also be adding an rmesa->state.max_state_size because
-  * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
-@@ -167,36 +143,37 @@ do {							\
- #define VBUF_BUFSZ	(4 * sizeof(int))
- #endif
- 
--/* Ensure that a minimum amount of space is available in the command buffer.
-- * This is used to ensure atomicity of state updates with the rendering requests
-- * that rely on them.
-- *
-- * An alternative would be to implement a "soft lock" such that when the buffer
-- * wraps at an inopportune time, we grab the lock, flush the current buffer,
-- * and hang on to the lock until the critical section is finished and we flush
-- * the buffer again and unlock.
-- */
--static INLINE void radeonEnsureCmdBufSpace( radeonContextPtr rmesa,
--					      int bytes )
--{
--   if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
--      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
--   assert( bytes <= RADEON_CMD_BUF_SZ );
--}
- 
--/* Alloc space in the command buffer
-- */
--static INLINE char *radeonAllocCmdBuf( radeonContextPtr rmesa,
--					 int bytes, const char *where )
-+static inline uint32_t cmdpacket3(int cmd_type)
- {
--   if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
--      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
-+  drm_radeon_cmd_header_t cmd;
-+
-+  cmd.i = 0;
-+  cmd.header.cmd_type = cmd_type;
-+
-+  return (uint32_t)cmd.i;
- 
--   {
--      char *head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
--      rmesa->store.cmd_used += bytes;
--      return head;
--   }
- }
- 
-+#define OUT_BATCH_PACKET3(packet, num_extra) do {	      \
-+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
-+      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3));				      \
-+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
-+    } else {						      \
-+      OUT_BATCH(CP_PACKET2);				      \
-+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
-+    }							      \
-+  } while(0)
-+
-+#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do {	      \
-+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
-+      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP));	      \
-+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
-+    } else {						      \
-+      OUT_BATCH(CP_PACKET2);				      \
-+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
-+    }							      \
-+  } while(0)
-+
-+
- #endif /* __RADEON_IOCTL_H__ */
-diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c
-index 64bb3ca..fe19218 100644
---- a/src/mesa/drivers/dri/radeon/radeon_lock.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_lock.c
-@@ -41,30 +41,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "main/glheader.h"
- #include "main/mtypes.h"
--#include "radeon_context.h"
-+#include "main/colormac.h"
-+#include "dri_util.h"
-+#include "radeon_screen.h"
-+#include "radeon_common.h"
- #include "radeon_lock.h"
--#include "radeon_tex.h"
--#include "radeon_state.h"
--#include "radeon_ioctl.h"
--
- #include "drirenderbuffer.h"
- 
--#if DEBUG_LOCKING
--char *prevLockFile = NULL;
--int prevLockLine = 0;
--#endif
--
--/* Turn on/off page flipping according to the flags in the sarea:
-- */
--static void radeonUpdatePageFlipping(radeonContextPtr rmesa)
--{
--	rmesa->doPageFlip = rmesa->sarea->pfState;
--	if (rmesa->glCtx->WinSysDrawBuffer) {
--		driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
--				     rmesa->sarea->pfCurrentPage);
--	}
--}
--
- /* Update the hardware state.  This is called if another context has
-  * grabbed the hardware lock, which includes the X server.  This
-  * function also updates the driver's window state after the X server
-@@ -78,7 +61,8 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
- 	__DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
- 	__DRIdrawablePrivate *const readable = rmesa->dri.readable;
- 	__DRIscreenPrivate *sPriv = rmesa->dri.screen;
--	drm_radeon_sarea_t *sarea = rmesa->sarea;
-+
-+	assert(drawable != NULL);
- 
- 	drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags);
- 
-@@ -96,29 +80,42 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
- 	}
- 
- 	if (rmesa->lastStamp != drawable->lastStamp) {
--		radeonUpdatePageFlipping(rmesa);
--		radeonSetCliprects(rmesa);
--		radeonUpdateViewportOffset(rmesa->glCtx);
--		driUpdateFramebufferSize(rmesa->glCtx, drawable);
-+		radeon_window_moved(rmesa);
-+		rmesa->lastStamp = drawable->lastStamp;
- 	}
- 
--	RADEON_STATECHANGE(rmesa, ctx);
--	if (rmesa->sarea->tiling_enabled) {
--		rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
--		    RADEON_COLOR_TILE_ENABLE;
--	} else {
--		rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &=
--		    ~RADEON_COLOR_TILE_ENABLE;
--	}
-+	rmesa->vtbl.get_lock(rmesa);
-+
-+	rmesa->lost_context = GL_TRUE;
-+}
- 
--	if (sarea->ctx_owner != rmesa->dri.hwContext) {
--		int i;
--		sarea->ctx_owner = rmesa->dri.hwContext;
-+void radeon_lock_hardware(radeonContextPtr radeon)
-+{
-+	char ret = 0;
-+	struct radeon_framebuffer *rfb = NULL;
-+	struct radeon_renderbuffer *rrb = NULL;
- 
--		for (i = 0; i < rmesa->nr_heaps; i++) {
--			DRI_AGE_TEXTURES(rmesa->texture_heaps[i]);
--		}
-+	if (radeon->dri.drawable) {
-+		rfb = radeon->dri.drawable->driverPrivate;
-+
-+		if (rfb)
-+			rrb = radeon_get_renderbuffer(&rfb->base,
-+						      rfb->base._ColorDrawBufferIndexes[0]);
- 	}
- 
--	rmesa->lost_context = GL_TRUE;
-+	if (!radeon->radeonScreen->driScreen->dri2.enabled) {
-+		DRM_CAS(radeon->dri.hwLock, radeon->dri.hwContext,
-+			 (DRM_LOCK_HELD | radeon->dri.hwContext), ret );
-+		if (ret)
-+			radeonGetLock(radeon, 0);
-+	}
-+}
-+
-+void radeon_unlock_hardware(radeonContextPtr radeon)
-+{
-+	if (!radeon->radeonScreen->driScreen->dri2.enabled) {
-+		DRM_UNLOCK( radeon->dri.fd,
-+			    radeon->dri.hwLock,
-+			    radeon->dri.hwContext );
-+	}
- }
-diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.h b/src/mesa/drivers/dri/radeon/radeon_lock.h
-index 86e96aa..2817709 100644
---- a/src/mesa/drivers/dri/radeon/radeon_lock.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_lock.h
-@@ -39,74 +39,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-  *   Kevin E. Martin <martin@valinux.com>
-  */
- 
--#ifndef __RADEON_LOCK_H__
--#define __RADEON_LOCK_H__
-+#ifndef COMMON_LOCK_H
-+#define COMMON_LOCK_H
- 
--extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
--
--/* Turn DEBUG_LOCKING on to find locking conflicts.
-- */
--#define DEBUG_LOCKING	0
--
--#if DEBUG_LOCKING
--extern char *prevLockFile;
--extern int prevLockLine;
--
--#define DEBUG_LOCK()							\
--   do {									\
--      prevLockFile = (__FILE__);					\
--      prevLockLine = (__LINE__);					\
--   } while (0)
--
--#define DEBUG_RESET()							\
--   do {									\
--      prevLockFile = 0;							\
--      prevLockLine = 0;							\
--   } while (0)
--
--#define DEBUG_CHECK_LOCK()						\
--   do {									\
--      if ( prevLockFile ) {						\
--	 fprintf( stderr,						\
--		  "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",	\
--		  prevLockFile, prevLockLine, __FILE__, __LINE__ );	\
--	 exit( 1 );							\
--      }									\
--   } while (0)
--
--#else
-+#include "main/colormac.h"
-+#include "radeon_screen.h"
-+#include "radeon_common.h"
- 
--#define DEBUG_LOCK()
--#define DEBUG_RESET()
--#define DEBUG_CHECK_LOCK()
-+extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
- 
--#endif
--
--/*
-- * !!! We may want to separate locks from locks with validation.  This
-- * could be used to improve performance for those things commands that
-- * do not do any drawing !!!
-- */
-+void radeon_lock_hardware(radeonContextPtr rmesa);
-+void radeon_unlock_hardware(radeonContextPtr rmesa);
- 
- /* Lock the hardware and validate our state.
-  */
--#define LOCK_HARDWARE( rmesa )					\
--   do {								\
--      char __ret = 0;						\
--      DEBUG_CHECK_LOCK();					\
--      DRM_CAS( (rmesa)->dri.hwLock, (rmesa)->dri.hwContext,		\
--	       (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret );	\
--      if ( __ret )						\
--	 radeonGetLock( (rmesa), 0 );				\
--      DEBUG_LOCK();						\
--   } while (0)
--
--#define UNLOCK_HARDWARE( rmesa )					\
--   do {									\
--      DRM_UNLOCK( (rmesa)->dri.fd,					\
--		  (rmesa)->dri.hwLock,					\
--		  (rmesa)->dri.hwContext );				\
--      DEBUG_RESET();							\
--   } while (0)
-+#define LOCK_HARDWARE( rmesa )	radeon_lock_hardware(rmesa)
-+#define UNLOCK_HARDWARE( rmesa )  radeon_unlock_hardware(rmesa)
- 
--#endif				/* __RADEON_LOCK_H__ */
-+#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_maos.h b/src/mesa/drivers/dri/radeon/radeon_maos.h
-index b8935e8..b88eb19 100644
---- a/src/mesa/drivers/dri/radeon/radeon_maos.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_maos.h
-@@ -38,6 +38,5 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_context.h"
- 
- extern void radeonEmitArrays( GLcontext *ctx, GLuint inputs );
--extern void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs );
- 
- #endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
-index 31eea13..7c6ea05 100644
---- a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
-@@ -48,160 +48,35 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_maos.h"
- #include "radeon_tcl.h"
- 
--#if 0
--/* Usage:
-- *   - from radeon_tcl_render
-- *   - call radeonEmitArrays to ensure uptodate arrays in dma
-- *   - emit primitives (new type?) which reference the data
-- *       -- need to use elts for lineloop, quads, quadstrip/flat
-- *       -- other primitives are all well-formed (need tristrip-1,fake-poly)
-- *
-- */
--static void emit_ubyte_rgba3( GLcontext *ctx,
--		       struct radeon_dma_region *rvb,
--		       char *data,
--		       int stride,
--		       int count )
-+static void emit_vecfog(GLcontext *ctx, struct radeon_aos *aos,
-+			GLvoid *data, int stride, int count)
- {
-    int i;
--   radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address);
--
--   if (RADEON_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d out %p\n",
--	      __FUNCTION__, count, stride, (void *)out);
--
--   for (i = 0; i < count; i++) {
--      out->red   = *data;
--      out->green = *(data+1);
--      out->blue  = *(data+2);
--      out->alpha = 0xFF;
--      out++;
--      data += stride;
--   }
--}
--
--static void emit_ubyte_rgba4( GLcontext *ctx,
--			      struct radeon_dma_region *rvb,
--			      char *data,
--			      int stride,
--			      int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
-+   uint32_t *out;
-+   int size = 1;
-+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- 
-    if (RADEON_DEBUG & DEBUG_VERTS)
-       fprintf(stderr, "%s count %d stride %d\n",
- 	      __FUNCTION__, count, stride);
- 
--   if (stride == 4)
--       COPY_DWORDS( out, data, count );
--   else
--      for (i = 0; i < count; i++) {
--	 *out++ = LE32_TO_CPU(*(int *)data);
--	 data += stride;
--      }
--}
--
--
--static void emit_ubyte_rgba( GLcontext *ctx,
--			     struct radeon_dma_region *rvb,
--			     char *data,
--			     int size,
--			     int stride,
--			     int count )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--
--   if (RADEON_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
--
--   assert (!rvb->buf);
--
-    if (stride == 0) {
--      radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
-+      radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );
-       count = 1;
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 0;
--      rvb->aos_size = 1;
-+      aos->stride = 0;
-    }
-    else {
--      radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 );	/* alignment? */
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 1;
--      rvb->aos_size = 1;
-+      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
-+      aos->stride = size;
-    }
- 
--   /* Emit the data
--    */
--   switch (size) {
--   case 3:
--      emit_ubyte_rgba3( ctx, rvb, data, stride, count );
--      break;
--   case 4:
--      emit_ubyte_rgba4( ctx, rvb, data, stride, count );
--      break;
--   default:
--      assert(0);
--      exit(1);
--      break;
--   }
--}
--#endif
--
--#if defined(USE_X86_ASM)
--#define COPY_DWORDS( dst, src, nr )					\
--do {									\
--	int __tmp;							\
--	__asm__ __volatile__( "rep ; movsl"				\
--			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
--			      : "0" (nr),				\
--			        "D" ((long)dst),			\
--			        "S" ((long)src) );			\
--} while (0)
--#else
--#define COPY_DWORDS( dst, src, nr )		\
--do {						\
--   int j;					\
--   for ( j = 0 ; j < nr ; j++ )			\
--      dst[j] = ((int *)src)[j];			\
--   dst += nr;					\
--} while (0)
--#endif
--
--static void emit_vecfog( GLcontext *ctx,
--			 struct radeon_dma_region *rvb,
--			 char *data,
--			 int stride,
--			 int count )
--{
--   int i;
--   GLfloat *out;
-+   aos->components = size;
-+   aos->count = count;
- 
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--
--   if (RADEON_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   assert (!rvb->buf);
--
--   if (stride == 0) {
--      radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
--      count = 1;
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 0;
--      rvb->aos_size = 1;
--   }
--   else {
--      radeonAllocDmaRegion( rmesa, rvb, count * 4, 4 );	/* alignment? */
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 1;
--      rvb->aos_size = 1;
--   }
- 
-    /* Emit the data
-     */
--   out = (GLfloat *)(rvb->address + rvb->start);
-+   out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
-    for (i = 0; i < count; i++) {
-       out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
-       out++;
-@@ -209,169 +84,9 @@ static void emit_vecfog( GLcontext *ctx,
-    }
- }
- 
--static void emit_vec4( GLcontext *ctx,
--		       struct radeon_dma_region *rvb,
--		       char *data,
--		       int stride,
--		       int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (RADEON_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   if (stride == 4)
--      COPY_DWORDS( out, data, count );
--   else
--      for (i = 0; i < count; i++) {
--	 out[0] = *(int *)data;
--	 out++;
--	 data += stride;
--      }
--}
--
--
--static void emit_vec8( GLcontext *ctx,
--		       struct radeon_dma_region *rvb,
--		       char *data,
--		       int stride,
--		       int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (RADEON_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   if (stride == 8)
--      COPY_DWORDS( out, data, count*2 );
--   else
--      for (i = 0; i < count; i++) {
--	 out[0] = *(int *)data;
--	 out[1] = *(int *)(data+4);
--	 out += 2;
--	 data += stride;
--      }
--}
--
--static void emit_vec12( GLcontext *ctx,
--		       struct radeon_dma_region *rvb,
--		       char *data,
--		       int stride,
--		       int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (RADEON_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d out %p data %p\n",
--	      __FUNCTION__, count, stride, (void *)out, (void *)data);
--
--   if (stride == 12)
--      COPY_DWORDS( out, data, count*3 );
--   else
--      for (i = 0; i < count; i++) {
--	 out[0] = *(int *)data;
--	 out[1] = *(int *)(data+4);
--	 out[2] = *(int *)(data+8);
--	 out += 3;
--	 data += stride;
--      }
--}
--
--static void emit_vec16( GLcontext *ctx,
--			struct radeon_dma_region *rvb,
--			char *data,
--			int stride,
--			int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (RADEON_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   if (stride == 16)
--      COPY_DWORDS( out, data, count*4 );
--   else
--      for (i = 0; i < count; i++) {
--	 out[0] = *(int *)data;
--	 out[1] = *(int *)(data+4);
--	 out[2] = *(int *)(data+8);
--	 out[3] = *(int *)(data+12);
--	 out += 4;
--	 data += stride;
--      }
--}
--
--
--static void emit_vector( GLcontext *ctx,
--			 struct radeon_dma_region *rvb,
--			 char *data,
--			 int size,
--			 int stride,
--			 int count )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--
--   if (RADEON_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d size %d stride %d\n",
--	      __FUNCTION__, count, size, stride);
--
--   assert (!rvb->buf);
--
--   if (stride == 0) {
--      radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
--      count = 1;
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 0;
--      rvb->aos_size = size;
--   }
--   else {
--      radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 );	/* alignment? */
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = size;
--      rvb->aos_size = size;
--   }
--
--   /* Emit the data
--    */
--   switch (size) {
--   case 1:
--      emit_vec4( ctx, rvb, data, stride, count );
--      break;
--   case 2:
--      emit_vec8( ctx, rvb, data, stride, count );
--      break;
--   case 3:
--      emit_vec12( ctx, rvb, data, stride, count );
--      break;
--   case 4:
--      emit_vec16( ctx, rvb, data, stride, count );
--      break;
--   default:
--      assert(0);
--      exit(1);
--      break;
--   }
--
--}
--
--
--
--static void emit_s0_vec( GLcontext *ctx,
--			 struct radeon_dma_region *rvb,
--			 char *data,
--			 int stride,
--			 int count )
-+static void emit_s0_vec(uint32_t *out, GLvoid *data, int stride, int count)
- {
-    int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
-    if (RADEON_DEBUG & DEBUG_VERTS)
-       fprintf(stderr, "%s count %d stride %d\n",
- 	      __FUNCTION__, count, stride);
-@@ -384,14 +99,9 @@ static void emit_s0_vec( GLcontext *ctx,
-    }
- }
- 
--static void emit_stq_vec( GLcontext *ctx,
--			 struct radeon_dma_region *rvb,
--			 char *data,
--			 int stride,
--			 int count )
-+static void emit_stq_vec(uint32_t *out, GLvoid *data, int stride, int count)
- {
-    int i;
--   int *out = (int *)(rvb->address + rvb->start);
- 
-    if (RADEON_DEBUG & DEBUG_VERTS)
-       fprintf(stderr, "%s count %d stride %d\n",
-@@ -409,21 +119,16 @@ static void emit_stq_vec( GLcontext *ctx,
- 
- 
- 
--static void emit_tex_vector( GLcontext *ctx,
--			     struct radeon_dma_region *rvb,
--			     char *data,
--			     int size,
--			     int stride,
--			     int count )
-+static void emit_tex_vector(GLcontext *ctx, struct radeon_aos *aos,
-+			    GLvoid *data, int size, int stride, int count)
- {
-    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-    int emitsize;
-+   uint32_t *out;
- 
-    if (RADEON_DEBUG & DEBUG_VERTS)
-       fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
- 
--   assert (!rvb->buf);
--
-    switch (size) {
-    case 4: emitsize = 3; break;
-    case 3: emitsize = 3; break;
-@@ -432,34 +137,33 @@ static void emit_tex_vector( GLcontext *ctx,
- 
- 
-    if (stride == 0) {
--      radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
-+      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * 4, 32);
-       count = 1;
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 0;
--      rvb->aos_size = emitsize;
-+      aos->stride = 0;
-    }
-    else {
--      radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = emitsize;
--      rvb->aos_size = emitsize;
-+      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * count * 4, 32);
-+      aos->stride = emitsize;
-    }
- 
-+   aos->components = emitsize;
-+   aos->count = count;
- 
-    /* Emit the data
-     */
-+   out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
-    switch (size) {
-    case 1:
--      emit_s0_vec( ctx, rvb, data, stride, count ); 
-+      emit_s0_vec( out, data, stride, count );
-       break;
-    case 2:
--      emit_vec8( ctx, rvb, data, stride, count );
-+      radeonEmitVec8( out, data, stride, count );
-       break;
-    case 3:
--      emit_vec12( ctx, rvb, data, stride, count );
-+      radeonEmitVec12( out, data, stride, count );
-       break;
-    case 4:
--      emit_stq_vec( ctx, rvb, data, stride, count );
-+      emit_stq_vec( out, data, stride, count );
-       break;
-    default:
-       assert(0);
-@@ -476,9 +180,8 @@ static void emit_tex_vector( GLcontext *ctx,
-  */
- void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
-    struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
--   struct radeon_dma_region **component = rmesa->tcl.aos_components;
-    GLuint nr = 0;
-    GLuint vfmt = 0;
-    GLuint count = VB->Count;
-@@ -491,12 +194,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
- 
-    if (1) {
-       if (!rmesa->tcl.obj.buf) 
--	 emit_vector( ctx, 
--		      &rmesa->tcl.obj, 
--		      (char *)VB->ObjPtr->data,
--		      VB->ObjPtr->size,
--		      VB->ObjPtr->stride,
--		      count);
-+	rcommon_emit_vector( ctx, 
-+			     &(rmesa->tcl.aos[nr]),
-+			     (char *)VB->ObjPtr->data,
-+			     VB->ObjPtr->size,
-+			     VB->ObjPtr->stride,
-+			     count);
- 
-       switch( VB->ObjPtr->size ) {
-       case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
-@@ -505,21 +208,21 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
-       default:
-          break;
-       }
--      component[nr++] = &rmesa->tcl.obj;
-+      nr++;
-    }
-    
- 
-    if (inputs & VERT_BIT_NORMAL) {
-       if (!rmesa->tcl.norm.buf)
--	 emit_vector( ctx, 
--		      &(rmesa->tcl.norm), 
--		      (char *)VB->NormalPtr->data,
--		      3,
--		      VB->NormalPtr->stride,
--		      count);
-+	 rcommon_emit_vector( ctx, 
-+			      &(rmesa->tcl.aos[nr]),
-+			      (char *)VB->NormalPtr->data,
-+			      3,
-+			      VB->NormalPtr->stride,
-+			      count);
- 
-       vfmt |= RADEON_CP_VC_FRMT_N0;
--      component[nr++] = &rmesa->tcl.norm;
-+      nr++;
-    }
- 
-    if (inputs & VERT_BIT_COLOR0) {
-@@ -537,31 +240,30 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
-       }
- 
-       if (!rmesa->tcl.rgba.buf)
--	 emit_vector( ctx,
--		      &(rmesa->tcl.rgba),
--		      (char *)VB->ColorPtr[0]->data,
--		      emitsize,
--		      VB->ColorPtr[0]->stride,
--		      count);
--
--
--      component[nr++] = &rmesa->tcl.rgba;
-+	rcommon_emit_vector( ctx,
-+			     &(rmesa->tcl.aos[nr]),
-+			     (char *)VB->ColorPtr[0]->data,
-+			     emitsize,
-+			     VB->ColorPtr[0]->stride,
-+			     count);
-+
-+      nr++;
-    }
- 
- 
-    if (inputs & VERT_BIT_COLOR1) {
-       if (!rmesa->tcl.spec.buf) {
- 
--	 emit_vector( ctx,
--		      &rmesa->tcl.spec,
--		      (char *)VB->SecondaryColorPtr[0]->data,
--		      3,
--		      VB->SecondaryColorPtr[0]->stride,
--		      count);
-+	rcommon_emit_vector( ctx,
-+			     &(rmesa->tcl.aos[nr]),
-+			     (char *)VB->SecondaryColorPtr[0]->data,
-+			     3,
-+			     VB->SecondaryColorPtr[0]->stride,
-+			     count);
-       }
- 
-       vfmt |= RADEON_CP_VC_FRMT_FPSPEC;
--      component[nr++] = &rmesa->tcl.spec;
-+      nr++;
-    }
- 
- /* FIXME: not sure if this is correct. May need to stitch this together with
-@@ -570,13 +272,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
-    if (inputs & VERT_BIT_FOG) {
-       if (!rmesa->tcl.fog.buf)
- 	 emit_vecfog( ctx,
--		      &(rmesa->tcl.fog),
-+		      &(rmesa->tcl.aos[nr]),
- 		      (char *)VB->FogCoordPtr->data,
- 		      VB->FogCoordPtr->stride,
- 		      count);
- 
-       vfmt |= RADEON_CP_VC_FRMT_FPFOG;
--      component[nr++] = &rmesa->tcl.fog;
-+      nr++;
-    }
- 
- 
-@@ -587,11 +289,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
-       if (inputs & VERT_BIT_TEX(unit)) {
- 	 if (!rmesa->tcl.tex[unit].buf)
- 	    emit_tex_vector( ctx,
--			     &(rmesa->tcl.tex[unit]),
-+			     &(rmesa->tcl.aos[nr]),
- 			     (char *)VB->TexCoordPtr[unit]->data,
- 			     VB->TexCoordPtr[unit]->size,
- 			     VB->TexCoordPtr[unit]->stride,
- 			     count );
-+	 nr++;
- 
- 	 vfmt |= RADEON_ST_BIT(unit);
-          /* assume we need the 3rd coord if texgen is active for r/q OR at least
-@@ -609,7 +312,6 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
- 		 (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1)))
- 	       radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ;
- 	 }
--	 component[nr++] = &rmesa->tcl.tex[unit];
-       }
-    }
- 
-@@ -622,34 +324,3 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
-    rmesa->tcl.vertex_format = vfmt;
- }
- 
--
--void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
--   GLuint unit;
--
--#if 0
--   if (RADEON_DEBUG & DEBUG_VERTS) 
--      _tnl_print_vert_flags( __FUNCTION__, newinputs );
--#endif
--
--   if (newinputs & VERT_BIT_POS) 
--     radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
--
--   if (newinputs & VERT_BIT_NORMAL) 
--      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
--
--   if (newinputs & VERT_BIT_COLOR0) 
--      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
--
--   if (newinputs & VERT_BIT_COLOR1) 
--      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
--      
--   if (newinputs & VERT_BIT_FOG)
--      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ );
--
--   for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
--      if (newinputs & VERT_BIT_TEX(unit))
--         radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ );
--   }
--}
-diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
-index 126d072..78ec119 100644
---- a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
-@@ -310,7 +310,7 @@ static void init_tcl_verts( void )
- 
- void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
-    GLuint req = 0;
-    GLuint unit;
-@@ -374,14 +374,15 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
- 	 break;
- 
-    if (rmesa->tcl.vertex_format == setup_tab[i].vertex_format &&
--       rmesa->tcl.indexed_verts.buf)
-+       rmesa->radeon.tcl.aos[0].bo)
-       return;
- 
--   if (rmesa->tcl.indexed_verts.buf)
-+   if (rmesa->radeon.tcl.aos[0].bo)
-       radeonReleaseArrays( ctx, ~0 );
- 
--   radeonAllocDmaRegion( rmesa,
--			 &rmesa->tcl.indexed_verts, 
-+   radeonAllocDmaRegion( &rmesa->radeon,
-+			 &rmesa->radeon.tcl.aos[0].bo,
-+			 &rmesa->radeon.tcl.aos[0].offset,
- 			 VB->Count * setup_tab[i].vertex_size * 4, 
- 			 4);
- 
-@@ -421,29 +422,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
- 
- 
-    setup_tab[i].emit( ctx, 0, VB->Count, 
--		      rmesa->tcl.indexed_verts.address + 
--		      rmesa->tcl.indexed_verts.start );
-+		      rmesa->radeon.tcl.aos[0].bo->ptr + rmesa->radeon.tcl.aos[0].offset);
- 
-+   //   rmesa->radeon.tcl.aos[0].size = setup_tab[i].vertex_size;
-+   rmesa->radeon.tcl.aos[0].stride = setup_tab[i].vertex_size;
-    rmesa->tcl.vertex_format = setup_tab[i].vertex_format;
--   rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts );
--   rmesa->tcl.indexed_verts.aos_size = setup_tab[i].vertex_size;
--   rmesa->tcl.indexed_verts.aos_stride = setup_tab[i].vertex_size;
--
--   rmesa->tcl.aos_components[0] = &rmesa->tcl.indexed_verts;
--   rmesa->tcl.nr_aos_components = 1;
-+   rmesa->radeon.tcl.aos_count = 1;
- }
- 
- 
--
--void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
--
--#if 0
--   if (RADEON_DEBUG & DEBUG_VERTS) 
--      _tnl_print_vert_flags( __FUNCTION__, newinputs );
--#endif
--
--   if (newinputs) 
--     radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ );
--}
-diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
-new file mode 100644
-index 0000000..51538e3
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
-@@ -0,0 +1,387 @@
-+/*
-+ * Copyright (C) 2008 Nicolai Haehnle.
-+ *
-+ * All Rights Reserved.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining
-+ * a copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sublicense, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial
-+ * portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ */
-+
-+#include "radeon_mipmap_tree.h"
-+
-+#include <errno.h>
-+#include <unistd.h>
-+
-+#include "main/simple_list.h"
-+#include "main/texcompress.h"
-+#include "main/texformat.h"
-+
-+static GLuint radeon_compressed_texture_size(GLcontext *ctx,
-+		GLsizei width, GLsizei height, GLsizei depth,
-+		GLuint mesaFormat)
-+{
-+	GLuint size = _mesa_compressed_texture_size(ctx, width, height, depth, mesaFormat);
-+
-+	if (mesaFormat == MESA_FORMAT_RGB_DXT1 ||
-+	    mesaFormat == MESA_FORMAT_RGBA_DXT1) {
-+		if (width + 3 < 8)	/* width one block */
-+			size = size * 4;
-+		else if (width + 3 < 16)
-+			size = size * 2;
-+	} else {
-+		/* DXT3/5, 16 bytes per block */
-+	  //		WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n");
-+		if (width + 3 < 8)
-+			size = size * 2;
-+	}
-+
-+	return size;
-+}
-+
-+
-+static int radeon_compressed_num_bytes(GLuint mesaFormat)
-+{
-+   int bytes = 0;
-+   switch(mesaFormat) {
-+     
-+   case MESA_FORMAT_RGB_FXT1:
-+   case MESA_FORMAT_RGBA_FXT1:
-+   case MESA_FORMAT_RGB_DXT1:
-+   case MESA_FORMAT_RGBA_DXT1:
-+     bytes = 2;
-+     break;
-+     
-+   case MESA_FORMAT_RGBA_DXT3:
-+   case MESA_FORMAT_RGBA_DXT5:
-+     bytes = 4;
-+   default:
-+     break;
-+   }
-+   
-+   return bytes;
-+}
-+
-+/**
-+ * Compute sizes and fill in offset and blit information for the given
-+ * image (determined by \p face and \p level).
-+ *
-+ * \param curOffset points to the offset at which the image is to be stored
-+ * and is updated by this function according to the size of the image.
-+ */
-+static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree *mt,
-+	GLuint face, GLuint level, GLuint* curOffset)
-+{
-+	radeon_mipmap_level *lvl = &mt->levels[level];
-+	uint32_t row_align = rmesa->texture_row_align - 1;
-+
-+	/* Find image size in bytes */
-+	if (mt->compressed) {
-+		/* TODO: Is this correct? Need test cases for compressed textures! */
-+		lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63;
-+		lvl->size = radeon_compressed_texture_size(mt->radeon->glCtx,
-+							   lvl->width, lvl->height, lvl->depth, mt->compressed);
-+	} else if (mt->target == GL_TEXTURE_RECTANGLE_NV) {
-+		lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63;
-+		lvl->size = lvl->rowstride * lvl->height;
-+	} else if (mt->tilebits & RADEON_TXO_MICRO_TILE) {
-+		/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
-+		 * though the actual offset may be different (if texture is less than
-+		 * 32 bytes width) to the untiled case */
-+		lvl->rowstride = (lvl->width * mt->bpp * 2 + 31) & ~31;
-+		lvl->size = lvl->rowstride * ((lvl->height + 1) / 2) * lvl->depth;
-+	} else {
-+		lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align;
-+		lvl->size = lvl->rowstride * lvl->height * lvl->depth;
-+	}
-+	assert(lvl->size > 0);
-+
-+	/* All images are aligned to a 32-byte offset */
-+	*curOffset = (*curOffset + 0x1f) & ~0x1f;
-+	lvl->faces[face].offset = *curOffset;
-+	*curOffset += lvl->size;
-+
-+	if (RADEON_DEBUG & DEBUG_TEXTURE)
-+	  fprintf(stderr,
-+		  "level %d, face %d: rs:%d %dx%d at %d\n",
-+		  level, face, lvl->rowstride, lvl->width, lvl->height, lvl->faces[face].offset);
-+}
-+
-+static GLuint minify(GLuint size, GLuint levels)
-+{
-+	size = size >> levels;
-+	if (size < 1)
-+		size = 1;
-+	return size;
-+}
-+
-+static void calculate_miptree_layout(radeonContextPtr rmesa, radeon_mipmap_tree *mt)
-+{
-+	GLuint curOffset;
-+	GLuint numLevels;
-+	GLuint i;
-+
-+	numLevels = mt->lastLevel - mt->firstLevel + 1;
-+	assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
-+
-+	curOffset = 0;
-+	for(i = 0; i < numLevels; i++) {
-+		GLuint face;
-+
-+		mt->levels[i].width = minify(mt->width0, i);
-+		mt->levels[i].height = minify(mt->height0, i);
-+		mt->levels[i].depth = minify(mt->depth0, i);
-+
-+		for(face = 0; face < mt->faces; face++)
-+			compute_tex_image_offset(rmesa, mt, face, i, &curOffset);
-+	}
-+
-+	/* Note the required size in memory */
-+	mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
-+}
-+
-+
-+/**
-+ * Create a new mipmap tree, calculate its layout and allocate memory.
-+ */
-+radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t,
-+		GLenum target, GLuint firstLevel, GLuint lastLevel,
-+		GLuint width0, GLuint height0, GLuint depth0,
-+		GLuint bpp, GLuint tilebits, GLuint compressed)
-+{
-+	radeon_mipmap_tree *mt = CALLOC_STRUCT(_radeon_mipmap_tree);
-+
-+	mt->radeon = rmesa;
-+	mt->refcount = 1;
-+	mt->t = t;
-+	mt->target = target;
-+	mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-+	mt->firstLevel = firstLevel;
-+	mt->lastLevel = lastLevel;
-+	mt->width0 = width0;
-+	mt->height0 = height0;
-+	mt->depth0 = depth0;
-+	mt->bpp = compressed ? radeon_compressed_num_bytes(compressed) : bpp;
-+	mt->tilebits = tilebits;
-+	mt->compressed = compressed;
-+
-+	calculate_miptree_layout(rmesa, mt);
-+
-+	mt->bo = radeon_bo_open(rmesa->radeonScreen->bom,
-+                            0, mt->totalsize, 1024,
-+                            RADEON_GEM_DOMAIN_VRAM,
-+                            0);
-+
-+	return mt;
-+}
-+
-+void radeon_miptree_reference(radeon_mipmap_tree *mt)
-+{
-+	mt->refcount++;
-+	assert(mt->refcount > 0);
-+}
-+
-+void radeon_miptree_unreference(radeon_mipmap_tree *mt)
-+{
-+	if (!mt)
-+		return;
-+
-+	assert(mt->refcount > 0);
-+	mt->refcount--;
-+	if (!mt->refcount) {
-+		radeon_bo_unref(mt->bo);
-+		free(mt);
-+	}
-+}
-+
-+
-+/**
-+ * Calculate first and last mip levels for the given texture object,
-+ * where the dimensions are taken from the given texture image at
-+ * the given level.
-+ *
-+ * Note: level is the OpenGL level number, which is not necessarily the same
-+ * as the first level that is actually present.
-+ *
-+ * The base level image of the given texture face must be non-null,
-+ * or this will fail.
-+ */
-+static void calculate_first_last_level(struct gl_texture_object *tObj,
-+				       GLuint *pfirstLevel, GLuint *plastLevel,
-+				       GLuint face, GLuint level)
-+{
-+	const struct gl_texture_image * const baseImage =
-+		tObj->Image[face][level];
-+
-+	assert(baseImage);
-+	
-+	/* These must be signed values.  MinLod and MaxLod can be negative numbers,
-+	* and having firstLevel and lastLevel as signed prevents the need for
-+	* extra sign checks.
-+	*/
-+	int   firstLevel;
-+	int   lastLevel;
-+
-+	/* Yes, this looks overly complicated, but it's all needed.
-+	*/
-+	switch (tObj->Target) {
-+	case GL_TEXTURE_1D:
-+	case GL_TEXTURE_2D:
-+	case GL_TEXTURE_3D:
-+	case GL_TEXTURE_CUBE_MAP:
-+		if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
-+			/* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
-+			*/
-+			firstLevel = lastLevel = tObj->BaseLevel;
-+		} else {
-+			firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5);
-+			firstLevel = MAX2(firstLevel, tObj->BaseLevel);
-+			firstLevel = MIN2(firstLevel, level + baseImage->MaxLog2);
-+			lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5);
-+			lastLevel = MAX2(lastLevel, tObj->BaseLevel);
-+			lastLevel = MIN2(lastLevel, level + baseImage->MaxLog2);
-+			lastLevel = MIN2(lastLevel, tObj->MaxLevel);
-+			lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
-+		}
-+		break;
-+	case GL_TEXTURE_RECTANGLE_NV:
-+	case GL_TEXTURE_4D_SGIS:
-+		firstLevel = lastLevel = 0;
-+		break;
-+	default:
-+		return;
-+	}
-+
-+	/* save these values */
-+	*pfirstLevel = firstLevel;
-+	*plastLevel = lastLevel;
-+}
-+
-+
-+/**
-+ * Checks whether the given miptree can hold the given texture image at the
-+ * given face and level.
-+ */
-+GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt,
-+		struct gl_texture_image *texImage, GLuint face, GLuint level)
-+{
-+	radeon_mipmap_level *lvl;
-+
-+	if (face >= mt->faces || level < mt->firstLevel || level > mt->lastLevel)
-+		return GL_FALSE;
-+
-+	if (texImage->IsCompressed != mt->compressed)
-+		return GL_FALSE;
-+
-+	if (!texImage->IsCompressed &&
-+	    !mt->compressed &&
-+	    texImage->TexFormat->TexelBytes != mt->bpp)
-+		return GL_FALSE;
-+
-+	lvl = &mt->levels[level - mt->firstLevel];
-+	if (lvl->width != texImage->Width ||
-+	    lvl->height != texImage->Height ||
-+	    lvl->depth != texImage->Depth)
-+		return GL_FALSE;
-+
-+	return GL_TRUE;
-+}
-+
-+
-+/**
-+ * Checks whether the given miptree has the right format to store the given texture object.
-+ */
-+GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj)
-+{
-+	struct gl_texture_image *firstImage;
-+	GLuint compressed;
-+	GLuint numfaces = 1;
-+	GLuint firstLevel, lastLevel;
-+
-+	calculate_first_last_level(texObj, &firstLevel, &lastLevel, 0, texObj->BaseLevel);
-+	if (texObj->Target == GL_TEXTURE_CUBE_MAP)
-+		numfaces = 6;
-+
-+	firstImage = texObj->Image[0][firstLevel];
-+	compressed = firstImage->IsCompressed ? firstImage->TexFormat->MesaFormat : 0;
-+
-+	return (mt->firstLevel == firstLevel &&
-+	        mt->lastLevel == lastLevel &&
-+	        mt->width0 == firstImage->Width &&
-+	        mt->height0 == firstImage->Height &&
-+	        mt->depth0 == firstImage->Depth &&
-+	        mt->bpp == firstImage->TexFormat->TexelBytes &&
-+	        mt->compressed == compressed);
-+}
-+
-+
-+/**
-+ * Try to allocate a mipmap tree for the given texture that will fit the
-+ * given image in the given position.
-+ */
-+void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t,
-+		struct gl_texture_image *texImage, GLuint face, GLuint level)
-+{
-+	GLuint compressed = texImage->IsCompressed ? texImage->TexFormat->MesaFormat : 0;
-+	GLuint numfaces = 1;
-+	GLuint firstLevel, lastLevel;
-+
-+	assert(!t->mt);
-+
-+	calculate_first_last_level(&t->base, &firstLevel, &lastLevel, face, level);
-+	if (t->base.Target == GL_TEXTURE_CUBE_MAP)
-+		numfaces = 6;
-+
-+	if (level != firstLevel || face >= numfaces)
-+		return;
-+
-+	t->mt = radeon_miptree_create(rmesa, t, t->base.Target,
-+		firstLevel, lastLevel,
-+		texImage->Width, texImage->Height, texImage->Depth,
-+		texImage->TexFormat->TexelBytes, t->tile_bits, compressed);
-+}
-+
-+/* Although we use the image_offset[] array to store relative offsets
-+ * to cube faces, Mesa doesn't know anything about this and expects
-+ * each cube face to be treated as a separate image.
-+ *
-+ * These functions present that view to mesa:
-+ */
-+void
-+radeon_miptree_depth_offsets(radeon_mipmap_tree *mt, GLuint level, GLuint *offsets)
-+{
-+     if (mt->target != GL_TEXTURE_3D || mt->faces == 1)
-+        offsets[0] = 0;
-+     else {
-+	int i;
-+	for (i = 0; i < 6; i++)
-+		offsets[i] = mt->levels[level].faces[i].offset;
-+     }
-+}
-+
-+GLuint
-+radeon_miptree_image_offset(radeon_mipmap_tree *mt,
-+			    GLuint face, GLuint level)
-+{
-+   if (mt->target == GL_TEXTURE_CUBE_MAP_ARB)
-+      return (mt->levels[level].faces[face].offset);
-+   else
-+      return mt->levels[level].faces[0].offset;
-+}
-diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
-new file mode 100644
-index 0000000..697010b
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
-@@ -0,0 +1,98 @@
-+/*
-+ * Copyright (C) 2008 Nicolai Haehnle.
-+ *
-+ * All Rights Reserved.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining
-+ * a copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sublicense, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial
-+ * portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ */
-+
-+#ifndef __RADEON_MIPMAP_TREE_H_
-+#define __RADEON_MIPMAP_TREE_H_
-+
-+#include "radeon_common.h"
-+
-+typedef struct _radeon_mipmap_tree radeon_mipmap_tree;
-+typedef struct _radeon_mipmap_level radeon_mipmap_level;
-+typedef struct _radeon_mipmap_image radeon_mipmap_image;
-+
-+struct _radeon_mipmap_image {
-+	GLuint offset; /** Offset of this image from the start of mipmap tree buffer, in bytes */
-+};
-+
-+struct _radeon_mipmap_level {
-+	GLuint width;
-+	GLuint height;
-+	GLuint depth;
-+	GLuint size; /** Size of each image, in bytes */
-+	GLuint rowstride; /** in bytes */
-+	radeon_mipmap_image faces[6];
-+};
-+
-+
-+/**
-+ * A mipmap tree contains texture images in the layout that the hardware
-+ * expects.
-+ *
-+ * The meta-data of mipmap trees is immutable, i.e. you cannot change the
-+ * layout on-the-fly; however, the texture contents (i.e. texels) can be
-+ * changed.
-+ */
-+struct _radeon_mipmap_tree {
-+	radeonContextPtr radeon;
-+	radeonTexObj *t;
-+	struct radeon_bo *bo;
-+	GLuint refcount;
-+
-+	GLuint totalsize; /** total size of the miptree, in bytes */
-+
-+	GLenum target; /** GL_TEXTURE_xxx */
-+	GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */
-+	GLuint firstLevel; /** First mip level stored in this mipmap tree */
-+	GLuint lastLevel; /** Last mip level stored in this mipmap tree */
-+
-+	GLuint width0; /** Width of firstLevel image */
-+	GLuint height0; /** Height of firstLevel image */
-+	GLuint depth0; /** Depth of firstLevel image */
-+
-+	GLuint bpp; /** Bytes per texel */
-+	GLuint tilebits; /** RADEON_TXO_xxx_TILE */
-+	GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */
-+
-+	radeon_mipmap_level levels[RADEON_MAX_TEXTURE_LEVELS];
-+};
-+
-+radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t,
-+		GLenum target, GLuint firstLevel, GLuint lastLevel,
-+		GLuint width0, GLuint height0, GLuint depth0,
-+		GLuint bpp, GLuint tilebits, GLuint compressed);
-+void radeon_miptree_reference(radeon_mipmap_tree *mt);
-+void radeon_miptree_unreference(radeon_mipmap_tree *mt);
-+
-+GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt,
-+		struct gl_texture_image *texImage, GLuint face, GLuint level);
-+GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj);
-+void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t,
-+			      struct gl_texture_image *texImage, GLuint face, GLuint level);
-+GLuint radeon_miptree_image_offset(radeon_mipmap_tree *mt,
-+				   GLuint face, GLuint level);
-+void radeon_miptree_depth_offsets(radeon_mipmap_tree *mt, GLuint level, GLuint *offsets);
-+#endif /* __RADEON_MIPMAP_TREE_H_ */
-diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.c b/src/mesa/drivers/dri/radeon/radeon_sanity.c
-index 6613757..bbed838 100644
---- a/src/mesa/drivers/dri/radeon/radeon_sanity.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_sanity.c
-@@ -973,7 +973,7 @@ static int radeon_emit_packet3_cliprect( drm_radeon_cmd_buffer_t *cmdbuf )
- }
- 
- 
--int radeonSanityCmdBuffer( radeonContextPtr rmesa,
-+int radeonSanityCmdBuffer( r100ContextPtr rmesa,
- 			   int nbox,
- 			   drm_clip_rect_t *boxes )
- {
-diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.h b/src/mesa/drivers/dri/radeon/radeon_sanity.h
-index 1ec06bc..f30eb1c 100644
---- a/src/mesa/drivers/dri/radeon/radeon_sanity.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_sanity.h
-@@ -1,7 +1,7 @@
- #ifndef RADEON_SANITY_H
- #define RADEON_SANITY_H
- 
--extern int radeonSanityCmdBuffer( radeonContextPtr rmesa,
-+extern int radeonSanityCmdBuffer( r100ContextPtr rmesa,
- 				  int nbox,
- 				  drm_clip_rect_t *boxes );
- 
-diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
-index 8828533..56dbe74 100644
---- a/src/mesa/drivers/dri/radeon/radeon_screen.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
-@@ -35,6 +35,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-  * \author  Gareth Hughes <gareth@valinux.com>
-  */
- 
-+#include <errno.h>
- #include "main/glheader.h"
- #include "main/imports.h"
- #include "main/mtypes.h"
-@@ -45,32 +46,39 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_chipset.h"
- #include "radeon_macros.h"
- #include "radeon_screen.h"
-+#include "radeon_common.h"
-+#include "radeon_span.h"
- #if !RADEON_COMMON
- #include "radeon_context.h"
--#include "radeon_span.h"
- #include "radeon_tex.h"
- #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
- #include "r200_context.h"
- #include "r200_ioctl.h"
--#include "r200_span.h"
- #include "r200_tex.h"
- #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
- #include "r300_context.h"
- #include "r300_fragprog.h"
- #include "r300_tex.h"
--#include "radeon_span.h"
- #endif
- 
- #include "utils.h"
- #include "vblank.h"
- #include "drirenderbuffer.h"
- 
-+#include "radeon_bocs_wrapper.h"
-+
- #include "GL/internal/dri_interface.h"
- 
- /* Radeon configuration
-  */
- #include "xmlpool.h"
- 
-+#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \
-+DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \
-+        DRI_CONF_DESC(en,"Size of command buffer (in KB)") \
-+        DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \
-+DRI_CONF_OPT_END
-+
- #if !RADEON_COMMON	/* R100 */
- PUBLIC const char __driConfigOptions[] =
- DRI_CONF_BEGIN
-@@ -80,6 +88,7 @@ DRI_CONF_BEGIN
-         DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
-         DRI_CONF_MAX_TEXTURE_UNITS(3,2,3)
-         DRI_CONF_HYPERZ(false)
-+        DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
-     DRI_CONF_SECTION_END
-     DRI_CONF_SECTION_QUALITY
-         DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
-@@ -95,7 +104,7 @@ DRI_CONF_BEGIN
-         DRI_CONF_NO_RAST(false)
-     DRI_CONF_SECTION_END
- DRI_CONF_END;
--static const GLuint __driNConfigOptions = 14;
-+static const GLuint __driNConfigOptions = 15;
- 
- #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
- 
-@@ -107,6 +116,7 @@ DRI_CONF_BEGIN
-         DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
-         DRI_CONF_MAX_TEXTURE_UNITS(6,2,6)
-         DRI_CONF_HYPERZ(false)
-+        DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
-     DRI_CONF_SECTION_END
-     DRI_CONF_SECTION_QUALITY
-         DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
-@@ -126,7 +136,7 @@ DRI_CONF_BEGIN
-         DRI_CONF_NV_VERTEX_PROGRAM(false)
-     DRI_CONF_SECTION_END
- DRI_CONF_END;
--static const GLuint __driNConfigOptions = 16;
-+static const GLuint __driNConfigOptions = 17;
- 
- extern const struct dri_extension blend_extensions[];
- extern const struct dri_extension ARB_vp_extension[];
-@@ -149,11 +159,7 @@ DRI_CONF_OPT_BEGIN_V(texture_coord_units,int,def, # min ":" # max ) \
-         DRI_CONF_DESC(de,"Anzahl der Texturkoordinateneinheiten") \
- DRI_CONF_OPT_END
- 
--#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \
--DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \
--        DRI_CONF_DESC(en,"Size of command buffer (in KB)") \
--        DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \
--DRI_CONF_OPT_END
-+
- 
- #define DRI_CONF_DISABLE_S3TC(def) \
- DRI_CONF_OPT_BEGIN(disable_s3tc,bool,def) \
-@@ -209,7 +215,6 @@ static const GLuint __driNConfigOptions = 17;
- extern const struct dri_extension gl_20_extension[];
- 
- #ifndef RADEON_DEBUG
--int RADEON_DEBUG = 0;
- 
- static const struct dri_debug_control debug_control[] = {
- 	{"fall", DEBUG_FALLBACKS},
-@@ -236,19 +241,36 @@ static const struct dri_debug_control debug_control[] = {
- #endif /* RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) */
- 
- extern const struct dri_extension card_extensions[];
-+extern const struct dri_extension mm_extensions[];
- 
- static int getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo );
- 
- static int
--radeonGetParam(int fd, int param, void *value)
-+radeonGetParam(__DRIscreenPrivate *sPriv, int param, void *value)
- {
-   int ret;
--  drm_radeon_getparam_t gp;
--
--  gp.param = param;
--  gp.value = value;
-+  drm_radeon_getparam_t gp = { 0 };
-+  struct drm_radeon_info info = { 0 };
-+
-+  if (sPriv->drm_version.major >= 2) {
-+      info.value = (uint64_t)value;
-+      switch (param) {
-+      case RADEON_PARAM_DEVICE_ID:
-+          info.request = RADEON_INFO_DEVICE_ID;
-+          break;
-+      case RADEON_PARAM_NUM_GB_PIPES:
-+          info.request = RADEON_INFO_NUM_GB_PIPES;
-+          break;
-+      default:
-+          return -EINVAL;
-+      }
-+      ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_INFO, &info, sizeof(info));
-+  } else {
-+      gp.param = param;
-+      gp.value = value;
- 
--  ret = drmCommandWriteRead( fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp));
-+      ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp));
-+  }
-   return ret;
- }
- 
-@@ -335,6 +357,12 @@ static const __DRItexOffsetExtension radeonTexOffsetExtension = {
-     { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
-     radeonSetTexOffset,
- };
-+
-+static const __DRItexBufferExtension radeonTexBufferExtension = {
-+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
-+   radeonSetTexBuffer,
-+   radeonSetTexBuffer2,
-+};
- #endif
- 
- #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
-@@ -349,6 +377,12 @@ static const __DRItexOffsetExtension r200texOffsetExtension = {
-     { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
-    r200SetTexOffset,
- };
-+
-+static const __DRItexBufferExtension r200TexBufferExtension = {
-+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
-+   r200SetTexBuffer,
-+   r200SetTexBuffer2,
-+};
- #endif
- 
- #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
-@@ -356,137 +390,18 @@ static const __DRItexOffsetExtension r300texOffsetExtension = {
-     { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
-    r300SetTexOffset,
- };
--#endif
- 
--/* Create the device specific screen private data struct.
-- */
--static radeonScreenPtr
--radeonCreateScreen( __DRIscreenPrivate *sPriv )
--{
--   radeonScreenPtr screen;
--   RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
--   unsigned char *RADEONMMIO;
--   int i;
--   int ret;
--   uint32_t temp = 0;
--
--   if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) {
--      fprintf(stderr,"\nERROR!  sizeof(RADEONDRIRec) does not match passed size from device driver\n");
--      return GL_FALSE;
--   }
--
--   /* Allocate the private area */
--   screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
--   if ( !screen ) {
--      __driUtilMessage("%s: Could not allocate memory for screen structure",
--		       __FUNCTION__);
--      return NULL;
--   }
--
--#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
--	RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
-+static const __DRItexBufferExtension r300TexBufferExtension = {
-+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
-+   r300SetTexBuffer,
-+   r300SetTexBuffer2,
-+};
- #endif
- 
--   /* parse information in __driConfigOptions */
--   driParseOptionInfo (&screen->optionCache,
--		       __driConfigOptions, __driNConfigOptions);
--
--   /* This is first since which regions we map depends on whether or
--    * not we are using a PCI card.
--    */
--   screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
--   {
--      int ret;
--      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET,
--			    &screen->gart_buffer_offset);
--
--      if (ret) {
--	 FREE( screen );
--	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret);
--	 return NULL;
--      }
--
--      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BASE,
--			    &screen->gart_base);
--      if (ret) {
--	 FREE( screen );
--	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret);
--	 return NULL;
--      }
--
--      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR,
--			    &screen->irq);
--      if (ret) {
--	 FREE( screen );
--	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret);
--	 return NULL;
--      }
--      screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7);
--      screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11);
--      screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16);
--      screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18);
--      screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13);
--      screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15);
--      screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
--   }
--
--   screen->mmio.handle = dri_priv->registerHandle;
--   screen->mmio.size   = dri_priv->registerSize;
--   if ( drmMap( sPriv->fd,
--		screen->mmio.handle,
--		screen->mmio.size,
--		&screen->mmio.map ) ) {
--      FREE( screen );
--      __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
--      return NULL;
--   }
--
--   RADEONMMIO = screen->mmio.map;
--
--   screen->status.handle = dri_priv->statusHandle;
--   screen->status.size   = dri_priv->statusSize;
--   if ( drmMap( sPriv->fd,
--		screen->status.handle,
--		screen->status.size,
--		&screen->status.map ) ) {
--      drmUnmap( screen->mmio.map, screen->mmio.size );
--      FREE( screen );
--      __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
--      return NULL;
--   }
--   screen->scratch = (__volatile__ uint32_t *)
--      ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
--
--   screen->buffers = drmMapBufs( sPriv->fd );
--   if ( !screen->buffers ) {
--      drmUnmap( screen->status.map, screen->status.size );
--      drmUnmap( screen->mmio.map, screen->mmio.size );
--      FREE( screen );
--      __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
--      return NULL;
--   }
--
--   if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
--      screen->gartTextures.handle = dri_priv->gartTexHandle;
--      screen->gartTextures.size   = dri_priv->gartTexMapSize;
--      if ( drmMap( sPriv->fd,
--		   screen->gartTextures.handle,
--		   screen->gartTextures.size,
--		   (drmAddressPtr)&screen->gartTextures.map ) ) {
--	 drmUnmapBufs( screen->buffers );
--	 drmUnmap( screen->status.map, screen->status.size );
--	 drmUnmap( screen->mmio.map, screen->mmio.size );
--	 FREE( screen );
--	 __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
--	 return NULL;
--      }
--
--      screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
--   }
--
-+static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
-+{
-    screen->chip_flags = 0;
--   /* XXX: add more chipsets */
--   switch ( dri_priv->deviceID ) {
-+   switch ( device_id ) {
-    case PCI_CHIP_RADEON_LY:
-    case PCI_CHIP_RADEON_LZ:
-    case PCI_CHIP_RADEON_QY:
-@@ -561,11 +476,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
-       screen->chip_family = CHIP_FAMILY_RS300;
-       break;
- 
--      /* 9500 with 1 pipe verified by: Reid Linnemann <lreid@cs.okstate.edu> */
-    case PCI_CHIP_R300_AD:
--      screen->chip_family = CHIP_FAMILY_RV350;
--      screen->chip_flags = RADEON_CHIPSET_TCL;
--      break;
-    case PCI_CHIP_R300_AE:
-    case PCI_CHIP_R300_AF:
-    case PCI_CHIP_R300_AG:
-@@ -824,9 +735,145 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
- 
-    default:
-       fprintf(stderr, "unknown chip id 0x%x, can't guess.\n",
--	      dri_priv->deviceID);
-+	      device_id);
-+      return -1;
-+   }
-+
-+   return 0;
-+}
-+
-+
-+/* Create the device specific screen private data struct.
-+ */
-+static radeonScreenPtr
-+radeonCreateScreen( __DRIscreenPrivate *sPriv )
-+{
-+   radeonScreenPtr screen;
-+   RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
-+   unsigned char *RADEONMMIO = NULL;
-+   int i;
-+   int ret;
-+   uint32_t temp = 0;
-+
-+   if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) {
-+      fprintf(stderr,"\nERROR!  sizeof(RADEONDRIRec) does not match passed size from device driver\n");
-+      return GL_FALSE;
-+   }
-+
-+   /* Allocate the private area */
-+   screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
-+   if ( !screen ) {
-+      __driUtilMessage("%s: Could not allocate memory for screen structure",
-+		       __FUNCTION__);
-       return NULL;
-    }
-+
-+#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
-+	RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
-+#endif
-+
-+   /* parse information in __driConfigOptions */
-+   driParseOptionInfo (&screen->optionCache,
-+		       __driConfigOptions, __driNConfigOptions);
-+
-+   /* This is first since which regions we map depends on whether or
-+    * not we are using a PCI card.
-+    */
-+   screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
-+   {
-+      int ret;
-+
-+      ret = radeonGetParam(sPriv, RADEON_PARAM_GART_BUFFER_OFFSET,
-+			    &screen->gart_buffer_offset);
-+
-+      if (ret) {
-+	 FREE( screen );
-+	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret);
-+	 return NULL;
-+      }
-+
-+      ret = radeonGetParam(sPriv, RADEON_PARAM_GART_BASE,
-+			    &screen->gart_base);
-+      if (ret) {
-+	 FREE( screen );
-+	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret);
-+	 return NULL;
-+      }
-+
-+      ret = radeonGetParam(sPriv, RADEON_PARAM_IRQ_NR,
-+			    &screen->irq);
-+      if (ret) {
-+	 FREE( screen );
-+	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret);
-+	 return NULL;
-+      }
-+      screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7);
-+      screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11);
-+      screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16);
-+      screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18);
-+      screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13);
-+      screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15);
-+      screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
-+   }
-+
-+   screen->mmio.handle = dri_priv->registerHandle;
-+   screen->mmio.size   = dri_priv->registerSize;
-+   if ( drmMap( sPriv->fd,
-+		screen->mmio.handle,
-+		screen->mmio.size,
-+		&screen->mmio.map ) ) {
-+     FREE( screen );
-+     __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
-+     return NULL;
-+   }
-+
-+   RADEONMMIO = screen->mmio.map;
-+
-+   screen->status.handle = dri_priv->statusHandle;
-+   screen->status.size   = dri_priv->statusSize;
-+   if ( drmMap( sPriv->fd,
-+		screen->status.handle,
-+		screen->status.size,
-+		&screen->status.map ) ) {
-+     drmUnmap( screen->mmio.map, screen->mmio.size );
-+     FREE( screen );
-+     __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
-+     return NULL;
-+   }
-+   screen->scratch = (__volatile__ uint32_t *)
-+     ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
-+
-+   screen->buffers = drmMapBufs( sPriv->fd );
-+   if ( !screen->buffers ) {
-+     drmUnmap( screen->status.map, screen->status.size );
-+     drmUnmap( screen->mmio.map, screen->mmio.size );
-+     FREE( screen );
-+     __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
-+     return NULL;
-+   }
-+
-+   if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
-+     screen->gartTextures.handle = dri_priv->gartTexHandle;
-+     screen->gartTextures.size   = dri_priv->gartTexMapSize;
-+     if ( drmMap( sPriv->fd,
-+		  screen->gartTextures.handle,
-+		  screen->gartTextures.size,
-+		  (drmAddressPtr)&screen->gartTextures.map ) ) {
-+       drmUnmapBufs( screen->buffers );
-+       drmUnmap( screen->status.map, screen->status.size );
-+       drmUnmap( screen->mmio.map, screen->mmio.size );
-+       FREE( screen );
-+       __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
-+       return NULL;
-+    }
-+
-+     screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
-+   }
-+
-+   ret = radeon_set_screen_flags(screen, dri_priv->deviceID);
-+   if (ret == -1)
-+     return NULL;
-+
-    if ((screen->chip_family == CHIP_FAMILY_R350 || screen->chip_family == CHIP_FAMILY_R300) &&
-        sPriv->ddx_version.minor < 2) {
-       fprintf(stderr, "xf86-video-ati-6.6.2 or newer needed for Radeon 9500/9700/9800 cards.\n");
-@@ -851,8 +898,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
-    screen->cpp = dri_priv->bpp / 8;
-    screen->AGPMode = dri_priv->AGPMode;
- 
--   ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION,
--                         &temp);
-+   ret = radeonGetParam(sPriv, RADEON_PARAM_FB_LOCATION, &temp);
-    if (ret) {
-        if (screen->chip_family < CHIP_FAMILY_RS600)
- 	   screen->fbLocation      = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16;
-@@ -866,8 +912,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
-    }
- 
-    if (screen->chip_family >= CHIP_FAMILY_R300) {
--       ret = radeonGetParam( sPriv->fd, RADEON_PARAM_NUM_GB_PIPES,
--			     &temp);
-+       ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp);
-        if (ret) {
- 	   fprintf(stderr, "Unable to get num_pipes, need newer drm\n");
- 	   switch (screen->chip_family) {
-@@ -893,6 +938,17 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
-        } else {
- 	   screen->num_gb_pipes = temp;
-        }
-+
-+       /* pipe overrides */
-+       switch (dri_priv->deviceID) {
-+       case PCI_CHIP_R300_AD: /* 9500 with 1 quadpipe verified by: Reid Linnemann <lreid@cs.okstate.edu> */
-+       case PCI_CHIP_RV410_5E4C: /* RV410 SE only have 1 quadpipe */
-+       case PCI_CHIP_RV410_5E4F: /* RV410 SE only have 1 quadpipe */
-+	   screen->num_gb_pipes = 1;
-+	   break;
-+       default:
-+	   break;
-+       }
-    }
- 
-    if ( sPriv->drm_version.minor >= 10 ) {
-@@ -962,7 +1018,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
- 
- #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
-    if (IS_R200_CLASS(screen))
--       screen->extensions[i++] = &r200AllocateExtension.base;
-+      screen->extensions[i++] = &r200AllocateExtension.base;
- 
-    screen->extensions[i++] = &r200texOffsetExtension.base;
- #endif
-@@ -976,6 +1032,154 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
- 
-    screen->driScreen = sPriv;
-    screen->sarea_priv_offset = dri_priv->sarea_priv_offset;
-+   screen->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
-+					       screen->sarea_priv_offset);
-+
-+   screen->bom = radeon_bo_manager_legacy_ctor(screen);
-+   if (screen->bom == NULL) {
-+     free(screen);
-+     return NULL;
-+   }
-+
-+   return screen;
-+}
-+
-+static radeonScreenPtr
-+radeonCreateScreen2(__DRIscreenPrivate *sPriv)
-+{
-+   radeonScreenPtr screen;
-+   int i;
-+   int ret;
-+   uint32_t device_id = 0;
-+   uint32_t temp = 0;
-+
-+   /* Allocate the private area */
-+   screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
-+   if ( !screen ) {
-+      __driUtilMessage("%s: Could not allocate memory for screen structure",
-+		       __FUNCTION__);
-+      fprintf(stderr, "leaving here\n");
-+      return NULL;
-+   }
-+
-+#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
-+	RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
-+#endif
-+
-+   /* parse information in __driConfigOptions */
-+   driParseOptionInfo (&screen->optionCache,
-+		       __driConfigOptions, __driNConfigOptions);
-+
-+   screen->kernel_mm = 1;
-+   screen->chip_flags = 0;
-+
-+   /* if we have kms we can support all of these */
-+   screen->drmSupportsCubeMapsR200 = 1;
-+   screen->drmSupportsBlendColor = 1;
-+   screen->drmSupportsTriPerf = 1;
-+   screen->drmSupportsFragShader = 1;
-+   screen->drmSupportsPointSprites = 1;
-+   screen->drmSupportsCubeMapsR100 = 1;
-+   screen->drmSupportsVertexProgram = 1;
-+
-+   ret = radeonGetParam(sPriv, RADEON_PARAM_IRQ_NR, &screen->irq);
-+
-+   ret = radeonGetParam(sPriv, RADEON_PARAM_DEVICE_ID, &device_id);
-+   if (ret) {
-+     FREE( screen );
-+     fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_DEVICE_ID): %d\n", ret);
-+     return NULL;
-+   }
-+
-+   ret = radeon_set_screen_flags(screen, device_id);
-+   if (ret == -1)
-+     return NULL;
-+
-+   if (screen->chip_family >= CHIP_FAMILY_R300) {
-+       ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp);
-+       if (ret) {
-+	   fprintf(stderr, "Unable to get num_pipes, need newer drm\n");
-+	   switch (screen->chip_family) {
-+	   case CHIP_FAMILY_R300:
-+	   case CHIP_FAMILY_R350:
-+	       screen->num_gb_pipes = 2;
-+	       break;
-+	   case CHIP_FAMILY_R420:
-+	   case CHIP_FAMILY_R520:
-+	   case CHIP_FAMILY_R580:
-+	   case CHIP_FAMILY_RV560:
-+	   case CHIP_FAMILY_RV570:
-+	       screen->num_gb_pipes = 4;
-+	       break;
-+	   case CHIP_FAMILY_RV350:
-+	   case CHIP_FAMILY_RV515:
-+	   case CHIP_FAMILY_RV530:
-+	   case CHIP_FAMILY_RV410:
-+	   default:
-+	       screen->num_gb_pipes = 1;
-+	       break;
-+	   }
-+       } else {
-+	   screen->num_gb_pipes = temp;
-+       }
-+
-+       /* pipe overrides */
-+       switch (device_id) {
-+       case PCI_CHIP_R300_AD: /* 9500 with 1 quadpipe verified by: Reid Linnemann <lreid@cs.okstate.edu> */
-+       case PCI_CHIP_RV410_5E4C: /* RV410 SE only have 1 quadpipe */
-+       case PCI_CHIP_RV410_5E4F: /* RV410 SE only have 1 quadpipe */
-+	   screen->num_gb_pipes = 1;
-+	   break;
-+       default:
-+	   break;
-+       }
-+
-+   }
-+
-+   if (screen->chip_family <= CHIP_FAMILY_RS200)
-+      screen->chip_flags |= RADEON_CLASS_R100;
-+   else if (screen->chip_family <= CHIP_FAMILY_RV280)
-+      screen->chip_flags |= RADEON_CLASS_R200;
-+   else
-+      screen->chip_flags |= RADEON_CLASS_R300;
-+
-+   if (getenv("R300_NO_TCL"))
-+     screen->chip_flags &= ~RADEON_CHIPSET_TCL;
-+
-+   i = 0;
-+   screen->extensions[i++] = &driCopySubBufferExtension.base;
-+   screen->extensions[i++] = &driFrameTrackingExtension.base;
-+   screen->extensions[i++] = &driReadDrawableExtension;
-+
-+   if ( screen->irq != 0 ) {
-+       screen->extensions[i++] = &driSwapControlExtension.base;
-+       screen->extensions[i++] = &driMediaStreamCounterExtension.base;
-+   }
-+
-+#if !RADEON_COMMON
-+   screen->extensions[i++] = &radeonTexBufferExtension.base;
-+#endif
-+
-+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
-+   if (IS_R200_CLASS(screen))
-+       screen->extensions[i++] = &r200AllocateExtension.base;
-+
-+   screen->extensions[i++] = &r200TexBufferExtension.base;
-+#endif
-+
-+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
-+   screen->extensions[i++] = &r300TexBufferExtension.base;
-+#endif
-+
-+   screen->extensions[i++] = NULL;
-+   sPriv->extensions = screen->extensions;
-+
-+   screen->driScreen = sPriv;
-+   screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd);
-+   if (screen->bom == NULL) {
-+       free(screen);
-+       return NULL;
-+   }
-    return screen;
- }
- 
-@@ -984,23 +1188,32 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
- static void
- radeonDestroyScreen( __DRIscreenPrivate *sPriv )
- {
--   radeonScreenPtr screen = (radeonScreenPtr)sPriv->private;
-+    radeonScreenPtr screen = (radeonScreenPtr)sPriv->private;
- 
--   if (!screen)
--      return;
-+    if (!screen)
-+        return;
- 
--   if ( screen->gartTextures.map ) {
--      drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
--   }
--   drmUnmapBufs( screen->buffers );
--   drmUnmap( screen->status.map, screen->status.size );
--   drmUnmap( screen->mmio.map, screen->mmio.size );
-+    if (screen->kernel_mm) {
-+#ifdef RADEON_BO_TRACK
-+        radeon_tracker_print(&screen->bom->tracker, stderr);
-+#endif
-+        radeon_bo_manager_gem_dtor(screen->bom);
-+    } else {
-+        radeon_bo_manager_legacy_dtor(screen->bom);
-+
-+        if ( screen->gartTextures.map ) {
-+            drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
-+        }
-+        drmUnmapBufs( screen->buffers );
-+        drmUnmap( screen->status.map, screen->status.size );
-+        drmUnmap( screen->mmio.map, screen->mmio.size );
-+    }
- 
--   /* free all option information */
--   driDestroyOptionInfo (&screen->optionCache);
-+    /* free all option information */
-+    driDestroyOptionInfo (&screen->optionCache);
- 
--   FREE( screen );
--   sPriv->private = NULL;
-+    FREE( screen );
-+    sPriv->private = NULL;
- }
- 
- 
-@@ -1009,16 +1222,21 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv )
- static GLboolean
- radeonInitDriver( __DRIscreenPrivate *sPriv )
- {
--   sPriv->private = (void *) radeonCreateScreen( sPriv );
--   if ( !sPriv->private ) {
--      radeonDestroyScreen( sPriv );
--      return GL_FALSE;
--   }
-+    if (sPriv->dri2.enabled) {
-+        sPriv->private = (void *) radeonCreateScreen2( sPriv );
-+    } else {
-+        sPriv->private = (void *) radeonCreateScreen( sPriv );
-+    }
-+    if ( !sPriv->private ) {
-+        radeonDestroyScreen( sPriv );
-+        return GL_FALSE;
-+    }
- 
--   return GL_TRUE;
-+    return GL_TRUE;
- }
- 
- 
-+
- /**
-  * Create the Mesa framebuffer and renderbuffers for a given window/drawable.
-  *
-@@ -1031,101 +1249,111 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
-                     const __GLcontextModes *mesaVis,
-                     GLboolean isPixmap )
- {
--   radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private;
-+    radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private;
-+
-+    const GLboolean swDepth = GL_FALSE;
-+    const GLboolean swAlpha = GL_FALSE;
-+    const GLboolean swAccum = mesaVis->accumRedBits > 0;
-+    const GLboolean swStencil = mesaVis->stencilBits > 0 &&
-+	mesaVis->depthBits != 24;
-+    GLenum rgbFormat;
-+    struct radeon_framebuffer *rfb;
- 
--   if (isPixmap) {
-+    if (isPixmap)
-       return GL_FALSE; /* not implemented */
--   }
--   else {
--      const GLboolean swDepth = GL_FALSE;
--      const GLboolean swAlpha = GL_FALSE;
--      const GLboolean swAccum = mesaVis->accumRedBits > 0;
--      const GLboolean swStencil = mesaVis->stencilBits > 0 &&
--         mesaVis->depthBits != 24;
--      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
--
--      /* front color renderbuffer */
--      {
--         driRenderbuffer *frontRb
--            = driNewRenderbuffer(GL_RGBA,
--                                 driScrnPriv->pFB + screen->frontOffset,
--                                 screen->cpp,
--                                 screen->frontOffset, screen->frontPitch,
--                                 driDrawPriv);
--         radeonSetSpanFunctions(frontRb, mesaVis);
--         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
--      }
- 
--      /* back color renderbuffer */
--      if (mesaVis->doubleBufferMode) {
--         driRenderbuffer *backRb
--            = driNewRenderbuffer(GL_RGBA,
--                                 driScrnPriv->pFB + screen->backOffset,
--                                 screen->cpp,
--                                 screen->backOffset, screen->backPitch,
--                                 driDrawPriv);
--         radeonSetSpanFunctions(backRb, mesaVis);
--         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
--      }
-+    rfb = CALLOC_STRUCT(radeon_framebuffer);
-+    if (!rfb)
-+      return GL_FALSE;
- 
--      /* depth renderbuffer */
--      if (mesaVis->depthBits == 16) {
--         driRenderbuffer *depthRb
--            = driNewRenderbuffer(GL_DEPTH_COMPONENT16,
--                                 driScrnPriv->pFB + screen->depthOffset,
--                                 screen->cpp,
--                                 screen->depthOffset, screen->depthPitch,
--                                 driDrawPriv);
--         radeonSetSpanFunctions(depthRb, mesaVis);
--         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
--	 depthRb->depthHasSurface = screen->depthHasSurface;
--      }
--      else if (mesaVis->depthBits == 24) {
--         driRenderbuffer *depthRb
--            = driNewRenderbuffer(GL_DEPTH_COMPONENT24,
--                                 driScrnPriv->pFB + screen->depthOffset,
--                                 screen->cpp,
--                                 screen->depthOffset, screen->depthPitch,
--                                 driDrawPriv);
--         radeonSetSpanFunctions(depthRb, mesaVis);
--         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
--	 depthRb->depthHasSurface = screen->depthHasSurface;
--      }
-+    _mesa_initialize_framebuffer(&rfb->base, mesaVis);
-+
-+    if (mesaVis->redBits == 5)
-+        rgbFormat = GL_RGB5;
-+    else if (mesaVis->alphaBits == 0)
-+        rgbFormat = GL_RGB8;
-+    else
-+        rgbFormat = GL_RGBA8;
-+
-+    /* front color renderbuffer */
-+    rfb->color_rb[0] = radeon_create_renderbuffer(rgbFormat, driDrawPriv);
-+    _mesa_add_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT, &rfb->color_rb[0]->base);
-+    rfb->color_rb[0]->has_surface = 1;
-+
-+    /* back color renderbuffer */
-+    if (mesaVis->doubleBufferMode) {
-+      rfb->color_rb[1] = radeon_create_renderbuffer(rgbFormat, driDrawPriv);
-+	_mesa_add_renderbuffer(&rfb->base, BUFFER_BACK_LEFT, &rfb->color_rb[1]->base);
-+	rfb->color_rb[1]->has_surface = 1;
-+    }
- 
--      /* stencil renderbuffer */
--      if (mesaVis->stencilBits > 0 && !swStencil) {
--         driRenderbuffer *stencilRb
--            = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT,
--                                 driScrnPriv->pFB + screen->depthOffset,
--                                 screen->cpp,
--                                 screen->depthOffset, screen->depthPitch,
--                                 driDrawPriv);
--         radeonSetSpanFunctions(stencilRb, mesaVis);
--         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
--	 stencilRb->depthHasSurface = screen->depthHasSurface;
-+    if (mesaVis->depthBits == 24) {
-+      if (mesaVis->stencilBits == 8) {
-+	struct radeon_renderbuffer *depthStencilRb = radeon_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT, driDrawPriv);
-+	_mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depthStencilRb->base);
-+	_mesa_add_renderbuffer(&rfb->base, BUFFER_STENCIL, &depthStencilRb->base);
-+	depthStencilRb->has_surface = screen->depthHasSurface;
-+      } else {
-+	/* depth renderbuffer */
-+	struct radeon_renderbuffer *depth = radeon_create_renderbuffer(GL_DEPTH_COMPONENT24, driDrawPriv);
-+	_mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depth->base);
-+	depth->has_surface = screen->depthHasSurface;
-       }
-+    } else if (mesaVis->depthBits == 16) {
-+      /* just 16-bit depth buffer, no hw stencil */
-+	struct radeon_renderbuffer *depth = radeon_create_renderbuffer(GL_DEPTH_COMPONENT16, driDrawPriv);
-+	_mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depth->base);
-+	depth->has_surface = screen->depthHasSurface;
-+    }
- 
--      _mesa_add_soft_renderbuffers(fb,
--                                   GL_FALSE, /* color */
--                                   swDepth,
--                                   swStencil,
--                                   swAccum,
--                                   swAlpha,
--                                   GL_FALSE /* aux */);
--      driDrawPriv->driverPrivate = (void *) fb;
-+    _mesa_add_soft_renderbuffers(&rfb->base,
-+	    GL_FALSE, /* color */
-+	    swDepth,
-+	    swStencil,
-+	    swAccum,
-+	    swAlpha,
-+	    GL_FALSE /* aux */);
-+    driDrawPriv->driverPrivate = (void *) rfb;
- 
--      return (driDrawPriv->driverPrivate != NULL);
--   }
-+    return (driDrawPriv->driverPrivate != NULL);
- }
- 
- 
--static void
-+static void radeon_cleanup_renderbuffers(struct radeon_framebuffer *rfb)
-+{
-+	struct radeon_renderbuffer *rb;
-+
-+	rb = rfb->color_rb[0];
-+	if (rb && rb->bo) {
-+		radeon_bo_unref(rb->bo);
-+		rb->bo = NULL;
-+	}
-+	rb = rfb->color_rb[1];
-+	if (rb && rb->bo) {
-+		radeon_bo_unref(rb->bo);
-+		rb->bo = NULL;
-+	}
-+	rb = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH);
-+	if (rb && rb->bo) {
-+		radeon_bo_unref(rb->bo);
-+		rb->bo = NULL;
-+	}
-+}
-+
-+void
- radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
- {
--   _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
-+    struct radeon_framebuffer *rfb;
-+    if (!driDrawPriv)
-+	return;
-+
-+    rfb = (void*)driDrawPriv->driverPrivate;
-+    if (!rfb)
-+	return;
-+    radeon_cleanup_renderbuffers(rfb);
-+    _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
- }
- 
--#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
- /**
-  * Choose the appropriate CreateContext function based on the chipset.
-  * Eventually, all drivers will go through this process.
-@@ -1136,25 +1364,21 @@ static GLboolean radeonCreateContext(const __GLcontextModes * glVisual,
- {
- 	__DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
- 	radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
--
-+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
- 	if (IS_R300_CLASS(screen))
- 		return r300CreateContext(glVisual, driContextPriv, sharedContextPriv);
--        return GL_FALSE;
--}
--
--/**
-- * Choose the appropriate DestroyContext function based on the chipset.
-- */
--static void radeonDestroyContext(__DRIcontextPrivate * driContextPriv)
--{
--	radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
--
--	if (IS_R300_CLASS(radeon->radeonScreen))
--		return r300DestroyContext(driContextPriv);
--}
-+#endif
- 
-+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
-+	if (IS_R200_CLASS(screen))
-+		return r200CreateContext(glVisual, driContextPriv, sharedContextPriv);
-+#endif
- 
-+#if !RADEON_COMMON
-+	return r100CreateContext(glVisual, driContextPriv, sharedContextPriv);
- #endif
-+	return GL_FALSE;
-+}
- 
- 
- /**
-@@ -1216,13 +1440,103 @@ radeonInitScreen(__DRIscreenPrivate *psp)
-    if (!radeonInitDriver(psp))
-        return NULL;
- 
-+   /* for now fill in all modes */
-    return radeonFillInModes( psp,
- 			     dri_priv->bpp,
- 			     (dri_priv->bpp == 16) ? 16 : 24,
--			     (dri_priv->bpp == 16) ? 0  : 8,
--			     (dri_priv->backOffset != dri_priv->depthOffset) );
-+			     (dri_priv->bpp == 16) ? 0  : 8, 1);
- }
-+#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
-+
-+/**
-+ * This is the driver specific part of the createNewScreen entry point.
-+ * Called when using DRI2.
-+ *
-+ * \return the __GLcontextModes supported by this driver
-+ */
-+static const
-+__DRIconfig **radeonInitScreen2(__DRIscreenPrivate *psp)
-+{
-+   GLenum fb_format[3];
-+   GLenum fb_type[3];
-+   /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
-+    * support pageflipping at all.
-+    */
-+   static const GLenum back_buffer_modes[] = {
-+     GLX_NONE, GLX_SWAP_UNDEFINED_OML, /*, GLX_SWAP_COPY_OML*/
-+   };
-+   uint8_t depth_bits[4], stencil_bits[4], msaa_samples_array[1];
-+   int color;
-+   __DRIconfig **configs = NULL;
-+
-+   /* Calling driInitExtensions here, with a NULL context pointer,
-+    * does not actually enable the extensions.  It just makes sure
-+    * that all the dispatch offsets for all the extensions that
-+    * *might* be enables are known.  This is needed because the
-+    * dispatch offsets need to be known when _mesa_context_create
-+    * is called, but we can't enable the extensions until we have a
-+    * context pointer.
-+    *
-+    * Hello chicken.  Hello egg.  How are you two today?
-+    */
-+   driInitExtensions( NULL, card_extensions, GL_FALSE );
-+   driInitExtensions( NULL, mm_extensions, GL_FALSE );
-+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
-+   driInitExtensions( NULL, blend_extensions, GL_FALSE );
-+   driInitSingleExtension( NULL, ARB_vp_extension );
-+   driInitSingleExtension( NULL, NV_vp_extension );
-+   driInitSingleExtension( NULL, ATI_fs_extension );
-+   driInitExtensions( NULL, point_extensions, GL_FALSE );
-+#endif
-+
-+   if (!radeonInitDriver(psp)) {
-+       return NULL;
-+    }
-+   depth_bits[0] = 0;
-+   stencil_bits[0] = 0;
-+   depth_bits[1] = 16;
-+   stencil_bits[1] = 0;
-+   depth_bits[2] = 24;
-+   stencil_bits[2] = 0;
-+   depth_bits[3] = 24;
-+   stencil_bits[3] = 8;
-+
-+   msaa_samples_array[0] = 0;
-+
-+   fb_format[0] = GL_RGB;
-+   fb_type[0] = GL_UNSIGNED_SHORT_5_6_5;
-+
-+   fb_format[1] = GL_BGR;
-+   fb_type[1] = GL_UNSIGNED_INT_8_8_8_8_REV;
-+
-+   fb_format[2] = GL_BGRA;
-+   fb_type[2] = GL_UNSIGNED_INT_8_8_8_8_REV;
-+
-+   for (color = 0; color < ARRAY_SIZE(fb_format); color++) {
-+      __DRIconfig **new_configs;
-+
-+      new_configs = driCreateConfigs(fb_format[color], fb_type[color],
-+				     depth_bits,
-+				     stencil_bits,
-+				     ARRAY_SIZE(depth_bits),
-+				     back_buffer_modes,
-+				     ARRAY_SIZE(back_buffer_modes),
-+				     msaa_samples_array,
-+				     ARRAY_SIZE(msaa_samples_array));
-+      if (configs == NULL)
-+	 configs = new_configs;
-+      else
-+	 configs = driConcatConfigs(configs, new_configs);
-+   }
- 
-+   if (configs == NULL) {
-+      fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
-+              __LINE__);
-+      return NULL;
-+   }
-+
-+   return (const __DRIconfig **)configs;
-+}
- 
- /**
-  * Get information about previous buffer swaps.
-@@ -1230,31 +1544,26 @@ radeonInitScreen(__DRIscreenPrivate *psp)
- static int
- getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo )
- {
--#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300))
--   radeonContextPtr  rmesa;
--#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
--   r200ContextPtr  rmesa;
--#endif
-+    struct radeon_framebuffer *rfb;
- 
--   if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
--	|| (dPriv->driContextPriv->driverPrivate == NULL)
--	|| (sInfo == NULL) ) {
--      return -1;
-+    if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
-+	 || (dPriv->driContextPriv->driverPrivate == NULL)
-+	 || (sInfo == NULL) ) {
-+	return -1;
-    }
- 
--   rmesa = dPriv->driContextPriv->driverPrivate;
--   sInfo->swap_count = rmesa->swap_count;
--   sInfo->swap_ust = rmesa->swap_ust;
--   sInfo->swap_missed_count = rmesa->swap_missed_count;
-+    rfb = dPriv->driverPrivate;
-+    sInfo->swap_count = rfb->swap_count;
-+    sInfo->swap_ust = rfb->swap_ust;
-+    sInfo->swap_missed_count = rfb->swap_missed_count;
- 
-    sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0)
--       ? driCalculateSwapUsage( dPriv, 0, rmesa->swap_missed_ust )
-+       ? driCalculateSwapUsage( dPriv, 0, rfb->swap_missed_ust )
-        : 0.0;
- 
-    return 0;
- }
- 
--#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300))
- const struct __DriverAPIRec driDriverAPI = {
-    .InitScreen      = radeonInitScreen,
-    .DestroyScreen   = radeonDestroyScreen,
-@@ -1271,23 +1580,7 @@ const struct __DriverAPIRec driDriverAPI = {
-    .WaitForSBC      = NULL,
-    .SwapBuffersMSC  = NULL,
-    .CopySubBuffer   = radeonCopySubBuffer,
-+    /* DRI2 */
-+   .InitScreen2     = radeonInitScreen2,
- };
--#else
--const struct __DriverAPIRec driDriverAPI = {
--   .InitScreen      = radeonInitScreen,
--   .DestroyScreen   = radeonDestroyScreen,
--   .CreateContext   = r200CreateContext,
--   .DestroyContext  = r200DestroyContext,
--   .CreateBuffer    = radeonCreateBuffer,
--   .DestroyBuffer   = radeonDestroyBuffer,
--   .SwapBuffers     = r200SwapBuffers,
--   .MakeCurrent     = r200MakeCurrent,
--   .UnbindContext   = r200UnbindContext,
--   .GetSwapInfo     = getSwapInfo,
--   .GetDrawableMSC  = driDrawableGetMSC32,
--   .WaitForMSC      = driWaitForMSC32,
--   .WaitForSBC      = NULL,
--   .SwapBuffersMSC  = NULL,
--   .CopySubBuffer   = r200CopySubBuffer,
--};
--#endif
-+
-diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h
-index b84c70b..8605eb4 100644
---- a/src/mesa/drivers/dri/radeon/radeon_screen.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
-@@ -54,7 +54,7 @@ typedef struct {
-    drmAddress map;			/* Mapping of the DRM region */
- } radeonRegionRec, *radeonRegionPtr;
- 
--typedef struct {
-+typedef struct radeon_screen {
-    int chip_family;
-    int chip_flags;
-    int cpp;
-@@ -103,9 +103,12 @@ typedef struct {
-    /* Configuration cache with default values for all contexts */
-    driOptionCache optionCache;
- 
--   const __DRIextension *extensions[8];
-+   const __DRIextension *extensions[16];
- 
-    int num_gb_pipes;
-+   int kernel_mm;
-+   drm_radeon_sarea_t *sarea;	/* Private SAREA data */
-+   struct radeon_bo_manager *bom;
- } radeonScreenRec, *radeonScreenPtr;
- 
- #define IS_R100_CLASS(screen) \
-@@ -115,4 +118,5 @@ typedef struct {
- #define IS_R300_CLASS(screen) \
- 	((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R300)
- 
-+extern void radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv);
- #endif /* __RADEON_SCREEN_H__ */
-diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
-index 12051ff..e28f286 100644
---- a/src/mesa/drivers/dri/radeon/radeon_span.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_span.c
-@@ -43,46 +43,203 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/glheader.h"
- #include "swrast/swrast.h"
- 
--#include "radeon_context.h"
--#include "radeon_ioctl.h"
--#include "radeon_state.h"
-+#include "radeon_common.h"
-+#include "radeon_lock.h"
- #include "radeon_span.h"
--#include "radeon_tex.h"
--
--#include "drirenderbuffer.h"
- 
- #define DBG 0
- 
-+static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
-+
-+static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
-+			     GLint x, GLint y)
-+{
-+    GLubyte *ptr = rrb->bo->ptr;
-+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
-+    GLint offset;
-+    GLint nmacroblkpl;
-+    GLint nmicroblkpl;
-+
-+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
-+        offset = x * rrb->cpp + y * rrb->pitch;
-+    } else {
-+        offset = 0;
-+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
-+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
-+                nmacroblkpl = rrb->pitch >> 5;
-+                offset += ((y >> 4) * nmacroblkpl) << 11;
-+                offset += ((y & 15) >> 1) << 8;
-+                offset += (y & 1) << 4;
-+                offset += (x >> 5) << 11;
-+                offset += ((x & 31) >> 2) << 5;
-+                offset += (x & 3) << 2;
-+            } else {
-+                nmacroblkpl = rrb->pitch >> 6;
-+                offset += ((y >> 3) * nmacroblkpl) << 11;
-+                offset += (y & 7) << 8;
-+                offset += (x >> 6) << 11;
-+                offset += ((x & 63) >> 3) << 5;
-+                offset += (x & 7) << 2;
-+            }
-+        } else {
-+            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
-+            offset += (y * nmicroblkpl) << 5;
-+            offset += (x >> 3) << 5;
-+            offset += (x & 7) << 2;
-+        }
-+    }
-+    return &ptr[offset];
-+}
-+
-+static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
-+			     GLint x, GLint y)
-+{
-+    GLubyte *ptr = rrb->bo->ptr;
-+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
-+    GLint offset;
-+    GLint nmacroblkpl;
-+    GLint nmicroblkpl;
-+
-+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
-+        offset = x * rrb->cpp + y * rrb->pitch;
-+    } else {
-+        offset = 0;
-+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
-+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
-+                nmacroblkpl = rrb->pitch >> 6;
-+                offset += ((y >> 4) * nmacroblkpl) << 11;
-+                offset += ((y & 15) >> 1) << 8;
-+                offset += (y & 1) << 4;
-+                offset += (x >> 6) << 11;
-+                offset += ((x & 63) >> 3) << 5;
-+                offset += (x & 7) << 1;
-+            } else {
-+                nmacroblkpl = rrb->pitch >> 7;
-+                offset += ((y >> 3) * nmacroblkpl) << 11;
-+                offset += (y & 7) << 8;
-+                offset += (x >> 7) << 11;
-+                offset += ((x & 127) >> 4) << 5;
-+                offset += (x & 15) << 2;
-+            }
-+        } else {
-+            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
-+            offset += (y * nmicroblkpl) << 5;
-+            offset += (x >> 4) << 5;
-+            offset += (x & 15) << 2;
-+        }
-+    }
-+    return &ptr[offset];
-+}
-+
-+static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
-+			   GLint x, GLint y)
-+{
-+    GLubyte *ptr = rrb->bo->ptr;
-+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
-+    GLint offset;
-+    GLint microblkxs;
-+    GLint macroblkxs;
-+    GLint nmacroblkpl;
-+    GLint nmicroblkpl;
-+
-+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
-+        offset = x * rrb->cpp + y * rrb->pitch;
-+    } else {
-+        offset = 0;
-+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
-+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
-+                microblkxs = 16 / rrb->cpp;
-+                macroblkxs = 128 / rrb->cpp;
-+                nmacroblkpl = rrb->pitch / macroblkxs;
-+                offset += ((y >> 4) * nmacroblkpl) << 11;
-+                offset += ((y & 15) >> 1) << 8;
-+                offset += (y & 1) << 4;
-+                offset += (x / macroblkxs) << 11;
-+                offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
-+                offset += (x & (microblkxs - 1)) * rrb->cpp;
-+            } else {
-+                microblkxs = 32 / rrb->cpp;
-+                macroblkxs = 256 / rrb->cpp;
-+                nmacroblkpl = rrb->pitch / macroblkxs;
-+                offset += ((y >> 3) * nmacroblkpl) << 11;
-+                offset += (y & 7) << 8;
-+                offset += (x / macroblkxs) << 11;
-+                offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
-+                offset += (x & (microblkxs - 1)) * rrb->cpp;
-+            }
-+        } else {
-+            microblkxs = 32 / rrb->cpp;
-+            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
-+            offset += (y * nmicroblkpl) << 5;
-+            offset += (x / microblkxs) << 5;
-+            offset += (x & (microblkxs - 1)) * rrb->cpp;
-+        }
-+    }
-+    return &ptr[offset];
-+}
-+
-+#ifndef COMPILE_R300
-+static uint32_t
-+z24s8_to_s8z24(uint32_t val)
-+{
-+   return (val << 24) | (val >> 8);
-+}
-+
-+static uint32_t
-+s8z24_to_z24s8(uint32_t val)
-+{
-+   return (val >> 24) | (val << 8);
-+}
-+#endif
-+
- /*
-  * Note that all information needed to access pixels in a renderbuffer
-  * should be obtained through the gl_renderbuffer parameter, not per-context
-  * information.
-  */
- #define LOCAL_VARS						\
--   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
--   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
--   const GLuint bottom = dPriv->h - 1;				\
--   GLubyte *buf = (GLubyte *) drb->flippedData			\
--      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
--   GLuint p;							\
--   (void) p;
-+   struct radeon_context *radeon = RADEON_CONTEXT(ctx);			\
-+   struct radeon_renderbuffer *rrb = (void *) rb;		\
-+   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
-+   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
-+   unsigned int num_cliprects;						\
-+   struct drm_clip_rect *cliprects;					\
-+   int x_off, y_off;							\
-+   GLuint p;						\
-+   (void)p;						\
-+   radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
- 
- #define LOCAL_DEPTH_VARS				\
--   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
--   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
--   const GLuint bottom = dPriv->h - 1;			\
--   GLuint xo = dPriv->x;				\
--   GLuint yo = dPriv->y;				\
--   GLubyte *buf = (GLubyte *) drb->Base.Data;
-+   struct radeon_context *radeon = RADEON_CONTEXT(ctx);			\
-+   struct radeon_renderbuffer *rrb = (void *) rb;	\
-+   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
-+   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
-+   unsigned int num_cliprects;						\
-+   struct drm_clip_rect *cliprects;					\
-+   int x_off, y_off;							\
-+  radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
- 
- #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
- 
--#define Y_FLIP(Y) (bottom - (Y))
-+#define Y_FLIP(_y) ((_y) * yScale + yBias)
- 
- #define HW_LOCK()
- 
- #define HW_UNLOCK()
- 
-+/* XXX FBO: this is identical to the macro in spantmp2.h except we get
-+ * the cliprect info from the context, not the driDrawable.
-+ * Move this into spantmp2.h someday.
-+ */
-+#define HW_CLIPLOOP()							\
-+   do {									\
-+      int _nc = num_cliprects;						\
-+      while ( _nc-- ) {							\
-+	 int minx = cliprects[_nc].x1 - x_off;				\
-+	 int miny = cliprects[_nc].y1 - y_off;				\
-+	 int maxx = cliprects[_nc].x2 - x_off;				\
-+	 int maxy = cliprects[_nc].y2 - y_off;
-+	
- /* ================================================================
-  * Color buffer
-  */
-@@ -94,7 +251,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #define TAG(x)    radeon##x##_RGB565
- #define TAG2(x,y) radeon##x##_RGB565##y
--#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
-+#define GET_PTR(X,Y) radeon_ptr16(rrb, (X) + x_off, (Y) + y_off)
-+#include "spantmp2.h"
-+
-+/* 32 bit, xRGB8888 color spanline and pixel functions
-+ */
-+#define SPANTMP_PIXEL_FMT GL_BGRA
-+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
-+
-+#define TAG(x)    radeon##x##_xRGB8888
-+#define TAG2(x,y) radeon##x##_xRGB8888##y
-+#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) | 0xff000000))
-+#define PUT_VALUE(_x, _y, d) { \
-+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );		\
-+   *_ptr = d;								\
-+} while (0)
- #include "spantmp2.h"
- 
- /* 32 bit, ARGB8888 color spanline and pixel functions
-@@ -104,7 +275,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #define TAG(x)    radeon##x##_ARGB8888
- #define TAG2(x,y) radeon##x##_ARGB8888##y
--#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
-+#define GET_PTR(X,Y) radeon_ptr32(rrb, (X) + x_off, (Y) + y_off)
- #include "spantmp2.h"
- 
- /* ================================================================
-@@ -121,70 +292,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-  * too...
-  */
- 
--static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
--{
--	GLuint pitch = drb->pitch;
--	if (drb->depthHasSurface) {
--		return 4 * (x + y * pitch);
--	} else {
--		GLuint ba, address = 0;	/* a[0..1] = 0           */
--
--#ifdef COMPILE_R300
--		ba = (y / 8) * (pitch / 8) + (x / 8);
--#else
--		ba = (y / 16) * (pitch / 16) + (x / 16);
--#endif
--
--		address |= (x & 0x7) << 2;	/* a[2..4] = x[0..2]     */
--		address |= (y & 0x3) << 5;	/* a[5..6] = y[0..1]     */
--		address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5;	/* a[7]    = x[4] ^ y[2] */
--		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
--
--		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
--		address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7;	/* a[11]   = x[3] ^ y[4] */
--		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
--
--		return address;
--	}
--}
--
--static INLINE GLuint
--radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
--{
--	GLuint pitch = drb->pitch;
--	if (drb->depthHasSurface) {
--		return 2 * (x + y * pitch);
--	} else {
--		GLuint ba, address = 0;	/* a[0]    = 0           */
--
--		ba = (y / 16) * (pitch / 32) + (x / 32);
--
--		address |= (x & 0x7) << 1;	/* a[1..3] = x[0..2]     */
--		address |= (y & 0x7) << 4;	/* a[4..6] = y[0..2]     */
--		address |= (x & 0x8) << 4;	/* a[7]    = x[3]        */
--		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
--		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
--		address |= ((x & 0x10) ^ (y & 0x10)) << 7;	/* a[11]   = x[4] ^ y[4] */
--		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
--
--		return address;
--	}
--}
--
- /* 16-bit depth buffer functions
-  */
- #define VALUE_TYPE GLushort
- 
- #define WRITE_DEPTH( _x, _y, d )					\
--   *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
-+   *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off) = d
- 
- #define READ_DEPTH( d, _x, _y )						\
--   d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
-+   d = *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off)
- 
- #define TAG(x) radeon##x##_z16
- #include "depthtmp.h"
- 
--/* 24 bit depth, 8 bit stencil depthbuffer functions
-+/* 24 bit depth
-  *
-  * Careful: It looks like the R300 uses ZZZS byte order while the R200
-  * uses SZZZ for 24 bit depth, 8 bit stencil mode.
-@@ -194,35 +315,76 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
- #ifdef COMPILE_R300
- #define WRITE_DEPTH( _x, _y, d )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );		\
-+   GLuint tmp = *_ptr;				\
-    tmp &= 0x000000ff;							\
-    tmp |= ((d << 8) & 0xffffff00);					\
--   *(GLuint *)(buf + offset) = tmp;					\
-+   *_ptr = tmp;					\
- } while (0)
- #else
- #define WRITE_DEPTH( _x, _y, d )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );	\
-+   GLuint tmp = *_ptr;							\
-    tmp &= 0xff000000;							\
-    tmp |= ((d) & 0x00ffffff);						\
--   *(GLuint *)(buf + offset) = tmp;					\
-+   *_ptr = tmp;					\
- } while (0)
- #endif
- 
- #ifdef COMPILE_R300
- #define READ_DEPTH( d, _x, _y )						\
--  do { \
--    d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,		\
--					 _y + yo )) & 0xffffff00) >> 8; \
-+  do {									\
-+    d = (*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
-   }while(0)
- #else
--#define READ_DEPTH( d, _x, _y )						\
--   d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,			\
--					 _y + yo )) & 0x00ffffff;
-+#define READ_DEPTH( d, _x, _y )	\
-+  d = *(GLuint*)(radeon_ptr32(rrb, _x + x_off,	_y + y_off)) & 0x00ffffff;
-+#endif
-+/*
-+    fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
-+   d = *(GLuint*)(radeon_ptr(rrb, _x,	_y )) & 0x00ffffff;
-+*/
-+#define TAG(x) radeon##x##_z24
-+#include "depthtmp.h"
-+
-+/* 24 bit depth, 8 bit stencil depthbuffer functions
-+ * EXT_depth_stencil
-+ *
-+ * Careful: It looks like the R300 uses ZZZS byte order while the R200
-+ * uses SZZZ for 24 bit depth, 8 bit stencil mode.
-+ */
-+#define VALUE_TYPE GLuint
-+
-+#ifdef COMPILE_R300
-+#define WRITE_DEPTH( _x, _y, d )					\
-+do {									\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );		\
-+   *_ptr = d;								\
-+} while (0)
-+#else
-+#define WRITE_DEPTH( _x, _y, d )					\
-+do {									\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );	\
-+   GLuint tmp = z24s8_to_s8z24(d);					\
-+   *_ptr = tmp;					\
-+} while (0)
- #endif
- 
-+#ifdef COMPILE_R300
-+#define READ_DEPTH( d, _x, _y )						\
-+  do { \
-+    d = (*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)));	\
-+  }while(0)
-+#else
-+#define READ_DEPTH( d, _x, _y )	do {					\
-+    d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr32(rrb, _x + x_off,	_y + y_off ))); \
-+  } while (0)
-+#endif
-+/*
-+    fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
-+   d = *(GLuint*)(radeon_ptr(rrb, _x,	_y )) & 0x00ffffff;
-+*/
- #define TAG(x) radeon##x##_z24_s8
- #include "depthtmp.h"
- 
-@@ -235,35 +397,35 @@ do {									\
- #ifdef COMPILE_R300
- #define WRITE_STENCIL( _x, _y, d )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off);		\
-+   GLuint tmp = *_ptr;				\
-    tmp &= 0xffffff00;							\
-    tmp |= (d) & 0xff;							\
--   *(GLuint *)(buf + offset) = tmp;					\
-+   *_ptr = tmp;					\
- } while (0)
- #else
- #define WRITE_STENCIL( _x, _y, d )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off);		\
-+   GLuint tmp = *_ptr;				\
-    tmp &= 0x00ffffff;							\
-    tmp |= (((d) & 0xff) << 24);						\
--   *(GLuint *)(buf + offset) = tmp;					\
-+   *_ptr = tmp;					\
- } while (0)
- #endif
- 
- #ifdef COMPILE_R300
- #define READ_STENCIL( d, _x, _y )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );		\
-+   GLuint tmp = *_ptr;				\
-    d = tmp & 0x000000ff;						\
- } while (0)
- #else
- #define READ_STENCIL( d, _x, _y )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );		\
-+   GLuint tmp = *_ptr;				\
-    d = (tmp & 0xff000000) >> 24;					\
- } while (0)
- #endif
-@@ -271,29 +433,105 @@ do {									\
- #define TAG(x) radeon##x##_z24_s8
- #include "stenciltmp.h"
- 
--/* Move locking out to get reasonable span performance (10x better
-- * than doing this in HW_LOCK above).  WaitForIdle() is the main
-- * culprit.
-- */
- 
-+static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
-+{
-+	struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
-+	int r;
-+	
-+	if (rrb == NULL || !rrb->bo)
-+		return;
-+
-+	if (flag) {
-+		r = radeon_bo_map(rrb->bo, 1);
-+		if (r) {
-+			fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
-+				__FUNCTION__, r);
-+		}
-+
-+		radeonSetSpanFunctions(rrb);
-+	} else {
-+		radeon_bo_unmap(rrb->bo);
-+		rb->GetRow = NULL;
-+		rb->PutRow = NULL;
-+	}
-+}
-+
-+static void
-+radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
-+{
-+	GLuint i, j;
-+
-+	/* color draw buffers */
-+	for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
-+		map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
-+
-+	/* check for render to textures */
-+	for (i = 0; i < BUFFER_COUNT; i++) {
-+		struct gl_renderbuffer_attachment *att =
-+			ctx->DrawBuffer->Attachment + i;
-+		struct gl_texture_object *tex = att->Texture;
-+		if (tex) {
-+			/* render to texture */
-+			ASSERT(att->Renderbuffer);
-+			if (map)
-+				ctx->Driver.MapTexture(ctx, tex);
-+			else
-+				ctx->Driver.UnmapTexture(ctx, tex);
-+		}
-+	}
-+	
-+	map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
-+
-+	/* depth buffer (Note wrapper!) */
-+	if (ctx->DrawBuffer->_DepthBuffer)
-+		map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
-+	
-+	if (ctx->DrawBuffer->_StencilBuffer)
-+		map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
-+
-+}
- static void radeonSpanRenderStart(GLcontext * ctx)
- {
- 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--#ifdef COMPILE_R300
--	r300ContextPtr r300 = (r300ContextPtr) rmesa;
--	R300_FIREVERTICES(r300);
--#else
--	RADEON_FIREVERTICES(rmesa);
--#endif
--	LOCK_HARDWARE(rmesa);
--	radeonWaitForIdleLocked(rmesa);
-+	int i;
-+
-+	radeon_firevertices(rmesa);
-+
-+	/* The locking and wait for idle should really only be needed in classic mode.
-+	 * In a future memory manager based implementation, this should become
-+	 * unnecessary due to the fact that mapping our buffers, textures, etc.
-+	 * should implicitly wait for any previous rendering commands that must
-+	 * be waited on. */
-+	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
-+		LOCK_HARDWARE(rmesa);
-+		radeonWaitForIdleLocked(rmesa);
-+	}
-+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
-+		if (ctx->Texture.Unit[i]._ReallyEnabled)
-+			ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
-+	}
-+
-+	radeon_map_unmap_buffers(ctx, 1);
-+
-+
-+
- }
- 
- static void radeonSpanRenderFinish(GLcontext * ctx)
- {
- 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+	int i;
- 	_swrast_flush(ctx);
--	UNLOCK_HARDWARE(rmesa);
-+	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
-+		UNLOCK_HARDWARE(rmesa);
-+	}
-+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
-+		if (ctx->Texture.Unit[i]._ReallyEnabled)
-+			ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
-+	}
-+
-+	radeon_map_unmap_buffers(ctx, 0);
- }
- 
- void radeonInitSpanFuncs(GLcontext * ctx)
-@@ -307,20 +545,21 @@ void radeonInitSpanFuncs(GLcontext * ctx)
- /**
-  * Plug in the Get/Put routines for the given driRenderbuffer.
-  */
--void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
-+static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
- {
--	if (drb->Base.InternalFormat == GL_RGBA) {
--		if (vis->redBits == 5 && vis->greenBits == 6
--		    && vis->blueBits == 5) {
--			radeonInitPointers_RGB565(&drb->Base);
--		} else {
--			radeonInitPointers_ARGB8888(&drb->Base);
--		}
--	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
--		radeonInitDepthPointers_z16(&drb->Base);
--	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
--		radeonInitDepthPointers_z24_s8(&drb->Base);
--	} else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
--		radeonInitStencilPointers_z24_s8(&drb->Base);
-+	if (rrb->base._ActualFormat == GL_RGB5) {
-+		radeonInitPointers_RGB565(&rrb->base);
-+	} else if (rrb->base._ActualFormat == GL_RGB8) {
-+		radeonInitPointers_xRGB8888(&rrb->base);
-+	} else if (rrb->base._ActualFormat == GL_RGBA8) {
-+		radeonInitPointers_ARGB8888(&rrb->base);
-+	} else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) {
-+		radeonInitDepthPointers_z16(&rrb->base);
-+	} else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) {
-+		radeonInitDepthPointers_z24(&rrb->base);
-+	} else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
-+		radeonInitDepthPointers_z24_s8(&rrb->base);
-+	} else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
-+		radeonInitStencilPointers_z24_s8(&rrb->base);
- 	}
- }
-diff --git a/src/mesa/drivers/dri/radeon/radeon_span.h b/src/mesa/drivers/dri/radeon/radeon_span.h
-index 9abe086..ea6a2e7 100644
---- a/src/mesa/drivers/dri/radeon/radeon_span.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_span.h
-@@ -42,9 +42,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #ifndef __RADEON_SPAN_H__
- #define __RADEON_SPAN_H__
- 
--#include "drirenderbuffer.h"
--
- extern void radeonInitSpanFuncs(GLcontext * ctx);
--extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis);
- 
- #endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
-index b656100..d9a7ef6 100644
---- a/src/mesa/drivers/dri/radeon/radeon_state.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
-@@ -47,6 +47,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "swrast_setup/swrast_setup.h"
- 
- #include "radeon_context.h"
-+#include "radeon_mipmap_tree.h"
- #include "radeon_ioctl.h"
- #include "radeon_state.h"
- #include "radeon_tcl.h"
-@@ -62,7 +63,7 @@ static void radeonUpdateSpecular( GLcontext *ctx );
- 
- static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC];
-    GLubyte refByte;
- 
-@@ -106,7 +107,7 @@ static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
- static void radeonBlendEquationSeparate( GLcontext *ctx,
- 					 GLenum modeRGB, GLenum modeA )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~RADEON_COMB_FCN_MASK;
-    GLboolean fallback = GL_FALSE;
- 
-@@ -147,7 +148,7 @@ static void radeonBlendFuncSeparate( GLcontext *ctx,
- 				     GLenum sfactorRGB, GLenum dfactorRGB,
- 				     GLenum sfactorA, GLenum dfactorA )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & 
-       ~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK);
-    GLboolean fallback = GL_FALSE;
-@@ -257,7 +258,7 @@ static void radeonBlendFuncSeparate( GLcontext *ctx,
- 
- static void radeonDepthFunc( GLcontext *ctx, GLenum func )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    RADEON_STATECHANGE( rmesa, ctx );
-    rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_TEST_MASK;
-@@ -293,7 +294,7 @@ static void radeonDepthFunc( GLcontext *ctx, GLenum func )
- 
- static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    RADEON_STATECHANGE( rmesa, ctx );
- 
-    if ( ctx->Depth.Mask ) {
-@@ -305,16 +306,16 @@ static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
- 
- static void radeonClearDepth( GLcontext *ctx, GLclampd d )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &
- 		    RADEON_DEPTH_FORMAT_MASK);
- 
-    switch ( format ) {
-    case RADEON_DEPTH_FORMAT_16BIT_INT_Z:
--      rmesa->state.depth.clear = d * 0x0000ffff;
-+      rmesa->radeon.state.depth.clear = d * 0x0000ffff;
-       break;
-    case RADEON_DEPTH_FORMAT_24BIT_INT_Z:
--      rmesa->state.depth.clear = d * 0x00ffffff;
-+      rmesa->radeon.state.depth.clear = d * 0x00ffffff;
-       break;
-    }
- }
-@@ -327,7 +328,7 @@ static void radeonClearDepth( GLcontext *ctx, GLclampd d )
- 
- static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    union { int i; float f; } c, d;
-    GLchan col[4];
- 
-@@ -406,109 +407,13 @@ static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
-    }
- }
- 
--
--/* =============================================================
-- * Scissoring
-- */
--
--
--static GLboolean intersect_rect( drm_clip_rect_t *out,
--				 drm_clip_rect_t *a,
--				 drm_clip_rect_t *b )
--{
--   *out = *a;
--   if ( b->x1 > out->x1 ) out->x1 = b->x1;
--   if ( b->y1 > out->y1 ) out->y1 = b->y1;
--   if ( b->x2 < out->x2 ) out->x2 = b->x2;
--   if ( b->y2 < out->y2 ) out->y2 = b->y2;
--   if ( out->x1 >= out->x2 ) return GL_FALSE;
--   if ( out->y1 >= out->y2 ) return GL_FALSE;
--   return GL_TRUE;
--}
--
--
--void radeonRecalcScissorRects( radeonContextPtr rmesa )
--{
--   drm_clip_rect_t *out;
--   int i;
--
--   /* Grow cliprect store?
--    */
--   if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
--      while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
--	 rmesa->state.scissor.numAllocedClipRects += 1;	/* zero case */
--	 rmesa->state.scissor.numAllocedClipRects *= 2;
--      }
--
--      if (rmesa->state.scissor.pClipRects)
--	 FREE(rmesa->state.scissor.pClipRects);
--
--      rmesa->state.scissor.pClipRects = 
--	 MALLOC( rmesa->state.scissor.numAllocedClipRects * 
--		 sizeof(drm_clip_rect_t) );
--
--      if ( rmesa->state.scissor.pClipRects == NULL ) {
--	 rmesa->state.scissor.numAllocedClipRects = 0;
--	 return;
--      }
--   }
--   
--   out = rmesa->state.scissor.pClipRects;
--   rmesa->state.scissor.numClipRects = 0;
--
--   for ( i = 0 ; i < rmesa->numClipRects ;  i++ ) {
--      if ( intersect_rect( out, 
--			   &rmesa->pClipRects[i], 
--			   &rmesa->state.scissor.rect ) ) {
--	 rmesa->state.scissor.numClipRects++;
--	 out++;
--      }
--   }
--}
--
--
--static void radeonUpdateScissor( GLcontext *ctx )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--
--   if ( rmesa->dri.drawable ) {
--      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
--
--      int x = ctx->Scissor.X;
--      int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
--      int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
--      int h = dPriv->h - ctx->Scissor.Y - 1;
--
--      rmesa->state.scissor.rect.x1 = x + dPriv->x;
--      rmesa->state.scissor.rect.y1 = y + dPriv->y;
--      rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
--      rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
--
--      radeonRecalcScissorRects( rmesa );
--   }
--}
--
--
--static void radeonScissor( GLcontext *ctx,
--			   GLint x, GLint y, GLsizei w, GLsizei h )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--
--   if ( ctx->Scissor.Enabled ) {
--      RADEON_FIREVERTICES( rmesa );	/* don't pipeline cliprect changes */
--      radeonUpdateScissor( ctx );
--   }
--
--}
--
--
- /* =============================================================
-  * Culling
-  */
- 
- static void radeonCullFace( GLcontext *ctx, GLenum unused )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
-    GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL];
- 
-@@ -545,7 +450,7 @@ static void radeonCullFace( GLcontext *ctx, GLenum unused )
- 
- static void radeonFrontFace( GLcontext *ctx, GLenum mode )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    RADEON_STATECHANGE( rmesa, set );
-    rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_FFACE_CULL_DIR_MASK;
-@@ -570,7 +475,7 @@ static void radeonFrontFace( GLcontext *ctx, GLenum mode )
-  */
- static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    RADEON_STATECHANGE( rmesa, lin );
-    RADEON_STATECHANGE( rmesa, set );
-@@ -587,7 +492,7 @@ static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
- 
- static void radeonLineStipple( GLcontext *ctx, GLint factor, GLushort pattern )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    RADEON_STATECHANGE( rmesa, lin );
-    rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = 
-@@ -602,8 +507,8 @@ static void radeonColorMask( GLcontext *ctx,
- 			     GLboolean r, GLboolean g,
- 			     GLboolean b, GLboolean a )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   GLuint mask = radeonPackColor( rmesa->radeonScreen->cpp,
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   GLuint mask = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
- 				  ctx->Color.ColorMask[RCOMP],
- 				  ctx->Color.ColorMask[GCOMP],
- 				  ctx->Color.ColorMask[BCOMP],
-@@ -623,8 +528,9 @@ static void radeonColorMask( GLcontext *ctx,
- static void radeonPolygonOffset( GLcontext *ctx,
- 				 GLfloat factor, GLfloat units )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   float_ui32_type constant =  { units * rmesa->state.depth.scale };
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
-+   float_ui32_type constant =  { units * depthScale };
-    float_ui32_type factoru = { factor };
- 
-    RADEON_STATECHANGE( rmesa, zbs );
-@@ -634,7 +540,7 @@ static void radeonPolygonOffset( GLcontext *ctx,
- 
- static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint i;
-    drm_radeon_stipple_t stipple;
- 
-@@ -646,27 +552,27 @@ static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
- 
-    /* TODO: push this into cmd mechanism
-     */
--   RADEON_FIREVERTICES( rmesa );
--   LOCK_HARDWARE( rmesa );
-+   radeon_firevertices(&rmesa->radeon);
-+   LOCK_HARDWARE( &rmesa->radeon );
- 
-    /* FIXME: Use window x,y offsets into stipple RAM.
-     */
-    stipple.mask = rmesa->state.stipple.mask;
--   drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, 
-+   drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, 
-                     &stipple, sizeof(drm_radeon_stipple_t) );
--   UNLOCK_HARDWARE( rmesa );
-+   UNLOCK_HARDWARE( &rmesa->radeon );
- }
- 
- static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0;
- 
-    /* Can't generally do unfilled via tcl, but some good special
-     * cases work. 
-     */
-    TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_UNFILLED, flag);
--   if (rmesa->TclFallback) {
-+   if (rmesa->radeon.TclFallback) {
-       radeonChooseRenderState( ctx );
-       radeonChooseVertexState( ctx );
-    }
-@@ -686,7 +592,7 @@ static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
-  */
- static void radeonUpdateSpecular( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    uint32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
-    GLuint flag = 0;
- 
-@@ -757,7 +663,7 @@ static void radeonUpdateSpecular( GLcontext *ctx )
- 
-    /* Update vertex/render formats
-     */
--   if (rmesa->TclFallback) { 
-+   if (rmesa->radeon.TclFallback) { 
-       radeonChooseRenderState( ctx );
-       radeonChooseVertexState( ctx );
-    }
-@@ -774,7 +680,7 @@ static void radeonUpdateSpecular( GLcontext *ctx )
-  */
- static void update_global_ambient( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    float *fcmd = (float *)RADEON_DB_STATE( glt );
- 
-    /* Need to do more if both emmissive & ambient are PREMULT:
-@@ -809,7 +715,7 @@ static void update_light_colors( GLcontext *ctx, GLuint p )
- /*     fprintf(stderr, "%s\n", __FUNCTION__); */
- 
-    if (l->Enabled) {
--      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+      r100ContextPtr rmesa = R100_CONTEXT(ctx);
-       float *fcmd = (float *)RADEON_DB_STATE( lit[p] );
- 
-       COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );	 
-@@ -849,7 +755,7 @@ static void check_twoside_fallback( GLcontext *ctx )
- 
- static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
- {
--      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+      r100ContextPtr rmesa = R100_CONTEXT(ctx);
-       GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
- 
-       light_model_ctl1 &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
-@@ -913,7 +819,7 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
- 
- void radeonUpdateMaterial( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
-    GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl );
-    GLuint mask = ~0;
-@@ -978,7 +884,7 @@ void radeonUpdateMaterial( GLcontext *ctx )
-  */
- static void update_light( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    /* Have to check these, or have an automatic shortcircuit mechanism
-     * to remove noop statechanges. (Or just do a better job on the
-@@ -1043,7 +949,7 @@ static void update_light( GLcontext *ctx )
- static void radeonLightfv( GLcontext *ctx, GLenum light,
- 			   GLenum pname, const GLfloat *params )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLint p = light - GL_LIGHT0;
-    struct gl_light *l = &ctx->Light.Light[p];
-    GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
-@@ -1164,7 +1070,7 @@ static void radeonLightfv( GLcontext *ctx, GLenum light,
- static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
- 				const GLfloat *param )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    switch (pname) {
-       case GL_LIGHT_MODEL_AMBIENT: 
-@@ -1188,7 +1094,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
- 
- 	 check_twoside_fallback( ctx );
- 
--	 if (rmesa->TclFallback) {
-+	 if (rmesa->radeon.TclFallback) {
- 	    radeonChooseRenderState( ctx );
- 	    radeonChooseVertexState( ctx );
- 	 }
-@@ -1205,7 +1111,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
- 
- static void radeonShadeModel( GLcontext *ctx, GLenum mode )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
- 
-    s &= ~(RADEON_DIFFUSE_SHADE_MASK |
-@@ -1244,7 +1150,7 @@ static void radeonShadeModel( GLcontext *ctx, GLenum mode )
- static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
- {
-    GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
- 
-    RADEON_STATECHANGE( rmesa, ucp[p] );
-@@ -1256,7 +1162,7 @@ static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
- 
- static void radeonUpdateClipPlanes( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint p;
- 
-    for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
-@@ -1281,7 +1187,7 @@ static void
- radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
-                            GLint ref, GLuint mask )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << RADEON_STENCIL_REF_SHIFT) |
- 		     ((ctx->Stencil.ValueMask[0] & 0xff) << RADEON_STENCIL_MASK_SHIFT));
- 
-@@ -1325,7 +1231,7 @@ radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
- static void
- radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    RADEON_STATECHANGE( rmesa, msk );
-    rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~RADEON_STENCIL_WRITE_MASK;
-@@ -1336,7 +1242,7 @@ radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
- static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
-                                      GLenum zfail, GLenum zpass )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    /* radeon 7200 have stencil bug, DEC and INC_WRAP will actually both do DEC_WRAP,
-       and DEC_WRAP (and INVERT) will do INVERT. No way to get correct INC_WRAP and DEC,
-@@ -1349,7 +1255,7 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
-    GLuint tempRADEON_STENCIL_ZPASS_DEC_WRAP;
-    GLuint tempRADEON_STENCIL_ZPASS_INC_WRAP;
-    
--   if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) {
-+   if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) {
-       tempRADEON_STENCIL_FAIL_DEC_WRAP = RADEON_STENCIL_FAIL_DEC;
-       tempRADEON_STENCIL_FAIL_INC_WRAP = RADEON_STENCIL_FAIL_INC;
-       tempRADEON_STENCIL_ZFAIL_DEC_WRAP = RADEON_STENCIL_ZFAIL_DEC;
-@@ -1455,9 +1361,9 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
- 
- static void radeonClearStencil( GLcontext *ctx, GLint s )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
--   rmesa->state.stencil.clear = 
-+   rmesa->radeon.state.stencil.clear = 
-       ((GLuint) (ctx->Stencil.Clear & 0xff) |
-        (0xff << RADEON_STENCIL_MASK_SHIFT) |
-        ((ctx->Stencil.WriteMask[0] & 0xff) << RADEON_STENCIL_WRITEMASK_SHIFT));
-@@ -1481,20 +1387,30 @@ static void radeonClearStencil( GLcontext *ctx, GLint s )
-  */
- void radeonUpdateWindow( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
--   GLfloat xoffset = (GLfloat)dPriv->x;
--   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-+   GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
-+   GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
-    const GLfloat *v = ctx->Viewport._WindowMap.m;
-+   const GLboolean render_to_fbo = (ctx->DrawBuffer ? (ctx->DrawBuffer->Name != 0) : 0);
-+   const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
-+   GLfloat y_scale, y_bias;
-+
-+   if (render_to_fbo) {
-+      y_scale = 1.0;
-+      y_bias = 0;
-+   } else {
-+      y_scale = -1.0;
-+      y_bias = yoffset;
-+   }
- 
-    float_ui32_type sx = { v[MAT_SX] };
-    float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
--   float_ui32_type sy = { - v[MAT_SY] };
--   float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y };
--   float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale };
--   float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale };
-+   float_ui32_type sy = { v[MAT_SY] * y_scale };
-+   float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias + SUBPIXEL_Y };
-+   float_ui32_type sz = { v[MAT_SZ] * depthScale };
-+   float_ui32_type tz = { v[MAT_TZ] * depthScale };
- 
--   RADEON_FIREVERTICES( rmesa );
-    RADEON_STATECHANGE( rmesa, vpt );
- 
-    rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = sx.ui32;
-@@ -1514,6 +1430,8 @@ static void radeonViewport( GLcontext *ctx, GLint x, GLint y,
-     * values, or keep the originals hanging around.
-     */
-    radeonUpdateWindow( ctx );
-+
-+   radeon_viewport(ctx, x, y, width, height);
- }
- 
- static void radeonDepthRange( GLcontext *ctx, GLclampd nearval,
-@@ -1524,8 +1442,8 @@ static void radeonDepthRange( GLcontext *ctx, GLclampd nearval,
- 
- void radeonUpdateViewportOffset( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-    GLfloat xoffset = (GLfloat)dPriv->x;
-    GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
-    const GLfloat *v = ctx->Viewport._WindowMap.m;
-@@ -1555,8 +1473,8 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
-                 RADEON_STIPPLE_Y_OFFSET_MASK);
- 
-          /* add magic offsets, then invert */
--         stx = 31 - ((rmesa->dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK);
--         sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
-+         stx = 31 - ((rmesa->radeon.dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK);
-+         sty = 31 - ((rmesa->radeon.dri.drawable->y + rmesa->radeon.dri.drawable->h - 1)
-                      & RADEON_STIPPLE_COORD_MASK);
- 
-          m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) |
-@@ -1580,20 +1498,20 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
- 
- static void radeonClearColor( GLcontext *ctx, const GLfloat color[4] )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLubyte c[4];
-    CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
-    CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
-    CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
-    CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
--   rmesa->state.color.clear = radeonPackColor( rmesa->radeonScreen->cpp,
-+   rmesa->radeon.state.color.clear = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
- 					       c[0], c[1], c[2], c[3] );
- }
- 
- 
- static void radeonRenderMode( GLcontext *ctx, GLenum mode )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    FALLBACK( rmesa, RADEON_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
- }
- 
-@@ -1619,7 +1537,7 @@ static GLuint radeon_rop_tab[] = {
- 
- static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint rop = (GLuint)opcode - GL_CLEAR;
- 
-    ASSERT( rop < 16 );
-@@ -1628,105 +1546,13 @@ static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
-    rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = radeon_rop_tab[rop];
- }
- 
--
--/**
-- * Set up the cliprects for either front or back-buffer drawing.
-- */
--void radeonSetCliprects( radeonContextPtr rmesa )
--{
--   __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
--   __DRIdrawablePrivate *const readable = rmesa->dri.readable;
--   GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate;
--   GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate;
--
--   if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
--      /* Can't ignore 2d windows if we are page flipping.
--       */
--      if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) {
--	 rmesa->numClipRects = drawable->numClipRects;
--	 rmesa->pClipRects = drawable->pClipRects;
--      }
--      else {
--	 rmesa->numClipRects = drawable->numBackClipRects;
--	 rmesa->pClipRects = drawable->pBackClipRects;
--      }
--   }
--   else {
--      /* front buffer (or none, or multiple buffers */
--      rmesa->numClipRects = drawable->numClipRects;
--      rmesa->pClipRects = drawable->pClipRects;
--   }
--
--   if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) {
--      _mesa_resize_framebuffer(rmesa->glCtx, draw_fb,
--			       drawable->w, drawable->h);
--      draw_fb->Initialized = GL_TRUE;
--   }
--
--   if (drawable != readable) {
--      if ((read_fb->Width != readable->w) || (read_fb->Height != readable->h)) {
--	 _mesa_resize_framebuffer(rmesa->glCtx, read_fb,
--				  readable->w, readable->h);
--	 read_fb->Initialized = GL_TRUE;
--      }
--   }
--
--   if (rmesa->state.scissor.enabled)
--      radeonRecalcScissorRects( rmesa );
--
--   rmesa->lastStamp = drawable->lastStamp;
--}
--
--
--/**
-- * Called via glDrawBuffer.
-- */
--static void radeonDrawBuffer( GLcontext *ctx, GLenum mode )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--
--   if (RADEON_DEBUG & DEBUG_DRI)
--      fprintf(stderr, "%s %s\n", __FUNCTION__,
--	      _mesa_lookup_enum_by_nr( mode ));
--
--   RADEON_FIREVERTICES(rmesa);	/* don't pipeline cliprect changes */
--
--   if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
--      /* 0 (GL_NONE) buffers or multiple color drawing buffers */
--      FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE );
--      return;
--   }
--
--   switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
--   case BUFFER_FRONT_LEFT:
--   case BUFFER_BACK_LEFT:
--      FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE );
--      break;
--   default:
--      FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE );
--      return;
--   }
--
--   radeonSetCliprects( rmesa );
--
--   /* We'll set the drawing engine's offset/pitch parameters later
--    * when we update other state.
--    */
--}
--
--static void radeonReadBuffer( GLcontext *ctx, GLenum mode )
--{
--   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
--}
--
--
- /* =============================================================
-  * State enable/disable
-  */
- 
- static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint p, flag;
- 
-    if ( RADEON_DEBUG & DEBUG_STATE )
-@@ -1821,10 +1647,10 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
-       RADEON_STATECHANGE(rmesa, ctx );
-       if ( state ) {
- 	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_DITHER_ENABLE;
--	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable;
-+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
-       } else {
- 	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_DITHER_ENABLE;
--	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->state.color.roundEnable;
-+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->radeon.state.color.roundEnable;
-       }
-       break;
- 
-@@ -1971,21 +1797,30 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
-    }
- 
-    case GL_SCISSOR_TEST:
--      RADEON_FIREVERTICES( rmesa );
--      rmesa->state.scissor.enabled = state;
-+      radeon_firevertices(&rmesa->radeon);
-+      rmesa->radeon.state.scissor.enabled = state;
-       radeonUpdateScissor( ctx );
-       break;
- 
-    case GL_STENCIL_TEST:
--      if ( rmesa->state.stencil.hwBuffer ) {
--	 RADEON_STATECHANGE( rmesa, ctx );
--	 if ( state ) {
--	    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_STENCIL_ENABLE;
-+      {
-+	 GLboolean hw_stencil = GL_FALSE;
-+	 if (ctx->DrawBuffer) {
-+	    struct radeon_renderbuffer *rrbStencil
-+	       = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
-+	    hw_stencil = (rrbStencil && rrbStencil->bo);
-+	 }
-+
-+	 if (hw_stencil) {
-+	    RADEON_STATECHANGE( rmesa, ctx );
-+	    if ( state ) {
-+	       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_STENCIL_ENABLE;
-+	    } else {
-+	       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_STENCIL_ENABLE;
-+	    }
- 	 } else {
--	    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_STENCIL_ENABLE;
-+	    FALLBACK( rmesa, RADEON_FALLBACK_STENCIL, state );
- 	 }
--      } else {
--	 FALLBACK( rmesa, RADEON_FALLBACK_STENCIL, state );
-       }
-       break;
- 
-@@ -2010,7 +1845,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
- 
- static void radeonLightingSpaceChange( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLboolean tmp;
-    RADEON_STATECHANGE( rmesa, tcl );
- 
-@@ -2039,7 +1874,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx )
-  */
- 
- 
--void radeonUploadTexMatrix( radeonContextPtr rmesa,
-+void radeonUploadTexMatrix( r100ContextPtr rmesa,
- 			    int unit, GLboolean swapcols )
- {
- /* Here's how this works: on r100, only 3 tex coords can be submitted, so the
-@@ -2065,7 +1900,7 @@ void radeonUploadTexMatrix( radeonContextPtr rmesa,
-    int idx = TEXMAT_0 + unit;
-    float *dest = ((float *)RADEON_DB_STATE( mat[idx] )) + MAT_ELT_0;
-    int i;
--   struct gl_texture_unit tUnit = rmesa->glCtx->Texture.Unit[unit];
-+   struct gl_texture_unit tUnit = rmesa->radeon.glCtx->Texture.Unit[unit];
-    GLfloat *src = rmesa->tmpmat[unit].m;
- 
-    rmesa->TexMatColSwap &= ~(1 << unit);
-@@ -2119,7 +1954,7 @@ void radeonUploadTexMatrix( radeonContextPtr rmesa,
- }
- 
- 
--static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
-+static void upload_matrix( r100ContextPtr rmesa, GLfloat *src, int idx )
- {
-    float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
-    int i;
-@@ -2135,7 +1970,7 @@ static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
-    RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
- }
- 
--static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
-+static void upload_matrix_t( r100ContextPtr rmesa, GLfloat *src, int idx )
- {
-    float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
-    memcpy(dest, src, 16*sizeof(float));
-@@ -2145,7 +1980,7 @@ static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
- 
- static void update_texturematrix( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
-    GLuint tpc = rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL];
-    GLuint vs = rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL];
-    int unit;
-@@ -2209,61 +2044,72 @@ static void update_texturematrix( GLcontext *ctx )
-    }
- }
- 
--
--/**
-- * Tell the card where to render (offset, pitch).
-- * Effected by glDrawBuffer, etc
-- */
--void
--radeonUpdateDrawBuffer(GLcontext *ctx)
-+static GLboolean r100ValidateBuffers(GLcontext *ctx)
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   struct gl_framebuffer *fb = ctx->DrawBuffer;
--   driRenderbuffer *drb;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   struct radeon_renderbuffer *rrb;
-+   int i;
- 
--   if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
--      /* draw to front */
--      drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
--   }
--   else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
--      /* draw to back */
--      drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
--   }
--   else {
--      /* drawing to multiple buffers, or none */
--      return;
-+   radeon_validate_reset_bos(&rmesa->radeon);
-+   
-+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
-+   /* color buffer */
-+   if (rrb && rrb->bo) {
-+     radeon_validate_bo(&rmesa->radeon, rrb->bo,
-+			0, RADEON_GEM_DOMAIN_VRAM);
-    }
- 
--   assert(drb);
--   assert(drb->flippedPitch);
-+   /* depth buffer */
-+   rrb = radeon_get_depthbuffer(&rmesa->radeon);
-+   /* color buffer */
-+   if (rrb && rrb->bo) {
-+     radeon_validate_bo(&rmesa->radeon, rrb->bo,
-+			0, RADEON_GEM_DOMAIN_VRAM);
-+   }
- 
--   RADEON_STATECHANGE( rmesa, ctx );
-+   for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
-+      radeonTexObj *t;
-+      
-+      if (!ctx->Texture.Unit[i]._ReallyEnabled)
-+	 continue;
- 
--   /* Note: we used the (possibly) page-flipped values */
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
--     = ((drb->flippedOffset + rmesa->radeonScreen->fbLocation)
--	& RADEON_COLOROFFSET_MASK);
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
--   if (rmesa->sarea->tiling_enabled) {
--      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE;
-+      t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
-+      if (t->image_override && t->bo)
-+	radeon_validate_bo(&rmesa->radeon, t->bo,
-+			   RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
-+      else if (t->mt->bo)
-+	radeon_validate_bo(&rmesa->radeon, t->mt->bo,
-+			   RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
-    }
--}
- 
-+   if (rmesa->radeon.dma.current)
-+       radeon_validate_bo(&rmesa->radeon, rmesa->radeon.dma.current,
-+			  RADEON_GEM_DOMAIN_GTT, 0);
-+
-+   return radeon_revalidate_bos(ctx);
-+}
- 
--void radeonValidateState( GLcontext *ctx )
-+GLboolean radeonValidateState( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   GLuint new_state = rmesa->NewGLState;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   GLuint new_state = rmesa->radeon.NewGLState;
- 
-    if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
--     radeonUpdateDrawBuffer(ctx);
-+     _mesa_update_framebuffer(ctx);
-+     /* this updates the DrawBuffer's Width/Height if it's a FBO */
-+     _mesa_update_draw_buffer_bounds(ctx);
-+     RADEON_STATECHANGE(rmesa, ctx);
-    }
- 
-    if (new_state & _NEW_TEXTURE) {
-       radeonUpdateTextureState( ctx );
--      new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
-+      new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
-    }
- 
-+   /* we need to do a space check here */
-+   if (!r100ValidateBuffers(ctx))
-+     return GL_FALSE;
-+
-    /* Need an event driven matrix update?
-     */
-    if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) 
-@@ -2295,7 +2141,9 @@ void radeonValidateState( GLcontext *ctx )
-    }
- 
- 
--   rmesa->NewGLState = 0;
-+   rmesa->radeon.NewGLState = 0;
-+
-+   return GL_TRUE;
- }
- 
- 
-@@ -2306,7 +2154,7 @@ static void radeonInvalidateState( GLcontext *ctx, GLuint new_state )
-    _vbo_InvalidateState( ctx, new_state );
-    _tnl_InvalidateState( ctx, new_state );
-    _ae_invalidate_state( ctx, new_state );
--   RADEON_CONTEXT(ctx)->NewGLState |= new_state;
-+   R100_CONTEXT(ctx)->radeon.NewGLState |= new_state;
- }
- 
- 
-@@ -2330,16 +2178,17 @@ static GLboolean check_material( GLcontext *ctx )
- 
- static void radeonWrapRunPipeline( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLboolean has_material;
- 
-    if (0)
--      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
-+      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
- 
-    /* Validate state:
-     */
--   if (rmesa->NewGLState)
--      radeonValidateState( ctx );
-+   if (rmesa->radeon.NewGLState)
-+      if (!radeonValidateState( ctx ))
-+	 FALLBACK(rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE);
- 
-    has_material = (ctx->Light.Enabled && check_material( ctx ));
- 
-diff --git a/src/mesa/drivers/dri/radeon/radeon_state.h b/src/mesa/drivers/dri/radeon/radeon_state.h
-index 2171879..a7c8eef 100644
---- a/src/mesa/drivers/dri/radeon/radeon_state.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_state.h
-@@ -39,30 +39,25 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "radeon_context.h"
- 
--extern void radeonInitState( radeonContextPtr rmesa );
-+extern void radeonInitState( r100ContextPtr rmesa );
- extern void radeonInitStateFuncs( GLcontext *ctx );
- 
- extern void radeonUpdateMaterial( GLcontext *ctx );
- 
--extern void radeonSetCliprects( radeonContextPtr rmesa );
--extern void radeonRecalcScissorRects( radeonContextPtr rmesa );
- extern void radeonUpdateViewportOffset( GLcontext *ctx );
- extern void radeonUpdateWindow( GLcontext *ctx );
- extern void radeonUpdateDrawBuffer( GLcontext *ctx );
--extern void radeonUploadTexMatrix( radeonContextPtr rmesa,
-+extern void radeonUploadTexMatrix( r100ContextPtr rmesa,
- 				   int unit, GLboolean swapcols );
- 
--extern void radeonValidateState( GLcontext *ctx );
--
--extern void radeonPrintDirty( radeonContextPtr rmesa,
--			      const char *msg );
-+extern GLboolean radeonValidateState( GLcontext *ctx );
- 
- 
- extern void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
- #define FALLBACK( rmesa, bit, mode ) do {				\
-    if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n",		\
- 		     __FUNCTION__, bit, mode );				\
--   radeonFallback( rmesa->glCtx, bit, mode );				\
-+   radeonFallback( rmesa->radeon.glCtx, bit, mode );				\
- } while (0)
- 
- 
-diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c
-index 57dc380..c00f59f 100644
---- a/src/mesa/drivers/dri/radeon/radeon_state_init.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c
-@@ -38,39 +38,140 @@
- #include "swrast_setup/swrast_setup.h"
- 
- #include "radeon_context.h"
-+#include "radeon_mipmap_tree.h"
- #include "radeon_ioctl.h"
- #include "radeon_state.h"
- #include "radeon_tcl.h"
- #include "radeon_tex.h"
- #include "radeon_swtcl.h"
- 
-+#include "../r200/r200_reg.h"
-+
- #include "xmlpool.h"
- 
-+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
-+ * 1.3 cmdbuffers allow all previous state to be updated as well as
-+ * the tcl scalar and vector areas.
-+ */
-+static struct {
-+	int start;
-+	int len;
-+	const char *name;
-+} packet[RADEON_MAX_STATE_PACKETS] = {
-+	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
-+	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
-+	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
-+	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
-+	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
-+	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
-+	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
-+	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
-+	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
-+	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
-+	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
-+	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
-+	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
-+	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
-+	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
-+	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
-+	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
-+	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
-+	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
-+	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
-+	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
-+		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
-+	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
-+	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
-+	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
-+	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
-+	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
-+	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
-+	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
-+	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
-+	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
-+	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
-+	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
-+	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
-+	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
-+	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
-+	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
-+	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
-+	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
-+	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
-+	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
-+	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
-+	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
-+	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
-+	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
-+	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
-+	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
-+	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
-+	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
-+	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
-+	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
-+	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
-+	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
-+	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
-+	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
-+	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
-+	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
-+	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
-+	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
-+	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
-+	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
-+	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
-+	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
-+		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
-+	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
-+	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
-+	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
-+	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
-+	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
-+	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
-+	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
-+	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
-+	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
-+	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
-+	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
-+	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
-+	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
-+	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
-+	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
-+	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
-+	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
-+	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
-+	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
-+	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
-+	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
-+	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
-+	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
-+	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
-+	{R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"},     /* 85 */
-+	{R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
-+	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
-+	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
-+	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
-+	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
-+	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
-+	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
-+	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
-+	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
-+};
-+
- /* =============================================================
-  * State initialization
-  */
--
--void radeonPrintDirty( radeonContextPtr rmesa, const char *msg )
-+static int cmdpkt( r100ContextPtr rmesa, int id ) 
- {
--   struct radeon_state_atom *l;
--
--   fprintf(stderr, msg);
--   fprintf(stderr, ": ");
-+   drm_radeon_cmd_header_t h;
- 
--   foreach(l, &rmesa->hw.atomlist) {
--      if (l->dirty || rmesa->hw.all_dirty)
--	 fprintf(stderr, "%s, ", l->name);
-+   if (rmesa->radeon.radeonScreen->kernel_mm) {
-+     return CP_PACKET0(packet[id].start, packet[id].len - 1);
-+   } else {
-+     h.i = 0;
-+     h.packet.cmd_type = RADEON_CMD_PACKET;
-+     h.packet.packet_id = id;
-    }
--
--   fprintf(stderr, "\n");
--}
--
--static int cmdpkt( int id ) 
--{
--   drm_radeon_cmd_header_t h;
--   h.i = 0;
--   h.packet.cmd_type = RADEON_CMD_PACKET;
--   h.packet.packet_id = id;
-    return h.i;
- }
- 
-@@ -96,17 +197,17 @@ static int cmdscl( int offset, int stride, int count )
-    return h.i;
- }
- 
--#define CHECK( NM, FLAG )			\
--static GLboolean check_##NM( GLcontext *ctx )	\
--{						\
--   return FLAG;					\
-+#define CHECK( NM, FLAG )				\
-+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom )	\
-+{							\
-+   return FLAG ? atom->cmd_size : 0;			\
- }
- 
- #define TCL_CHECK( NM, FLAG )				\
--static GLboolean check_##NM( GLcontext *ctx )		\
-+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom )	\
- {							\
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);	\
--   return !rmesa->TclFallback && (FLAG);		\
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);	\
-+   return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0;	\
- }
- 
- 
-@@ -146,81 +247,384 @@ CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT))
- CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT))
- CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT))
- 
-+#define OUT_VEC(hdr, data) do {			\
-+    drm_radeon_cmd_header_t h;					\
-+    h.i = hdr;								\
-+    OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
-+    OUT_BATCH(0);							\
-+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
-+    OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
-+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1));	\
-+    OUT_BATCH_TABLE((data), h.vectors.count);				\
-+  } while(0)
-+
-+#define OUT_SCL(hdr, data) do {					\
-+    drm_radeon_cmd_header_t h;						\
-+    h.i = hdr;								\
-+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
-+    OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
-+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
-+    OUT_BATCH_TABLE((data), h.scalars.count);				\
-+  } while(0)
-+
-+static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r100ContextPtr r100 = R100_CONTEXT(ctx);
-+   BATCH_LOCALS(&r100->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+   
-+   dwords += 2;
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_SCL(atom->cmd[0], atom->cmd+1);
-+   END_BATCH();
-+}
- 
- 
--/* Initialize the context's hardware state.
-- */
--void radeonInitState( radeonContextPtr rmesa )
-+static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)
- {
--   GLcontext *ctx = rmesa->glCtx;
--   GLuint color_fmt, depth_fmt, i;
--   GLint drawPitch, drawOffset;
-+   r100ContextPtr r100 = R100_CONTEXT(ctx);
-+   BATCH_LOCALS(&r100->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+
-+   dwords += 4;
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_VEC(atom->cmd[0], atom->cmd+1);
-+   END_BATCH();
-+}
- 
--   switch ( rmesa->radeonScreen->cpp ) {
--   case 2:
--      color_fmt = RADEON_COLOR_FORMAT_RGB565;
--      break;
--   case 4:
--      color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
--      break;
--   default:
--      fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
--      exit( -1 );
-+
-+static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r100ContextPtr r100 = R100_CONTEXT(ctx);
-+   BATCH_LOCALS(&r100->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+
-+   dwords += 6;
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
-+   OUT_SCL(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
-+   END_BATCH();
-+}
-+
-+static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r100ContextPtr r100 = R100_CONTEXT(ctx);
-+   BATCH_LOCALS(&r100->radeon);
-+   struct radeon_renderbuffer *rrb;
-+   uint32_t cbpitch;
-+   uint32_t zbpitch, depth_fmt;
-+   uint32_t dwords = atom->cmd_size;
-+
-+   /* output the first 7 bytes of context */
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords + 4);
-+   OUT_BATCH_TABLE(atom->cmd, 5);
-+
-+   rrb = radeon_get_depthbuffer(&r100->radeon);
-+   if (!rrb) {
-+     OUT_BATCH(0);
-+     OUT_BATCH(0);
-+   } else {
-+     zbpitch = (rrb->pitch / rrb->cpp);
-+     if (r100->using_hyperz)
-+       zbpitch |= RADEON_DEPTH_HYPERZ;
-+
-+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+     OUT_BATCH(zbpitch);
-+     if (rrb->cpp == 4)
-+        depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
-+     else
-+        depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
-+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
-+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
-+   }
-+     
-+   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
-+   OUT_BATCH(atom->cmd[CTX_CMD_1]);
-+   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
-+
-+   rrb = radeon_get_colorbuffer(&r100->radeon);
-+   if (!rrb || !rrb->bo) {
-+      OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
-+      OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]);
-+   } else {
-+      atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
-+      if (rrb->cpp == 4)
-+         atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
-+      else
-+         atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
-+
-+      OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
-+      OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-    }
- 
--   rmesa->state.color.clear = 0x00000000;
-+   OUT_BATCH(atom->cmd[CTX_CMD_2]);
-+
-+   if (!rrb || !rrb->bo) {
-+     OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]);
-+   } else {
-+     cbpitch = (rrb->pitch / rrb->cpp);
-+     if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
-+       cbpitch |= RADEON_COLOR_TILE_ENABLE;
-+     OUT_BATCH(cbpitch);
-+   }
-+
-+   END_BATCH();
-+}
-+
-+static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r100ContextPtr r100 = R100_CONTEXT(ctx);
-+   BATCH_LOCALS(&r100->radeon);
-+   struct radeon_renderbuffer *rrb, *drb;
-+   uint32_t cbpitch = 0;
-+   uint32_t zbpitch = 0;
-+   uint32_t dwords = atom->cmd_size;
-+   uint32_t depth_fmt;
-+
-+   rrb = radeon_get_colorbuffer(&r100->radeon);
-+   if (!rrb || !rrb->bo) {
-+      fprintf(stderr, "no rrb\n");
-+      return;
-+   }
-+
-+   atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
-+   if (rrb->cpp == 4)
-+	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
-+   else
-+	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
-+
-+   cbpitch = (rrb->pitch / rrb->cpp);
-+   if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
-+       cbpitch |= R200_COLOR_TILE_ENABLE;
-+
-+   drb = radeon_get_depthbuffer(&r100->radeon);
-+   if (drb) {
-+     zbpitch = (drb->pitch / drb->cpp);
-+     if (drb->cpp == 4)
-+        depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
-+     else
-+        depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
-+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
-+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
-+     
-+   }
-+
-+   /* output the first 7 bytes of context */
-+   dwords = 10;
-+   if (drb)
-+     dwords += 6;
-+   if (rrb)
-+     dwords += 6;
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+
-+   /* In the CS case we need to split this up */
-+   OUT_BATCH(CP_PACKET0(packet[0].start, 3));
-+   OUT_BATCH_TABLE((atom->cmd + 1), 4);
-+
-+   if (drb) {
-+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
-+     OUT_BATCH_RELOC(0, drb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+
-+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
-+     OUT_BATCH(zbpitch);
-+   }
-+
-+   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
-+   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
-+   OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
-+   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
-+   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
-+
-+   if (rrb) {
-+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
-+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+
-+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
-+     OUT_BATCH(cbpitch);
-+   }
-+
-+   // if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
-+   //   OUT_BATCH_TABLE((atom->cmd + 14), 4);
-+   // }
-+
-+   END_BATCH();
-+   BEGIN_BATCH_NO_AUTOSTATE(4);
-+   OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
-+   OUT_BATCH(0);
-+   OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
-+   if (rrb) {
-+       OUT_BATCH(((rrb->width - 1) << RADEON_RE_WIDTH_SHIFT) |
-+                 ((rrb->height - 1) << RADEON_RE_HEIGHT_SHIFT));
-+   } else {
-+       OUT_BATCH(0);
-+   }
-+   END_BATCH();
-+}
-+
-+static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r100ContextPtr r100 = R100_CONTEXT(ctx);
-+   BATCH_LOCALS(&r100->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+   int i = atom->idx, j;
-+   radeonTexObj *t = r100->state.texture.unit[i].texobj;
-+   radeon_mipmap_level *lvl;
-+
-+   if (!(ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT))
-+	return;
-+
-+   if (!t)
-+	return;
-+
-+   if (!t->mt)
-+	return;
-+
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords + 10);
-+   OUT_BATCH_TABLE(atom->cmd, 3);
-+   lvl = &t->mt->levels[0];
-+   for (j = 0; j < 5; j++) {
-+	OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
-+			RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+   }
-+   END_BATCH();
-+}
-+
-+static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r100ContextPtr r100 = R100_CONTEXT(ctx);
-+   BATCH_LOCALS(&r100->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+   int i = atom->idx;
-+   radeonTexObj *t = r100->state.texture.unit[i].texobj;
-+   radeon_mipmap_level *lvl;
-+
-+   if (t && t->mt && !t->image_override)
-+     dwords += 2;
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+
-+   OUT_BATCH_TABLE(atom->cmd, 3);
-+   if (t && t->mt && !t->image_override) {
-+     if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) {
-+   	lvl = &t->mt->levels[0];
-+	OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset,
-+			RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+     } else {
-+        OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
-+		     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+     }
-+   } else if (!t) {
-+     /* workaround for old CS mechanism */
-+     OUT_BATCH(r100->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]);
-+     //     OUT_BATCH(r100->radeon.radeonScreen);
-+   } else {
-+     OUT_BATCH(t->override_offset);
-+   }
-+
-+   OUT_BATCH_TABLE((atom->cmd+4), 5);
-+   END_BATCH();
-+}
-+
-+static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r100ContextPtr r100 = R100_CONTEXT(ctx);
-+   BATCH_LOCALS(&r100->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+   int i = atom->idx;
-+   radeonTexObj *t = r100->state.texture.unit[i].texobj;
-+   radeon_mipmap_level *lvl;
-+   int hastexture = 1;
-+
-+   if (!t)
-+	hastexture = 0;
-+   else {
-+	if (!t->mt && !t->bo)
-+		hastexture = 0;
-+   }
-+   dwords += 1;
-+   if (hastexture)
-+     dwords += 2;
-+   else
-+     dwords -= 2;
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+
-+   OUT_BATCH(CP_PACKET0(RADEON_PP_TXFILTER_0 + (24 * i), 1));
-+   OUT_BATCH_TABLE((atom->cmd + 1), 2);
-+
-+   if (hastexture) {
-+     OUT_BATCH(CP_PACKET0(RADEON_PP_TXOFFSET_0 + (24 * i), 0));
-+     if (t->mt && !t->image_override) {
-+        if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) {
-+            lvl = &t->mt->levels[0];
-+	    OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset,
-+			RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+        } else {
-+           OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
-+		     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+        }
-+      } else {
-+	if (t->bo)
-+            OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
-+                            RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+      }
-+   }
-+
-+   OUT_BATCH(CP_PACKET0(RADEON_PP_TXCBLEND_0 + (i * 24), 1));
-+   OUT_BATCH_TABLE((atom->cmd+4), 2);
-+   OUT_BATCH(CP_PACKET0(RADEON_PP_BORDER_COLOR_0 + (i * 4), 0));
-+   OUT_BATCH((atom->cmd[TEX_PP_BORDER_COLOR]));
-+   END_BATCH();
-+}
-+
-+/* Initialize the context's hardware state.
-+ */
-+void radeonInitState( r100ContextPtr rmesa )
-+{
-+   GLcontext *ctx = rmesa->radeon.glCtx;
-+   GLuint i;
-+
-+   rmesa->radeon.state.color.clear = 0x00000000;
- 
-    switch ( ctx->Visual.depthBits ) {
-    case 16:
--      rmesa->state.depth.clear = 0x0000ffff;
--      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
--      depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
--      rmesa->state.stencil.clear = 0x00000000;
-+      rmesa->radeon.state.depth.clear = 0x0000ffff;
-+      rmesa->radeon.state.stencil.clear = 0x00000000;
-       break;
-    case 24:
--      rmesa->state.depth.clear = 0x00ffffff;
--      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
--      depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
--      rmesa->state.stencil.clear = 0xffff0000;
-+      rmesa->radeon.state.depth.clear = 0x00ffffff;
-+      rmesa->radeon.state.stencil.clear = 0xffff0000;
-       break;
-    default:
--      fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
--	       ctx->Visual.depthBits );
--      exit( -1 );
-+      break;
-    }
- 
--   /* Only have hw stencil when depth buffer is 24 bits deep */
--   rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
--				     ctx->Visual.depthBits == 24 );
-+   rmesa->radeon.Fallback = 0;
- 
--   rmesa->Fallback = 0;
--
--   if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
--      drawOffset = rmesa->radeonScreen->backOffset;
--      drawPitch  = rmesa->radeonScreen->backPitch;
--   } else {
--      drawOffset = rmesa->radeonScreen->frontOffset;
--      drawPitch  = rmesa->radeonScreen->frontPitch;
--   }
- 
--   rmesa->hw.max_state_size = 0;
-+   rmesa->radeon.hw.max_state_size = 0;
- 
--#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG )				\
-+#define ALLOC_STATE_IDX( ATOM, CHK, SZ, NM, FLAG, IDX )		\
-    do {								\
-       rmesa->hw.ATOM.cmd_size = SZ;				\
--      rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int));	\
--      rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int));	\
--      rmesa->hw.ATOM.name = NM;					\
-+      rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int));	\
-+      rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int)); \
-+      rmesa->hw.ATOM.name = NM;						\
-       rmesa->hw.ATOM.is_tcl = FLAG;					\
-       rmesa->hw.ATOM.check = check_##CHK;				\
--      rmesa->hw.ATOM.dirty = GL_TRUE;				\
--      rmesa->hw.max_state_size += SZ * sizeof(int);		\
-+      rmesa->hw.ATOM.dirty = GL_TRUE;					\
-+      rmesa->hw.ATOM.idx = IDX;					\
-+      rmesa->radeon.hw.max_state_size += SZ * sizeof(int);		\
-    } while (0)
--      
--      
-+
-+#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG )		\
-+   ALLOC_STATE_IDX(ATOM, CHK, SZ, NM, FLAG, 0)
-+
-    /* Allocate state buffers:
-     */
-    ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 );
-+   if (rmesa->radeon.radeonScreen->kernel_mm)
-+     rmesa->hw.ctx.emit = ctx_emit_cs;
-+   else
-+     rmesa->hw.ctx.emit = ctx_emit;
-    ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
-    ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
-    ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 );
-@@ -233,20 +637,29 @@ void radeonInitState( radeonContextPtr rmesa )
-    ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 );
-    ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 );
-    ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
--   ALLOC_STATE( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0 );
--   ALLOC_STATE( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0 );
--   ALLOC_STATE( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0 );
--   if (rmesa->radeonScreen->drmSupportsCubeMapsR100)
-+   ALLOC_STATE_IDX( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0, 0);
-+   ALLOC_STATE_IDX( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0, 1);
-+   ALLOC_STATE_IDX( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0, 2);
-+
-+   for (i = 0; i < 3; i++) {
-+      if (rmesa->radeon.radeonScreen->kernel_mm)
-+          rmesa->hw.tex[i].emit = tex_emit_cs;
-+      else
-+          rmesa->hw.tex[i].emit = tex_emit;
-+   }
-+   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100)
-    {
--      ALLOC_STATE( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0 );
--      ALLOC_STATE( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0 );
--      ALLOC_STATE( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0 );
-+      ALLOC_STATE_IDX( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
-+      ALLOC_STATE_IDX( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
-+      ALLOC_STATE_IDX( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
-+      for (i = 0; i < 3; i++)
-+         rmesa->hw.cube[i].emit = cube_emit;
-    }
-    else
-    {
--      ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0 );
--      ALLOC_STATE( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0 );
--      ALLOC_STATE( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0 );
-+      ALLOC_STATE_IDX( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
-+      ALLOC_STATE_IDX( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
-+      ALLOC_STATE_IDX( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
-    }
-    ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 );
-    ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 );
-@@ -268,43 +681,43 @@ void radeonInitState( radeonContextPtr rmesa )
-    ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 );
-    ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 );
-    ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 );
--   ALLOC_STATE( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0 );
--   ALLOC_STATE( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0 );
--   ALLOC_STATE( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0 );
-+   ALLOC_STATE_IDX( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0, 0 );
-+   ALLOC_STATE_IDX( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0, 1 );
-+   ALLOC_STATE_IDX( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0, 2 );
- 
-    radeonSetUpAtomList( rmesa );
- 
-    /* Fill in the packet headers:
-     */
--   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC);
--   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL);
--   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH);
--   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN);
--   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH);
--   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK);
--   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE);
--   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL);
--   rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(RADEON_EMIT_SE_CNTL_STATUS);
--   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC);
--   rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_0);
--   rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_0);
--   rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_1);
--   rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_1);
--   rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_2);
--   rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_2);
--   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_0);
--   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T0);
--   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_1);
--   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T1);
--   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_2);
--   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T2);
--   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
--   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
-+   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
-+   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
-+   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
-+   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
-+   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
-+   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
-+   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
-+   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
-+   rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL_STATUS);
-+   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
-+   rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_0);
-+   rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_0);
-+   rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_1);
-+   rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_1);
-+   rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_2);
-+   rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_2);
-+   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_0);
-+   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T0);
-+   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_1);
-+   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T1);
-+   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_2);
-+   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T2);
-+   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
-+   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
-    rmesa->hw.mtl.cmd[MTL_CMD_0] = 
--      cmdpkt(RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
--   rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_0);
--   rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_1);
--   rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_2);
-+      cmdpkt(rmesa, RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
-+   rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_0);
-+   rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_1);
-+   rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_2);
-    rmesa->hw.grd.cmd[GRD_CMD_0] = 
-       cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
-    rmesa->hw.fog.cmd[FOG_CMD_0] = 
-@@ -331,6 +744,22 @@ void radeonInitState( radeonContextPtr rmesa )
- 	 cmdvec( RADEON_VS_UCP_ADDR + i, 1, 4 );
-    }
- 
-+   if (rmesa->radeon.radeonScreen->kernel_mm) {
-+      rmesa->hw.grd.emit = scl_emit;
-+      rmesa->hw.fog.emit = vec_emit;
-+      rmesa->hw.glt.emit = vec_emit;
-+      rmesa->hw.eye.emit = vec_emit;
-+      
-+      for (i = 0; i <= 6; i++)
-+	 rmesa->hw.mat[i].emit = vec_emit;
-+
-+      for (i = 0; i < 8; i++)
-+	 rmesa->hw.lit[i].emit = lit_emit;
-+
-+      for (i = 0; i < 6; i++)
-+	 rmesa->hw.ucp[i].emit = vec_emit;
-+   }
-+
-    rmesa->last_ReallyEnabled = -1;
- 
-    /* Initial Harware state:
-@@ -352,19 +781,7 @@ void radeonInitState( radeonContextPtr rmesa )
- 					    RADEON_SRC_BLEND_GL_ONE |
- 					    RADEON_DST_BLEND_GL_ZERO );
- 
--   rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
--      rmesa->radeonScreen->depthOffset + rmesa->radeonScreen->fbLocation;
--
--   rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = 
--      ((rmesa->radeonScreen->depthPitch &
--	RADEON_DEPTHPITCH_MASK) |
--       RADEON_DEPTH_ENDIAN_NO_SWAP);
--       
--   if (rmesa->using_hyperz)
--       rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= RADEON_DEPTH_HYPERZ;
--
--   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt |
--					       RADEON_Z_TEST_LESS |
-+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (RADEON_Z_TEST_LESS |
- 					       RADEON_STENCIL_TEST_ALWAYS |
- 					       RADEON_STENCIL_FAIL_KEEP |
- 					       RADEON_STENCIL_ZPASS_KEEP |
-@@ -374,7 +791,7 @@ void radeonInitState( radeonContextPtr rmesa )
-    if (rmesa->using_hyperz) {
-        rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_COMPRESSION_ENABLE |
- 						   RADEON_Z_DECOMPRESSION_ENABLE;
--      if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
-+      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
- 	 /* works for q3, but slight rendering errors with glxgears ? */
- /*	 rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
- 	 /* need this otherwise get lots of lockups with q3 ??? */
-@@ -386,10 +803,9 @@ void radeonInitState( radeonContextPtr rmesa )
- 				     RADEON_ANTI_ALIAS_NONE);
- 
-    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = (RADEON_PLANE_MASK_ENABLE |
--				       color_fmt |
- 				       RADEON_ZBLOCK16);
- 
--   switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) {
-+   switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
-    case DRI_CONF_DITHER_XERRORDIFFRESET:
-       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_INIT;
-       break;
-@@ -397,31 +813,18 @@ void radeonInitState( radeonContextPtr rmesa )
-       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_SCALE_DITHER_ENABLE;
-       break;
-    }
--   if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) ==
-+   if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
- 	DRI_CONF_ROUND_ROUND )
--      rmesa->state.color.roundEnable = RADEON_ROUND_ENABLE;
-+      rmesa->radeon.state.color.roundEnable = RADEON_ROUND_ENABLE;
-    else
--      rmesa->state.color.roundEnable = 0;
--   if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) ==
-+      rmesa->radeon.state.color.roundEnable = 0;
-+   if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
- 	DRI_CONF_COLOR_REDUCTION_DITHER )
-       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_ENABLE;
-    else
--      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable;
--
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset +
--					       rmesa->radeonScreen->fbLocation)
--					      & RADEON_COLOROFFSET_MASK);
--
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch &
--					      RADEON_COLORPITCH_MASK) |
--					     RADEON_COLOR_ENDIAN_NO_SWAP);
-+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
- 
- 
--   /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */
--   if (rmesa->sarea->tiling_enabled) {
--      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE;
--   }
--
-    rmesa->hw.set.cmd[SET_SE_CNTL] = (RADEON_FFACE_CULL_CCW |
- 				     RADEON_BFACE_SOLID |
- 				     RADEON_FFACE_SOLID |
-@@ -444,7 +847,7 @@ void radeonInitState( radeonContextPtr rmesa )
-   					    RADEON_VC_NO_SWAP;
- #endif
- 
--   if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
-+   if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
-      rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] |= RADEON_TCL_BYPASS;
-    }
- 
-@@ -491,8 +894,8 @@ void radeonInitState( radeonContextPtr rmesa )
- 	   (2 << RADEON_TXFORMAT_HEIGHT_SHIFT));
- 
-       /* Initialize the texture offset to the start of the card texture heap */
--      rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
--	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+      //      rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
-+      //	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
- 
-       rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
-       rmesa->hw.tex[i].cmd[TEX_PP_TXCBLEND] =  
-@@ -513,15 +916,15 @@ void radeonInitState( radeonContextPtr rmesa )
- 
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_0] =
--	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_1] =
--	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_2] =
--	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_3] =
--	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_4] =
--	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-    }
- 
-    /* Can only add ST1 at the time of doing some multitex but can keep
-@@ -613,5 +1016,7 @@ void radeonInitState( radeonContextPtr rmesa )
-    rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
-    rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
-    
--   rmesa->hw.all_dirty = GL_TRUE;
-+   rmesa->radeon.hw.all_dirty = GL_TRUE;
-+
-+   rcommonInitCmdBuf(&rmesa->radeon);
- }
-diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
-index ebea1fe..e31f045 100644
---- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
-@@ -52,8 +52,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_tcl.h"
- 
- 
--static void flush_last_swtcl_prim( radeonContextPtr rmesa  );
--
- /* R100: xyzw, c0, c1/fog, stq[0..2]  = 4+1+1+3*3 = 15  right? */
- /* R200: xyzw, c0, c1/fog, strq[0..5] = 4+1+1+4*6 = 30 */
- #define RADEON_MAX_TNL_VERTEX_SIZE (15 * sizeof(GLfloat))	/* for mesa _tnl stage */
-@@ -64,18 +62,18 @@ static void flush_last_swtcl_prim( radeonContextPtr rmesa  );
- 
- #define EMIT_ATTR( ATTR, STYLE, F0 )					\
- do {									\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR);	\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE);	\
--   rmesa->swtcl.vertex_attr_count++;					\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);	\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);	\
-+   rmesa->radeon.swtcl.vertex_attr_count++;					\
-    fmt_0 |= F0;								\
- } while (0)
- 
- #define EMIT_PAD( N )							\
- do {									\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0;		\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD;	\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N);		\
--   rmesa->swtcl.vertex_attr_count++;					\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;		\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;	\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);		\
-+   rmesa->radeon.swtcl.vertex_attr_count++;					\
- } while (0)
- 
- static GLuint radeon_cp_vc_frmts[3][2] =
-@@ -87,7 +85,7 @@ static GLuint radeon_cp_vc_frmts[3][2] =
- 
- static void radeonSetVertexFormat( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
-    struct vertex_buffer *VB = &tnl->vb;
-    DECLARE_RENDERINPUTS(index_bitset);
-@@ -106,7 +104,7 @@ static void radeonSetVertexFormat( GLcontext *ctx )
-    }
- 
-    assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
--   rmesa->swtcl.vertex_attr_count = 0;
-+   rmesa->radeon.swtcl.vertex_attr_count = 0;
- 
-    /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
-     * build up a hardware vertex.
-@@ -204,33 +202,33 @@ static void radeonSetVertexFormat( GLcontext *ctx )
-       }
-    }
- 
--   if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) ||
-+   if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) ||
- 	fmt_0 != rmesa->swtcl.vertex_format) {
-       RADEON_NEWPRIM(rmesa);
-       rmesa->swtcl.vertex_format = fmt_0;
--      rmesa->swtcl.vertex_size =
-+      rmesa->radeon.swtcl.vertex_size =
- 	  _tnl_install_attrs( ctx,
--			      rmesa->swtcl.vertex_attrs, 
--			      rmesa->swtcl.vertex_attr_count,
-+			      rmesa->radeon.swtcl.vertex_attrs, 
-+			      rmesa->radeon.swtcl.vertex_attr_count,
- 			      NULL, 0 );
--      rmesa->swtcl.vertex_size /= 4;
--      RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
-+      rmesa->radeon.swtcl.vertex_size /= 4;
-+      RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset );
-       if (RADEON_DEBUG & DEBUG_VERTS)
- 	 fprintf( stderr, "%s: vertex_size= %d floats\n",
--		  __FUNCTION__, rmesa->swtcl.vertex_size);
-+		  __FUNCTION__, rmesa->radeon.swtcl.vertex_size);
-    }
- }
- 
- 
- static void radeonRenderStart( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
- 
-    radeonSetVertexFormat( ctx );
-    
--   if (rmesa->dma.flush != 0 && 
--       rmesa->dma.flush != flush_last_swtcl_prim)
--      rmesa->dma.flush( rmesa );
-+   if (rmesa->radeon.dma.flush != 0 && 
-+       rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim)
-+      rmesa->radeon.dma.flush( ctx );
- }
- 
- 
-@@ -241,7 +239,7 @@ static void radeonRenderStart( GLcontext *ctx )
-  */
- void radeonChooseVertexState( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
- 
-    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
-@@ -254,7 +252,7 @@ void radeonChooseVertexState( GLcontext *ctx )
-     * rasterization fallback.  As this function will be called again when we
-     * leave a rasterization fallback, we can just skip it for now.
-     */
--   if (rmesa->Fallback != 0)
-+   if (rmesa->radeon.Fallback != 0)
-       return;
- 
-    /* HW perspective divide is a win, but tiny vertex formats are a
-@@ -281,80 +279,29 @@ void radeonChooseVertexState( GLcontext *ctx )
-    }
- }
- 
--
--/* Flush vertices in the current dma region.
-- */
--static void flush_last_swtcl_prim( radeonContextPtr rmesa  )
-+void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
- {
--   if (RADEON_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s\n", __FUNCTION__);
--
--   rmesa->dma.flush = NULL;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
--   if (rmesa->dma.current.buf) {
--      struct radeon_dma_region *current = &rmesa->dma.current;
--      GLuint current_offset = (rmesa->radeonScreen->gart_buffer_offset +
--			       current->buf->buf->idx * RADEON_BUFFER_SIZE + 
--			       current->start);
-+   rcommonEnsureCmdBufSpace(&rmesa->radeon,
-+			    rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
-+			    __FUNCTION__);
- 
--      assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
- 
--      assert (current->start + 
--	      rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
--	      current->ptr);
-+   radeonEmitState(&rmesa->radeon);
-+   radeonEmitVertexAOS( rmesa,
-+			rmesa->radeon.swtcl.vertex_size,
-+			rmesa->radeon.dma.current,
-+			current_offset);
- 
--      if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
--	 radeonEnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ +
--			          rmesa->hw.max_state_size + VBUF_BUFSZ );
--
--	 radeonEmitVertexAOS( rmesa,
--			      rmesa->swtcl.vertex_size,
--			      current_offset);
--
--	 radeonEmitVbufPrim( rmesa,
--			     rmesa->swtcl.vertex_format,
--			     rmesa->swtcl.hw_primitive,
--			     rmesa->swtcl.numverts);
--      }
-+		      
-+   radeonEmitVbufPrim( rmesa,
-+		       rmesa->swtcl.vertex_format,
-+		       rmesa->radeon.swtcl.hw_primitive,
-+		       rmesa->radeon.swtcl.numverts);
- 
--      rmesa->swtcl.numverts = 0;
--      current->start = current->ptr;
--   }
- }
- 
--
--/* Alloc space in the current dma region.
-- */
--static INLINE void *
--radeonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
--{
--   GLuint bytes = vsize * nverts;
--
--   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
--      radeonRefillCurrentDmaRegion( rmesa );
--
--   if (!rmesa->dma.flush) {
--      rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
--      rmesa->dma.flush = flush_last_swtcl_prim;
--   }
--
--   assert( vsize == rmesa->swtcl.vertex_size * 4 );
--   assert( rmesa->dma.flush == flush_last_swtcl_prim );
--   assert (rmesa->dma.current.start + 
--	   rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
--	   rmesa->dma.current.ptr);
--
--
--   {
--      GLubyte *head = (GLubyte *)(rmesa->dma.current.address + rmesa->dma.current.ptr);
--      rmesa->dma.current.ptr += bytes;
--      rmesa->swtcl.numverts += nverts;
--      return head;
--   }
--
--}
--
--
- /*
-  * Render unclipped vertex buffers by emitting vertices directly to
-  * dma buffers.  Use strip/fan hardware primitives where possible.
-@@ -387,22 +334,22 @@ static const GLuint hw_prim[GL_POLYGON+1] = {
- };
- 
- static INLINE void
--radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim )
-+radeonDmaPrimitive( r100ContextPtr rmesa, GLenum prim )
- {
-    RADEON_NEWPRIM( rmesa );
--   rmesa->swtcl.hw_primitive = hw_prim[prim];
--   assert(rmesa->dma.current.ptr == rmesa->dma.current.start);
-+   rmesa->radeon.swtcl.hw_primitive = hw_prim[prim];
-+   //   assert(rmesa->radeon.dma.current.ptr == rmesa->radeon.dma.current.start);
- }
- 
--#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
-+#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
- #define INIT( prim ) radeonDmaPrimitive( rmesa, prim )
- #define FLUSH()  RADEON_NEWPRIM( rmesa )
--#define GET_CURRENT_VB_MAX_VERTS() \
--  (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4))
-+#define GET_CURRENT_VB_MAX_VERTS()					10\
-+//  (((int)rmesa->radeon.dma.current.end - (int)rmesa->radeon.dma.current.ptr) / (rmesa->radeon.swtcl.vertex_size*4))
- #define GET_SUBSEQUENT_VB_MAX_VERTS() \
--  ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4))
-+  ((RADEON_BUFFER_SIZE) / (rmesa->radeon.swtcl.vertex_size*4))
- #define ALLOC_VERTS( nr ) \
--  radeonAllocDmaLowVerts( rmesa, nr, rmesa->swtcl.vertex_size * 4 )
-+  rcommonAllocDmaLowVerts( &rmesa->radeon, nr, rmesa->radeon.swtcl.vertex_size * 4 )
- #define EMIT_VERTS( ctx, j, nr, buf ) \
-   _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf)
- 
-@@ -418,16 +365,13 @@ radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim )
- static GLboolean radeon_run_render( GLcontext *ctx,
- 				    struct tnl_pipeline_stage *stage )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
-    struct vertex_buffer *VB = &tnl->vb;
-    tnl_render_func *tab = TAG(render_tab_verts);
-    GLuint i;
- 
--   if (rmesa->swtcl.indexed_verts.buf) 
--      RELEASE_ELT_VERTS();
--   	
--   if (rmesa->swtcl.RenderIndex != 0 ||   
-+   if (rmesa->radeon.swtcl.RenderIndex != 0 ||   
-        !radeon_dma_validate_render( ctx, VB ))
-       return GL_TRUE;		
- 
-@@ -496,13 +440,13 @@ static void radeonResetLineStipple( GLcontext *ctx );
- 
- #undef LOCAL_VARS
- #undef ALLOC_VERTS
--#define CTX_ARG radeonContextPtr rmesa
--#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
--#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, (size) * 4 )
-+#define CTX_ARG r100ContextPtr rmesa
-+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
-+#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, (size) * 4 )
- #undef LOCAL_VARS
- #define LOCAL_VARS						\
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
--   const char *radeonverts = (char *)rmesa->swtcl.verts;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);		\
-+   const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;
- #define VERT(x) (radeonVertex *)(radeonverts + ((x) * (vertsize) * sizeof(int)))
- #define VERTEX radeonVertex 
- #undef TAG
-@@ -560,7 +504,7 @@ static struct {
- #define VERT_Y(_v) _v->v.y
- #define VERT_Z(_v) _v->v.z
- #define AREA_IS_CCW( a ) (a < 0)
--#define GET_VERTEX(e) (rmesa->swtcl.verts + ((e) * rmesa->swtcl.vertex_size * sizeof(int)))
-+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + ((e) * rmesa->radeon.swtcl.vertex_size * sizeof(int)))
- 
- #define VERT_SET_RGBA( v, c )  					\
- do {								\
-@@ -606,7 +550,7 @@ do {							\
- #undef INIT
- 
- #define LOCAL_VARS(n)							\
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);			\
-    GLuint color[n], spec[n];						\
-    GLuint coloroffset = rmesa->swtcl.coloroffset;	\
-    GLuint specoffset = rmesa->swtcl.specoffset;			\
-@@ -617,7 +561,7 @@ do {							\
-  ***********************************************************************/
- 
- #define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] )
--#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
-+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
- #undef TAG
- #define TAG(x) x
- #include "tnl_dd/t_dd_unfilled.h"
-@@ -673,9 +617,9 @@ static void init_rast_tab( void )
- } while (0)
- #undef LOCAL_VARS
- #define LOCAL_VARS						\
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
--   const GLuint vertsize = rmesa->swtcl.vertex_size;		\
--   const char *radeonverts = (char *)rmesa->swtcl.verts;		\
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);		\
-+   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;		\
-+   const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;		\
-    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
-    const GLboolean stipple = ctx->Line.StippleFlag;		\
-    (void) elt; (void) stipple;
-@@ -700,17 +644,17 @@ static void init_rast_tab( void )
- void radeonChooseRenderState( GLcontext *ctx )
- {
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint index = 0;
-    GLuint flags = ctx->_TriangleCaps;
- 
--   if (!rmesa->TclFallback || rmesa->Fallback) 
-+   if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback) 
-       return;
- 
-    if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT;
-    if (flags & DD_TRI_UNFILLED)      index |= RADEON_UNFILLED_BIT;
- 
--   if (index != rmesa->swtcl.RenderIndex) {
-+   if (index != rmesa->radeon.swtcl.RenderIndex) {
-       tnl->Driver.Render.Points = rast_tab[index].points;
-       tnl->Driver.Render.Line = rast_tab[index].line;
-       tnl->Driver.Render.ClippedLine = rast_tab[index].line;
-@@ -727,7 +671,7 @@ void radeonChooseRenderState( GLcontext *ctx )
- 	 tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
-       }
- 
--      rmesa->swtcl.RenderIndex = index;
-+      rmesa->radeon.swtcl.RenderIndex = index;
-    }
- }
- 
-@@ -739,18 +683,18 @@ void radeonChooseRenderState( GLcontext *ctx )
- 
- static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
--   if (rmesa->swtcl.hw_primitive != hwprim) {
-+   if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
-       RADEON_NEWPRIM( rmesa );
--      rmesa->swtcl.hw_primitive = hwprim;
-+      rmesa->radeon.swtcl.hw_primitive = hwprim;
-    }
- }
- 
- static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   rmesa->swtcl.render_primitive = prim;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   rmesa->radeon.swtcl.render_primitive = prim;
-    if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) 
-       radeonRasterPrimitive( ctx, reduced_hw_prim[prim] );
- }
-@@ -761,7 +705,7 @@ static void radeonRenderFinish( GLcontext *ctx )
- 
- static void radeonResetLineStipple( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    RADEON_STATECHANGE( rmesa, lin );
- }
- 
-@@ -795,17 +739,17 @@ static const char *getFallbackString(GLuint bit)
- 
- void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
--   GLuint oldfallback = rmesa->Fallback;
-+   GLuint oldfallback = rmesa->radeon.Fallback;
- 
-    if (mode) {
--      rmesa->Fallback |= bit;
-+      rmesa->radeon.Fallback |= bit;
-       if (oldfallback == 0) {
--	 RADEON_FIREVERTICES( rmesa );
-+	 radeon_firevertices(&rmesa->radeon);
- 	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE );
- 	 _swsetup_Wakeup( ctx );
--	 rmesa->swtcl.RenderIndex = ~0;
-+	 rmesa->radeon.swtcl.RenderIndex = ~0;
-          if (RADEON_DEBUG & DEBUG_FALLBACKS) {
-             fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n",
-                     bit, getFallbackString(bit));
-@@ -813,7 +757,7 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
-       }
-    }
-    else {
--      rmesa->Fallback &= ~bit;
-+      rmesa->radeon.Fallback &= ~bit;
-       if (oldfallback == bit) {
- 	 _swrast_flush( ctx );
- 	 tnl->Driver.Render.Start = radeonRenderStart;
-@@ -826,14 +770,14 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
- 
- 	 tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
- 	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_FALSE );
--	 if (rmesa->TclFallback) {
--	    /* These are already done if rmesa->TclFallback goes to
-+	 if (rmesa->radeon.TclFallback) {
-+	    /* These are already done if rmesa->radeon.TclFallback goes to
- 	     * zero above. But not if it doesn't (RADEON_NO_TCL for
- 	     * example?)
- 	     */
- 	    _tnl_invalidate_vertex_state( ctx, ~0 );
- 	    _tnl_invalidate_vertices( ctx, ~0 );
--	    RENDERINPUTS_ZERO( rmesa->tnl_index_bitset );
-+	    RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset );
- 	    radeonChooseVertexState( ctx );
- 	    radeonChooseRenderState( ctx );
- 	 }
-@@ -853,7 +797,7 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
- void radeonInitSwtcl( GLcontext *ctx )
- {
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    static int firsttime = 1;
- 
-    if (firsttime) {
-@@ -872,18 +816,9 @@ void radeonInitSwtcl( GLcontext *ctx )
-    _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
- 		       RADEON_MAX_TNL_VERTEX_SIZE);
-    
--   rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
--   rmesa->swtcl.RenderIndex = ~0;
--   rmesa->swtcl.render_primitive = GL_TRIANGLES;
--   rmesa->swtcl.hw_primitive = 0;
-+   rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
-+   rmesa->radeon.swtcl.RenderIndex = ~0;
-+   rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
-+   rmesa->radeon.swtcl.hw_primitive = 0;
- }
- 
--
--void radeonDestroySwtcl( GLcontext *ctx )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--
--   if (rmesa->swtcl.indexed_verts.buf) 
--      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
--			      __FUNCTION__ );
--}
-diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.h b/src/mesa/drivers/dri/radeon/radeon_swtcl.h
-index e485052..da89158 100644
---- a/src/mesa/drivers/dri/radeon/radeon_swtcl.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.h
-@@ -40,7 +40,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_context.h"
- 
- extern void radeonInitSwtcl( GLcontext *ctx );
--extern void radeonDestroySwtcl( GLcontext *ctx );
- 
- extern void radeonChooseRenderState( GLcontext *ctx );
- extern void radeonChooseVertexState( GLcontext *ctx );
-@@ -63,5 +62,5 @@ extern void radeon_translate_vertex( GLcontext *ctx,
- 
- extern void radeon_print_vertex( GLcontext *ctx, const radeonVertex *v );
- 
--
-+extern void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
- #endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
-index 779e9ae..df6708f 100644
---- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
-@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "tnl/tnl.h"
- #include "tnl/t_pipeline.h"
- 
-+#include "radeon_common.h"
- #include "radeon_context.h"
- #include "radeon_state.h"
- #include "radeon_ioctl.h"
-@@ -104,7 +105,7 @@ static GLboolean discrete_prim[0x10] = {
- };
-    
- 
--#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
-+#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
- #define ELT_TYPE  GLushort
- 
- #define ELT_INIT(prim, hw_prim) \
-@@ -125,7 +126,7 @@ static GLboolean discrete_prim[0x10] = {
- 
- #define RESET_STIPPLE() do {			\
-    RADEON_STATECHANGE( rmesa, lin );		\
--   radeonEmitState( rmesa );			\
-+   radeonEmitState(&rmesa->radeon);			\
- } while (0)
- 
- #define AUTO_STIPPLE( mode )  do {		\
-@@ -136,31 +137,29 @@ static GLboolean discrete_prim[0x10] = {
-    else						\
-       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
- 	 ~RADEON_LINE_PATTERN_AUTO_RESET;	\
--   radeonEmitState( rmesa );			\
-+   radeonEmitState(&rmesa->radeon);		\
- } while (0)
- 
- 
- 
- #define ALLOC_ELTS(nr)	radeonAllocElts( rmesa, nr )
- 
--static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr ) 
-+static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr ) 
- {
--   if (rmesa->dma.flush)
--      rmesa->dma.flush( rmesa );
-+      if (rmesa->radeon.dma.flush)
-+	 rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
- 
--   radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
--			   rmesa->hw.max_state_size + ELTS_BUFSZ(nr));
-+      rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->radeon.hw.max_state_size + ELTS_BUFSZ(nr) + 
-+			       AOS_BUFSZ(rmesa->radeon.tcl.aos_count), __FUNCTION__);
- 
--   radeonEmitAOS( rmesa,
--		rmesa->tcl.aos_components,
--		rmesa->tcl.nr_aos_components, 0 );
-+      radeonEmitAOS( rmesa,
-+		     rmesa->radeon.tcl.aos_count, 0 );
- 
--   return radeonAllocEltsOpenEnded( rmesa,
--				    rmesa->tcl.vertex_format, 
--				    rmesa->tcl.hw_primitive, nr );
-+      return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format,
-+				       rmesa->tcl.hw_primitive, nr );
- }
- 
--#define CLOSE_ELTS()  RADEON_NEWPRIM( rmesa )
-+#define CLOSE_ELTS() if (0)  RADEON_NEWPRIM( rmesa )
- 
- 
- 
-@@ -174,15 +173,15 @@ static void radeonEmitPrim( GLcontext *ctx,
- 		       GLuint start, 
- 		       GLuint count)	
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
-    radeonTclPrimitive( ctx, prim, hwprim );
-    
--   radeonEnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
--			    rmesa->hw.max_state_size + VBUF_BUFSZ );
-+   rcommonEnsureCmdBufSpace( &rmesa->radeon,
-+			     AOS_BUFSZ(rmesa->radeon.tcl.aos_count) +
-+			     rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ );
- 
-    radeonEmitAOS( rmesa,
--		  rmesa->tcl.aos_components,
--		  rmesa->tcl.nr_aos_components,
-+		  rmesa->radeon.tcl.aos_count,
- 		  start );
-    
-    /* Why couldn't this packet have taken an offset param?
-@@ -254,7 +253,7 @@ void radeonTclPrimitive( GLcontext *ctx,
- 			 GLenum prim,
- 			 int hw_prim )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint se_cntl;
-    GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
- 
-@@ -371,7 +370,7 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
- static GLboolean radeon_run_tcl_render( GLcontext *ctx,
- 					struct tnl_pipeline_stage *stage )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
-    struct vertex_buffer *VB = &tnl->vb;
-    GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
-@@ -379,7 +378,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
- 
-    /* TODO: separate this from the swtnl pipeline 
-     */
--   if (rmesa->TclFallback)
-+   if (rmesa->radeon.TclFallback)
-       return GL_TRUE;	/* fallback to software t&l */
- 
-    if (VB->Count == 0)
-@@ -461,7 +460,7 @@ const struct tnl_pipeline_stage _radeon_tcl_stage =
- 
- static void transition_to_swtnl( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
-    GLuint se_cntl;
- 
-@@ -490,7 +489,7 @@ static void transition_to_swtnl( GLcontext *ctx )
- 
- static void transition_to_hwtnl( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
-    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
- 
-@@ -509,15 +508,15 @@ static void transition_to_hwtnl( GLcontext *ctx )
- 
-    tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
- 
--   if ( rmesa->dma.flush )			
--      rmesa->dma.flush( rmesa );	
-+   if ( rmesa->radeon.dma.flush )			
-+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	
- 
--   rmesa->dma.flush = NULL;
-+   rmesa->radeon.dma.flush = NULL;
-    rmesa->swtcl.vertex_format = 0;
-    
--   if (rmesa->swtcl.indexed_verts.buf) 
--      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
--			      __FUNCTION__ );
-+   //   if (rmesa->swtcl.indexed_verts.buf) 
-+   //      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
-+   //			      __FUNCTION__ );
- 
-    if (RADEON_DEBUG & DEBUG_FALLBACKS) 
-       fprintf(stderr, "Radeon end tcl fallback\n");
-@@ -550,11 +549,11 @@ static char *getFallbackString(GLuint bit)
- 
- void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   GLuint oldfallback = rmesa->TclFallback;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   GLuint oldfallback = rmesa->radeon.TclFallback;
- 
-    if (mode) {
--      rmesa->TclFallback |= bit;
-+      rmesa->radeon.TclFallback |= bit;
-       if (oldfallback == 0) {
- 	 if (RADEON_DEBUG & DEBUG_FALLBACKS) 
- 	    fprintf(stderr, "Radeon begin tcl fallback %s\n",
-@@ -563,7 +562,7 @@ void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
-       }
-    }
-    else {
--      rmesa->TclFallback &= ~bit;
-+      rmesa->radeon.TclFallback &= ~bit;
-       if (oldfallback == bit) {
- 	 if (RADEON_DEBUG & DEBUG_FALLBACKS) 
- 	    fprintf(stderr, "Radeon end tcl fallback %s\n",
-diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c
-index f2b6deb..2549d5c 100644
---- a/src/mesa/drivers/dri/radeon/radeon_tex.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
-@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/texobj.h"
- 
- #include "radeon_context.h"
-+#include "radeon_mipmap_tree.h"
- #include "radeon_state.h"
- #include "radeon_ioctl.h"
- #include "radeon_swtcl.h"
-@@ -170,10 +171,13 @@ static void radeonSetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
- {
-    GLuint anisotropy = (t->pp_txfilter & RADEON_MAX_ANISO_MASK);
- 
-+   /* Force revalidation to account for switches from/to mipmapping. */
-+   t->validated = GL_FALSE;
-+
-    t->pp_txfilter &= ~(RADEON_MIN_FILTER_MASK | RADEON_MAG_FILTER_MASK);
- 
-    /* r100 chips can't handle mipmaps/aniso for cubemap/volume textures */
--   if ( t->base.tObj->Target == GL_TEXTURE_CUBE_MAP ) {
-+   if ( t->base.Target == GL_TEXTURE_CUBE_MAP ) {
-       switch ( minf ) {
-       case GL_NEAREST:
-       case GL_NEAREST_MIPMAP_NEAREST:
-@@ -249,433 +253,13 @@ static void radeonSetTexBorderColor( radeonTexObjPtr t, const GLfloat color[4] )
-    t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
- }
- 
--
--/**
-- * Allocate space for and load the mesa images into the texture memory block.
-- * This will happen before drawing with a new texture, or drawing with a
-- * texture after it was swapped out or teximaged again.
-- */
--
--static radeonTexObjPtr radeonAllocTexObj( struct gl_texture_object *texObj )
--{
--   radeonTexObjPtr t;
--
--   t = CALLOC_STRUCT( radeon_tex_obj );
--   texObj->DriverData = t;
--   if ( t != NULL ) {
--      if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
--	 fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj, (void *)t );
--      }
--
--      /* Initialize non-image-dependent parts of the state:
--       */
--      t->base.tObj = texObj;
--      t->border_fallback = GL_FALSE;
--
--      t->pp_txfilter = RADEON_BORDER_MODE_OGL;
--      t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
--			RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
--
--      make_empty_list( & t->base );
--
--      radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT );
--      radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
--      radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
--      radeonSetTexBorderColor( t, texObj->BorderColor );
--   }
--
--   return t;
--}
--
--
--static const struct gl_texture_format *
--radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
--                           GLenum format, GLenum type )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   const GLboolean do32bpt =
--       ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 );
--   const GLboolean force16bpt =
--       ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 );
--   (void) format;
--
--   switch ( internalFormat ) {
--   case 4:
--   case GL_RGBA:
--   case GL_COMPRESSED_RGBA:
--      switch ( type ) {
--      case GL_UNSIGNED_INT_10_10_10_2:
--      case GL_UNSIGNED_INT_2_10_10_10_REV:
--	 return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555;
--      case GL_UNSIGNED_SHORT_4_4_4_4:
--      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
--	 return _dri_texformat_argb4444;
--      case GL_UNSIGNED_SHORT_5_5_5_1:
--      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
--	 return _dri_texformat_argb1555;
--      default:
--         return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb4444;
--      }
--
--   case 3:
--   case GL_RGB:
--   case GL_COMPRESSED_RGB:
--      switch ( type ) {
--      case GL_UNSIGNED_SHORT_4_4_4_4:
--      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
--	 return _dri_texformat_argb4444;
--      case GL_UNSIGNED_SHORT_5_5_5_1:
--      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
--	 return _dri_texformat_argb1555;
--      case GL_UNSIGNED_SHORT_5_6_5:
--      case GL_UNSIGNED_SHORT_5_6_5_REV:
--	 return _dri_texformat_rgb565;
--      default:
--         return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
--      }
--
--   case GL_RGBA8:
--   case GL_RGB10_A2:
--   case GL_RGBA12:
--   case GL_RGBA16:
--      return !force16bpt ?
--	  _dri_texformat_argb8888 : _dri_texformat_argb4444;
--
--   case GL_RGBA4:
--   case GL_RGBA2:
--      return _dri_texformat_argb4444;
--
--   case GL_RGB5_A1:
--      return _dri_texformat_argb1555;
--
--   case GL_RGB8:
--   case GL_RGB10:
--   case GL_RGB12:
--   case GL_RGB16:
--      return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
--
--   case GL_RGB5:
--   case GL_RGB4:
--   case GL_R3_G3_B2:
--      return _dri_texformat_rgb565;
--
--   case GL_ALPHA:
--   case GL_ALPHA4:
--   case GL_ALPHA8:
--   case GL_ALPHA12:
--   case GL_ALPHA16:
--   case GL_COMPRESSED_ALPHA:
--      return _dri_texformat_a8;
--
--   case 1:
--   case GL_LUMINANCE:
--   case GL_LUMINANCE4:
--   case GL_LUMINANCE8:
--   case GL_LUMINANCE12:
--   case GL_LUMINANCE16:
--   case GL_COMPRESSED_LUMINANCE:
--      return _dri_texformat_l8;
--
--   case 2:
--   case GL_LUMINANCE_ALPHA:
--   case GL_LUMINANCE4_ALPHA4:
--   case GL_LUMINANCE6_ALPHA2:
--   case GL_LUMINANCE8_ALPHA8:
--   case GL_LUMINANCE12_ALPHA4:
--   case GL_LUMINANCE12_ALPHA12:
--   case GL_LUMINANCE16_ALPHA16:
--   case GL_COMPRESSED_LUMINANCE_ALPHA:
--      return _dri_texformat_al88;
--
--   case GL_INTENSITY:
--   case GL_INTENSITY4:
--   case GL_INTENSITY8:
--   case GL_INTENSITY12:
--   case GL_INTENSITY16:
--   case GL_COMPRESSED_INTENSITY:
--      return _dri_texformat_i8;
--
--   case GL_YCBCR_MESA:
--      if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
--          type == GL_UNSIGNED_BYTE)
--         return &_mesa_texformat_ycbcr;
--      else
--         return &_mesa_texformat_ycbcr_rev;
--
--   case GL_RGB_S3TC:
--   case GL_RGB4_S3TC:
--   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
--      return &_mesa_texformat_rgb_dxt1;
--
--   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
--      return &_mesa_texformat_rgba_dxt1;
--
--   case GL_RGBA_S3TC:
--   case GL_RGBA4_S3TC:
--   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
--      return &_mesa_texformat_rgba_dxt3;
--
--   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
--      return &_mesa_texformat_rgba_dxt5;
--
--   default:
--      _mesa_problem(ctx, "unexpected texture format in %s", __FUNCTION__);
--      return NULL;
--   }
--
--   return NULL; /* never get here */
--}
--
--
--static void radeonTexImage1D( GLcontext *ctx, GLenum target, GLint level,
--                              GLint internalFormat,
--                              GLint width, GLint border,
--                              GLenum format, GLenum type, const GLvoid *pixels,
--                              const struct gl_pixelstore_attrib *packing,
--                              struct gl_texture_object *texObj,
--                              struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) radeonAllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
--         return;
--      }
--   }
--
--   /* Note, this will call ChooseTextureFormat */
--   _mesa_store_teximage1d(ctx, target, level, internalFormat,
--                          width, border, format, type, pixels,
--                          &ctx->Unpack, texObj, texImage);
--
--   t->dirty_images[0] |= (1 << level);
--}
--
--
--static void radeonTexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
--                                 GLint xoffset,
--                                 GLsizei width,
--                                 GLenum format, GLenum type,
--                                 const GLvoid *pixels,
--                                 const struct gl_pixelstore_attrib *packing,
--                                 struct gl_texture_object *texObj,
--                                 struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--
--   assert( t ); /* this _should_ be true */
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) radeonAllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
--         return;
--      }
--   }
--
--   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
--			     format, type, pixels, packing, texObj,
--			     texImage);
--
--   t->dirty_images[0] |= (1 << level);
--}
--
--
--static void radeonTexImage2D( GLcontext *ctx, GLenum target, GLint level,
--                              GLint internalFormat,
--                              GLint width, GLint height, GLint border,
--                              GLenum format, GLenum type, const GLvoid *pixels,
--                              const struct gl_pixelstore_attrib *packing,
--                              struct gl_texture_object *texObj,
--                              struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--   GLuint face;
--
--   /* which cube face or ordinary 2D image */
--   switch (target) {
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--      ASSERT(face < 6);
--      break;
--   default:
--      face = 0;
--   }
--
--   if ( t != NULL ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) radeonAllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
--         return;
--      }
--   }
--
--   /* Note, this will call ChooseTextureFormat */
--   _mesa_store_teximage2d(ctx, target, level, internalFormat,
--                          width, height, border, format, type, pixels,
--                          &ctx->Unpack, texObj, texImage);
--
--   t->dirty_images[face] |= (1 << level);
--}
--
--
--static void radeonTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
--                                 GLint xoffset, GLint yoffset,
--                                 GLsizei width, GLsizei height,
--                                 GLenum format, GLenum type,
--                                 const GLvoid *pixels,
--                                 const struct gl_pixelstore_attrib *packing,
--                                 struct gl_texture_object *texObj,
--                                 struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--   GLuint face;
--
--   /* which cube face or ordinary 2D image */
--   switch (target) {
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--      ASSERT(face < 6);
--      break;
--   default:
--      face = 0;
--   }
--
--   assert( t ); /* this _should_ be true */
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) radeonAllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
--         return;
--      }
--   }
--
--   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
--			     height, format, type, pixels, packing, texObj,
--			     texImage);
--
--   t->dirty_images[face] |= (1 << level);
--}
--
--static void radeonCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
--                              GLint internalFormat,
--                              GLint width, GLint height, GLint border,
--                              GLsizei imageSize, const GLvoid *data,
--                              struct gl_texture_object *texObj,
--                              struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--   GLuint face;
--
--   /* which cube face or ordinary 2D image */
--   switch (target) {
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--      ASSERT(face < 6);
--      break;
--   default:
--      face = 0;
--   }
--
--   if ( t != NULL ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) radeonAllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D");
--         return;
--      }
--   }
--
--   /* Note, this will call ChooseTextureFormat */
--   _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
--                                 height, border, imageSize, data, texObj, texImage);
--
--   t->dirty_images[face] |= (1 << level);
--}
--
--
--static void radeonCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
--                                 GLint xoffset, GLint yoffset,
--                                 GLsizei width, GLsizei height,
--                                 GLenum format,
--                                 GLsizei imageSize, const GLvoid *data,
--                                 struct gl_texture_object *texObj,
--                                 struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--   GLuint face;
--
--
--   /* which cube face or ordinary 2D image */
--   switch (target) {
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--      ASSERT(face < 6);
--      break;
--   default:
--      face = 0;
--   }
--
--   assert( t ); /* this _should_ be true */
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) radeonAllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D");
--         return;
--      }
--   }
--
--   _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
--                                 height, format, imageSize, data, texObj, texImage);
--
--   t->dirty_images[face] |= (1 << level);
--}
--
- #define SCALED_FLOAT_TO_BYTE( x, scale ) \
- 		(((GLuint)((255.0F / scale) * (x))) / 2)
- 
- static void radeonTexEnv( GLcontext *ctx, GLenum target,
- 			  GLenum pname, const GLfloat *param )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint unit = ctx->Texture.CurrentUnit;
-    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- 
-@@ -706,7 +290,7 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target,
-        * functions, one mapping [-1.0,0.0] to [-128,0] and one mapping
-        * [0.0,4.0] to [0,127].
-        */
--      min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ?
-+      min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ?
- 	  0.0 : -1.0;
-       bias = CLAMP( *param, min, 4.0 );
-       if ( bias == 0 ) {
-@@ -739,7 +323,7 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
- 				struct gl_texture_object *texObj,
- 				GLenum pname, const GLfloat *params )
- {
--   radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
-+   radeonTexObj* t = radeon_tex_obj(texObj);
- 
-    if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
-       fprintf( stderr, "%s( %s )\n", __FUNCTION__,
-@@ -767,57 +351,51 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
-    case GL_TEXTURE_MAX_LEVEL:
-    case GL_TEXTURE_MIN_LOD:
-    case GL_TEXTURE_MAX_LOD:
-+
-       /* This isn't the most efficient solution but there doesn't appear to
-        * be a nice alternative.  Since there's no LOD clamping,
-        * we just have to rely on loading the right subset of mipmap levels
-        * to simulate a clamped LOD.
-        */
--      driSwapOutTextureObject( (driTextureObject *) t );
-+      if (t->mt) {
-+         radeon_miptree_unreference(t->mt);
-+	 t->mt = 0;
-+	 t->validated = GL_FALSE;
-+      }
-       break;
- 
-    default:
-       return;
-    }
--
--   /* Mark this texobj as dirty (one bit per tex unit)
--    */
--   t->dirty_state = TEX_ALL;
--}
--
--
--static void radeonBindTexture( GLcontext *ctx, GLenum target,
--			       struct gl_texture_object *texObj )
--{
--   if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
--      fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj,
--	       ctx->Texture.CurrentUnit );
--   }
--
--   assert( (target != GL_TEXTURE_1D && target != GL_TEXTURE_2D &&
--            target != GL_TEXTURE_RECTANGLE_NV && target != GL_TEXTURE_CUBE_MAP) ||
--           (texObj->DriverData != NULL) );
- }
- 
--
- static void radeonDeleteTexture( GLcontext *ctx,
- 				 struct gl_texture_object *texObj )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   radeonTexObj* t = radeon_tex_obj(texObj);
-+   int i;
- 
-    if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
-       fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj,
- 	       _mesa_lookup_enum_by_nr( texObj->Target ) );
-    }
- 
--   if ( t != NULL ) {
--      if ( rmesa ) {
--         RADEON_FIREVERTICES( rmesa );
--      }
--
--      driDestroyTextureObject( t );
-+   if ( rmesa ) {
-+     radeon_firevertices(&rmesa->radeon);
-+     for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) {
-+       if ( t == rmesa->state.texture.unit[i].texobj ) {
-+	 rmesa->state.texture.unit[i].texobj = NULL;
-+	 rmesa->hw.tex[i].dirty = GL_FALSE;
-+	 rmesa->hw.cube[i].dirty = GL_FALSE;
-+       }
-+     }
-    }
- 
-+   if (t->mt) {
-+      radeon_miptree_unreference(t->mt);
-+      t->mt = 0;
-+   }
-    /* Free mipmap images and the texture object itself */
-    _mesa_delete_texture_object(ctx, texObj);
- }
-@@ -837,7 +415,7 @@ static void radeonTexGen( GLcontext *ctx,
- 			  GLenum pname,
- 			  const GLfloat *params )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint unit = ctx->Texture.CurrentUnit;
-    rmesa->recheck_texgen[unit] = GL_TRUE;
- }
-@@ -851,29 +429,40 @@ static void radeonTexGen( GLcontext *ctx,
- static struct gl_texture_object *
- radeonNewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   struct gl_texture_object *obj;
--   obj = _mesa_new_texture_object(ctx, name, target);
--   if (!obj)
--      return NULL;
--   obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
--   radeonAllocTexObj( obj );
--   return obj;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
-+
-+   _mesa_initialize_texture_object(&t->base, name, target);
-+   t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
-+
-+   t->border_fallback = GL_FALSE;
-+
-+   t->pp_txfilter = RADEON_BORDER_MODE_OGL;
-+   t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
-+		     RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
-+   
-+   radeonSetTexWrap( t, t->base.WrapS, t->base.WrapT );
-+   radeonSetTexMaxAnisotropy( t, t->base.MaxAnisotropy );
-+   radeonSetTexFilter( t, t->base.MinFilter, t->base.MagFilter );
-+   radeonSetTexBorderColor( t, t->base.BorderColor );
-+   return &t->base;
- }
- 
- 
-+
- void radeonInitTextureFuncs( struct dd_function_table *functions )
- {
--   functions->ChooseTextureFormat	= radeonChooseTextureFormat;
-+   functions->ChooseTextureFormat	= radeonChooseTextureFormat_mesa;
-    functions->TexImage1D		= radeonTexImage1D;
-    functions->TexImage2D		= radeonTexImage2D;
-    functions->TexSubImage1D		= radeonTexSubImage1D;
-    functions->TexSubImage2D		= radeonTexSubImage2D;
-+   functions->GetTexImage               = radeonGetTexImage;
-+   functions->GetCompressedTexImage     = radeonGetCompressedTexImage;
- 
-    functions->NewTextureObject		= radeonNewTextureObject;
--   functions->BindTexture		= radeonBindTexture;
-+   //   functions->BindTexture		= radeonBindTexture;
-    functions->DeleteTexture		= radeonDeleteTexture;
--   functions->IsTextureResident		= driIsTextureResident;
- 
-    functions->TexEnv			= radeonTexEnv;
-    functions->TexParameter		= radeonTexParameter;
-@@ -882,5 +471,12 @@ void radeonInitTextureFuncs( struct dd_function_table *functions )
-    functions->CompressedTexImage2D	= radeonCompressedTexImage2D;
-    functions->CompressedTexSubImage2D	= radeonCompressedTexSubImage2D;
- 
-+   functions->GenerateMipmap = radeonGenerateMipmap;
-+
-+   functions->NewTextureImage = radeonNewTextureImage;
-+   functions->FreeTexImageData = radeonFreeTexImageData;
-+   functions->MapTexture = radeonMapTexture;
-+   functions->UnmapTexture = radeonUnmapTexture;
-+
-    driInitTextureFormats();
- }
-diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h
-index 8000880..a4aaddc 100644
---- a/src/mesa/drivers/dri/radeon/radeon_tex.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_tex.h
-@@ -41,12 +41,16 @@ extern void radeonSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
-                                unsigned long long offset, GLint depth,
-                                GLuint pitch);
- 
-+extern void radeonSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv);
-+extern void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format,
-+			       __DRIdrawable *dPriv);
-+
- extern void radeonUpdateTextureState( GLcontext *ctx );
- 
--extern int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t,
-+extern int radeonUploadTexImages( r100ContextPtr rmesa, radeonTexObjPtr t,
- 				  GLuint face );
- 
--extern void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t );
-+extern void radeonDestroyTexObj( r100ContextPtr rmesa, radeonTexObjPtr t );
- 
- extern void radeonInitTextureFuncs( struct dd_function_table *functions );
- 
-diff --git a/src/mesa/drivers/dri/radeon/radeon_texmem.c b/src/mesa/drivers/dri/radeon/radeon_texmem.c
-deleted file mode 100644
-index 5f7bbe6..0000000
---- a/src/mesa/drivers/dri/radeon/radeon_texmem.c
-+++ /dev/null
-@@ -1,404 +0,0 @@
--/**************************************************************************
--
--Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
--                     VA Linux Systems Inc., Fremont, California.
--
--All Rights Reserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation on the rights to use, copy, modify, merge, publish,
--distribute, sub license, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
--NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
--SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
--IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
--IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
--SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Kevin E. Martin <martin@valinux.com>
-- *   Gareth Hughes <gareth@valinux.com>
-- *
-- */
--#include <errno.h> 
--
--#include "main/glheader.h"
--#include "main/imports.h"
--#include "main/context.h"
--#include "main/macros.h"
--
--#include "radeon_context.h"
--#include "radeon_ioctl.h"
--#include "radeon_tex.h"
--
--#include <unistd.h>  /* for usleep() */
--
--
--/**
-- * Destroy any device-dependent state associated with the texture.  This may
-- * include NULLing out hardware state that points to the texture.
-- */
--void
--radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
--{
--   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
--      fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)t, (void *)t->base.tObj );
--   }
--
--   if ( rmesa != NULL ) {
--      unsigned   i;
--
--
--      for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) {
--	 if ( t == rmesa->state.texture.unit[i].texobj ) {
--	    rmesa->state.texture.unit[i].texobj = NULL;
--	 }
--      }
--   }
--}
--
--
--/* ------------------------------------------------------------
-- * Texture image conversions
-- */
--
--
--static void radeonUploadRectSubImage( radeonContextPtr rmesa,
--				      radeonTexObjPtr t, 
--				      struct gl_texture_image *texImage,
--				      GLint x, GLint y, 
--				      GLint width, GLint height )
--{
--   const struct gl_texture_format *texFormat = texImage->TexFormat;
--   int blit_format, dstPitch, done;
--
--   switch ( texFormat->TexelBytes ) {
--   case 1:
--      blit_format = RADEON_GMC_DST_8BPP_CI;
--      break;
--   case 2:
--      blit_format = RADEON_GMC_DST_16BPP;
--      break;
--   case 4:
--      blit_format = RADEON_GMC_DST_32BPP;
--      break;
--   default:
--      fprintf( stderr, "radeonUploadRectSubImage: unknown blit_format (texelbytes=%d)\n", 
--      	       texFormat->TexelBytes);
--      return;
--   }
--
--   t->image[0][0].data = texImage->Data;
--
--   /* Currently don't need to cope with small pitches.
--    */
--   width = texImage->Width;
--   height = texImage->Height;
--   dstPitch = t->pp_txpitch + 32;
--
--   {	/* FIXME: prefer GART-texturing if possible */
--      /* Data not in GART memory, or bad pitch.
--       */
--      for (done = 0; done < height ; ) {
--	 struct radeon_dma_region region;
--	 int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch );
--	 int src_pitch;
--	 char *tex;
--
--         src_pitch = texImage->RowStride * texFormat->TexelBytes;
--
--	 tex = (char *)texImage->Data + done * src_pitch;
--
--	 memset(&region, 0, sizeof(region));
--	 radeonAllocDmaRegion( rmesa, &region, lines * dstPitch, 1024 );
--
--	 /* Copy texdata to dma:
--	  */
--	 if (0)
--	    fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n",
--		    __FUNCTION__, src_pitch, dstPitch);
--
--	 if (src_pitch == dstPitch) {
--	    memcpy( region.address + region.start, tex, lines * src_pitch );
--	 } 
--	 else {
--	    char *buf = region.address + region.start;
--	    int i;
--	    for (i = 0 ; i < lines ; i++) {
--	       memcpy( buf, tex, src_pitch );
--	       buf += dstPitch;
--	       tex += src_pitch;
--	    }
--	 }
--
--	 radeonEmitWait( rmesa, RADEON_WAIT_3D );
--
--	 
--
--	 /* Blit to framebuffer
--	  */
--	 radeonEmitBlit( rmesa,
--		       blit_format,
--		       dstPitch, GET_START( &region ),
--		       dstPitch, t->bufAddr,
--		       0, 0,
--		       0, done,
--		       width, lines );
--	 
--	 radeonEmitWait( rmesa, RADEON_WAIT_2D );
--
--	 radeonReleaseDmaRegion( rmesa, &region, __FUNCTION__ );
--	 done += lines;
--      }
--   }
--}
--
--
--/**
-- * Upload the texture image associated with texture \a t at the specified
-- * level at the address relative to \a start.
-- */
--static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t, 
--			    GLint hwlevel,
--			    GLint x, GLint y, GLint width, GLint height,
--			    GLuint face )
--{
--   struct gl_texture_image *texImage = NULL;
--   GLuint offset;
--   GLint imageWidth, imageHeight;
--   GLint ret;
--   drm_radeon_texture_t tex;
--   drm_radeon_tex_image_t tmp;
--   const int level = hwlevel + t->base.firstLevel;
--
--   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
--      fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", 
--	       __FUNCTION__, (void *)t, (void *)t->base.tObj, level, width, height, face );
--   }
--
--   ASSERT(face < 6);
--
--   /* Ensure we have a valid texture to upload */
--   if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) {
--      _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
--      return;
--   }
--
--   texImage = t->base.tObj->Image[face][level];
--
--   if ( !texImage ) {
--      if ( RADEON_DEBUG & DEBUG_TEXTURE )
--	 fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level );
--      return;
--   }
--   if ( !texImage->Data ) {
--      if ( RADEON_DEBUG & DEBUG_TEXTURE )
--	 fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ );
--      return;
--   }
--
--
--   if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--      assert(level == 0);
--      assert(hwlevel == 0);
--      if ( RADEON_DEBUG & DEBUG_TEXTURE )
--	 fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__);
--      radeonUploadRectSubImage( rmesa, t, texImage, x, y, width, height );
--      return;
--   }
--
--   imageWidth = texImage->Width;
--   imageHeight = texImage->Height;
--
--   offset = t->bufAddr + t->base.totalSize * face / 6;
--
--   if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
--      GLint imageX = 0;
--      GLint imageY = 0;
--      GLint blitX = t->image[face][hwlevel].x;
--      GLint blitY = t->image[face][hwlevel].y;
--      GLint blitWidth = t->image[face][hwlevel].width;
--      GLint blitHeight = t->image[face][hwlevel].height;
--      fprintf( stderr, "   upload image: %d,%d at %d,%d\n",
--	       imageWidth, imageHeight, imageX, imageY );
--      fprintf( stderr, "   upload  blit: %d,%d at %d,%d\n",
--	       blitWidth, blitHeight, blitX, blitY );
--      fprintf( stderr, "       blit ofs: 0x%07x level: %d/%d\n",
--	       (GLuint)offset, hwlevel, level );
--   }
--
--   t->image[face][hwlevel].data = texImage->Data;
--
--   /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
--    * NOTE: we're always use a 1KB-wide blit and I8 texture format.
--    * We used to use 1, 2 and 4-byte texels and used to use the texture
--    * width to dictate the blit width - but that won't work for compressed
--    * textures. (Brian)
--    * NOTE: can't do that with texture tiling. (sroland)
--    */
--   tex.offset = offset;
--   tex.image = &tmp;
--   /* copy (x,y,width,height,data) */
--   memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) );
--
--   if (texImage->TexFormat->TexelBytes) {
--      /* use multi-byte upload scheme */
--      tex.height = imageHeight;
--      tex.width = imageWidth;
--      tex.format = t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK;
--      tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
--      tex.offset += tmp.x & ~1023;
--      tmp.x = tmp.x % 1024;
--      if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
--	 /* need something like "tiled coordinates" ? */
--	 tmp.y = tmp.x / (tex.pitch * 128) * 2;
--	 tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
--	 tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
--      }
--      else {
--	 tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
--      }
--      if ((t->tile_bits & RADEON_TXO_MACRO_TILE) &&
--	 (texImage->Width * texImage->TexFormat->TexelBytes >= 256)) {
--	 /* radeon switches off macro tiling for small textures/mipmaps it seems */
--	 tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
--      }
--   }
--   else {
--      /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
--         needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
--      /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
--         so the kernel module reads the right amount of data. */
--      tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
--      tex.pitch = (BLIT_WIDTH_BYTES / 64);
--      tex.height = (imageHeight + 3) / 4;
--      tex.width = (imageWidth + 3) / 4;
--      switch (t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) {
--      case RADEON_TXFORMAT_DXT1:
--         tex.width *= 8;
--         break;
--      case RADEON_TXFORMAT_DXT23:
--      case RADEON_TXFORMAT_DXT45:
--         tex.width *= 16;
--         break;
--      }
--   }
--
--   LOCK_HARDWARE( rmesa );
--   do {
--      ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
--                                 &tex, sizeof(drm_radeon_texture_t) );
--   } while ( ret == -EAGAIN );
--
--   UNLOCK_HARDWARE( rmesa );
--
--   if ( ret ) {
--      fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
--      fprintf( stderr, "   offset=0x%08x\n",
--	       offset );
--      fprintf( stderr, "   image width=%d height=%d\n",
--	       imageWidth, imageHeight );
--      fprintf( stderr, "    blit width=%d height=%d data=%p\n",
--	       t->image[face][hwlevel].width, t->image[face][hwlevel].height,
--	       t->image[face][hwlevel].data );
--      exit( 1 );
--   }
--}
--
--
--/**
-- * Upload the texture images associated with texture \a t.  This might
-- * require the allocation of texture memory.
-- * 
-- * \param rmesa Context pointer
-- * \param t Texture to be uploaded
-- * \param face Cube map face to be uploaded.  Zero for non-cube maps.
-- */
--
--int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint face )
--{
--   int numLevels;
--
--   if ( !t || t->base.totalSize == 0 || t->image_override )
--      return 0;
--
--   if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
--      fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
--	       (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize,
--	       t->base.firstLevel, t->base.lastLevel );
--   }
--
--   numLevels = t->base.lastLevel - t->base.firstLevel + 1;
--
--   if (RADEON_DEBUG & DEBUG_SYNC) {
--      fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
--      radeonFinish( rmesa->glCtx );
--   }
--
--   LOCK_HARDWARE( rmesa );
--
--   if ( t->base.memBlock == NULL ) {
--      int heap;
--
--      heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps,
--				 (driTextureObject *) t );
--      if ( heap == -1 ) {
--	 UNLOCK_HARDWARE( rmesa );
--	 return -1;
--      }
--
--      /* Set the base offset of the texture image */
--      t->bufAddr = rmesa->radeonScreen->texOffset[heap] 
--	   + t->base.memBlock->ofs;
--      t->pp_txoffset = t->bufAddr;
--
--      if (!(t->base.tObj->Image[0][0]->IsClientData)) {
--	 /* hope it's safe to add that here... */
--	 t->pp_txoffset |= t->tile_bits;
--      }
--
--      /* Mark this texobj as dirty on all units:
--       */
--      t->dirty_state = TEX_ALL;
--   }
--
--
--   /* Let the world know we've used this memory recently.
--    */
--   driUpdateTextureLRU( (driTextureObject *) t );
--   UNLOCK_HARDWARE( rmesa );
--
--
--   /* Upload any images that are new */
--   if (t->base.dirty_images[face]) {
--      int i;
--      for ( i = 0 ; i < numLevels ; i++ ) {
--         if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) {
--            uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width,
--			    t->image[face][i].height, face );
--         }
--      }
--      t->base.dirty_images[face] = 0;
--   }
--
--   if (RADEON_DEBUG & DEBUG_SYNC) {
--      fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
--      radeonFinish( rmesa->glCtx );
--   }
--
--   return 0;
--}
-diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
-index b165205..e4df337 100644
---- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
-@@ -39,10 +39,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/context.h"
- #include "main/macros.h"
- #include "main/texformat.h"
-+#include "main/teximage.h"
- #include "main/texobj.h"
- #include "main/enums.h"
- 
- #include "radeon_context.h"
-+#include "radeon_mipmap_tree.h"
- #include "radeon_state.h"
- #include "radeon_ioctl.h"
- #include "radeon_swtcl.h"
-@@ -75,10 +77,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
- 			     && (tx_table[f].format != 0xffffffff) )
- 
--static const struct {
-+struct tx_table {
-    GLuint format, filter;
--}
--tx_table[] =
-+};
-+
-+static const struct tx_table tx_table[] =
- {
-    _ALPHA(RGBA8888),
-    _ALPHA_REV(RGBA8888),
-@@ -111,252 +114,6 @@ tx_table[] =
- #undef _ALPHA
- #undef _INVALID
- 
--/**
-- * This function computes the number of bytes of storage needed for
-- * the given texture object (all mipmap levels, all cube faces).
-- * The \c image[face][level].x/y/width/height parameters for upload/blitting
-- * are computed here.  \c pp_txfilter, \c pp_txformat, etc. will be set here
-- * too.
-- * 
-- * \param rmesa Context pointer
-- * \param tObj GL texture object whose images are to be posted to
-- *                 hardware state.
-- */
--static void radeonSetTexImages( radeonContextPtr rmesa,
--				struct gl_texture_object *tObj )
--{
--   radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
--   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
--   GLint curOffset, blitWidth;
--   GLint i, texelBytes;
--   GLint numLevels;
--   GLint log2Width, log2Height, log2Depth;
--
--   /* Set the hardware texture format
--    */
--   if ( !t->image_override ) {
--      t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
--                          RADEON_TXFORMAT_ALPHA_IN_MAP);
--      t->pp_txfilter &= ~RADEON_YUV_TO_RGB;
--
--      if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
--         t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format;
--         t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter;
--      }
--      else {
--         _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
--         return;
--      }
--   }
--
--   texelBytes = baseImage->TexFormat->TexelBytes;
--
--   /* Compute which mipmap levels we really want to send to the hardware.
--    */
--
--   if (tObj->Target != GL_TEXTURE_CUBE_MAP)
--      driCalculateTextureFirstLastLevel( (driTextureObject *) t );
--   else {
--      /* r100 can't handle mipmaps for cube/3d textures, so don't waste
--         memory for them */
--      t->base.firstLevel = t->base.lastLevel = tObj->BaseLevel;
--   }
--   log2Width  = tObj->Image[0][t->base.firstLevel]->WidthLog2;
--   log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
--   log2Depth  = tObj->Image[0][t->base.firstLevel]->DepthLog2;
--
--   numLevels = t->base.lastLevel - t->base.firstLevel + 1;
--
--   assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
--
--   /* Calculate mipmap offsets and dimensions for blitting (uploading)
--    * The idea is that we lay out the mipmap levels within a block of
--    * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
--    */
--   curOffset = 0;
--   blitWidth = BLIT_WIDTH_BYTES;
--   t->tile_bits = 0;
--
--   /* figure out if this texture is suitable for tiling. */
--   if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) {
--      if (rmesa->texmicrotile && (baseImage->Height > 1)) {
--	 /* allow 32 (bytes) x 1 mip (which will use two times the space
--	    the non-tiled version would use) max if base texture is large enough */
--	 if ((numLevels == 1) ||
--	   (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
--	       (baseImage->Width * texelBytes > 64)) ||
--	    ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
--	    /* R100 has two microtile bits (only the txoffset reg, not the blitter)
--	       weird: X2 + OPT: 32bit correct, 16bit completely hosed
--		      X2: 32bit correct, 16bit correct
--		      OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */
--	    t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/;
--	 }
--      }
--      if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) {
--	 /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not
--	    in the case if height is smaller than 16 (not 100% sure), as does the r200,
--	    so need to disable macro tiling in that case */
--	 if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) {
--	    t->tile_bits |= RADEON_TXO_MACRO_TILE;
--	 }
--      }
--   }
--
--   for (i = 0; i < numLevels; i++) {
--      const struct gl_texture_image *texImage;
--      GLuint size;
--
--      texImage = tObj->Image[0][i + t->base.firstLevel];
--      if ( !texImage )
--	 break;
--
--      /* find image size in bytes */
--      if (texImage->IsCompressed) {
--      /* need to calculate the size AFTER padding even though the texture is
--         submitted without padding.
--         Only handle pot textures currently - don't know if npot is even possible,
--         size calculation would certainly need (trivial) adjustments.
--         Align (and later pad) to 32byte, not sure what that 64byte blit width is
--         good for? */
--         if ((t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) == RADEON_TXFORMAT_DXT1) {
--            /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
--            if ((texImage->Width + 3) < 8) /* width one block */
--               size = texImage->CompressedSize * 4;
--            else if ((texImage->Width + 3) < 16)
--               size = texImage->CompressedSize * 2;
--            else size = texImage->CompressedSize;
--         }
--         else /* DXT3/5, 16 bytes per block */
--            if ((texImage->Width + 3) < 8)
--               size = texImage->CompressedSize * 2;
--            else size = texImage->CompressedSize;
--      }
--      else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--	 size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
--      }
--      else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
--	 /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
--	    though the actual offset may be different (if texture is less than
--	    32 bytes width) to the untiled case */
--	 int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
--	 size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
--	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
--      }
--      else {
--	 int w = (texImage->Width * texelBytes + 31) & ~31;
--	 size = w * texImage->Height * texImage->Depth;
--	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
--      }
--      assert(size > 0);
--
--      /* Align to 32-byte offset.  It is faster to do this unconditionally
--       * (no branch penalty).
--       */
--
--      curOffset = (curOffset + 0x1f) & ~0x1f;
--
--      if (texelBytes) {
--	 t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
--	 t->image[0][i].y = 0;
--	 t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
--	 t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
--      }
--      else {
--         t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
--         t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
--         t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
--         t->image[0][i].height = size / t->image[0][i].width;     
--      }
--
--#if 0
--      /* for debugging only and only  applicable to non-rectangle targets */
--      assert(size % t->image[0][i].width == 0);
--      assert(t->image[0][i].x == 0
--             || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
--#endif
--
--      if (0)
--         fprintf(stderr,
--                 "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
--                 i, texImage->Width, texImage->Height,
--                 t->image[0][i].x, t->image[0][i].y,
--                 t->image[0][i].width, t->image[0][i].height, size, curOffset);
--
--      curOffset += size;
--
--   }
--
--   /* Align the total size of texture memory block.
--    */
--   t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
--
--   /* Setup remaining cube face blits, if needed */
--   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
--      const GLuint faceSize = t->base.totalSize;
--      GLuint face;
--      /* reuse face 0 x/y/width/height - just update the offset when uploading */
--      for (face = 1; face < 6; face++) {
--         for (i = 0; i < numLevels; i++) {
--            t->image[face][i].x =  t->image[0][i].x;
--            t->image[face][i].y =  t->image[0][i].y;
--            t->image[face][i].width  = t->image[0][i].width;
--            t->image[face][i].height = t->image[0][i].height;
--         }
--      }
--      t->base.totalSize = 6 * faceSize; /* total texmem needed */
--   }
--
--   /* Hardware state:
--    */
--   t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
--   t->pp_txfilter |= (numLevels - 1) << RADEON_MAX_MIP_LEVEL_SHIFT;
--
--   t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
--		       RADEON_TXFORMAT_HEIGHT_MASK |
--                       RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
--                       RADEON_TXFORMAT_F5_WIDTH_MASK |
--                       RADEON_TXFORMAT_F5_HEIGHT_MASK);
--   t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
--		      (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
--
--   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
--      assert(log2Width == log2Height);
--      t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
--                         (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
--                         (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
--      t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
--                           (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
--                           (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
--                           (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
--                           (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
--                           (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
--                           (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
--                           (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
--   }
--
--   t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
--                   ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
--
--   /* Only need to round to nearest 32 for textures, but the blitter
--    * requires 64-byte aligned pitches, and we may/may not need the
--    * blitter.   NPOT only!
--    */
--   if ( !t->image_override ) {
--      if (baseImage->IsCompressed)
--         t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
--      else
--         t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
--      t->pp_txpitch -= 32;
--   }
--
--   t->dirty_state = TEX_ALL;
--
--   /* FYI: radeonUploadTexImages( rmesa, t ); used to be called here */
--}
--
--
--
- /* ================================================================
-  * Texture combine functions
-  */
-@@ -503,7 +260,7 @@ do {							\
- 
- static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-    GLuint color_combine, alpha_combine;
-    const GLuint color_combine0 = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO
-@@ -846,22 +603,21 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
- void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
-                         unsigned long long offset, GLint depth, GLuint pitch)
- {
--	radeonContextPtr rmesa = pDRICtx->driverPrivate;
-+	r100ContextPtr rmesa = pDRICtx->driverPrivate;
- 	struct gl_texture_object *tObj =
--	    _mesa_lookup_texture(rmesa->glCtx, texname);
--	radeonTexObjPtr t;
-+	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
-+	radeonTexObjPtr t = radeon_tex_obj(tObj);
- 
- 	if (tObj == NULL)
- 		return;
- 
--	t = (radeonTexObjPtr) tObj->DriverData;
--
- 	t->image_override = GL_TRUE;
- 
- 	if (!offset)
- 		return;
--
--	t->pp_txoffset = offset;
-+	
-+	t->bo = NULL;
-+	t->override_offset = offset;
- 	t->pp_txpitch = pitch - 32;
- 
- 	switch (depth) {
-@@ -881,6 +637,122 @@ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
- 	}
- }
- 
-+void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format,
-+			 __DRIdrawable *dPriv)
-+{
-+	struct gl_texture_unit *texUnit;
-+	struct gl_texture_object *texObj;
-+	struct gl_texture_image *texImage;
-+	struct radeon_renderbuffer *rb;
-+	radeon_texture_image *rImage;
-+	radeonContextPtr radeon;
-+	r100ContextPtr rmesa;
-+	struct radeon_framebuffer *rfb;
-+	radeonTexObjPtr t;
-+	uint32_t pitch_val;
-+	uint32_t internalFormat, type, format;
-+
-+	type = GL_BGRA;
-+	format = GL_UNSIGNED_BYTE;
-+	internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4);
-+
-+	radeon = pDRICtx->driverPrivate;
-+	rmesa = pDRICtx->driverPrivate;
-+
-+	rfb = dPriv->driverPrivate;
-+        texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
-+	texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
-+        texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
-+
-+	rImage = get_radeon_texture_image(texImage);
-+	t = radeon_tex_obj(texObj);
-+        if (t == NULL) {
-+    	    return;
-+    	}
-+
-+	radeon_update_renderbuffers(pDRICtx, dPriv);
-+	/* back & depth buffer are useless free them right away */
-+	rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer;
-+	if (rb && rb->bo) {
-+		radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+	}
-+	rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+	if (rb && rb->bo) {
-+		radeon_bo_unref(rb->bo);
-+		rb->bo = NULL;
-+	}
-+	rb = rfb->color_rb[0];
-+	if (rb->bo == NULL) {
-+		/* Failed to BO for the buffer */
-+		return;
-+	}
-+	
-+	_mesa_lock_texture(radeon->glCtx, texObj);
-+	if (t->bo) {
-+		radeon_bo_unref(t->bo);
-+		t->bo = NULL;
-+	}
-+	if (rImage->bo) {
-+		radeon_bo_unref(rImage->bo);
-+		rImage->bo = NULL;
-+	}
-+	if (t->mt) {
-+		radeon_miptree_unreference(t->mt);
-+		t->mt = NULL;
-+	}
-+	if (rImage->mt) {
-+		radeon_miptree_unreference(rImage->mt);
-+		rImage->mt = NULL;
-+	}
-+	_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
-+				   rb->width, rb->height, 1, 0, rb->cpp);
-+	texImage->RowStride = rb->pitch / rb->cpp;
-+	texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx,
-+							internalFormat,
-+							type, format, 0);
-+	rImage->bo = rb->bo;
-+	radeon_bo_ref(rImage->bo);
-+	t->bo = rb->bo;
-+	radeon_bo_ref(t->bo);
-+	t->tile_bits = 0;
-+	t->image_override = GL_TRUE;
-+	t->override_offset = 0;
-+	t->pp_txpitch &= (1 << 13) -1;
-+	pitch_val = rb->pitch;
-+	switch (rb->cpp) {
-+	case 4:
-+		t->pp_txformat = tx_table[MESA_FORMAT_ARGB8888].format;
-+		t->pp_txfilter |= tx_table[MESA_FORMAT_ARGB8888].filter;
-+		break;
-+	case 3:
-+	default:
-+		t->pp_txformat = tx_table[MESA_FORMAT_RGB888].format;
-+		t->pp_txfilter |= tx_table[MESA_FORMAT_RGB888].filter;
-+		break;
-+	case 2:
-+		t->pp_txformat = tx_table[MESA_FORMAT_RGB565].format;
-+		t->pp_txfilter |= tx_table[MESA_FORMAT_RGB565].filter;
-+		break;
-+	}
-+        t->pp_txsize = ((rb->width - 1) << RADEON_TEX_USIZE_SHIFT)
-+		   | ((rb->height - 1) << RADEON_TEX_VSIZE_SHIFT);
-+        t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
-+	t->pp_txpitch = pitch_val;
-+        t->pp_txpitch -= 32;
-+
-+	t->validated = GL_TRUE;
-+	_mesa_unlock_texture(radeon->glCtx, texObj);
-+	return;
-+}
-+
-+
-+void radeonSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
-+{
-+        radeonSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv);
-+}
-+
-+
- #define TEXOBJ_TXFILTER_MASK (RADEON_MAX_MIP_LEVEL_MASK |	\
- 			      RADEON_MIN_FILTER_MASK | 		\
- 			      RADEON_MAG_FILTER_MASK |		\
-@@ -901,12 +773,53 @@ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
-                               RADEON_TXFORMAT_NON_POWER2)
- 
- 
--static void import_tex_obj_state( radeonContextPtr rmesa,
-+static void disable_tex_obj_state( r100ContextPtr rmesa, 
-+				   int unit )
-+{
-+   RADEON_STATECHANGE( rmesa, tex[unit] );
-+
-+   RADEON_STATECHANGE( rmesa, tcl );
-+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) |
-+					     RADEON_Q_BIT(unit));
-+   
-+   if (rmesa->radeon.TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
-+     TCL_FALLBACK( rmesa->radeon.glCtx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
-+     rmesa->recheck_texgen[unit] = GL_TRUE;
-+   }
-+
-+   if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) {
-+     /* this seems to be a genuine (r100 only?) hw bug. Need to remove the
-+	cubic_map bit on unit 2 when the unit is disabled, otherwise every
-+	2nd (2d) mipmap on unit 0 will be broken (may not be needed for other
-+	units, better be safe than sorry though).*/
-+     RADEON_STATECHANGE( rmesa, tex[unit] );
-+     rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE;
-+   }
-+
-+   {
-+      GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
-+      GLuint tmp = rmesa->TexGenEnabled;
-+
-+      rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
-+      rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
-+      rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
-+      rmesa->TexGenNeedNormals[unit] = 0;
-+      rmesa->TexGenEnabled |= 
-+	(RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
-+
-+      if (tmp != rmesa->TexGenEnabled) {
-+	rmesa->recheck_texgen[unit] = GL_TRUE;
-+	rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
-+      }
-+   }
-+}
-+
-+static void import_tex_obj_state( r100ContextPtr rmesa,
- 				  int unit,
- 				  radeonTexObjPtr texobj )
- {
- /* do not use RADEON_DB_STATE to avoid stale texture caches */
--   int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
-+   uint32_t *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
-    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
- 
-    RADEON_STATECHANGE( rmesa, tex[unit] );
-@@ -915,10 +828,9 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
-    cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
-    cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
-    cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
--   cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
-    cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
- 
--   if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
-+   if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) {
-       GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] );
-       txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */
-       txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */
-@@ -928,22 +840,12 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
-    else {
-       se_coord_fmt &= ~(RADEON_VTX_ST0_NONPARAMETRIC << unit);
- 
--      if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
--	 int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
--	 GLuint bytesPerFace = texobj->base.totalSize / 6;
--	 ASSERT(texobj->base.totalSize % 6 == 0);
-+      if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) {
-+	 uint32_t *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
- 
- 	 RADEON_STATECHANGE( rmesa, cube[unit] );
- 	 cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
--	 /* dont know if this setup conforms to OpenGL.. 
--	  * at least it matches the behavior of mesa software renderer
--	  */
--	 cube_cmd[CUBE_PP_CUBIC_OFFSET_0] = texobj->pp_txoffset; /* right */
--	 cube_cmd[CUBE_PP_CUBIC_OFFSET_1] = texobj->pp_txoffset + 1 * bytesPerFace; /* left */
--	 cube_cmd[CUBE_PP_CUBIC_OFFSET_2] = texobj->pp_txoffset + 2 * bytesPerFace; /* top */
--	 cube_cmd[CUBE_PP_CUBIC_OFFSET_3] = texobj->pp_txoffset + 3 * bytesPerFace; /* bottom */
--	 cube_cmd[CUBE_PP_CUBIC_OFFSET_4] = texobj->pp_txoffset + 4 * bytesPerFace; /* front */
--	 cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset + 5 * bytesPerFace; /* back */
-+	 /* state filled out in the cube_emit */
-       }
-    }
- 
-@@ -952,13 +854,11 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
-       rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
-    }
- 
--   texobj->dirty_state &= ~(1<<unit);
-+   rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
- }
- 
- 
--
--
--static void set_texgen_matrix( radeonContextPtr rmesa, 
-+static void set_texgen_matrix( r100ContextPtr rmesa, 
- 			       GLuint unit,
- 			       const GLfloat *s_plane,
- 			       const GLfloat *t_plane,
-@@ -986,14 +886,14 @@ static void set_texgen_matrix( radeonContextPtr rmesa,
-    rmesa->TexGenMatrix[unit].m[15] = q_plane[3];
- 
-    rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE << unit;
--   rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
-+   rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
- }
- 
- /* Returns GL_FALSE if fallback required.
-  */
- static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-    GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
-    GLuint tmp = rmesa->TexGenEnabled;
-@@ -1094,283 +994,187 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
-    }
- 
-    if (tmp != rmesa->TexGenEnabled) {
--      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
-+      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
-    }
- 
-    return GL_TRUE;
- }
- 
--
--static void disable_tex( GLcontext *ctx, int unit )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--
--   if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit)) {
--      /* Texture unit disabled */
--      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
--	 /* The old texture is no longer bound to this texture unit.
--	  * Mark it as such.
--	  */
--
--	 rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
--	 rmesa->state.texture.unit[unit].texobj = NULL;
--      }
--
--      RADEON_STATECHANGE( rmesa, ctx );
--      rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= 
--	  ~((RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit);
--
--      RADEON_STATECHANGE( rmesa, tcl );
--      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) |
--						RADEON_Q_BIT(unit));
--
--      if (rmesa->TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
--	 TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
--	 rmesa->recheck_texgen[unit] = GL_TRUE;
--      }
--
--      if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) {
--      /* this seems to be a genuine (r100 only?) hw bug. Need to remove the
--         cubic_map bit on unit 2 when the unit is disabled, otherwise every
--	 2nd (2d) mipmap on unit 0 will be broken (may not be needed for other
--	 units, better be safe than sorry though).*/
--	 RADEON_STATECHANGE( rmesa, tex[unit] );
--	 rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE;
--      }
--
--      {
--	 GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
--	 GLuint tmp = rmesa->TexGenEnabled;
--
--	 rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
--	 rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
--	 rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
--	 rmesa->TexGenNeedNormals[unit] = 0;
--	 rmesa->TexGenEnabled |= 
--	     (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
--
--	 if (tmp != rmesa->TexGenEnabled) {
--	    rmesa->recheck_texgen[unit] = GL_TRUE;
--	    rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
--	 }
--      }
--   }
--}
--
--static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
--
--   /* Need to load the 2d images associated with this unit.
--    */
--   if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
--      t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
--      t->base.dirty_images[0] = ~0;
--   }
--
--   ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
--
--   if ( t->base.dirty_images[0] ) {
--      RADEON_FIREVERTICES( rmesa );
--      radeonSetTexImages( rmesa, tObj );
--      radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
--      if ( !t->base.memBlock && !t->image_override ) 
--	return GL_FALSE;
--   }
--
--   return GL_TRUE;
--}
--
--static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
-+/**
-+ * Compute the cached hardware register values for the given texture object.
-+ *
-+ * \param rmesa Context pointer
-+ * \param t the r300 texture object
-+ */
-+static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit)
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
--   GLuint face;
-+   const struct gl_texture_image *firstImage;
-+   GLint log2Width, log2Height, log2Depth, texelBytes;
- 
--   /* Need to load the 2d images associated with this unit.
--    */
--   if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
--      t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
--      for (face = 0; face < 6; face++)
--         t->base.dirty_images[face] = ~0;
-+   if ( t->bo ) {
-+	return GL_TRUE;
-    }
- 
--   ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
-+   firstImage = t->base.Image[0][t->mt->firstLevel];   
- 
--   if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
--        t->base.dirty_images[2] || t->base.dirty_images[3] ||
--        t->base.dirty_images[4] || t->base.dirty_images[5] ) {
--      /* flush */
--      RADEON_FIREVERTICES( rmesa );
--      /* layout memory space, once for all faces */
--      radeonSetTexImages( rmesa, tObj );
-+   if (firstImage->Border > 0) {
-+      fprintf(stderr, "%s: border\n", __FUNCTION__);
-+      return GL_FALSE;
-    }
- 
--   /* upload (per face) */
--   for (face = 0; face < 6; face++) {
--      if (t->base.dirty_images[face]) {
--         radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, face );
-+   log2Width  = firstImage->WidthLog2;
-+   log2Height = firstImage->HeightLog2;
-+   log2Depth  = firstImage->DepthLog2;
-+   texelBytes = firstImage->TexFormat->TexelBytes;
-+
-+   if (!t->image_override) {
-+      if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
-+	const struct tx_table *table = tx_table;
-+
-+	 t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
-+			     RADEON_TXFORMAT_ALPHA_IN_MAP);
-+	 t->pp_txfilter &= ~RADEON_YUV_TO_RGB;	 
-+	 
-+	 t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format;
-+	 t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter;
-+      } else {
-+	 _mesa_problem(NULL, "unexpected texture format in %s",
-+		       __FUNCTION__);
-+	 return GL_FALSE;
-       }
-    }
--      
--   if ( !t->base.memBlock ) {
--      /* texmem alloc failed, use s/w fallback */
--      return GL_FALSE;
-+   
-+   t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
-+   t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << RADEON_MAX_MIP_LEVEL_SHIFT;
-+	
-+   t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
-+		       RADEON_TXFORMAT_HEIGHT_MASK |
-+		       RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
-+		       RADEON_TXFORMAT_F5_WIDTH_MASK |
-+		       RADEON_TXFORMAT_F5_HEIGHT_MASK);
-+   t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
-+		      (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
-+   
-+   t->tile_bits = 0;
-+   
-+   if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
-+      ASSERT(log2Width == log2Height);
-+      t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
-+			 (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
-+			 /* don't think we need this bit, if it exists at all - fglrx does not set it */
-+			 (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
-+      t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
-+                           (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
-+                           (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
-+                           (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
-+                           (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
-+                           (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
-+                           (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
-+                           (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
-    }
- 
--   return GL_TRUE;
--}
--
--static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
-+   t->pp_txsize = (((firstImage->Width - 1) << RADEON_TEX_USIZE_SHIFT)
-+		   | ((firstImage->Height - 1) << RADEON_TEX_VSIZE_SHIFT));
- 
--   if (!(t->pp_txformat & RADEON_TXFORMAT_NON_POWER2)) {
--      t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
--      t->base.dirty_images[0] = ~0;
-+   if ( !t->image_override ) {
-+      if (firstImage->IsCompressed)
-+         t->pp_txpitch = (firstImage->Width + 63) & ~(63);
-+      else
-+         t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
-+      t->pp_txpitch -= 32;
-    }
- 
--   ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
--
--   if ( t->base.dirty_images[0] ) {
--      RADEON_FIREVERTICES( rmesa );
--      radeonSetTexImages( rmesa, tObj );
--      radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
--      if ( !t->base.memBlock &&
--           !t->image_override /* && !rmesa->prefer_gart_client_texturing  FIXME */ ) {
--	 fprintf(stderr, "%s: upload failed\n", __FUNCTION__);
--	 return GL_FALSE;
--      }
-+   if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
-+      t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
-    }
- 
-    return GL_TRUE;
- }
- 
--
--static GLboolean update_tex_common( GLcontext *ctx, int unit )
-+static GLboolean radeon_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit)
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
--   GLenum format;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   radeonTexObj *t = radeon_tex_obj(texObj);
-+   int ret;
- 
--   /* Fallback if there's a texture border */
--   if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
--      fprintf(stderr, "%s: border\n", __FUNCTION__);
-+   if (!radeon_validate_texture_miptree(ctx, texObj))
-       return GL_FALSE;
--   }
-+
-+   ret = setup_hardware_state(rmesa, t, unit);
-+   if (ret == GL_FALSE)
-+     return GL_FALSE;
-+
-    /* yuv conversion only works in first unit */
-    if (unit != 0 && (t->pp_txfilter & RADEON_YUV_TO_RGB))
-       return GL_FALSE;
- 
--   /* Update state if this is a different texture object to last
--    * time.
--    */
--   if ( rmesa->state.texture.unit[unit].texobj != t ) {
--      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
--	 /* The old texture is no longer bound to this texture unit.
--	  * Mark it as such.
--	  */
--
--	 rmesa->state.texture.unit[unit].texobj->base.bound &= 
--	     ~(1UL << unit);
--      }
--
--      rmesa->state.texture.unit[unit].texobj = t;
--      t->base.bound |= (1UL << unit);
--      t->dirty_state |= 1<<unit;
--      driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
--   }
-+   RADEON_STATECHANGE( rmesa, ctx );
-+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= 
-+     (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
- 
-+   RADEON_STATECHANGE( rmesa, tcl );
-+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
- 
--   /* Newly enabled?
--    */
--   if ( !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit))) {
--      RADEON_STATECHANGE( rmesa, ctx );
--      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= 
--	  (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
-+   rmesa->recheck_texgen[unit] = GL_TRUE;
- 
--      RADEON_STATECHANGE( rmesa, tcl );
--
--      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
--
--      rmesa->recheck_texgen[unit] = GL_TRUE;
--   }
--
--   if (t->dirty_state & (1<<unit)) {
--      import_tex_obj_state( rmesa, unit, t );
--      /* may need to update texture matrix (for texrect adjustments) */
--      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
--   }
-+   import_tex_obj_state( rmesa, unit, t );
- 
-    if (rmesa->recheck_texgen[unit]) {
-       GLboolean fallback = !radeon_validate_texgen( ctx, unit );
-       TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
-       rmesa->recheck_texgen[unit] = 0;
--      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
-+      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
-    }
- 
--   format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
--   if ( rmesa->state.texture.unit[unit].format != format ||
--	rmesa->state.texture.unit[unit].envMode != texUnit->EnvMode ) {
--      rmesa->state.texture.unit[unit].format = format;
--      rmesa->state.texture.unit[unit].envMode = texUnit->EnvMode;
--      if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
--	 return GL_FALSE;
--      }
-+   if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
-+     return GL_FALSE;
-    }
--
-    FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
-+
-+   t->validated = GL_TRUE;
-    return !t->border_fallback;
- }
- 
--
--
- static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit )
- {
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
--   if ( texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT) ) {
--      return (enable_tex_rect( ctx, unit ) &&
--	      update_tex_common( ctx, unit ));
--   }
--   else if ( texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
--      return (enable_tex_2d( ctx, unit ) &&
--	      update_tex_common( ctx, unit ));
--   }
--   else if ( texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT) ) {
--      return (enable_tex_cube( ctx, unit ) &&
--	      update_tex_common( ctx, unit ));
-+   if (ctx->Texture.Unit[unit]._ReallyEnabled & TEXTURE_3D_BIT) {
-+     return GL_FALSE;
-    }
--   else if ( texUnit->_ReallyEnabled ) {
--      return GL_FALSE;
-+
-+   if (!ctx->Texture.Unit[unit]._ReallyEnabled) {
-+     /* disable the unit */
-+     disable_tex_obj_state(rmesa, unit);
-+     return GL_TRUE;
-    }
--   else {
--      disable_tex( ctx, unit );
--      return GL_TRUE;
-+
-+   if (!radeon_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) {
-+    _mesa_warning(ctx,
-+		  "failed to validate texture for unit %d.\n",
-+		  unit);
-+    rmesa->state.texture.unit[unit].texobj = NULL;
-+    return GL_FALSE;
-    }
-+   rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
-+   return GL_TRUE;
- }
- 
- void radeonUpdateTextureState( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLboolean ok;
- 
-+   /* set the ctx all textures off */
-+   RADEON_STATECHANGE( rmesa, ctx );
-+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~((RADEON_TEX_ENABLE_MASK) | (RADEON_TEX_BLEND_ENABLE_MASK));
-+
-    ok = (radeonUpdateTextureUnit( ctx, 0 ) &&
- 	 radeonUpdateTextureUnit( ctx, 1 ) &&
- 	 radeonUpdateTextureUnit( ctx, 2 ));
- 
-    FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok );
- 
--   if (rmesa->TclFallback)
-+   if (rmesa->radeon.TclFallback)
-       radeonChooseVertexState( ctx );
- }
-diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
-new file mode 100644
-index 0000000..35ed542
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
-@@ -0,0 +1,996 @@
-+/*
-+ * Copyright (C) 2008 Nicolai Haehnle.
-+ * Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
-+ *
-+ * The Weather Channel (TM) funded Tungsten Graphics to develop the
-+ * initial release of the Radeon 8500 driver under the XFree86 license.
-+ * This notice must be preserved.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining
-+ * a copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sublicense, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial
-+ * portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ */
-+
-+#include "main/glheader.h"
-+#include "main/imports.h"
-+#include "main/context.h"
-+#include "main/convolve.h"
-+#include "main/mipmap.h"
-+#include "main/texcompress.h"
-+#include "main/texformat.h"
-+#include "main/texstore.h"
-+#include "main/teximage.h"
-+#include "main/texobj.h"
-+
-+#include "xmlpool.h"		/* for symbolic values of enum-type options */
-+
-+#include "radeon_common.h"
-+
-+#include "radeon_mipmap_tree.h"
-+
-+
-+static void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride,
-+	GLuint numrows, GLuint rowsize)
-+{
-+	assert(rowsize <= dststride);
-+	assert(rowsize <= srcstride);
-+
-+	if (rowsize == srcstride && rowsize == dststride) {
-+		memcpy(dst, src, numrows*rowsize);
-+	} else {
-+		GLuint i;
-+		for(i = 0; i < numrows; ++i) {
-+			memcpy(dst, src, rowsize);
-+			dst += dststride;
-+			src += srcstride;
-+		}
-+	}
-+}
-+
-+/* textures */
-+/**
-+ * Allocate an empty texture image object.
-+ */
-+struct gl_texture_image *radeonNewTextureImage(GLcontext *ctx)
-+{
-+	return CALLOC(sizeof(radeon_texture_image));
-+}
-+
-+/**
-+ * Free memory associated with this texture image.
-+ */
-+void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage)
-+{
-+	radeon_texture_image* image = get_radeon_texture_image(timage);
-+
-+	if (image->mt) {
-+		radeon_miptree_unreference(image->mt);
-+		image->mt = 0;
-+		assert(!image->base.Data);
-+	} else {
-+		_mesa_free_texture_image_data(ctx, timage);
-+	}
-+	if (image->bo) {
-+		radeon_bo_unref(image->bo);
-+		image->bo = NULL;
-+	}
-+	if (timage->Data) {
-+		_mesa_free_texmemory(timage->Data);
-+		timage->Data = NULL;
-+	}
-+}
-+
-+/* Set Data pointer and additional data for mapped texture image */
-+static void teximage_set_map_data(radeon_texture_image *image)
-+{
-+	radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
-+
-+	image->base.Data = image->mt->bo->ptr + lvl->faces[image->mtface].offset;
-+	image->base.RowStride = lvl->rowstride / image->mt->bpp;
-+}
-+
-+
-+/**
-+ * Map a single texture image for glTexImage and friends.
-+ */
-+void radeon_teximage_map(radeon_texture_image *image, GLboolean write_enable)
-+{
-+	if (image->mt) {
-+		assert(!image->base.Data);
-+
-+		radeon_bo_map(image->mt->bo, write_enable);
-+		teximage_set_map_data(image);
-+	}
-+}
-+
-+
-+void radeon_teximage_unmap(radeon_texture_image *image)
-+{
-+	if (image->mt) {
-+		assert(image->base.Data);
-+
-+		image->base.Data = 0;
-+		radeon_bo_unmap(image->mt->bo);
-+	}
-+}
-+
-+static void map_override(GLcontext *ctx, radeonTexObj *t)
-+{
-+	radeon_texture_image *img = get_radeon_texture_image(t->base.Image[0][0]);
-+
-+	radeon_bo_map(t->bo, GL_FALSE);
-+
-+	img->base.Data = t->bo->ptr;
-+	_mesa_set_fetch_functions(&img->base, 2);
-+}
-+
-+static void unmap_override(GLcontext *ctx, radeonTexObj *t)
-+{
-+	radeon_texture_image *img = get_radeon_texture_image(t->base.Image[0][0]);
-+
-+	radeon_bo_unmap(t->bo);
-+
-+	img->base.Data = NULL;
-+}
-+
-+/**
-+ * Map a validated texture for reading during software rendering.
-+ */
-+void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj)
-+{
-+	radeonTexObj* t = radeon_tex_obj(texObj);
-+	int face, level;
-+
-+	if (!radeon_validate_texture_miptree(ctx, texObj))
-+	  return;
-+
-+	/* for r100 3D sw fallbacks don't have mt */
-+	if (t->image_override && t->bo)
-+		map_override(ctx, t);
-+
-+	if (!t->mt)
-+		return;
-+
-+	radeon_bo_map(t->mt->bo, GL_FALSE);
-+	for(face = 0; face < t->mt->faces; ++face) {
-+		for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level)
-+			teximage_set_map_data(get_radeon_texture_image(texObj->Image[face][level]));
-+	}
-+}
-+
-+void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj)
-+{
-+	radeonTexObj* t = radeon_tex_obj(texObj);
-+	int face, level;
-+
-+	if (t->image_override && t->bo)
-+		unmap_override(ctx, t);
-+	/* for r100 3D sw fallbacks don't have mt */
-+	if (!t->mt)
-+	  return;
-+
-+	for(face = 0; face < t->mt->faces; ++face) {
-+		for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level)
-+			texObj->Image[face][level]->Data = 0;
-+	}
-+	radeon_bo_unmap(t->mt->bo);
-+}
-+
-+GLuint radeon_face_for_target(GLenum target)
-+{
-+	switch (target) {
-+	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-+	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-+	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-+	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-+	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-+	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-+		return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
-+	default:
-+		return 0;
-+	}
-+}
-+
-+/**
-+ * Wraps Mesa's implementation to ensure that the base level image is mapped.
-+ *
-+ * This relies on internal details of _mesa_generate_mipmap, in particular
-+ * the fact that the memory for recreated texture images is always freed.
-+ */
-+static void radeon_generate_mipmap(GLcontext *ctx, GLenum target,
-+				   struct gl_texture_object *texObj)
-+{
-+	radeonTexObj* t = radeon_tex_obj(texObj);
-+	GLuint nr_faces = (t->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-+	int i, face;
-+
-+
-+	_mesa_generate_mipmap(ctx, target, texObj);
-+
-+	for (face = 0; face < nr_faces; face++) {
-+		for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
-+			radeon_texture_image *image;
-+
-+			image = get_radeon_texture_image(texObj->Image[face][i]);
-+
-+			if (image == NULL)
-+				break;
-+
-+			image->mtlevel = i;
-+			image->mtface = face;
-+
-+			radeon_miptree_unreference(image->mt);
-+			image->mt = NULL;
-+		}
-+	}
-+	
-+}
-+
-+void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj)
-+{
-+	GLuint face = radeon_face_for_target(target);
-+	radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[face][texObj->BaseLevel]);
-+
-+	radeon_teximage_map(baseimage, GL_FALSE);
-+	radeon_generate_mipmap(ctx, target, texObj);
-+	radeon_teximage_unmap(baseimage);
-+}
-+
-+
-+/* try to find a format which will only need a memcopy */
-+static const struct gl_texture_format *radeonChoose8888TexFormat(radeonContextPtr rmesa,
-+								 GLenum srcFormat,
-+								 GLenum srcType, GLboolean fbo)
-+{
-+	const GLuint ui = 1;
-+	const GLubyte littleEndian = *((const GLubyte *)&ui);
-+
-+	/* r100 can only do this */
-+	if (IS_R100_CLASS(rmesa->radeonScreen) || fbo)
-+	  return _dri_texformat_argb8888;
-+
-+	if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
-+	    (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
-+	    (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
-+	    (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
-+		return &_mesa_texformat_rgba8888;
-+	} else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
-+		   (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
-+		   (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
-+		   (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
-+		return &_mesa_texformat_rgba8888_rev;
-+	} else if (IS_R200_CLASS(rmesa->radeonScreen)) {
-+		return _dri_texformat_argb8888;
-+	} else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
-+					    srcType == GL_UNSIGNED_INT_8_8_8_8)) {
-+		return &_mesa_texformat_argb8888_rev;
-+	} else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) ||
-+					    srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) {
-+		return &_mesa_texformat_argb8888;
-+	} else
-+		return _dri_texformat_argb8888;
-+}
-+
-+const struct gl_texture_format *radeonChooseTextureFormat_mesa(GLcontext * ctx,
-+							  GLint internalFormat,
-+							  GLenum format,
-+							  GLenum type)
-+{
-+	return radeonChooseTextureFormat(ctx, internalFormat, format,
-+					 type, 0);
-+}
-+
-+const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx,
-+							  GLint internalFormat,
-+							  GLenum format,
-+							  GLenum type, GLboolean fbo)
-+{
-+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+	const GLboolean do32bpt =
-+	    (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32);
-+	const GLboolean force16bpt =
-+	    (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16);
-+	(void)format;
-+
-+#if 0
-+	fprintf(stderr, "InternalFormat=%s(%d) type=%s format=%s\n",
-+		_mesa_lookup_enum_by_nr(internalFormat), internalFormat,
-+		_mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
-+	fprintf(stderr, "do32bpt=%d force16bpt=%d\n", do32bpt, force16bpt);
-+#endif
-+
-+	switch (internalFormat) {
-+	case 4:
-+	case GL_RGBA:
-+	case GL_COMPRESSED_RGBA:
-+		switch (type) {
-+		case GL_UNSIGNED_INT_10_10_10_2:
-+		case GL_UNSIGNED_INT_2_10_10_10_REV:
-+			return do32bpt ? _dri_texformat_argb8888 :
-+			    _dri_texformat_argb1555;
-+		case GL_UNSIGNED_SHORT_4_4_4_4:
-+		case GL_UNSIGNED_SHORT_4_4_4_4_REV:
-+			return _dri_texformat_argb4444;
-+		case GL_UNSIGNED_SHORT_5_5_5_1:
-+		case GL_UNSIGNED_SHORT_1_5_5_5_REV:
-+			return _dri_texformat_argb1555;
-+		default:
-+			return do32bpt ? radeonChoose8888TexFormat(rmesa, format, type, fbo) :
-+			    _dri_texformat_argb4444;
-+		}
-+
-+	case 3:
-+	case GL_RGB:
-+	case GL_COMPRESSED_RGB:
-+		switch (type) {
-+		case GL_UNSIGNED_SHORT_4_4_4_4:
-+		case GL_UNSIGNED_SHORT_4_4_4_4_REV:
-+			return _dri_texformat_argb4444;
-+		case GL_UNSIGNED_SHORT_5_5_5_1:
-+		case GL_UNSIGNED_SHORT_1_5_5_5_REV:
-+			return _dri_texformat_argb1555;
-+		case GL_UNSIGNED_SHORT_5_6_5:
-+		case GL_UNSIGNED_SHORT_5_6_5_REV:
-+			return _dri_texformat_rgb565;
-+		default:
-+			return do32bpt ? _dri_texformat_argb8888 :
-+			    _dri_texformat_rgb565;
-+		}
-+
-+	case GL_RGBA8:
-+	case GL_RGB10_A2:
-+	case GL_RGBA12:
-+	case GL_RGBA16:
-+		return !force16bpt ?
-+			radeonChoose8888TexFormat(rmesa, format, type, fbo) :
-+			_dri_texformat_argb4444;
-+
-+	case GL_RGBA4:
-+	case GL_RGBA2:
-+		return _dri_texformat_argb4444;
-+
-+	case GL_RGB5_A1:
-+		return _dri_texformat_argb1555;
-+
-+	case GL_RGB8:
-+	case GL_RGB10:
-+	case GL_RGB12:
-+	case GL_RGB16:
-+		return !force16bpt ? _dri_texformat_argb8888 :
-+		    _dri_texformat_rgb565;
-+
-+	case GL_RGB5:
-+	case GL_RGB4:
-+	case GL_R3_G3_B2:
-+		return _dri_texformat_rgb565;
-+
-+	case GL_ALPHA:
-+	case GL_ALPHA4:
-+	case GL_ALPHA8:
-+	case GL_ALPHA12:
-+	case GL_ALPHA16:
-+	case GL_COMPRESSED_ALPHA:
-+		/* r200: can't use a8 format since interpreting hw I8 as a8 would result
-+		   in wrong rgb values (same as alpha value instead of 0). */
-+		if (IS_R200_CLASS(rmesa->radeonScreen))
-+			return _dri_texformat_al88;
-+		else
-+			return _dri_texformat_a8;
-+	case 1:
-+	case GL_LUMINANCE:
-+	case GL_LUMINANCE4:
-+	case GL_LUMINANCE8:
-+	case GL_LUMINANCE12:
-+	case GL_LUMINANCE16:
-+	case GL_COMPRESSED_LUMINANCE:
-+		return _dri_texformat_l8;
-+
-+	case 2:
-+	case GL_LUMINANCE_ALPHA:
-+	case GL_LUMINANCE4_ALPHA4:
-+	case GL_LUMINANCE6_ALPHA2:
-+	case GL_LUMINANCE8_ALPHA8:
-+	case GL_LUMINANCE12_ALPHA4:
-+	case GL_LUMINANCE12_ALPHA12:
-+	case GL_LUMINANCE16_ALPHA16:
-+	case GL_COMPRESSED_LUMINANCE_ALPHA:
-+		return _dri_texformat_al88;
-+
-+	case GL_INTENSITY:
-+	case GL_INTENSITY4:
-+	case GL_INTENSITY8:
-+	case GL_INTENSITY12:
-+	case GL_INTENSITY16:
-+	case GL_COMPRESSED_INTENSITY:
-+		return _dri_texformat_i8;
-+
-+	case GL_YCBCR_MESA:
-+		if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
-+		    type == GL_UNSIGNED_BYTE)
-+			return &_mesa_texformat_ycbcr;
-+		else
-+			return &_mesa_texformat_ycbcr_rev;
-+
-+	case GL_RGB_S3TC:
-+	case GL_RGB4_S3TC:
-+	case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
-+		return &_mesa_texformat_rgb_dxt1;
-+
-+	case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
-+		return &_mesa_texformat_rgba_dxt1;
-+
-+	case GL_RGBA_S3TC:
-+	case GL_RGBA4_S3TC:
-+	case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
-+		return &_mesa_texformat_rgba_dxt3;
-+
-+	case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
-+		return &_mesa_texformat_rgba_dxt5;
-+
-+	case GL_ALPHA16F_ARB:
-+		return &_mesa_texformat_alpha_float16;
-+	case GL_ALPHA32F_ARB:
-+		return &_mesa_texformat_alpha_float32;
-+	case GL_LUMINANCE16F_ARB:
-+		return &_mesa_texformat_luminance_float16;
-+	case GL_LUMINANCE32F_ARB:
-+		return &_mesa_texformat_luminance_float32;
-+	case GL_LUMINANCE_ALPHA16F_ARB:
-+		return &_mesa_texformat_luminance_alpha_float16;
-+	case GL_LUMINANCE_ALPHA32F_ARB:
-+		return &_mesa_texformat_luminance_alpha_float32;
-+	case GL_INTENSITY16F_ARB:
-+		return &_mesa_texformat_intensity_float16;
-+	case GL_INTENSITY32F_ARB:
-+		return &_mesa_texformat_intensity_float32;
-+	case GL_RGB16F_ARB:
-+		return &_mesa_texformat_rgba_float16;
-+	case GL_RGB32F_ARB:
-+		return &_mesa_texformat_rgba_float32;
-+	case GL_RGBA16F_ARB:
-+		return &_mesa_texformat_rgba_float16;
-+	case GL_RGBA32F_ARB:
-+		return &_mesa_texformat_rgba_float32;
-+
-+	case GL_DEPTH_COMPONENT:
-+	case GL_DEPTH_COMPONENT16:
-+	case GL_DEPTH_COMPONENT24:
-+	case GL_DEPTH_COMPONENT32:
-+	case GL_DEPTH_STENCIL_EXT:
-+	case GL_DEPTH24_STENCIL8_EXT:
-+		return &_mesa_texformat_s8_z24;
-+	default:
-+		_mesa_problem(ctx,
-+			      "unexpected internalFormat 0x%x in %s",
-+			      (int)internalFormat, __func__);
-+		return NULL;
-+	}
-+
-+	return NULL;		/* never get here */
-+}
-+
-+/**
-+ * All glTexImage calls go through this function.
-+ */
-+static void radeon_teximage(
-+	GLcontext *ctx, int dims,
-+	GLint face, GLint level,
-+	GLint internalFormat,
-+	GLint width, GLint height, GLint depth,
-+	GLsizei imageSize,
-+	GLenum format, GLenum type, const GLvoid * pixels,
-+	const struct gl_pixelstore_attrib *packing,
-+	struct gl_texture_object *texObj,
-+	struct gl_texture_image *texImage,
-+	int compressed)
-+{
-+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+	radeonTexObj* t = radeon_tex_obj(texObj);
-+	radeon_texture_image* image = get_radeon_texture_image(texImage);
-+	GLuint dstRowStride;
-+	GLint postConvWidth = width;
-+	GLint postConvHeight = height;
-+	GLuint texelBytes;
-+
-+	radeon_firevertices(rmesa);
-+
-+	t->validated = GL_FALSE;
-+
-+	if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) {
-+	       _mesa_adjust_image_for_convolution(ctx, dims, &postConvWidth,
-+						  &postConvHeight);
-+	}
-+
-+	/* Choose and fill in the texture format for this image */
-+	texImage->TexFormat = radeonChooseTextureFormat(ctx, internalFormat, format, type, 0);
-+	_mesa_set_fetch_functions(texImage, dims);
-+
-+	if (texImage->TexFormat->TexelBytes == 0) {
-+		texelBytes = 0;
-+		texImage->IsCompressed = GL_TRUE;
-+		texImage->CompressedSize =
-+			ctx->Driver.CompressedTextureSize(ctx, texImage->Width,
-+					   texImage->Height, texImage->Depth,
-+					   texImage->TexFormat->MesaFormat);
-+	} else {
-+		texImage->IsCompressed = GL_FALSE;
-+		texImage->CompressedSize = 0;
-+
-+		texelBytes = texImage->TexFormat->TexelBytes;
-+		/* Minimum pitch of 32 bytes */
-+		if (postConvWidth * texelBytes < 32) {
-+		  postConvWidth = 32 / texelBytes;
-+		  texImage->RowStride = postConvWidth;
-+		}
-+		if (!image->mt) {      
-+			assert(texImage->RowStride == postConvWidth);
-+		}
-+	}
-+
-+	/* Allocate memory for image */
-+	radeonFreeTexImageData(ctx, texImage); /* Mesa core only clears texImage->Data but not image->mt */
-+
-+	if (t->mt &&
-+	    t->mt->firstLevel == level &&
-+	    t->mt->lastLevel == level &&
-+	    t->mt->target != GL_TEXTURE_CUBE_MAP_ARB &&
-+	    !radeon_miptree_matches_image(t->mt, texImage, face, level)) {
-+	  radeon_miptree_unreference(t->mt);
-+	  t->mt = NULL;
-+	}
-+
-+	if (!t->mt)
-+		radeon_try_alloc_miptree(rmesa, t, texImage, face, level);
-+	if (t->mt && radeon_miptree_matches_image(t->mt, texImage, face, level)) {
-+		radeon_mipmap_level *lvl;
-+		image->mt = t->mt;
-+		image->mtlevel = level - t->mt->firstLevel;
-+		image->mtface = face;
-+		radeon_miptree_reference(t->mt);
-+		lvl = &image->mt->levels[image->mtlevel];
-+		dstRowStride = lvl->rowstride;
-+	} else {
-+		int size;
-+		if (texImage->IsCompressed) {
-+			size = texImage->CompressedSize;
-+		} else {
-+			size = texImage->Width * texImage->Height * texImage->Depth * texImage->TexFormat->TexelBytes;
-+		}
-+		texImage->Data = _mesa_alloc_texmemory(size);
-+	}
-+
-+	/* Upload texture image; note that the spec allows pixels to be NULL */
-+	if (compressed) {
-+		pixels = _mesa_validate_pbo_compressed_teximage(
-+			ctx, imageSize, pixels, packing, "glCompressedTexImage");
-+	} else {
-+		pixels = _mesa_validate_pbo_teximage(
-+			ctx, dims, width, height, depth,
-+			format, type, pixels, packing, "glTexImage");
-+	}
-+
-+	if (pixels) {
-+		radeon_teximage_map(image, GL_TRUE);
-+
-+		if (compressed) {
-+			memcpy(texImage->Data, pixels, imageSize);
-+		} else {
-+			GLuint dstRowStride;
-+			if (image->mt) {
-+				radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
-+				dstRowStride = lvl->rowstride;
-+			} else {
-+				dstRowStride = texImage->Width * texImage->TexFormat->TexelBytes;
-+			}
-+
-+			if (!texImage->TexFormat->StoreImage(ctx, dims,
-+						texImage->_BaseFormat,
-+						texImage->TexFormat,
-+						texImage->Data, 0, 0, 0, /* dstX/Y/Zoffset */
-+						dstRowStride,
-+						texImage->ImageOffsets,
-+						width, height, depth,
-+						format, type, pixels, packing))
-+				_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
-+		}
-+
-+	}
-+
-+	/* SGIS_generate_mipmap */
-+	if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
-+		radeon_generate_mipmap(ctx, texObj->Target, texObj);
-+	}
-+
-+	_mesa_unmap_teximage_pbo(ctx, packing);
-+
-+	if (pixels)
-+	  radeon_teximage_unmap(image);
-+
-+
-+}
-+
-+void radeonTexImage1D(GLcontext * ctx, GLenum target, GLint level,
-+		      GLint internalFormat,
-+		      GLint width, GLint border,
-+		      GLenum format, GLenum type, const GLvoid * pixels,
-+		      const struct gl_pixelstore_attrib *packing,
-+		      struct gl_texture_object *texObj,
-+		      struct gl_texture_image *texImage)
-+{
-+	radeon_teximage(ctx, 1, 0, level, internalFormat, width, 1, 1,
-+		0, format, type, pixels, packing, texObj, texImage, 0);
-+}
-+
-+void radeonTexImage2D(GLcontext * ctx, GLenum target, GLint level,
-+			   GLint internalFormat,
-+			   GLint width, GLint height, GLint border,
-+			   GLenum format, GLenum type, const GLvoid * pixels,
-+			   const struct gl_pixelstore_attrib *packing,
-+			   struct gl_texture_object *texObj,
-+			   struct gl_texture_image *texImage)
-+
-+{
-+	GLuint face = radeon_face_for_target(target);
-+
-+	radeon_teximage(ctx, 2, face, level, internalFormat, width, height, 1,
-+		0, format, type, pixels, packing, texObj, texImage, 0);
-+}
-+
-+void radeonCompressedTexImage2D(GLcontext * ctx, GLenum target,
-+				     GLint level, GLint internalFormat,
-+				     GLint width, GLint height, GLint border,
-+				     GLsizei imageSize, const GLvoid * data,
-+				     struct gl_texture_object *texObj,
-+				     struct gl_texture_image *texImage)
-+{
-+	GLuint face = radeon_face_for_target(target);
-+
-+	radeon_teximage(ctx, 2, face, level, internalFormat, width, height, 1,
-+		imageSize, 0, 0, data, &ctx->Unpack, texObj, texImage, 1);
-+}
-+
-+void radeonTexImage3D(GLcontext * ctx, GLenum target, GLint level,
-+		      GLint internalFormat,
-+		      GLint width, GLint height, GLint depth,
-+		      GLint border,
-+		      GLenum format, GLenum type, const GLvoid * pixels,
-+		      const struct gl_pixelstore_attrib *packing,
-+		      struct gl_texture_object *texObj,
-+		      struct gl_texture_image *texImage)
-+{
-+	radeon_teximage(ctx, 3, 0, level, internalFormat, width, height, depth,
-+		0, format, type, pixels, packing, texObj, texImage, 0);
-+}
-+
-+/**
-+ * Update a subregion of the given texture image.
-+ */
-+static void radeon_texsubimage(GLcontext* ctx, int dims, int level,
-+		GLint xoffset, GLint yoffset, GLint zoffset,
-+		GLsizei width, GLsizei height, GLsizei depth,
-+		GLsizei imageSize,
-+		GLenum format, GLenum type,
-+		const GLvoid * pixels,
-+		const struct gl_pixelstore_attrib *packing,
-+		struct gl_texture_object *texObj,
-+		struct gl_texture_image *texImage,
-+		int compressed)
-+{
-+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+	radeonTexObj* t = radeon_tex_obj(texObj);
-+	radeon_texture_image* image = get_radeon_texture_image(texImage);
-+
-+	radeon_firevertices(rmesa);
-+
-+	t->validated = GL_FALSE;
-+	if (compressed) {
-+		pixels = _mesa_validate_pbo_compressed_teximage(
-+			ctx, imageSize, pixels, packing, "glCompressedTexImage");
-+	} else {
-+		pixels = _mesa_validate_pbo_teximage(ctx, dims,
-+			width, height, depth, format, type, pixels, packing, "glTexSubImage1D");
-+	}
-+
-+	if (pixels) {
-+		GLint dstRowStride;
-+		radeon_teximage_map(image, GL_TRUE);
-+
-+		if (image->mt) {
-+			radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
-+			dstRowStride = lvl->rowstride;
-+		} else {
-+			dstRowStride = texImage->RowStride * texImage->TexFormat->TexelBytes;
-+		}
-+
-+		if (compressed) {
-+			uint32_t srcRowStride, bytesPerRow, rows; 
-+			dstRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, texImage->Width);
-+			srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width);
-+			bytesPerRow = srcRowStride;
-+			rows = height / 4;
-+
-+			copy_rows(texImage->Data, dstRowStride,  image->base.Data, srcRowStride, rows,
-+				  bytesPerRow);
-+			
-+		} else {
-+			if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
-+							     texImage->TexFormat, texImage->Data,
-+							     xoffset, yoffset, zoffset,
-+							     dstRowStride,
-+							     texImage->ImageOffsets,
-+							     width, height, depth,
-+							     format, type, pixels, packing))
-+				_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
-+		}
-+
-+	}
-+
-+	/* GL_SGIS_generate_mipmap */
-+	if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
-+		radeon_generate_mipmap(ctx, texObj->Target, texObj);
-+	}
-+	radeon_teximage_unmap(image);
-+
-+	_mesa_unmap_teximage_pbo(ctx, packing);
-+
-+
-+}
-+
-+void radeonTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
-+			 GLint xoffset,
-+			 GLsizei width,
-+			 GLenum format, GLenum type,
-+			 const GLvoid * pixels,
-+			 const struct gl_pixelstore_attrib *packing,
-+			 struct gl_texture_object *texObj,
-+			 struct gl_texture_image *texImage)
-+{
-+	radeon_texsubimage(ctx, 1, level, xoffset, 0, 0, width, 1, 1, 0,
-+		format, type, pixels, packing, texObj, texImage, 0);
-+}
-+
-+void radeonTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
-+			 GLint xoffset, GLint yoffset,
-+			 GLsizei width, GLsizei height,
-+			 GLenum format, GLenum type,
-+			 const GLvoid * pixels,
-+			 const struct gl_pixelstore_attrib *packing,
-+			 struct gl_texture_object *texObj,
-+			 struct gl_texture_image *texImage)
-+{
-+	radeon_texsubimage(ctx, 2, level, xoffset, yoffset, 0, width, height, 1,
-+			   0, format, type, pixels, packing, texObj, texImage,
-+			   0);
-+}
-+
-+void radeonCompressedTexSubImage2D(GLcontext * ctx, GLenum target,
-+				   GLint level, GLint xoffset,
-+				   GLint yoffset, GLsizei width,
-+				   GLsizei height, GLenum format,
-+				   GLsizei imageSize, const GLvoid * data,
-+				   struct gl_texture_object *texObj,
-+				   struct gl_texture_image *texImage)
-+{
-+	radeon_texsubimage(ctx, 2, level, xoffset, yoffset, 0, width, height, 1,
-+		imageSize, format, 0, data, &ctx->Unpack, texObj, texImage, 1);
-+}
-+
-+
-+void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
-+			 GLint xoffset, GLint yoffset, GLint zoffset,
-+			 GLsizei width, GLsizei height, GLsizei depth,
-+			 GLenum format, GLenum type,
-+			 const GLvoid * pixels,
-+			 const struct gl_pixelstore_attrib *packing,
-+			 struct gl_texture_object *texObj,
-+			 struct gl_texture_image *texImage)
-+{
-+	radeon_texsubimage(ctx, 3, level, xoffset, yoffset, zoffset, width, height, depth, 0,
-+		format, type, pixels, packing, texObj, texImage, 0);
-+}
-+
-+
-+
-+/**
-+ * Ensure that the given image is stored in the given miptree from now on.
-+ */
-+static void migrate_image_to_miptree(radeon_mipmap_tree *mt, radeon_texture_image *image, int face, int level)
-+{
-+	radeon_mipmap_level *dstlvl = &mt->levels[level - mt->firstLevel];
-+	unsigned char *dest;
-+
-+	assert(image->mt != mt);
-+	assert(dstlvl->width == image->base.Width);
-+	assert(dstlvl->height == image->base.Height);
-+	assert(dstlvl->depth == image->base.Depth);
-+
-+
-+	radeon_bo_map(mt->bo, GL_TRUE);
-+	dest = mt->bo->ptr + dstlvl->faces[face].offset;
-+
-+	if (image->mt) {
-+		/* Format etc. should match, so we really just need a memcpy().
-+		 * In fact, that memcpy() could be done by the hardware in many
-+		 * cases, provided that we have a proper memory manager.
-+		 */
-+		radeon_mipmap_level *srclvl = &image->mt->levels[image->mtlevel];
-+
-+		assert(srclvl->size == dstlvl->size);
-+		assert(srclvl->rowstride == dstlvl->rowstride);
-+
-+		radeon_bo_map(image->mt->bo, GL_FALSE);
-+
-+		memcpy(dest,
-+			image->mt->bo->ptr + srclvl->faces[face].offset,
-+			dstlvl->size);
-+		radeon_bo_unmap(image->mt->bo);
-+
-+		radeon_miptree_unreference(image->mt);
-+	} else {
-+		uint32_t srcrowstride;
-+		uint32_t height;
-+		/* need to confirm this value is correct */
-+		if (mt->compressed) {
-+			height = image->base.Height / 4;
-+			srcrowstride = image->base.RowStride * mt->bpp;
-+		} else {
-+			height = image->base.Height * image->base.Depth;
-+			srcrowstride = image->base.Width * image->base.TexFormat->TexelBytes;
-+		}
-+
-+//		if (mt->tilebits)
-+//			WARN_ONCE("%s: tiling not supported yet", __FUNCTION__);
-+
-+		copy_rows(dest, dstlvl->rowstride, image->base.Data, srcrowstride,
-+			  height, srcrowstride);
-+
-+		_mesa_free_texmemory(image->base.Data);
-+		image->base.Data = 0;
-+	}
-+
-+	radeon_bo_unmap(mt->bo);
-+
-+	image->mt = mt;
-+	image->mtface = face;
-+	image->mtlevel = level;
-+	radeon_miptree_reference(image->mt);
-+}
-+
-+int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj)
-+{
-+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+	radeonTexObj *t = radeon_tex_obj(texObj);
-+	radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[0][texObj->BaseLevel]);
-+	int face, level;
-+
-+	if (t->validated || t->image_override)
-+		return GL_TRUE;
-+
-+	if (RADEON_DEBUG & DEBUG_TEXTURE)
-+		fprintf(stderr, "%s: Validating texture %p now\n", __FUNCTION__, texObj);
-+
-+	if (baseimage->base.Border > 0)
-+		return GL_FALSE;
-+
-+	/* Ensure a matching miptree exists.
-+	 *
-+	 * Differing mipmap trees can result when the app uses TexImage to
-+	 * change texture dimensions.
-+	 *
-+	 * Prefer to use base image's miptree if it
-+	 * exists, since that most likely contains more valid data (remember
-+	 * that the base level is usually significantly larger than the rest
-+	 * of the miptree, so cubemaps are the only possible exception).
-+	 */
-+	if (baseimage->mt &&
-+	    baseimage->mt != t->mt &&
-+	    radeon_miptree_matches_texture(baseimage->mt, &t->base)) {
-+		radeon_miptree_unreference(t->mt);
-+		t->mt = baseimage->mt;
-+		radeon_miptree_reference(t->mt);
-+	} else if (t->mt && !radeon_miptree_matches_texture(t->mt, &t->base)) {
-+		radeon_miptree_unreference(t->mt);
-+		t->mt = 0;
-+	}
-+
-+	if (!t->mt) {
-+		if (RADEON_DEBUG & DEBUG_TEXTURE)
-+			fprintf(stderr, " Allocate new miptree\n");
-+		radeon_try_alloc_miptree(rmesa, t, &baseimage->base, 0, texObj->BaseLevel);
-+		if (!t->mt) {
-+			_mesa_problem(ctx, "r300_validate_texture failed to alloc miptree");
-+			return GL_FALSE;
-+		}
-+	}
-+
-+	/* Ensure all images are stored in the single main miptree */
-+	for(face = 0; face < t->mt->faces; ++face) {
-+		for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level) {
-+			radeon_texture_image *image = get_radeon_texture_image(texObj->Image[face][level]);
-+			if (RADEON_DEBUG & DEBUG_TEXTURE)
-+				fprintf(stderr, " face %i, level %i... %p vs %p ", face, level, t->mt, image->mt);
-+			if (t->mt == image->mt) {
-+				if (RADEON_DEBUG & DEBUG_TEXTURE)
-+					fprintf(stderr, "OK\n");
-+				continue;
-+			}
-+
-+			if (RADEON_DEBUG & DEBUG_TEXTURE)
-+				fprintf(stderr, "migrating\n");
-+			migrate_image_to_miptree(t->mt, image, face, level);
-+		}
-+	}
-+
-+	return GL_TRUE;
-+}
-+
-+
-+/**
-+ * Need to map texture image into memory before copying image data,
-+ * then unmap it.
-+ */
-+static void
-+radeon_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
-+		     GLenum format, GLenum type, GLvoid * pixels,
-+		     struct gl_texture_object *texObj,
-+		     struct gl_texture_image *texImage, int compressed)
-+{
-+	radeon_texture_image *image = get_radeon_texture_image(texImage);
-+
-+	if (image->mt) {
-+		/* Map the texture image read-only */
-+		radeon_teximage_map(image, GL_FALSE);
-+	} else {
-+		/* Image hasn't been uploaded to a miptree yet */
-+		assert(image->base.Data);
-+	}
-+
-+	if (compressed) {
-+		_mesa_get_compressed_teximage(ctx, target, level, pixels,
-+					      texObj, texImage);
-+	} else {
-+		_mesa_get_teximage(ctx, target, level, format, type, pixels,
-+				   texObj, texImage);
-+	}
-+     
-+	if (image->mt) {
-+		radeon_teximage_unmap(image);
-+	}
-+}
-+
-+void
-+radeonGetTexImage(GLcontext * ctx, GLenum target, GLint level,
-+		  GLenum format, GLenum type, GLvoid * pixels,
-+		  struct gl_texture_object *texObj,
-+		  struct gl_texture_image *texImage)
-+{
-+	radeon_get_tex_image(ctx, target, level, format, type, pixels,
-+			     texObj, texImage, 0);
-+}
-+
-+void
-+radeonGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
-+			    GLvoid *pixels,
-+			    struct gl_texture_object *texObj,
-+			    struct gl_texture_image *texImage)
-+{
-+	radeon_get_tex_image(ctx, target, level, 0, 0, pixels,
-+			     texObj, texImage, 1);
-+}
-diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h
-new file mode 100644
-index 0000000..888a55b
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_texture.h
-@@ -0,0 +1,122 @@
-+/*
-+ * Copyright (C) 2008 Nicolai Haehnle.
-+ * Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
-+ *
-+ * The Weather Channel (TM) funded Tungsten Graphics to develop the
-+ * initial release of the Radeon 8500 driver under the XFree86 license.
-+ * This notice must be preserved.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining
-+ * a copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sublicense, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial
-+ * portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ */
-+
-+#ifndef RADEON_TEXTURE_H
-+#define RADEON_TEXTURE_H
-+struct gl_texture_image *radeonNewTextureImage(GLcontext *ctx);
-+void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage);
-+
-+void radeon_teximage_map(radeon_texture_image *image, GLboolean write_enable);
-+void radeon_teximage_unmap(radeon_texture_image *image);
-+void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj);
-+void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj);
-+void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj);
-+int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj);
-+GLuint radeon_face_for_target(GLenum target);
-+const struct gl_texture_format *radeonChooseTextureFormat_mesa(GLcontext * ctx,
-+							  GLint internalFormat,
-+							  GLenum format,
-+							  GLenum type);
-+const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx,
-+							  GLint internalFormat,
-+							  GLenum format,
-+							  GLenum type, GLboolean fbo);
-+
-+void radeonTexImage1D(GLcontext * ctx, GLenum target, GLint level,
-+		      GLint internalFormat,
-+		      GLint width, GLint border,
-+		      GLenum format, GLenum type, const GLvoid * pixels,
-+		      const struct gl_pixelstore_attrib *packing,
-+		      struct gl_texture_object *texObj,
-+		      struct gl_texture_image *texImage);
-+void radeonTexImage2D(GLcontext * ctx, GLenum target, GLint level,
-+		      GLint internalFormat,
-+		      GLint width, GLint height, GLint border,
-+		      GLenum format, GLenum type, const GLvoid * pixels,
-+		      const struct gl_pixelstore_attrib *packing,
-+		      struct gl_texture_object *texObj,
-+		      struct gl_texture_image *texImage);
-+void radeonCompressedTexImage2D(GLcontext * ctx, GLenum target,
-+				GLint level, GLint internalFormat,
-+				GLint width, GLint height, GLint border,
-+				GLsizei imageSize, const GLvoid * data,
-+				struct gl_texture_object *texObj,
-+				struct gl_texture_image *texImage);
-+void radeonTexImage3D(GLcontext * ctx, GLenum target, GLint level,
-+		      GLint internalFormat,
-+		      GLint width, GLint height, GLint depth,
-+		      GLint border,
-+		      GLenum format, GLenum type, const GLvoid * pixels,
-+		      const struct gl_pixelstore_attrib *packing,
-+		      struct gl_texture_object *texObj,
-+		      struct gl_texture_image *texImage);
-+void radeonTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
-+			 GLint xoffset,
-+			 GLsizei width,
-+			 GLenum format, GLenum type,
-+			 const GLvoid * pixels,
-+			 const struct gl_pixelstore_attrib *packing,
-+			 struct gl_texture_object *texObj,
-+			 struct gl_texture_image *texImage);
-+void radeonTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
-+				GLint xoffset, GLint yoffset,
-+				GLsizei width, GLsizei height,
-+				GLenum format, GLenum type,
-+				const GLvoid * pixels,
-+				const struct gl_pixelstore_attrib *packing,
-+				struct gl_texture_object *texObj,
-+				struct gl_texture_image *texImage);
-+void radeonCompressedTexSubImage2D(GLcontext * ctx, GLenum target,
-+				   GLint level, GLint xoffset,
-+				   GLint yoffset, GLsizei width,
-+				   GLsizei height, GLenum format,
-+				   GLsizei imageSize, const GLvoid * data,
-+				   struct gl_texture_object *texObj,
-+				   struct gl_texture_image *texImage);
-+
-+void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
-+			 GLint xoffset, GLint yoffset, GLint zoffset,
-+			 GLsizei width, GLsizei height, GLsizei depth,
-+			 GLenum format, GLenum type,
-+			 const GLvoid * pixels,
-+			 const struct gl_pixelstore_attrib *packing,
-+			 struct gl_texture_object *texObj,
-+			 struct gl_texture_image *texImage);
-+
-+void radeonGetTexImage(GLcontext * ctx, GLenum target, GLint level,
-+		       GLenum format, GLenum type, GLvoid * pixels,
-+		       struct gl_texture_object *texObj,
-+		       struct gl_texture_image *texImage);
-+void radeonGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
-+				 GLvoid *pixels,
-+				 struct gl_texture_object *texObj,
-+				 struct gl_texture_image *texImage);
-+
-+#endif
-diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
-index ae2ccdf..8668074 100644
---- a/src/mesa/drivers/dri/radeon/server/radeon_reg.h
-+++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
-@@ -2031,6 +2031,9 @@
- #define RADEON_CP_PACKET3_3D_DRAW_INDX              0xC0002A00
- #define RADEON_CP_PACKET3_LOAD_PALETTE              0xC0002C00
- #define RADEON_CP_PACKET3_3D_LOAD_VBPNTR            0xC0002F00
-+#define R200_CP_CMD_3D_DRAW_VBUF_2      0xC0003400
-+#define R200_CP_CMD_3D_DRAW_IMMD_2      0xC0003500
-+#define R200_CP_CMD_3D_DRAW_INDX_2      0xC0003600
- #define RADEON_CP_PACKET3_CNTL_PAINT                0xC0009100
- #define RADEON_CP_PACKET3_CNTL_BITBLT               0xC0009200
- #define RADEON_CP_PACKET3_CNTL_SMALLTEXT            0xC0009300
diff --git a/sources b/sources
index bada79f..eca66ce 100644
--- a/sources
+++ b/sources
@@ -1,3 +1,3 @@
 6ae05158e678f4594343f32c2ca50515  gl-manpages-1.0.1.tar.bz2
 a5ec51ed9f0a55dc3462d90d52ff899c  xdriinfo-1.0.2.tar.bz2
-93b7016851831fc31d02103e7d9551b3  mesa-20090612.tar.bz2
+b3547e33cef106021dac5f5ff44e6bc6  mesa-20090723.tar.bz2