diff --git a/mesa.spec b/mesa.spec index af182f9..b559ef5 100644 --- a/mesa.spec +++ b/mesa.spec @@ -20,7 +20,7 @@ Summary: Mesa graphics libraries Name: mesa Version: 7.3 -Release: 9%{?dist} +Release: 10%{?dist} License: MIT Group: System Environment/Libraries URL: http://www.mesa3d.org @@ -427,6 +427,9 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/mesa-demos-data %changelog +* Thu Mar 05 2009 Dave Airlie 7.3-10 +- radeon-rewrite.patch: fixup link against libdrm_radeon + * Wed Mar 04 2009 Dave Airlie 7.3-9 - try again: pull in 7.4 fixes, dri configs changes, new radeon-rewrite diff --git a/radeon-rewrite.patch b/radeon-rewrite.patch index 0edb095..1eb201c 100644 --- a/radeon-rewrite.patch +++ b/radeon-rewrite.patch @@ -1,716 +1,20182 @@ -From c4030c794274b22ba6ccb7c919900b41f5c723f2 Mon Sep 17 00:00:00 2001 -From: Dave Airlie -Date: Wed, 4 Mar 2009 16:51:14 +1000 -Subject: [PATCH] radeon/r100/r200: import latest merge +commit 263b887d85e3eac9a32673c8ed3004c3129ce997 +Author: Dave Airlie +Date: Sun Feb 15 17:03:47 2009 +1000 ---- - src/mesa/drivers/dri/radeon/radeon_bo_drm.h | 182 ++++ - src/mesa/drivers/dri/radeon/radeon_bo_legacy.c | 825 +++++++++++++++++ - src/mesa/drivers/dri/radeon/radeon_bo_legacy.h | 47 + - src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h | 67 ++ - src/mesa/drivers/dri/radeon/radeon_cmdbuf.h | 143 +++ - src/mesa/drivers/dri/radeon/radeon_common.c | 849 +++++++++++++++++ - src/mesa/drivers/dri/radeon/radeon_common.h | 55 ++ - .../drivers/dri/radeon/radeon_common_context.c | 589 ++++++++++++ - .../drivers/dri/radeon/radeon_common_context.h | 508 ++++++++++ - src/mesa/drivers/dri/radeon/radeon_cs_drm.h | 207 +++++ - src/mesa/drivers/dri/radeon/radeon_cs_legacy.c | 504 ++++++++++ - src/mesa/drivers/dri/radeon/radeon_cs_legacy.h | 40 + - src/mesa/drivers/dri/radeon/radeon_dma.c | 323 +++++++ - src/mesa/drivers/dri/radeon/radeon_dma.h | 51 + - src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c | 360 ++++++++ - src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h | 97 ++ - src/mesa/drivers/dri/radeon/radeon_texture.c | 966 ++++++++++++++++++++ - src/mesa/drivers/dri/radeon/radeon_texture.h | 118 +++ - 18 files changed, 5931 insertions(+), 0 deletions(-) - create mode 100644 src/mesa/drivers/dri/radeon/radeon_bo_drm.h - create mode 100644 src/mesa/drivers/dri/radeon/radeon_bo_legacy.c - create mode 100644 src/mesa/drivers/dri/radeon/radeon_bo_legacy.h - create mode 100644 src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h - create mode 100644 src/mesa/drivers/dri/radeon/radeon_cmdbuf.h - create mode 100644 src/mesa/drivers/dri/radeon/radeon_common.c - create mode 100644 src/mesa/drivers/dri/radeon/radeon_common.h - create mode 100644 src/mesa/drivers/dri/radeon/radeon_common_context.c - create mode 100644 src/mesa/drivers/dri/radeon/radeon_common_context.h - create mode 100644 src/mesa/drivers/dri/radeon/radeon_cs_drm.h - create mode 100644 src/mesa/drivers/dri/radeon/radeon_cs_legacy.c - create mode 100644 src/mesa/drivers/dri/radeon/radeon_cs_legacy.h - create mode 100644 src/mesa/drivers/dri/radeon/radeon_dma.c - create mode 100644 src/mesa/drivers/dri/radeon/radeon_dma.h - create mode 100644 src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c - create mode 100644 src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h - create mode 100644 src/mesa/drivers/dri/radeon/radeon_texture.c - create mode 100644 src/mesa/drivers/dri/radeon/radeon_texture.h + radeon: add cflags to decide whether to link libdrm_radeon or not. + + You don't need libdrm_radeon for the legacy driver to build, + only for the experimental mm/cs paths. -diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h -new file mode 100644 -index 0000000..1ed13f1 ---- /dev/null -+++ b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h -@@ -0,0 +1,182 @@ -+/* -+ * Copyright © 2008 Jérôme Glisse -+ * All Rights Reserved. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining -+ * a copy of this software and associated documentation files (the -+ * "Software"), to deal in the Software without restriction, including -+ * without limitation the rights to use, copy, modify, merge, publish, -+ * distribute, sub license, and/or sell copies of the Software, and to -+ * permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS -+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE -+ * USE OR OTHER DEALINGS IN THE SOFTWARE. -+ * -+ * The above copyright notice and this permission notice (including the -+ * next paragraph) shall be included in all copies or substantial portions -+ * of the Software. -+ */ -+/* -+ * Authors: -+ * Jérôme Glisse -+ */ -+#ifndef RADEON_BO_H -+#define RADEON_BO_H -+ -+#include -+#include -+//#include "radeon_track.h" +commit 61e6b2aee3069700db397f26d7ae1384641367ff +Author: Ian Romanick +Date: Fri Jan 9 15:43:17 2009 -0800 + + Track two sets of back-face stencil state + + Track separate back-face stencil state for OpenGL 2.0 / + GL_ATI_separate_stencil and GL_EXT_stencil_two_side. This allows all + three to be enabled in a driver. One set of state is set via the 2.0 + or ATI functions and is used when STENCIL_TEST_TWO_SIDE_EXT is + disabled. The other is set by StencilFunc and StencilOp when the + active stencil face is set to BACK. The GL_EXT_stencil_two_side spec has + more details. + + http://opengl.org/registry/specs/EXT/stencil_two_side.txt + +commit 86691da4b5f43be625ec510b7fe40657b9985783 +Author: Dave Airlie +Date: Wed Mar 4 16:51:14 2009 +1000 + + radeon/r100/r200: import latest merge +diff --git a/configs/autoconf.in b/configs/autoconf.in +index 4a89716..f18d119 100644 +--- a/configs/autoconf.in ++++ b/configs/autoconf.in +@@ -20,6 +20,8 @@ CXXFLAGS = @CPPFLAGS@ @CXXFLAGS@ \ + $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) + LDFLAGS = @LDFLAGS@ + EXTRA_LIB_PATH = @EXTRA_LIB_PATH@ ++RADEON_CFLAGS = @RADEON_CFLAGS@ ++RADEON_LDFLAGS = @RADEON_LDFLAGS@ + + # Assembler + ASM_SOURCES = @ASM_SOURCES@ +diff --git a/configure.ac b/configure.ac +index 73caf00..48f4eac 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -413,6 +413,8 @@ AC_SUBST([SRC_DIRS]) + AC_SUBST([GLU_DIRS]) + AC_SUBST([DRIVER_DIRS]) + AC_SUBST([WINDOW_SYSTEM]) ++AC_SUBST([RADEON_CFLAGS]) ++AC_SUBST([RADEON_LDFLAGS]) + + dnl + dnl User supplied program configuration +@@ -540,6 +542,13 @@ dri) + GL_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED dri2proto >= $DRI2PROTO_REQUIRED" + DRI_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED" + ++ PKG_CHECK_MODULES([LIBDRM_RADEON], [libdrm_radeon], HAVE_LIBDRM_RADEON=yes, HAVE_LIBDRM_RADEON=no) ++ ++ if test "$HAVE_LIBDRM_RADEON" = yes; then ++ RADEON_CFLAGS="-DHAVE_LIBDRM_RADEON=1 $LIBDRM_RADEON_CFLAGS" ++ RADEON_LDFLAGS=$LIBDRM_RADEON_LIBS ++ fi ++ + # find the DRI deps for libGL + if test "$x11_pkgconfig" = yes; then + # add xcb modules if necessary +diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c +index fa8121e..abae4b3 100644 +--- a/src/mesa/drivers/dri/i965/brw_cc.c ++++ b/src/mesa/drivers/dri/i965/brw_cc.c +@@ -84,6 +84,7 @@ static void + cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) + { + struct gl_stencil_attrib *stencil = brw->attribs.Stencil; ++ const unsigned back = stencil->_BackFace; + + memset(key, 0, sizeof(*key)); + +@@ -100,13 +101,13 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) + key->stencil_test_mask[0] = stencil->ValueMask[0]; + } + if (key->stencil_two_side) { +- key->stencil_func[1] = stencil->Function[1]; +- key->stencil_fail_op[1] = stencil->FailFunc[1]; +- key->stencil_pass_depth_fail_op[1] = stencil->ZFailFunc[1]; +- key->stencil_pass_depth_pass_op[1] = stencil->ZPassFunc[1]; +- key->stencil_ref[1] = stencil->Ref[1]; +- key->stencil_write_mask[1] = stencil->WriteMask[1]; +- key->stencil_test_mask[1] = stencil->ValueMask[1]; ++ key->stencil_func[1] = stencil->Function[back]; ++ key->stencil_fail_op[1] = stencil->FailFunc[back]; ++ key->stencil_pass_depth_fail_op[1] = stencil->ZFailFunc[back]; ++ key->stencil_pass_depth_pass_op[1] = stencil->ZPassFunc[back]; ++ key->stencil_ref[1] = stencil->Ref[back]; ++ key->stencil_write_mask[1] = stencil->WriteMask[back]; ++ key->stencil_test_mask[1] = stencil->ValueMask[back]; + } + + if (brw->attribs.Color->_LogicOpEnabled) +diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c +index c50b0d2..24149cf 100644 +--- a/src/mesa/drivers/dri/i965/brw_wm.c ++++ b/src/mesa/drivers/dri/i965/brw_wm.c +@@ -189,8 +189,7 @@ static void brw_wm_populate_key( struct brw_context *brw, + lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (brw->attribs.Stencil->WriteMask[0] || +- (brw->attribs.Stencil->_TestTwoSide && +- brw->attribs.Stencil->WriteMask[1])) ++ brw->attribs.Stencil->WriteMask[brw->attribs.Stencil->_BackFace]) + lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + } + +diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile +index e9144ac..e593ed9 100644 +--- a/src/mesa/drivers/dri/r200/Makefile ++++ b/src/mesa/drivers/dri/r200/Makefile +@@ -3,6 +3,8 @@ + TOP = ../../../../.. + include $(TOP)/configs/current + ++CFLAGS += $(RADEON_CFLAGS) ++ + LIBNAME = r200_dri.so + + MINIGLX_SOURCES = server/radeon_dri.c +@@ -11,25 +13,35 @@ ifeq ($(USING_EGL), 1) + EGL_SOURCES = server/radeon_egl.c + endif + ++RADEON_COMMON_SOURCES = \ ++ radeon_texture.c \ ++ radeon_common_context.c \ ++ radeon_common.c \ ++ radeon_dma.c \ ++ radeon_lock.c \ ++ radeon_bo_legacy.c \ ++ radeon_cs_legacy.c \ ++ radeon_mipmap_tree.c \ ++ radeon_span.c ++ ++ + DRIVER_SOURCES = r200_context.c \ + r200_ioctl.c \ +- r200_lock.c \ + r200_state.c \ + r200_state_init.c \ + r200_cmdbuf.c \ + r200_pixel.c \ + r200_tex.c \ +- r200_texmem.c \ + r200_texstate.c \ + r200_tcl.c \ + r200_swtcl.c \ +- r200_span.c \ + r200_maos.c \ + r200_sanity.c \ + r200_fragshader.c \ + r200_vertprog.c \ + radeon_screen.c \ +- $(EGL_SOURCES) ++ $(EGL_SOURCES) \ ++ $(RADEON_COMMON_SOURCES) + + C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) + +@@ -48,7 +60,29 @@ SYMLINKS = \ + COMMON_SYMLINKS = \ + radeon_chipset.h \ + radeon_screen.c \ +- radeon_screen.h ++ radeon_screen.h \ ++ radeon_bo_legacy.c \ ++ radeon_cs_legacy.c \ ++ radeon_bo_legacy.h \ ++ radeon_cs_legacy.h \ ++ radeon_bocs_wrapper.h \ ++ radeon_span.h \ ++ radeon_span.c \ ++ radeon_lock.c \ ++ radeon_lock.h \ ++ radeon_common.c \ ++ radeon_common_context.c \ ++ radeon_common_context.h \ ++ radeon_common.h \ ++ radeon_cmdbuf.h \ ++ radeon_mipmap_tree.c \ ++ radeon_mipmap_tree.h \ ++ radeon_texture.c \ ++ radeon_texture.h \ ++ radeon_dma.c \ ++ radeon_dma.h ++ ++DRI_LIB_DEPS += $(RADEON_LDFLAGS) + + ##### TARGETS ##### + +diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c +index e163377..ae31bcb 100644 +--- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c ++++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c +@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "swrast/swrast.h" + #include "main/simple_list.h" + ++#include "radeon_common.h" + #include "r200_context.h" + #include "r200_state.h" + #include "r200_ioctl.h" +@@ -45,18 +46,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "r200_sanity.h" + #include "radeon_reg.h" + +-static void print_state_atom( struct r200_state_atom *state ) +-{ +- int i; +- +- fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size); +- +- if (0 & R200_DEBUG & DEBUG_VERBOSE) +- for (i = 0 ; i < state->cmd_size ; i++) +- fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]); +- +-} +- + /* The state atoms will be emitted in the order they appear in the atom list, + * so this step is important. + */ +@@ -64,141 +53,56 @@ void r200SetUpAtomList( r200ContextPtr rmesa ) + { + int i, mtu; + +- mtu = rmesa->glCtx->Const.MaxTextureUnits; +- +- make_empty_list(&rmesa->hw.atomlist); +- rmesa->hw.atomlist.name = "atom-list"; +- +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ctx ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.set ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lin ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msk ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpt ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vtx ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vap ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vte ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msc ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cst ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.zbs ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcl ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msl ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcg ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.grd ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tam ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tf ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.atf ); ++ mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits; ++ ++ make_empty_list(&rmesa->radeon.hw.atomlist); ++ rmesa->radeon.hw.atomlist.name = "atom-list"; ++ ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ctx ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.set ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lin ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msk ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpt ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vtx ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vap ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vte ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msc ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cst ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.zbs ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcl ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msl ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcg ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.grd ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.fog ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tam ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tf ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.atf ); + for (i = 0; i < mtu; ++i) +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i] ); + for (i = 0; i < mtu; ++i) +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cube[i] ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i] ); + for (i = 0; i < 6; ++i) +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pix[i] ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[0] ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[1] ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pix[i] ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[0] ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[1] ); + for (i = 0; i < 8; ++i) +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i] ); + for (i = 0; i < 3 + mtu; ++i) +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mat[i] ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.eye ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.glt ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i] ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.eye ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.glt ); + for (i = 0; i < 2; ++i) +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mtl[i] ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mtl[i] ); + for (i = 0; i < 6; ++i) +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ucp[i] ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.spr ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ptp ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.prf ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pvs ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[0] ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[1] ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[0] ); +- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[1] ); +-} +- +-static void r200SaveHwState( r200ContextPtr rmesa ) +-{ +- struct r200_state_atom *atom; +- char * dest = rmesa->backup_store.cmd_buf; +- +- if (R200_DEBUG & DEBUG_STATE) +- fprintf(stderr, "%s\n", __FUNCTION__); +- +- rmesa->backup_store.cmd_used = 0; +- +- foreach( atom, &rmesa->hw.atomlist ) { +- if ( atom->check( rmesa->glCtx, atom->idx ) ) { +- int size = atom->cmd_size * 4; +- memcpy( dest, atom->cmd, size); +- dest += size; +- rmesa->backup_store.cmd_used += size; +- if (R200_DEBUG & DEBUG_STATE) +- print_state_atom( atom ); +- } +- } +- +- assert( rmesa->backup_store.cmd_used <= R200_CMD_BUF_SZ ); +- if (R200_DEBUG & DEBUG_STATE) +- fprintf(stderr, "Returning to r200EmitState\n"); +-} +- +-void r200EmitState( r200ContextPtr rmesa ) +-{ +- char *dest; +- int mtu; +- struct r200_state_atom *atom; +- +- if (R200_DEBUG & (DEBUG_STATE|DEBUG_PRIMS)) +- fprintf(stderr, "%s\n", __FUNCTION__); +- +- if (rmesa->save_on_next_emit) { +- r200SaveHwState(rmesa); +- rmesa->save_on_next_emit = GL_FALSE; +- } +- +- if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty) +- return; +- +- mtu = rmesa->glCtx->Const.MaxTextureUnits; +- +- /* To avoid going across the entire set of states multiple times, just check +- * for enough space for the case of emitting all state, and inline the +- * r200AllocCmdBuf code here without all the checks. +- */ +- r200EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size ); +- +- /* we need to calculate dest after EnsureCmdBufSpace +- as we may flush the buffer - airlied */ +- dest = rmesa->store.cmd_buf + rmesa->store.cmd_used; +- if (R200_DEBUG & DEBUG_STATE) { +- foreach( atom, &rmesa->hw.atomlist ) { +- if ( atom->dirty || rmesa->hw.all_dirty ) { +- if ( atom->check( rmesa->glCtx, atom->idx ) ) +- print_state_atom( atom ); +- else +- fprintf(stderr, "skip state %s\n", atom->name); +- } +- } +- } +- +- foreach( atom, &rmesa->hw.atomlist ) { +- if ( rmesa->hw.all_dirty ) +- atom->dirty = GL_TRUE; +- if ( atom->dirty ) { +- if ( atom->check( rmesa->glCtx, atom->idx ) ) { +- int size = atom->cmd_size * 4; +- memcpy( dest, atom->cmd, size); +- dest += size; +- rmesa->store.cmd_used += size; +- atom->dirty = GL_FALSE; +- } +- } +- } +- +- assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ ); +- +- rmesa->hw.is_dirty = GL_FALSE; +- rmesa->hw.all_dirty = GL_FALSE; ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i] ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.spr ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ptp ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.prf ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pvs ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[0] ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[1] ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[0] ); ++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[1] ); + } + + /* Fire a section of the retained (indexed_verts) buffer as a regular +@@ -209,50 +113,81 @@ void r200EmitVbufPrim( r200ContextPtr rmesa, + GLuint vertex_nr ) + { + drm_radeon_cmd_header_t *cmd; ++ BATCH_LOCALS(&rmesa->radeon); + + assert(!(primitive & R200_VF_PRIM_WALK_IND)); + +- r200EmitState( rmesa ); ++ radeonEmitState(&rmesa->radeon); + + if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS)) + fprintf(stderr, "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__, + rmesa->store.cmd_used/4, primitive, vertex_nr); +- +- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, VBUF_BUFSZ, +- __FUNCTION__ ); +- cmd[0].i = 0; +- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; +- cmd[1].i = R200_CP_CMD_3D_DRAW_VBUF_2; +- cmd[2].i = (primitive | +- R200_VF_PRIM_WALK_LIST | +- R200_VF_COLOR_ORDER_RGBA | +- (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT)); ++ ++ BEGIN_BATCH(3); ++ OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_VBUF_2, 0); ++ OUT_BATCH(primitive | R200_VF_PRIM_WALK_LIST | R200_VF_COLOR_ORDER_RGBA | ++ (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT)); ++ END_BATCH(); + } + ++static void r200FireEB(r200ContextPtr rmesa, int vertex_count, int type) ++{ ++ BATCH_LOCALS(&rmesa->radeon); ++ ++ if (vertex_count > 0) { ++ BEGIN_BATCH(8+2); ++ OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_INDX_2, 0); ++ OUT_BATCH(R200_VF_PRIM_WALK_IND | ++ ((vertex_count + 0) << 16) | ++ type); ++ ++ if (!rmesa->radeon.radeonScreen->kernel_mm) { ++ OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2); ++ OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810); ++ OUT_BATCH_RELOC(rmesa->tcl.elt_dma_offset, ++ rmesa->tcl.elt_dma_bo, ++ rmesa->tcl.elt_dma_offset, ++ RADEON_GEM_DOMAIN_GTT, 0, 0); ++ OUT_BATCH(vertex_count/2); ++ } else { ++ OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2); ++ OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810); ++ OUT_BATCH(rmesa->tcl.elt_dma_offset); ++ OUT_BATCH(vertex_count/2); ++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, ++ rmesa->tcl.elt_dma_bo, ++ RADEON_GEM_DOMAIN_GTT, 0, 0); ++ } ++ END_BATCH(); ++ } ++} + +-void r200FlushElts( r200ContextPtr rmesa ) ++void r200FlushElts(GLcontext *ctx) + { +- int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start); ++ r200ContextPtr rmesa = R200_CONTEXT(ctx); + int dwords; +- int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 12)) / 2; ++ int nr, elt_used = rmesa->tcl.elt_used; + + if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS)) +- fprintf(stderr, "%s\n", __FUNCTION__); ++ fprintf(stderr, "%s %x %d\n", __FUNCTION__, rmesa->tcl.hw_primitive, elt_used); ++ ++ assert( rmesa->radeon.dma.flush == r200FlushElts ); ++ rmesa->radeon.dma.flush = NULL; ++ ++ elt_used = (elt_used + 2) & ~2; + +- assert( rmesa->dma.flush == r200FlushElts ); +- rmesa->dma.flush = NULL; ++ nr = elt_used / 2; + +- /* Cope with odd number of elts: +- */ +- rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2; +- dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4; ++ radeon_bo_unmap(rmesa->tcl.elt_dma_bo); + +- cmd[1] |= (dwords - 3) << 16; +- cmd[2] |= nr << R200_VF_VERTEX_NUMBER_SHIFT; ++ r200FireEB(rmesa, nr, rmesa->tcl.hw_primitive); ++ ++ radeon_bo_unref(rmesa->tcl.elt_dma_bo); ++ rmesa->tcl.elt_dma_bo = NULL; + + if (R200_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "%s: Syncing\n", __FUNCTION__); +- r200Finish( rmesa->glCtx ); ++ radeonFinish( rmesa->radeon.glCtx ); + } + } + +@@ -261,7 +196,6 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, + GLuint primitive, + GLuint min_nr ) + { +- drm_radeon_cmd_header_t *cmd; + GLushort *retval; + + if (R200_DEBUG & DEBUG_IOCTL) +@@ -269,30 +203,25 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, + + assert((primitive & R200_VF_PRIM_WALK_IND)); + +- r200EmitState( rmesa ); +- +- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, ELTS_BUFSZ(min_nr), +- __FUNCTION__ ); +- cmd[0].i = 0; +- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; +- cmd[1].i = R200_CP_CMD_3D_DRAW_INDX_2; +- cmd[2].i = (primitive | +- R200_VF_PRIM_WALK_IND | +- R200_VF_COLOR_ORDER_RGBA); ++ radeonEmitState(&rmesa->radeon); + ++ rmesa->tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom, ++ 0, R200_ELT_BUF_SZ, 4, ++ RADEON_GEM_DOMAIN_GTT, 0); ++ rmesa->tcl.elt_dma_offset = 0; ++ rmesa->tcl.elt_used = min_nr * 2; ++ ++ radeon_bo_map(rmesa->tcl.elt_dma_bo, 1); ++ retval = rmesa->tcl.elt_dma_bo->ptr + rmesa->tcl.elt_dma_offset; + +- retval = (GLushort *)(cmd+3); + + if (R200_DEBUG & DEBUG_PRIMS) +- fprintf(stderr, "%s: header 0x%x prim %x \n", +- __FUNCTION__, +- cmd[1].i, primitive); +- +- assert(!rmesa->dma.flush); +- rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; +- rmesa->dma.flush = r200FlushElts; ++ fprintf(stderr, "%s: header prim %x \n", ++ __FUNCTION__, primitive); + +- rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf; ++ assert(!rmesa->radeon.dma.flush); ++ rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; ++ rmesa->radeon.dma.flush = r200FlushElts; + + return retval; + } +@@ -300,129 +229,130 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, + + + void r200EmitVertexAOS( r200ContextPtr rmesa, +- GLuint vertex_size, +- GLuint offset ) ++ GLuint vertex_size, ++ struct radeon_bo *bo, ++ GLuint offset ) + { +- drm_radeon_cmd_header_t *cmd; ++ BATCH_LOCALS(&rmesa->radeon); + + if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL)) + fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n", + __FUNCTION__, vertex_size, offset); + +- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, VERT_AOS_BUFSZ, +- __FUNCTION__ ); + +- cmd[0].header.cmd_type = RADEON_CMD_PACKET3; +- cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (2 << 16); +- cmd[2].i = 1; +- cmd[3].i = vertex_size | (vertex_size << 8); +- cmd[4].i = offset; ++ BEGIN_BATCH(5); ++ OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, 2); ++ OUT_BATCH(1); ++ OUT_BATCH(vertex_size | (vertex_size << 8)); ++ OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0); ++ END_BATCH(); + } +- + +-void r200EmitAOS( r200ContextPtr rmesa, +- struct r200_dma_region **component, +- GLuint nr, +- GLuint offset ) ++void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset) + { +- drm_radeon_cmd_header_t *cmd; +- int sz = AOS_BUFSZ(nr); ++ BATCH_LOCALS(&rmesa->radeon); ++ uint32_t voffset; ++ int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; + int i; +- int *tmp; +- +- if (R200_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s nr arrays: %d\n", __FUNCTION__, nr); +- +- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, sz, __FUNCTION__ ); +- cmd[0].i = 0; +- cmd[0].header.cmd_type = RADEON_CMD_PACKET3; +- cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (((sz / sizeof(int)) - 3) << 16); +- cmd[2].i = nr; +- tmp = &cmd[0].i; +- cmd += 3; +- +- for (i = 0 ; i < nr ; i++) { +- if (i & 1) { +- cmd[0].i |= ((component[i]->aos_stride << 24) | +- (component[i]->aos_size << 16)); +- cmd[2].i = (component[i]->aos_start + +- offset * component[i]->aos_stride * 4); +- cmd += 3; ++ ++ if (RADEON_DEBUG & DEBUG_VERTS) ++ fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, ++ offset); + -+/* bo object */ -+#define RADEON_BO_FLAGS_MACRO_TILE 1 -+#define RADEON_BO_FLAGS_MICRO_TILE 2 ++ BEGIN_BATCH(sz+2+ (nr*2)); ++ OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, sz - 1); ++ OUT_BATCH(nr); + -+struct radeon_bo_manager; ++ ++ if (!rmesa->radeon.radeonScreen->kernel_mm) { ++ for (i = 0; i + 1 < nr; i += 2) { ++ OUT_BATCH((rmesa->tcl.aos[i].components << 0) | ++ (rmesa->tcl.aos[i].stride << 8) | ++ (rmesa->tcl.aos[i + 1].components << 16) | ++ (rmesa->tcl.aos[i + 1].stride << 24)); ++ ++ voffset = rmesa->tcl.aos[i + 0].offset + ++ offset * 4 * rmesa->tcl.aos[i + 0].stride; ++ OUT_BATCH_RELOC(voffset, ++ rmesa->tcl.aos[i].bo, ++ voffset, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); ++ voffset = rmesa->tcl.aos[i + 1].offset + ++ offset * 4 * rmesa->tcl.aos[i + 1].stride; ++ OUT_BATCH_RELOC(voffset, ++ rmesa->tcl.aos[i+1].bo, ++ voffset, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); + } +- else { +- cmd[0].i = ((component[i]->aos_stride << 8) | +- (component[i]->aos_size << 0)); +- cmd[1].i = (component[i]->aos_start + +- offset * component[i]->aos_stride * 4); ++ ++ if (nr & 1) { ++ OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) | ++ (rmesa->tcl.aos[nr - 1].stride << 8)); ++ voffset = rmesa->tcl.aos[nr - 1].offset + ++ offset * 4 * rmesa->tcl.aos[nr - 1].stride; ++ OUT_BATCH_RELOC(voffset, ++ rmesa->tcl.aos[nr - 1].bo, ++ voffset, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); ++ } ++ } else { ++ for (i = 0; i + 1 < nr; i += 2) { ++ OUT_BATCH((rmesa->tcl.aos[i].components << 0) | ++ (rmesa->tcl.aos[i].stride << 8) | ++ (rmesa->tcl.aos[i + 1].components << 16) | ++ (rmesa->tcl.aos[i + 1].stride << 24)); ++ ++ voffset = rmesa->tcl.aos[i + 0].offset + ++ offset * 4 * rmesa->tcl.aos[i + 0].stride; ++ OUT_BATCH(voffset); ++ voffset = rmesa->tcl.aos[i + 1].offset + ++ offset * 4 * rmesa->tcl.aos[i + 1].stride; ++ OUT_BATCH(voffset); ++ } ++ ++ if (nr & 1) { ++ OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) | ++ (rmesa->tcl.aos[nr - 1].stride << 8)); ++ voffset = rmesa->tcl.aos[nr - 1].offset + ++ offset * 4 * rmesa->tcl.aos[nr - 1].stride; ++ OUT_BATCH(voffset); ++ } ++ for (i = 0; i + 1 < nr; i += 2) { ++ voffset = rmesa->tcl.aos[i + 0].offset + ++ offset * 4 * rmesa->tcl.aos[i + 0].stride; ++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, ++ rmesa->tcl.aos[i+0].bo, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); ++ voffset = rmesa->tcl.aos[i + 1].offset + ++ offset * 4 * rmesa->tcl.aos[i + 1].stride; ++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, ++ rmesa->tcl.aos[i+1].bo, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); ++ } ++ if (nr & 1) { ++ voffset = rmesa->tcl.aos[nr - 1].offset + ++ offset * 4 * rmesa->tcl.aos[nr - 1].stride; ++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, ++ rmesa->tcl.aos[nr-1].bo, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); + } + } +- +- if (R200_DEBUG & DEBUG_VERTS) { +- fprintf(stderr, "%s:\n", __FUNCTION__); +- for (i = 0 ; i < sz ; i++) +- fprintf(stderr, " %d: %x\n", i, tmp[i]); +- } ++ END_BATCH(); + } + +-void r200EmitBlit( r200ContextPtr rmesa, +- GLuint color_fmt, +- GLuint src_pitch, +- GLuint src_offset, +- GLuint dst_pitch, +- GLuint dst_offset, +- GLint srcx, GLint srcy, +- GLint dstx, GLint dsty, +- GLuint w, GLuint h ) ++void r200FireAOS(r200ContextPtr rmesa, int vertex_count, int type) + { +- drm_radeon_cmd_header_t *cmd; ++ BATCH_LOCALS(&rmesa->radeon); + +- if (R200_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n", +- __FUNCTION__, +- src_pitch, src_offset, srcx, srcy, +- dst_pitch, dst_offset, dstx, dsty, +- w, h); +- +- assert( (src_pitch & 63) == 0 ); +- assert( (dst_pitch & 63) == 0 ); +- assert( (src_offset & 1023) == 0 ); +- assert( (dst_offset & 1023) == 0 ); +- assert( w < (1<<16) ); +- assert( h < (1<<16) ); +- +- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 8 * sizeof(int), +- __FUNCTION__ ); +- +- +- cmd[0].header.cmd_type = RADEON_CMD_PACKET3; +- cmd[1].i = R200_CP_CMD_BITBLT_MULTI | (5 << 16); +- cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | +- RADEON_GMC_DST_PITCH_OFFSET_CNTL | +- RADEON_GMC_BRUSH_NONE | +- (color_fmt << 8) | +- RADEON_GMC_SRC_DATATYPE_COLOR | +- RADEON_ROP3_S | +- RADEON_DP_SRC_SOURCE_MEMORY | +- RADEON_GMC_CLR_CMP_CNTL_DIS | +- RADEON_GMC_WR_MSK_DIS ); +- +- cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10); +- cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10); +- cmd[5].i = (srcx << 16) | srcy; +- cmd[6].i = (dstx << 16) | dsty; /* dst */ +- cmd[7].i = (w << 16) | h; ++ BEGIN_BATCH(3); ++ OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_VBUF_2, 0); ++ OUT_BATCH(R200_VF_PRIM_WALK_LIST | (vertex_count << 16) | type); ++ END_BATCH(); + } + +- +-void r200EmitWait( r200ContextPtr rmesa, GLuint flags ) +-{ +- drm_radeon_cmd_header_t *cmd; +- +- assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) ); +- +- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 1 * sizeof(int), +- __FUNCTION__ ); +- cmd[0].i = 0; +- cmd[0].wait.cmd_type = RADEON_CMD_WAIT; +- cmd[0].wait.flags = flags; +-} +diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c +index 5531e0a..a744469 100644 +--- a/src/mesa/drivers/dri/r200/r200_context.c ++++ b/src/mesa/drivers/dri/r200/r200_context.c +@@ -54,7 +54,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "r200_context.h" + #include "r200_ioctl.h" + #include "r200_state.h" +-#include "r200_span.h" + #include "r200_pixel.h" + #include "r200_tex.h" + #include "r200_swtcl.h" +@@ -62,14 +61,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "r200_maos.h" + #include "r200_vertprog.h" + +-#define need_GL_ARB_multisample +-#define need_GL_ARB_texture_compression +-#define need_GL_ARB_vertex_buffer_object ++#include "radeon_span.h" ++ + #define need_GL_ARB_vertex_program + #define need_GL_ATI_fragment_shader + #define need_GL_EXT_blend_minmax + #define need_GL_EXT_fog_coord +-#define need_GL_EXT_multi_draw_arrays + #define need_GL_EXT_secondary_color + #define need_GL_EXT_blend_equation_separate + #define need_GL_EXT_blend_func_separate +@@ -82,9 +79,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "vblank.h" + #include "utils.h" + #include "xmlpool.h" /* for symbolic values of enum-type options */ +-#ifndef R200_DEBUG +-int R200_DEBUG = (0); +-#endif + + /* Return various strings for glGetString(). + */ +@@ -93,8 +87,8 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name ) + r200ContextPtr rmesa = R200_CONTEXT(ctx); + static char buffer[128]; + unsigned offset; +- GLuint agp_mode = (rmesa->r200Screen->card_type == RADEON_CARD_PCI)? 0 : +- rmesa->r200Screen->AGPMode; ++ GLuint agp_mode = (rmesa->radeon.radeonScreen->card_type == RADEON_CARD_PCI)? 0 : ++ rmesa->radeon.radeonScreen->AGPMode; + + switch ( name ) { + case GL_VENDOR: +@@ -105,7 +99,7 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name ) + agp_mode ); + + sprintf( & buffer[ offset ], " %sTCL", +- !(rmesa->TclFallback & R200_TCL_FALLBACK_TCL_DISABLE) ++ !(rmesa->radeon.TclFallback & R200_TCL_FALLBACK_TCL_DISABLE) + ? "" : "NO-" ); + + return (GLubyte *)buffer; +@@ -120,20 +114,16 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name ) + */ + const struct dri_extension card_extensions[] = + { +- { "GL_ARB_multisample", GL_ARB_multisample_functions }, + { "GL_ARB_multitexture", NULL }, + { "GL_ARB_texture_border_clamp", NULL }, +- { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, + { "GL_ARB_texture_env_add", NULL }, + { "GL_ARB_texture_env_combine", NULL }, + { "GL_ARB_texture_env_dot3", NULL }, + { "GL_ARB_texture_env_crossbar", NULL }, + { "GL_ARB_texture_mirrored_repeat", NULL }, +- { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, + { "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions }, + { "GL_EXT_blend_subtract", NULL }, + { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, +- { "GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions }, + { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, + { "GL_EXT_stencil_wrap", NULL }, + { "GL_EXT_texture_edge_clamp", NULL }, +@@ -242,6 +232,40 @@ static const struct dri_debug_control debug_control[] = + { NULL, 0 } + }; + ++static void r200_get_lock(radeonContextPtr radeon) ++{ ++ r200ContextPtr rmesa = (r200ContextPtr)radeon; ++ drm_radeon_sarea_t *sarea = radeon->sarea; ++ int i; + -+struct radeon_bo { -+ uint32_t alignment; -+ uint32_t handle; -+ uint32_t size; -+ uint32_t domains; -+ uint32_t flags; -+ unsigned cref; -+#ifdef RADEON_BO_TRACK -+ struct radeon_track *track; -+#endif -+ void *ptr; -+ struct radeon_bo_manager *bom; -+ uint32_t space_accounted; -+}; ++ R200_STATECHANGE( rmesa, ctx ); ++ if (rmesa->radeon.sarea->tiling_enabled) { ++ rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE; ++ } ++ else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE; + -+/* bo functions */ -+struct radeon_bo_funcs { -+ struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom, -+ uint32_t handle, -+ uint32_t size, -+ uint32_t alignment, -+ uint32_t domains, -+ uint32_t flags); -+ void (*bo_ref)(struct radeon_bo *bo); -+ struct radeon_bo *(*bo_unref)(struct radeon_bo *bo); -+ int (*bo_map)(struct radeon_bo *bo, int write); -+ int (*bo_unmap)(struct radeon_bo *bo); -+ int (*bo_wait)(struct radeon_bo *bo); -+}; ++ if ( sarea->ctx_owner != rmesa->radeon.dri.hwContext ) { ++ sarea->ctx_owner = rmesa->radeon.dri.hwContext; ++ if (!radeon->radeonScreen->kernel_mm) ++ radeon_bo_legacy_texture_age(radeon->radeonScreen->bom); ++ } + -+struct radeon_bo_manager { -+ struct radeon_bo_funcs *funcs; -+ int fd; ++} + -+#ifdef RADEON_BO_TRACK -+ struct radeon_tracker tracker; -+#endif -+}; -+ -+static inline void _radeon_bo_debug(struct radeon_bo *bo, -+ const char *op, -+ const char *file, -+ const char *func, -+ int line) ++static void r200_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa) +{ -+ fprintf(stderr, "%s %p 0x%08X 0x%08X 0x%08X [%s %s %d]\n", -+ op, bo, bo->handle, bo->size, bo->cref, file, func, line); +} + -+static inline struct radeon_bo *_radeon_bo_open(struct radeon_bo_manager *bom, -+ uint32_t handle, -+ uint32_t size, -+ uint32_t alignment, -+ uint32_t domains, -+ uint32_t flags, -+ const char *file, -+ const char *func, -+ int line) ++ ++static void r200_init_vtbl(radeonContextPtr radeon) +{ -+ struct radeon_bo *bo; ++ radeon->vtbl.get_lock = r200_get_lock; ++ radeon->vtbl.update_viewport_offset = r200UpdateViewportOffset; ++ radeon->vtbl.update_draw_buffer = r200UpdateDrawBuffer; ++ radeon->vtbl.emit_cs_header = r200_vtbl_emit_cs_header; ++ radeon->vtbl.swtcl_flush = r200_swtcl_flush; ++} + -+ bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags); -+#ifdef RADEON_BO_TRACK -+ if (bo) { -+ bo->track = radeon_tracker_add_track(&bom->tracker, bo->handle); -+ radeon_track_add_event(bo->track, file, func, "open", line); -+ } + + /* Create the device specific rendering context. + */ +@@ -253,9 +277,9 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, + radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private); + struct dd_function_table functions; + r200ContextPtr rmesa; +- GLcontext *ctx, *shareCtx; ++ GLcontext *ctx; + int i; +- int tcl_mode, fthrottle_mode; ++ int tcl_mode; + + assert(glVisual); + assert(driContextPriv); +@@ -265,7 +289,8 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, + rmesa = (r200ContextPtr) CALLOC( sizeof(*rmesa) ); + if ( !rmesa ) + return GL_FALSE; +- ++ ++ r200_init_vtbl(&rmesa->radeon); + /* init exp fog table data */ + r200InitStaticFogData(); + +@@ -273,12 +298,12 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, + * Do this here so that initialMaxAnisotropy is set before we create + * the default textures. + */ +- driParseConfigFiles (&rmesa->optionCache, &screen->optionCache, ++ driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache, + screen->driScreen->myNum, "r200"); +- rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache, +- "def_max_anisotropy"); ++ rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache, ++ "def_max_anisotropy"); + +- if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) { ++ if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) { + if ( sPriv->drm_version.minor < 13 ) + fprintf( stderr, "DRM version 1.%d too old to support HyperZ, " + "disabling.\n", sPriv->drm_version.minor ); +@@ -299,59 +324,21 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, + r200InitTextureFuncs(&functions); + r200InitShaderFuncs(&functions); + +- /* Allocate and initialize the Mesa context */ +- if (sharedContextPrivate) +- shareCtx = ((r200ContextPtr) sharedContextPrivate)->glCtx; +- else +- shareCtx = NULL; +- rmesa->glCtx = _mesa_create_context(glVisual, shareCtx, +- &functions, (void *) rmesa); +- if (!rmesa->glCtx) { +- FREE(rmesa); +- return GL_FALSE; +- } +- driContextPriv->driverPrivate = rmesa; +- +- /* Init r200 context data */ +- rmesa->dri.context = driContextPriv; +- rmesa->dri.screen = sPriv; +- rmesa->dri.drawable = NULL; /* Set by XMesaMakeCurrent */ +- rmesa->dri.hwContext = driContextPriv->hHWContext; +- rmesa->dri.hwLock = &sPriv->pSAREA->lock; +- rmesa->dri.fd = sPriv->fd; +- rmesa->dri.drmMinor = sPriv->drm_version.minor; +- +- rmesa->r200Screen = screen; +- rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA + +- screen->sarea_priv_offset); +- +- +- rmesa->dma.buf0_address = rmesa->r200Screen->buffers->list[0].address; +- +- (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) ); +- make_empty_list( & rmesa->swapped ); +- +- rmesa->nr_heaps = 1 /* screen->numTexHeaps */ ; +- assert(rmesa->nr_heaps < RADEON_NR_TEX_HEAPS); +- for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) { +- rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa, +- screen->texSize[i], +- 12, +- RADEON_NR_TEX_REGIONS, +- (drmTextureRegionPtr)rmesa->sarea->tex_list[i], +- & rmesa->sarea->tex_age[i], +- & rmesa->swapped, +- sizeof( r200TexObj ), +- (destroy_texture_object_t *) r200DestroyTexObj ); ++ if (!radeonInitContext(&rmesa->radeon, &functions, ++ glVisual, driContextPriv, ++ sharedContextPrivate)) { ++ FREE(rmesa); ++ return GL_FALSE; + } +- rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache, ++ ++ rmesa->radeon.texture_depth = driQueryOptioni (&rmesa->radeon.optionCache, + "texture_depth"); +- if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) +- rmesa->texture_depth = ( screen->cpp == 4 ) ? ++ if (rmesa->radeon.texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) ++ rmesa->radeon.texture_depth = ( screen->cpp == 4 ) ? + DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16; + +- rmesa->swtcl.RenderIndex = ~0; +- rmesa->hw.all_dirty = 1; ++ rmesa->radeon.swtcl.RenderIndex = ~0; ++ rmesa->radeon.hw.all_dirty = 1; + + /* Set the maximum texture size small enough that we can guarentee that + * all texture units can bind a maximal texture and have all of them in +@@ -359,29 +346,13 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, + * setting allow larger textures. + */ + +- ctx = rmesa->glCtx; +- ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache, ++ ctx = rmesa->radeon.glCtx; ++ ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache, + "texture_units"); + ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits; + ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits; + +- i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures"); +- +- driCalculateMaxTextureLevels( rmesa->texture_heaps, +- rmesa->nr_heaps, +- & ctx->Const, +- 4, +- 11, /* max 2D texture size is 2048x2048 */ +-#if ENABLE_HW_3D_TEXTURE +- 8, /* max 3D texture size is 256^3 */ +-#else +- 0, /* 3D textures unsupported */ +-#endif +- 11, /* max cube texture size is 2048x2048 */ +- 11, /* max texture rectangle size is 2048x2048 */ +- 12, +- GL_FALSE, +- i ); ++ i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures"); + + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + +@@ -391,7 +362,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, + ctx->Const.MinPointSizeAA = 1.0; + ctx->Const.MaxPointSizeAA = 1.0; + ctx->Const.PointSizeGranularity = 0.0625; +- if (rmesa->r200Screen->drmSupportsPointSprites) ++ if (rmesa->radeon.radeonScreen->drmSupportsPointSprites) + ctx->Const.MaxPointSize = 2047.0; + else + ctx->Const.MaxPointSize = 1.0; +@@ -411,6 +382,8 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, + ctx->Const.VertexProgram.MaxNativeParameters = R200_VSF_MAX_PARAM; + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + ++ ctx->Const.MaxDrawBuffers = 1; ++ + /* Initialize the software rasterizer and helper modules. + */ + _swrast_CreateContext( ctx ); +@@ -445,32 +418,32 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, + _math_matrix_set_identity( &rmesa->tmpmat ); + + driInitExtensions( ctx, card_extensions, GL_TRUE ); +- if (!(rmesa->r200Screen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) { ++ if (!(rmesa->radeon.radeonScreen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) { + /* yuv textures don't work with some chips - R200 / rv280 okay so far + others get the bit ordering right but don't actually do YUV-RGB conversion */ + _mesa_enable_extension( ctx, "GL_MESA_ycbcr_texture" ); + } +- if (rmesa->glCtx->Mesa_DXTn) { ++ if (rmesa->radeon.glCtx->Mesa_DXTn) { + _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); + _mesa_enable_extension( ctx, "GL_S3_s3tc" ); + } +- else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) { ++ else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) { + _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); + } + +- if (rmesa->r200Screen->drmSupportsCubeMapsR200) ++ if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200) + _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" ); +- if (rmesa->r200Screen->drmSupportsBlendColor) { ++ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) { + driInitExtensions( ctx, blend_extensions, GL_FALSE ); + } +- if(rmesa->r200Screen->drmSupportsVertexProgram) ++ if(rmesa->radeon.radeonScreen->drmSupportsVertexProgram) + driInitSingleExtension( ctx, ARB_vp_extension ); +- if(driQueryOptionb(&rmesa->optionCache, "nv_vertex_program")) ++ if(driQueryOptionb(&rmesa->radeon.optionCache, "nv_vertex_program")) + driInitSingleExtension( ctx, NV_vp_extension ); + +- if ((ctx->Const.MaxTextureUnits == 6) && rmesa->r200Screen->drmSupportsFragShader) ++ if ((ctx->Const.MaxTextureUnits == 6) && rmesa->radeon.radeonScreen->drmSupportsFragShader) + driInitSingleExtension( ctx, ATI_fs_extension ); +- if (rmesa->r200Screen->drmSupportsPointSprites) ++ if (rmesa->radeon.radeonScreen->drmSupportsPointSprites) + driInitExtensions( ctx, point_extensions, GL_FALSE ); + #if 0 + r200InitDriverFuncs( ctx ); +@@ -480,33 +453,15 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, + #endif + /* plug in a few more device driver functions */ + /* XXX these should really go right after _mesa_init_driver_functions() */ ++ radeonInitSpanFuncs( ctx ); + r200InitPixelFuncs( ctx ); +- r200InitSpanFuncs( ctx ); + r200InitTnlFuncs( ctx ); + r200InitState( rmesa ); + r200InitSwtcl( ctx ); + +- fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode"); +- rmesa->iw.irq_seq = -1; +- rmesa->irqsEmitted = 0; +- rmesa->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && +- rmesa->r200Screen->irq); +- +- rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); +- +- if (!rmesa->do_irqs) +- fprintf(stderr, +- "IRQ's not enabled, falling back to %s: %d %d\n", +- rmesa->do_usleeps ? "usleeps" : "busy waits", +- fthrottle_mode, +- rmesa->r200Screen->irq); +- + rmesa->prefer_gart_client_texturing = + (getenv("R200_GART_CLIENT_TEXTURES") != 0); + +- (*sPriv->systemTime->getUST)( & rmesa->swap_ust ); +- +- + #if DO_DEBUG + R200_DEBUG = driParseDebugString( getenv( "R200_DEBUG" ), + debug_control ); +@@ -514,18 +469,18 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, + debug_control ); + #endif + +- tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode"); +- if (driQueryOptionb(&rmesa->optionCache, "no_rast")) { ++ tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode"); ++ if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) { + fprintf(stderr, "disabling 3D acceleration\n"); + FALLBACK(rmesa, R200_FALLBACK_DISABLE, 1); + } + else if (tcl_mode == DRI_CONF_TCL_SW || getenv("R200_NO_TCL") || +- !(rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL)) { +- if (rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL) { +- rmesa->r200Screen->chip_flags &= ~RADEON_CHIPSET_TCL; ++ !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { ++ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { ++ rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL; + fprintf(stderr, "Disabling HW TCL support\n"); + } +- TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1); ++ TCL_FALLBACK(rmesa->radeon.glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1); + } + + return GL_TRUE; +@@ -544,55 +499,33 @@ void r200DestroyContext( __DRIcontextPrivate *driContextPriv ) + + /* check if we're deleting the currently bound context */ + if (rmesa == current) { +- R200_FIREVERTICES( rmesa ); ++ radeon_firevertices(&rmesa->radeon); + _mesa_make_current(NULL, NULL, NULL); + } + + /* Free r200 context resources */ + assert(rmesa); /* should never be null */ + if ( rmesa ) { +- GLboolean release_texture_heaps; + ++ _swsetup_DestroyContext( rmesa->radeon.glCtx ); ++ _tnl_DestroyContext( rmesa->radeon.glCtx ); ++ _vbo_DestroyContext( rmesa->radeon.glCtx ); ++ _swrast_DestroyContext( rmesa->radeon.glCtx ); + +- release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1); +- _swsetup_DestroyContext( rmesa->glCtx ); +- _tnl_DestroyContext( rmesa->glCtx ); +- _vbo_DestroyContext( rmesa->glCtx ); +- _swrast_DestroyContext( rmesa->glCtx ); ++ r200DestroySwtcl( rmesa->radeon.glCtx ); ++ r200ReleaseArrays( rmesa->radeon.glCtx, ~0 ); + +- r200DestroySwtcl( rmesa->glCtx ); +- r200ReleaseArrays( rmesa->glCtx, ~0 ); +- +- if (rmesa->dma.current.buf) { +- r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ ); +- r200FlushCmdBuf( rmesa, __FUNCTION__ ); +- } +- +- if (rmesa->state.scissor.pClipRects) { +- FREE(rmesa->state.scissor.pClipRects); +- rmesa->state.scissor.pClipRects = NULL; ++ if (rmesa->radeon.dma.current) { ++ radeonReleaseDmaRegion( &rmesa->radeon ); ++ rcommonFlushCmdBuf( &rmesa->radeon, __FUNCTION__ ); + } + +- if ( release_texture_heaps ) { +- /* This share group is about to go away, free our private +- * texture object data. +- */ +- int i; +- +- for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) { +- driDestroyTextureHeap( rmesa->texture_heaps[ i ] ); +- rmesa->texture_heaps[ i ] = NULL; +- } +- +- assert( is_empty_list( & rmesa->swapped ) ); ++ if (rmesa->radeon.state.scissor.pClipRects) { ++ FREE(rmesa->radeon.state.scissor.pClipRects); ++ rmesa->radeon.state.scissor.pClipRects = NULL; + } + +- /* free the Mesa context */ +- rmesa->glCtx->DriverCtx = NULL; +- _mesa_destroy_context( rmesa->glCtx ); +- +- /* free the option cache */ +- driDestroyOptionCache (&rmesa->optionCache); ++ radeonCleanupContext(&rmesa->radeon); + + FREE( rmesa ); + } +@@ -600,107 +533,6 @@ void r200DestroyContext( __DRIcontextPrivate *driContextPriv ) + + + +- +-void +-r200SwapBuffers( __DRIdrawablePrivate *dPriv ) +-{ +- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { +- r200ContextPtr rmesa; +- GLcontext *ctx; +- rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate; +- ctx = rmesa->glCtx; +- if (ctx->Visual.doubleBufferMode) { +- _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ +- if ( rmesa->doPageFlip ) { +- r200PageFlip( dPriv ); +- } +- else { +- r200CopyBuffer( dPriv, NULL ); +- } +- } +- } +- else { +- /* XXX this shouldn't be an error but we can't handle it for now */ +- _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__); +- } +-} +- +-void +-r200CopySubBuffer( __DRIdrawablePrivate *dPriv, +- int x, int y, int w, int h ) +-{ +- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { +- r200ContextPtr rmesa; +- GLcontext *ctx; +- rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate; +- ctx = rmesa->glCtx; +- if (ctx->Visual.doubleBufferMode) { +- drm_clip_rect_t rect; +- rect.x1 = x + dPriv->x; +- rect.y1 = (dPriv->h - y - h) + dPriv->y; +- rect.x2 = rect.x1 + w; +- rect.y2 = rect.y1 + h; +- _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ +- r200CopyBuffer( dPriv, &rect ); +- } +- } +- else { +- /* XXX this shouldn't be an error but we can't handle it for now */ +- _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__); +- } +-} +- +-/* Force the context `c' to be the current context and associate with it +- * buffer `b'. +- */ +-GLboolean +-r200MakeCurrent( __DRIcontextPrivate *driContextPriv, +- __DRIdrawablePrivate *driDrawPriv, +- __DRIdrawablePrivate *driReadPriv ) +-{ +- if ( driContextPriv ) { +- r200ContextPtr newCtx = +- (r200ContextPtr) driContextPriv->driverPrivate; +- +- if (R200_DEBUG & DEBUG_DRI) +- fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)newCtx->glCtx); +- +- newCtx->dri.readable = driReadPriv; +- +- if ( newCtx->dri.drawable != driDrawPriv || +- newCtx->lastStamp != driDrawPriv->lastStamp ) { +- if (driDrawPriv->swap_interval == (unsigned)-1) { +- driDrawPriv->vblFlags = (newCtx->r200Screen->irq != 0) +- ? driGetDefaultVBlankFlags(&newCtx->optionCache) +- : VBLANK_FLAG_NO_IRQ; +- +- driDrawableInitVBlank( driDrawPriv ); +- } +- +- newCtx->dri.drawable = driDrawPriv; +- +- r200SetCliprects(newCtx); +- r200UpdateViewportOffset( newCtx->glCtx ); +- } +- +- _mesa_make_current( newCtx->glCtx, +- (GLframebuffer *) driDrawPriv->driverPrivate, +- (GLframebuffer *) driReadPriv->driverPrivate ); +- +- _mesa_update_state( newCtx->glCtx ); +- r200ValidateState( newCtx->glCtx ); +- +- } else { +- if (R200_DEBUG & DEBUG_DRI) +- fprintf(stderr, "%s ctx is null\n", __FUNCTION__); +- _mesa_make_current( NULL, NULL, NULL ); +- } +- +- if (R200_DEBUG & DEBUG_DRI) +- fprintf(stderr, "End %s\n", __FUNCTION__); +- return GL_TRUE; +-} +- + /* Force the context `c' to be unbound from its buffer. + */ + GLboolean +@@ -709,7 +541,7 @@ r200UnbindContext( __DRIcontextPrivate *driContextPriv ) + r200ContextPtr rmesa = (r200ContextPtr) driContextPriv->driverPrivate; + + if (R200_DEBUG & DEBUG_DRI) +- fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)rmesa->glCtx); ++ fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)rmesa->radeon.glCtx); + + return GL_TRUE; + } +diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h +index 14a1dda..fcbe725 100644 +--- a/src/mesa/drivers/dri/r200/r200_context.h ++++ b/src/mesa/drivers/dri/r200/r200_context.h +@@ -53,51 +53,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #error This driver requires a newer libdrm to compile + #endif + ++#include "radeon_screen.h" ++#include "radeon_common.h" ++ ++#include "radeon_lock.h" ++ + struct r200_context; + typedef struct r200_context r200ContextRec; + typedef struct r200_context *r200ContextPtr; + +-/* This union is used to avoid warnings/miscompilation +- with float to uint32_t casts due to strict-aliasing */ +-typedef union { GLfloat f; uint32_t ui32; } float_ui32_type; +- +-#include "r200_lock.h" +-#include "radeon_screen.h" + #include "main/mm.h" + +-/* Flags for software fallback cases */ +-/* See correponding strings in r200_swtcl.c */ +-#define R200_FALLBACK_TEXTURE 0x01 +-#define R200_FALLBACK_DRAW_BUFFER 0x02 +-#define R200_FALLBACK_STENCIL 0x04 +-#define R200_FALLBACK_RENDER_MODE 0x08 +-#define R200_FALLBACK_DISABLE 0x10 +-#define R200_FALLBACK_BORDER_MODE 0x20 +- +-/* The blit width for texture uploads +- */ +-#define BLIT_WIDTH_BYTES 1024 +- +-/* Use the templated vertex format: +- */ +-#define COLOR_IS_RGBA +-#define TAG(x) r200##x +-#include "tnl_dd/t_dd_vertex.h" +-#undef TAG +- +-typedef void (*r200_tri_func)( r200ContextPtr, +- r200Vertex *, +- r200Vertex *, +- r200Vertex * ); +- +-typedef void (*r200_line_func)( r200ContextPtr, +- r200Vertex *, +- r200Vertex * ); +- +-typedef void (*r200_point_func)( r200ContextPtr, +- r200Vertex * ); +- +- + struct r200_vertex_program { + struct gl_vertex_program mesa_program; /* Must be first */ + int translated; +@@ -112,93 +78,11 @@ struct r200_vertex_program { + int fogmode; + }; + +-struct r200_colorbuffer_state { +- GLuint clear; +-#if 000 +- GLint drawOffset, drawPitch; +-#endif +- int roundEnable; +-}; +- +- +-struct r200_depthbuffer_state { +- GLuint clear; +- GLfloat scale; +-}; +- +-#if 000 +-struct r200_pixel_state { +- GLint readOffset, readPitch; +-}; +-#endif +- +-struct r200_scissor_state { +- drm_clip_rect_t rect; +- GLboolean enabled; +- +- GLuint numClipRects; /* Cliprects active */ +- GLuint numAllocedClipRects; /* Cliprects available */ +- drm_clip_rect_t *pClipRects; +-}; +- +-struct r200_stencilbuffer_state { +- GLboolean hwBuffer; +- GLuint clear; /* rb3d_stencilrefmask value */ +-}; +- +-struct r200_stipple_state { +- GLuint mask[32]; +-}; +- +- +- +-#define TEX_0 0x1 +-#define TEX_1 0x2 +-#define TEX_2 0x4 +-#define TEX_3 0x8 +-#define TEX_4 0x10 +-#define TEX_5 0x20 +-#define TEX_ALL 0x3f +- +-typedef struct r200_tex_obj r200TexObj, *r200TexObjPtr; +- +-/* Texture object in locally shared texture space. +- */ +-struct r200_tex_obj { +- driTextureObject base; +- +- GLuint bufAddr; /* Offset to start of locally +- shared texture block */ +- +- GLuint dirty_state; /* Flags (1 per texunit) for +- whether or not this texobj +- has dirty hardware state +- (pp_*) that needs to be +- brought into the +- texunit. */ +- +- drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; +- /* Six, for the cube faces */ +- GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ +- +- GLuint pp_txfilter; /* hardware register values */ +- GLuint pp_txformat; +- GLuint pp_txformat_x; +- GLuint pp_txoffset; /* Image location in texmem. +- All cube faces follow. */ +- GLuint pp_txsize; /* npot only */ +- GLuint pp_txpitch; /* npot only */ +- GLuint pp_border_color; +- GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ +- +- GLboolean border_fallback; +- +- GLuint tile_bits; /* hw texture tile bits used on this texture */ +-}; ++#define R200_TEX_ALL 0x3f + + + struct r200_texture_env_state { +- r200TexObjPtr texobj; ++ radeonTexObjPtr texobj; + GLuint outputreg; + GLuint unitneeded; + }; +@@ -210,19 +94,6 @@ struct r200_texture_state { + }; + + +-struct r200_state_atom { +- struct r200_state_atom *next, *prev; +- const char *name; /* for debug */ +- int cmd_size; /* size in bytes */ +- GLuint idx; +- int *cmd; /* one or more cmd's */ +- int *lastcmd; /* one or more cmd's */ +- GLboolean dirty; +- GLboolean (*check)( GLcontext *, int ); /* is this state active? */ +-}; +- +- +- + /* Trying to keep these relatively short as the variables are becoming + * extravagently long. Drop the driver name prefix off the front of + * everything - I think we know which driver we're in by now, and keep the +@@ -597,181 +468,85 @@ struct r200_state_atom { + + + struct r200_hw_state { +- /* Head of the linked list of state atoms. */ +- struct r200_state_atom atomlist; +- + /* Hardware state, stored as cmdbuf commands: + * -- Need to doublebuffer for + * - reviving state after loss of context + * - eliding noop statechange loops? (except line stipple count) + */ +- struct r200_state_atom ctx; +- struct r200_state_atom set; +- struct r200_state_atom vte; +- struct r200_state_atom lin; +- struct r200_state_atom msk; +- struct r200_state_atom vpt; +- struct r200_state_atom vap; +- struct r200_state_atom vtx; +- struct r200_state_atom tcl; +- struct r200_state_atom msl; +- struct r200_state_atom tcg; +- struct r200_state_atom msc; +- struct r200_state_atom cst; +- struct r200_state_atom tam; +- struct r200_state_atom tf; +- struct r200_state_atom tex[6]; +- struct r200_state_atom cube[6]; +- struct r200_state_atom zbs; +- struct r200_state_atom mtl[2]; +- struct r200_state_atom mat[9]; +- struct r200_state_atom lit[8]; /* includes vec, scl commands */ +- struct r200_state_atom ucp[6]; +- struct r200_state_atom pix[6]; /* pixshader stages */ +- struct r200_state_atom eye; /* eye pos */ +- struct r200_state_atom grd; /* guard band clipping */ +- struct r200_state_atom fog; +- struct r200_state_atom glt; +- struct r200_state_atom prf; +- struct r200_state_atom afs[2]; +- struct r200_state_atom pvs; +- struct r200_state_atom vpi[2]; +- struct r200_state_atom vpp[2]; +- struct r200_state_atom atf; +- struct r200_state_atom spr; +- struct r200_state_atom ptp; +- +- int max_state_size; /* Number of bytes necessary for a full state emit. */ +- GLboolean is_dirty, all_dirty; ++ struct radeon_state_atom ctx; ++ struct radeon_state_atom set; ++ struct radeon_state_atom vte; ++ struct radeon_state_atom lin; ++ struct radeon_state_atom msk; ++ struct radeon_state_atom vpt; ++ struct radeon_state_atom vap; ++ struct radeon_state_atom vtx; ++ struct radeon_state_atom tcl; ++ struct radeon_state_atom msl; ++ struct radeon_state_atom tcg; ++ struct radeon_state_atom msc; ++ struct radeon_state_atom cst; ++ struct radeon_state_atom tam; ++ struct radeon_state_atom tf; ++ struct radeon_state_atom tex[6]; ++ struct radeon_state_atom cube[6]; ++ struct radeon_state_atom zbs; ++ struct radeon_state_atom mtl[2]; ++ struct radeon_state_atom mat[9]; ++ struct radeon_state_atom lit[8]; /* includes vec, scl commands */ ++ struct radeon_state_atom ucp[6]; ++ struct radeon_state_atom pix[6]; /* pixshader stages */ ++ struct radeon_state_atom eye; /* eye pos */ ++ struct radeon_state_atom grd; /* guard band clipping */ ++ struct radeon_state_atom fog; ++ struct radeon_state_atom glt; ++ struct radeon_state_atom prf; ++ struct radeon_state_atom afs[2]; ++ struct radeon_state_atom pvs; ++ struct radeon_state_atom vpi[2]; ++ struct radeon_state_atom vpp[2]; ++ struct radeon_state_atom atf; ++ struct radeon_state_atom spr; ++ struct radeon_state_atom ptp; + }; + + struct r200_state { + /* Derived state for internal purposes: + */ +- struct r200_colorbuffer_state color; +- struct r200_depthbuffer_state depth; +-#if 00 +- struct r200_pixel_state pixel; +-#endif +- struct r200_scissor_state scissor; +- struct r200_stencilbuffer_state stencil; +- struct r200_stipple_state stipple; ++ struct radeon_stipple_state stipple; + struct r200_texture_state texture; + GLuint envneeded; + }; + +-/* Need refcounting on dma buffers: +- */ +-struct r200_dma_buffer { +- int refcount; /* the number of retained regions in buf */ +- drmBufPtr buf; +-}; +- +-#define GET_START(rvb) (rmesa->r200Screen->gart_buffer_offset + \ +- (rvb)->address - rmesa->dma.buf0_address + \ +- (rvb)->start) +- +-/* A retained region, eg vertices for indexed vertices. +- */ +-struct r200_dma_region { +- struct r200_dma_buffer *buf; +- char *address; /* == buf->address */ +- int start, end, ptr; /* offsets from start of buf */ +- int aos_start; +- int aos_stride; +- int aos_size; +-}; +- +- +-struct r200_dma { +- /* Active dma region. Allocations for vertices and retained +- * regions come from here. Also used for emitting random vertices, +- * these may be flushed by calling flush_current(); +- */ +- struct r200_dma_region current; +- +- void (*flush)( r200ContextPtr ); +- +- char *buf0_address; /* start of buf[0], for index calcs */ +- GLuint nr_released_bufs; /* flush after so many buffers released */ +-}; +- +-struct r200_dri_mirror { +- __DRIcontextPrivate *context; /* DRI context */ +- __DRIscreenPrivate *screen; /* DRI screen */ +- __DRIdrawablePrivate *drawable; /* DRI drawable bound to this ctx */ +- __DRIdrawablePrivate *readable; /* DRI readable bound to this ctx */ +- +- drm_context_t hwContext; +- drm_hw_lock_t *hwLock; +- int fd; +- int drmMinor; +-}; +- +- + #define R200_CMD_BUF_SZ (16*1024) + +-struct r200_store { +- GLuint statenr; +- GLuint primnr; +- char cmd_buf[R200_CMD_BUF_SZ]; +- int cmd_used; +- int elts_start; +-}; +- +- ++#define R200_ELT_BUF_SZ (16*1024) + /* r200_tcl.c + */ + struct r200_tcl_info { + GLuint hw_primitive; + + /* hw can handle 12 components max */ +- struct r200_dma_region *aos_components[12]; ++ struct radeon_aos aos[12]; + GLuint nr_aos_components; + + GLuint *Elts; + +- struct r200_dma_region indexed_verts; +- struct r200_dma_region vertex_data[15]; ++ struct radeon_bo *elt_dma_bo; ++ int elt_dma_offset; /** Offset into this buffer object, in bytes */ ++ int elt_used; ++ + }; + + + /* r200_swtcl.c + */ + struct r200_swtcl_info { +- GLuint RenderIndex; +- +- /** +- * Size of a hardware vertex. This is calculated when \c ::vertex_attrs is +- * installed in the Mesa state vector. +- */ +- GLuint vertex_size; + +- /** +- * Attributes instructing the Mesa TCL pipeline where / how to put vertex +- * data in the hardware buffer. +- */ +- struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; + +- /** +- * Number of elements of \c ::vertex_attrs that are actually used. +- */ +- GLuint vertex_attr_count; +- +- /** +- * Cached pointer to the buffer where Mesa will store vertex data. +- */ +- GLubyte *verts; +- +- /* Fallback rasterization functions +- */ +- r200_point_func draw_point; +- r200_line_func draw_line; +- r200_tri_func draw_tri; +- +- GLuint hw_primitive; +- GLenum render_primitive; +- GLuint numverts; ++ radeon_point_func draw_point; ++ radeon_line_func draw_line; ++ radeon_tri_func draw_tri; + + /** + * Offset of the 4UB color data within a hardware (swtcl) vertex. +@@ -787,27 +562,10 @@ struct r200_swtcl_info { + * Should Mesa project vertex data or will the hardware do it? + */ + GLboolean needproj; +- +- struct r200_dma_region indexed_verts; +-}; +- +- +-struct r200_ioctl { +- GLuint vertex_offset; +- GLuint vertex_size; + }; + + + +-#define R200_MAX_PRIMS 64 +- +- +- +-struct r200_prim { +- GLuint start; +- GLuint end; +- GLuint prim; +-}; + + /* A maximum total of 29 elements per vertex: 3 floats for position, 3 + * floats for normal, 4 floats for color, 4 bytes for secondary color, +@@ -822,9 +580,8 @@ struct r200_prim { + + #define R200_MAX_VERTEX_SIZE ((3*6)+11) + +- + struct r200_context { +- GLcontext *glCtx; /* Mesa context */ ++ struct radeon_context radeon; + + /* Driver and hardware state management + */ +@@ -832,56 +589,15 @@ struct r200_context { + struct r200_state state; + struct r200_vertex_program *curr_vp_hw; + +- /* Texture object bookkeeping +- */ +- unsigned nr_heaps; +- driTexHeap * texture_heaps[ RADEON_NR_TEX_HEAPS ]; +- driTextureObject swapped; +- int texture_depth; +- float initialMaxAnisotropy; +- +- /* Rasterization and vertex state: +- */ +- GLuint TclFallback; +- GLuint Fallback; +- GLuint NewGLState; +- DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */ +- + /* Vertex buffers + */ +- struct r200_ioctl ioctl; +- struct r200_dma dma; +- struct r200_store store; +- /* A full state emit as of the first state emit in the main store, in case +- * the context is lost. +- */ +- struct r200_store backup_store; +- +- /* Page flipping +- */ +- GLuint doPageFlip; +- +- /* Busy waiting +- */ +- GLuint do_usleeps; +- GLuint do_irqs; +- GLuint irqsEmitted; +- drm_radeon_irq_wait_t iw; ++ struct radeon_ioctl ioctl; ++ struct radeon_store store; + + /* Clientdata textures; + */ + GLuint prefer_gart_client_texturing; + +- /* Drawable, cliprect and scissor information +- */ +- GLuint numClipRects; /* Cliprects for the draw buffer */ +- drm_clip_rect_t *pClipRects; +- unsigned int lastStamp; +- GLboolean lost_context; +- GLboolean save_on_next_emit; +- radeonScreenPtr r200Screen; /* Screen private DRI data */ +- drm_radeon_sarea_t *sarea; /* Private SAREA data */ +- + /* TCL stuff + */ + GLmatrix TexGenMatrix[R200_MAX_TEXTURE_UNITS]; +@@ -893,15 +609,6 @@ struct r200_context { + GLuint TexGenCompSel; + GLmatrix tmpmat; + +- /* buffer swap +- */ +- int64_t swap_ust; +- int64_t swap_missed_ust; +- +- GLuint swap_count; +- GLuint swap_missed_count; +- +- + /* r200_tcl.c + */ + struct r200_tcl_info tcl; +@@ -910,14 +617,6 @@ struct r200_context { + */ + struct r200_swtcl_info swtcl; + +- /* Mirrors of some DRI state +- */ +- struct r200_dri_mirror dri; +- +- /* Configuration cache +- */ +- driOptionCache optionCache; +- + GLboolean using_hyperz; + GLboolean texmicrotile; + +@@ -927,28 +626,10 @@ struct r200_context { + #define R200_CONTEXT(ctx) ((r200ContextPtr)(ctx->DriverCtx)) + + +-static INLINE GLuint r200PackColor( GLuint cpp, +- GLubyte r, GLubyte g, +- GLubyte b, GLubyte a ) +-{ +- switch ( cpp ) { +- case 2: +- return PACK_COLOR_565( r, g, b ); +- case 4: +- return PACK_COLOR_8888( a, r, g, b ); +- default: +- return 0; +- } +-} +- +- + extern void r200DestroyContext( __DRIcontextPrivate *driContextPriv ); + extern GLboolean r200CreateContext( const __GLcontextModes *glVisual, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate); +-extern void r200SwapBuffers( __DRIdrawablePrivate *dPriv ); +-extern void r200CopySubBuffer( __DRIdrawablePrivate * dPriv, +- int x, int y, int w, int h ); + extern GLboolean r200MakeCurrent( __DRIcontextPrivate *driContextPriv, + __DRIdrawablePrivate *driDrawPriv, + __DRIdrawablePrivate *driReadPriv ); +@@ -957,28 +638,9 @@ extern GLboolean r200UnbindContext( __DRIcontextPrivate *driContextPriv ); + /* ================================================================ + * Debugging: + */ +-#define DO_DEBUG 1 + +-#if DO_DEBUG +-extern int R200_DEBUG; +-#else +-#define R200_DEBUG 0 +-#endif ++#define R200_DEBUG RADEON_DEBUG ++ + +-#define DEBUG_TEXTURE 0x001 +-#define DEBUG_STATE 0x002 +-#define DEBUG_IOCTL 0x004 +-#define DEBUG_PRIMS 0x008 +-#define DEBUG_VERTS 0x010 +-#define DEBUG_FALLBACKS 0x020 +-#define DEBUG_VFMT 0x040 +-#define DEBUG_CODEGEN 0x080 +-#define DEBUG_VERBOSE 0x100 +-#define DEBUG_DRI 0x200 +-#define DEBUG_DMA 0x400 +-#define DEBUG_SANITY 0x800 +-#define DEBUG_SYNC 0x1000 +-#define DEBUG_PIXEL 0x2000 +-#define DEBUG_MEMORY 0x4000 + + #endif /* __R200_CONTEXT_H__ */ +diff --git a/src/mesa/drivers/dri/r200/r200_fragshader.c b/src/mesa/drivers/dri/r200/r200_fragshader.c +index d514b28..85c1b7b 100644 +--- a/src/mesa/drivers/dri/r200/r200_fragshader.c ++++ b/src/mesa/drivers/dri/r200/r200_fragshader.c +@@ -522,7 +522,7 @@ static void r200UpdateFSConstants( GLcontext *ctx ) + CLAMPED_FLOAT_TO_UBYTE(con_byte[2], ctx->ATIFragmentShader.GlobalConstants[i][2]); + CLAMPED_FLOAT_TO_UBYTE(con_byte[3], ctx->ATIFragmentShader.GlobalConstants[i][3]); + } +- rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = r200PackColor ( ++ rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = radeonPackColor ( + 4, con_byte[0], con_byte[1], con_byte[2], con_byte[3] ); + } + } +diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c +index 0741e57..c08968f 100644 +--- a/src/mesa/drivers/dri/r200/r200_ioctl.c ++++ b/src/mesa/drivers/dri/r200/r200_ioctl.c +@@ -41,6 +41,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "main/context.h" + #include "swrast/swrast.h" + ++#include "radeon_common.h" ++#include "radeon_lock.h" + #include "r200_context.h" + #include "r200_state.h" + #include "r200_ioctl.h" +@@ -54,635 +56,28 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #define R200_TIMEOUT 512 + #define R200_IDLE_RETRY 16 + +- +-static void r200WaitForIdle( r200ContextPtr rmesa ); +- +- +-/* At this point we were in FlushCmdBufLocked but we had lost our context, so +- * we need to unwire our current cmdbuf, hook the one with the saved state in +- * it, flush it, and then put the current one back. This is so commands at the +- * start of a cmdbuf can rely on the state being kept from the previous one. +- */ +-static void r200BackUpAndEmitLostStateLocked( r200ContextPtr rmesa ) +-{ +- GLuint nr_released_bufs; +- struct r200_store saved_store; +- +- if (rmesa->backup_store.cmd_used == 0) +- return; +- +- if (R200_DEBUG & DEBUG_STATE) +- fprintf(stderr, "Emitting backup state on lost context\n"); +- +- rmesa->lost_context = GL_FALSE; +- +- nr_released_bufs = rmesa->dma.nr_released_bufs; +- saved_store = rmesa->store; +- rmesa->dma.nr_released_bufs = 0; +- rmesa->store = rmesa->backup_store; +- r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); +- rmesa->dma.nr_released_bufs = nr_released_bufs; +- rmesa->store = saved_store; +-} +- +-int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller ) +-{ +- int ret, i; +- drm_radeon_cmd_buffer_t cmd; +- +- if (rmesa->lost_context) +- r200BackUpAndEmitLostStateLocked( rmesa ); +- +- if (R200_DEBUG & DEBUG_IOCTL) { +- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); +- +- if (0 & R200_DEBUG & DEBUG_VERBOSE) +- for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 ) +- fprintf(stderr, "%d: %x\n", i/4, +- *(int *)(&rmesa->store.cmd_buf[i])); +- } +- +- if (R200_DEBUG & DEBUG_DMA) +- fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__, +- rmesa->dma.nr_released_bufs); +- +- +- if (R200_DEBUG & DEBUG_SANITY) { +- if (rmesa->state.scissor.enabled) +- ret = r200SanityCmdBuffer( rmesa, +- rmesa->state.scissor.numClipRects, +- rmesa->state.scissor.pClipRects); +- else +- ret = r200SanityCmdBuffer( rmesa, +- rmesa->numClipRects, +- rmesa->pClipRects); +- if (ret) { +- fprintf(stderr, "drmSanityCommandWrite: %d\n", ret); +- goto out; +- } +- } +- +- +- if (R200_DEBUG & DEBUG_MEMORY) { +- if (! driValidateTextureHeaps( rmesa->texture_heaps, rmesa->nr_heaps, +- & rmesa->swapped ) ) { +- fprintf( stderr, "%s: texture memory is inconsistent - expect " +- "mangled textures\n", __FUNCTION__ ); +- } +- } +- +- +- cmd.bufsz = rmesa->store.cmd_used; +- cmd.buf = rmesa->store.cmd_buf; +- +- if (rmesa->state.scissor.enabled) { +- cmd.nbox = rmesa->state.scissor.numClipRects; +- cmd.boxes = (drm_clip_rect_t *)rmesa->state.scissor.pClipRects; +- } else { +- cmd.nbox = rmesa->numClipRects; +- cmd.boxes = (drm_clip_rect_t *)rmesa->pClipRects; +- } +- +- ret = drmCommandWrite( rmesa->dri.fd, +- DRM_RADEON_CMDBUF, +- &cmd, sizeof(cmd) ); +- +- if (ret) +- fprintf(stderr, "drmCommandWrite: %d\n", ret); +- +- if (R200_DEBUG & DEBUG_SYNC) { +- fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__); +- r200WaitForIdleLocked( rmesa ); +- } +- +- +- out: +- rmesa->store.primnr = 0; +- rmesa->store.statenr = 0; +- rmesa->store.cmd_used = 0; +- rmesa->dma.nr_released_bufs = 0; +- rmesa->save_on_next_emit = 1; +- +- return ret; +-} +- +- +-/* Note: does not emit any commands to avoid recursion on +- * r200AllocCmdBuf. +- */ +-void r200FlushCmdBuf( r200ContextPtr rmesa, const char *caller ) +-{ +- int ret; +- +- LOCK_HARDWARE( rmesa ); +- +- ret = r200FlushCmdBufLocked( rmesa, caller ); +- +- UNLOCK_HARDWARE( rmesa ); +- +- if (ret) { +- fprintf(stderr, "drmRadeonCmdBuffer: %d (exiting)\n", ret); +- exit(ret); +- } +-} +- +- +-/* ============================================================= +- * Hardware vertex buffer handling +- */ +- +- +-void r200RefillCurrentDmaRegion( r200ContextPtr rmesa ) +-{ +- struct r200_dma_buffer *dmabuf; +- int fd = rmesa->dri.fd; +- int index = 0; +- int size = 0; +- drmDMAReq dma; +- int ret; +- +- if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA)) +- fprintf(stderr, "%s\n", __FUNCTION__); +- +- if (rmesa->dma.flush) { +- rmesa->dma.flush( rmesa ); +- } +- +- if (rmesa->dma.current.buf) +- r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ ); +- +- if (rmesa->dma.nr_released_bufs > 4) +- r200FlushCmdBuf( rmesa, __FUNCTION__ ); +- +- dma.context = rmesa->dri.hwContext; +- dma.send_count = 0; +- dma.send_list = NULL; +- dma.send_sizes = NULL; +- dma.flags = 0; +- dma.request_count = 1; +- dma.request_size = RADEON_BUFFER_SIZE; +- dma.request_list = &index; +- dma.request_sizes = &size; +- dma.granted_count = 0; +- +- LOCK_HARDWARE(rmesa); /* no need to validate */ +- +- while (1) { +- ret = drmDMA( fd, &dma ); +- if (ret == 0) +- break; +- +- if (rmesa->dma.nr_released_bufs) { +- r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); +- } +- +- if (rmesa->do_usleeps) { +- UNLOCK_HARDWARE( rmesa ); +- DO_USLEEP( 1 ); +- LOCK_HARDWARE( rmesa ); +- } +- } +- +- UNLOCK_HARDWARE(rmesa); +- +- if (R200_DEBUG & DEBUG_DMA) +- fprintf(stderr, "Allocated buffer %d\n", index); +- +- dmabuf = CALLOC_STRUCT( r200_dma_buffer ); +- dmabuf->buf = &rmesa->r200Screen->buffers->list[index]; +- dmabuf->refcount = 1; +- +- rmesa->dma.current.buf = dmabuf; +- rmesa->dma.current.address = dmabuf->buf->address; +- rmesa->dma.current.end = dmabuf->buf->total; +- rmesa->dma.current.start = 0; +- rmesa->dma.current.ptr = 0; +-} +- +-void r200ReleaseDmaRegion( r200ContextPtr rmesa, +- struct r200_dma_region *region, +- const char *caller ) +-{ +- if (R200_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); +- +- if (!region->buf) +- return; +- +- if (rmesa->dma.flush) +- rmesa->dma.flush( rmesa ); +- +- if (--region->buf->refcount == 0) { +- drm_radeon_cmd_header_t *cmd; +- +- if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA)) +- fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__, +- region->buf->buf->idx); +- +- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, sizeof(*cmd), +- __FUNCTION__ ); +- cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD; +- cmd->dma.buf_idx = region->buf->buf->idx; +- FREE(region->buf); +- rmesa->dma.nr_released_bufs++; +- } +- +- region->buf = NULL; +- region->start = 0; +-} +- +-/* Allocates a region from rmesa->dma.current. If there isn't enough +- * space in current, grab a new buffer (and discard what was left of current) +- */ +-void r200AllocDmaRegion( r200ContextPtr rmesa, +- struct r200_dma_region *region, +- int bytes, +- int alignment ) +-{ +- if (R200_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); +- +- if (rmesa->dma.flush) +- rmesa->dma.flush( rmesa ); +- +- if (region->buf) +- r200ReleaseDmaRegion( rmesa, region, __FUNCTION__ ); +- +- alignment--; +- rmesa->dma.current.start = rmesa->dma.current.ptr = +- (rmesa->dma.current.ptr + alignment) & ~alignment; +- +- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) +- r200RefillCurrentDmaRegion( rmesa ); +- +- region->start = rmesa->dma.current.start; +- region->ptr = rmesa->dma.current.start; +- region->end = rmesa->dma.current.start + bytes; +- region->address = rmesa->dma.current.address; +- region->buf = rmesa->dma.current.buf; +- region->buf->refcount++; +- +- rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ +- rmesa->dma.current.start = +- rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; +- +- assert( rmesa->dma.current.ptr <= rmesa->dma.current.end ); +-} +- +-/* ================================================================ +- * SwapBuffers with client-side throttling +- */ +- +-static uint32_t r200GetLastFrame(r200ContextPtr rmesa) +-{ +- drm_radeon_getparam_t gp; +- int ret; +- uint32_t frame; +- +- gp.param = RADEON_PARAM_LAST_FRAME; +- gp.value = (int *)&frame; +- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM, +- &gp, sizeof(gp) ); +- if ( ret ) { +- fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret ); +- exit(1); +- } +- +- return frame; +-} +- +-static void r200EmitIrqLocked( r200ContextPtr rmesa ) +-{ +- drm_radeon_irq_emit_t ie; +- int ret; +- +- ie.irq_seq = &rmesa->iw.irq_seq; +- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, +- &ie, sizeof(ie) ); +- if ( ret ) { +- fprintf( stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, ret ); +- exit(1); +- } +-} +- +- +-static void r200WaitIrq( r200ContextPtr rmesa ) +-{ +- int ret; +- +- do { +- ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT, +- &rmesa->iw, sizeof(rmesa->iw) ); +- } while (ret && (errno == EINTR || errno == EBUSY)); +- +- if ( ret ) { +- fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret ); +- exit(1); +- } +-} +- +- +-static void r200WaitForFrameCompletion( r200ContextPtr rmesa ) +-{ +- drm_radeon_sarea_t *sarea = rmesa->sarea; +- +- if (rmesa->do_irqs) { +- if (r200GetLastFrame(rmesa) < sarea->last_frame) { +- if (!rmesa->irqsEmitted) { +- while (r200GetLastFrame (rmesa) < sarea->last_frame) +- ; +- } +- else { +- UNLOCK_HARDWARE( rmesa ); +- r200WaitIrq( rmesa ); +- LOCK_HARDWARE( rmesa ); +- } +- rmesa->irqsEmitted = 10; +- } +- +- if (rmesa->irqsEmitted) { +- r200EmitIrqLocked( rmesa ); +- rmesa->irqsEmitted--; +- } +- } +- else { +- while (r200GetLastFrame (rmesa) < sarea->last_frame) { +- UNLOCK_HARDWARE( rmesa ); +- if (rmesa->do_usleeps) +- DO_USLEEP( 1 ); +- LOCK_HARDWARE( rmesa ); +- } +- } +-} +- +- +- +-/* Copy the back color buffer to the front color buffer. +- */ +-void r200CopyBuffer( __DRIdrawablePrivate *dPriv, +- const drm_clip_rect_t *rect) +-{ +- r200ContextPtr rmesa; +- GLint nbox, i, ret; +- GLboolean missed_target; +- int64_t ust; +- __DRIscreenPrivate *psp = dPriv->driScreenPriv; +- +- assert(dPriv); +- assert(dPriv->driContextPriv); +- assert(dPriv->driContextPriv->driverPrivate); +- +- rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate; +- +- if ( R200_DEBUG & DEBUG_IOCTL ) { +- fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *)rmesa->glCtx ); +- } +- +- R200_FIREVERTICES( rmesa ); +- +- LOCK_HARDWARE( rmesa ); +- +- +- /* Throttle the frame rate -- only allow one pending swap buffers +- * request at a time. +- */ +- r200WaitForFrameCompletion( rmesa ); +- if (!rect) +- { +- UNLOCK_HARDWARE( rmesa ); +- driWaitForVBlank( dPriv, & missed_target ); +- LOCK_HARDWARE( rmesa ); +- } +- +- nbox = dPriv->numClipRects; /* must be in locked region */ +- +- for ( i = 0 ; i < nbox ; ) { +- GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox ); +- drm_clip_rect_t *box = dPriv->pClipRects; +- drm_clip_rect_t *b = rmesa->sarea->boxes; +- GLint n = 0; +- +- for ( ; i < nr ; i++ ) { +- +- *b = box[i]; +- +- if (rect) +- { +- if (rect->x1 > b->x1) +- b->x1 = rect->x1; +- if (rect->y1 > b->y1) +- b->y1 = rect->y1; +- if (rect->x2 < b->x2) +- b->x2 = rect->x2; +- if (rect->y2 < b->y2) +- b->y2 = rect->y2; +- +- if (b->x1 >= b->x2 || b->y1 >= b->y2) +- continue; +- } +- +- b++; +- n++; +- } +- rmesa->sarea->nbox = n; +- +- if (!n) +- continue; +- +- ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP ); +- +- if ( ret ) { +- fprintf( stderr, "DRM_R200_SWAP_BUFFERS: return = %d\n", ret ); +- UNLOCK_HARDWARE( rmesa ); +- exit( 1 ); +- } +- } +- +- UNLOCK_HARDWARE( rmesa ); +- if (!rect) +- { +- rmesa->hw.all_dirty = GL_TRUE; +- +- rmesa->swap_count++; +- (*psp->systemTime->getUST)( & ust ); +- if ( missed_target ) { +- rmesa->swap_missed_count++; +- rmesa->swap_missed_ust = ust - rmesa->swap_ust; +- } +- +- rmesa->swap_ust = ust; +- +- sched_yield(); +- } +-} +- +-void r200PageFlip( __DRIdrawablePrivate *dPriv ) ++static void r200UserClear(GLcontext *ctx, GLuint flags) + { +- r200ContextPtr rmesa; +- GLint ret; +- GLboolean missed_target; +- __DRIscreenPrivate *psp = dPriv->driScreenPriv; +- +- assert(dPriv); +- assert(dPriv->driContextPriv); +- assert(dPriv->driContextPriv->driverPrivate); ++ if (flags & (RADEON_FRONT | RADEON_BACK)) { + +- rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate; + +- if ( R200_DEBUG & DEBUG_IOCTL ) { +- fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__, +- rmesa->sarea->pfCurrentPage); +- } +- +- R200_FIREVERTICES( rmesa ); +- LOCK_HARDWARE( rmesa ); +- +- if (!dPriv->numClipRects) { +- UNLOCK_HARDWARE( rmesa ); +- usleep( 10000 ); /* throttle invisible client 10ms */ +- return; + } ++ ++ if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) ++ && (flags & RADEON_CLEAR_FASTZ)) { + +- /* Need to do this for the perf box placement: +- */ +- { +- drm_clip_rect_t *box = dPriv->pClipRects; +- drm_clip_rect_t *b = rmesa->sarea->boxes; +- b[0] = box[0]; +- rmesa->sarea->nbox = 1; +- } +- +- /* Throttle the frame rate -- only allow a few pending swap buffers +- * request at a time. +- */ +- r200WaitForFrameCompletion( rmesa ); +- UNLOCK_HARDWARE( rmesa ); +- driWaitForVBlank( dPriv, & missed_target ); +- if ( missed_target ) { +- rmesa->swap_missed_count++; +- (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust ); + } +- LOCK_HARDWARE( rmesa ); + +- ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP ); +- +- UNLOCK_HARDWARE( rmesa ); +- +- if ( ret ) { +- fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret ); +- exit( 1 ); +- } +- +- rmesa->swap_count++; +- (void) (*psp->systemTime->getUST)( & rmesa->swap_ust ); +- +-#if 000 +- if ( rmesa->sarea->pfCurrentPage == 1 ) { +- rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset; +- rmesa->state.color.drawPitch = rmesa->r200Screen->frontPitch; +- } else { +- rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset; +- rmesa->state.color.drawPitch = rmesa->r200Screen->backPitch; +- } +- +- R200_STATECHANGE( rmesa, ctx ); +- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset +- + rmesa->r200Screen->fbLocation; +- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = rmesa->state.color.drawPitch; +- if (rmesa->sarea->tiling_enabled) { +- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE; +- } +-#else +- /* Get ready for drawing next frame. Update the renderbuffers' +- * flippedOffset/Pitch fields so we draw into the right place. +- */ +- driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer, +- rmesa->sarea->pfCurrentPage); +- +- +- r200UpdateDrawBuffer(rmesa->glCtx); +-#endif + } + +- +-/* ================================================================ +- * Buffer clear +- */ +-static void r200Clear( GLcontext *ctx, GLbitfield mask ) ++static void r200KernelClear(GLcontext *ctx, GLuint flags) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); +- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; +- GLuint flags = 0; +- GLuint color_mask = 0; +- GLint ret, i; +- GLint cx, cy, cw, ch; ++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; ++ GLint cx, cy, cw, ch, ret; ++ GLuint i; + +- if ( R200_DEBUG & DEBUG_IOCTL ) { +- fprintf( stderr, "r200Clear\n"); +- } +- +- { +- LOCK_HARDWARE( rmesa ); +- UNLOCK_HARDWARE( rmesa ); +- if ( dPriv->numClipRects == 0 ) +- return; +- } +- +- r200Flush( ctx ); +- +- if ( mask & BUFFER_BIT_FRONT_LEFT ) { +- flags |= RADEON_FRONT; +- color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; +- mask &= ~BUFFER_BIT_FRONT_LEFT; +- } +- +- if ( mask & BUFFER_BIT_BACK_LEFT ) { +- flags |= RADEON_BACK; +- color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; +- mask &= ~BUFFER_BIT_BACK_LEFT; +- } +- +- if ( mask & BUFFER_BIT_DEPTH ) { +- flags |= RADEON_DEPTH; +- mask &= ~BUFFER_BIT_DEPTH; +- } +- +- if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) { +- flags |= RADEON_STENCIL; +- mask &= ~BUFFER_BIT_STENCIL; +- } +- +- if ( mask ) { +- if (R200_DEBUG & DEBUG_FALLBACKS) +- fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask); +- _swrast_Clear( ctx, mask ); +- } +- +- if ( !flags ) +- return; +- +- if (rmesa->using_hyperz) { +- flags |= RADEON_USE_COMP_ZBUF; +-/* if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) +- flags |= RADEON_USE_HIERZ; */ +- if (!(rmesa->state.stencil.hwBuffer) || +- ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) && +- ((rmesa->state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) { +- flags |= RADEON_CLEAR_FASTZ; +- } +- } +- +- LOCK_HARDWARE( rmesa ); +- +- /* compute region after locking: */ +- cx = ctx->DrawBuffer->_Xmin; +- cy = ctx->DrawBuffer->_Ymin; +- cw = ctx->DrawBuffer->_Xmax - cx; +- ch = ctx->DrawBuffer->_Ymax - cy; +- +- /* Flip top to bottom */ +- cx += dPriv->x; +- cy = dPriv->y + dPriv->h - cy - ch; ++ LOCK_HARDWARE( &rmesa->radeon ); + + /* Throttle the number of clear ioctls we do. + */ +@@ -693,7 +88,7 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask ) + + gp.param = RADEON_PARAM_LAST_CLEAR; + gp.value = (int *)&clear; +- ret = drmCommandWriteRead( rmesa->dri.fd, ++ ret = drmCommandWriteRead( rmesa->radeon.dri.fd, + DRM_RADEON_GETPARAM, &gp, sizeof(gp) ); + + if ( ret ) { +@@ -703,24 +98,34 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask ) + + /* Clear throttling needs more thought. + */ +- if ( rmesa->sarea->last_clear - clear <= 25 ) { ++ if ( rmesa->radeon.sarea->last_clear - clear <= 25 ) { + break; + } + +- if (rmesa->do_usleeps) { +- UNLOCK_HARDWARE( rmesa ); ++ if (rmesa->radeon.do_usleeps) { ++ UNLOCK_HARDWARE( &rmesa->radeon ); + DO_USLEEP( 1 ); +- LOCK_HARDWARE( rmesa ); ++ LOCK_HARDWARE( &rmesa->radeon ); + } + } + + /* Send current state to the hardware */ +- r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); ++ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ ); ++ ++ ++ /* compute region after locking: */ ++ cx = ctx->DrawBuffer->_Xmin; ++ cy = ctx->DrawBuffer->_Ymin; ++ cw = ctx->DrawBuffer->_Xmax - cx; ++ ch = ctx->DrawBuffer->_Ymax - cy; + ++ /* Flip top to bottom */ ++ cx += dPriv->x; ++ cy = dPriv->y + dPriv->h - cy - ch; + for ( i = 0 ; i < dPriv->numClipRects ; ) { + GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects ); + drm_clip_rect_t *box = dPriv->pClipRects; +- drm_clip_rect_t *b = rmesa->sarea->boxes; ++ drm_clip_rect_t *b = rmesa->radeon.sarea->boxes; + drm_radeon_clear_t clear; + drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS]; + GLint n = 0; +@@ -755,17 +160,17 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask ) + } + } + +- rmesa->sarea->nbox = n; ++ rmesa->radeon.sarea->nbox = n; + + clear.flags = flags; +- clear.clear_color = rmesa->state.color.clear; +- clear.clear_depth = rmesa->state.depth.clear; /* needed for hyperz */ ++ clear.clear_color = rmesa->radeon.state.color.clear; ++ clear.clear_depth = rmesa->radeon.state.depth.clear; /* needed for hyperz */ + clear.color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; +- clear.depth_mask = rmesa->state.stencil.clear; ++ clear.depth_mask = rmesa->radeon.state.stencil.clear; + clear.depth_boxes = depth_boxes; + + n--; +- b = rmesa->sarea->boxes; ++ b = rmesa->radeon.sarea->boxes; + for ( ; n >= 0 ; n-- ) { + depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1; + depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1; +@@ -774,83 +179,91 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask ) + depth_boxes[n].f[CLEAR_DEPTH] = ctx->Depth.Clear; + } + +- ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR, ++ ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR, + &clear, sizeof(clear)); + + + if ( ret ) { +- UNLOCK_HARDWARE( rmesa ); ++ UNLOCK_HARDWARE( &rmesa->radeon ); + fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret ); + exit( 1 ); + } + } +- +- UNLOCK_HARDWARE( rmesa ); +- rmesa->hw.all_dirty = GL_TRUE; ++ UNLOCK_HARDWARE( &rmesa->radeon ); + } +- +- +-void r200WaitForIdleLocked( r200ContextPtr rmesa ) ++/* ================================================================ ++ * Buffer clear ++ */ ++static void r200Clear( GLcontext *ctx, GLbitfield mask ) + { +- int ret; +- int i = 0; +- +- do { +- ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_CP_IDLE); +- if (ret) +- DO_USLEEP( 1 ); +- } while (ret && ++i < 100); +- +- if ( ret < 0 ) { +- UNLOCK_HARDWARE( rmesa ); +- fprintf( stderr, "Error: R200 timed out... exiting\n" ); +- exit( -1 ); +- } +-} ++ r200ContextPtr rmesa = R200_CONTEXT(ctx); ++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; ++ GLuint flags = 0; ++ GLuint color_mask = 0; ++ GLint ret; + ++ if ( R200_DEBUG & DEBUG_IOCTL ) { ++ fprintf( stderr, "r200Clear\n"); ++ } + +-static void r200WaitForIdle( r200ContextPtr rmesa ) +-{ +- LOCK_HARDWARE(rmesa); +- r200WaitForIdleLocked( rmesa ); +- UNLOCK_HARDWARE(rmesa); +-} ++ { ++ LOCK_HARDWARE( &rmesa->radeon ); ++ UNLOCK_HARDWARE( &rmesa->radeon ); ++ if ( dPriv->numClipRects == 0 ) ++ return; ++ } + ++ radeonFlush( ctx ); + +-void r200Flush( GLcontext *ctx ) +-{ +- r200ContextPtr rmesa = R200_CONTEXT( ctx ); ++ if ( mask & BUFFER_BIT_FRONT_LEFT ) { ++ flags |= RADEON_FRONT; ++ color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; ++ mask &= ~BUFFER_BIT_FRONT_LEFT; ++ } + +- if (R200_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s\n", __FUNCTION__); ++ if ( mask & BUFFER_BIT_BACK_LEFT ) { ++ flags |= RADEON_BACK; ++ color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; ++ mask &= ~BUFFER_BIT_BACK_LEFT; ++ } + +- if (rmesa->dma.flush) +- rmesa->dma.flush( rmesa ); ++ if ( mask & BUFFER_BIT_DEPTH ) { ++ flags |= RADEON_DEPTH; ++ mask &= ~BUFFER_BIT_DEPTH; ++ } + +- r200EmitState( rmesa ); +- +- if (rmesa->store.cmd_used) +- r200FlushCmdBuf( rmesa, __FUNCTION__ ); +-} ++ if ( (mask & BUFFER_BIT_STENCIL) && rmesa->radeon.state.stencil.hwBuffer ) { ++ flags |= RADEON_STENCIL; ++ mask &= ~BUFFER_BIT_STENCIL; ++ } + +-/* Make sure all commands have been sent to the hardware and have +- * completed processing. +- */ +-void r200Finish( GLcontext *ctx ) +-{ +- r200ContextPtr rmesa = R200_CONTEXT(ctx); +- r200Flush( ctx ); ++ if ( mask ) { ++ if (R200_DEBUG & DEBUG_FALLBACKS) ++ fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask); ++ _swrast_Clear( ctx, mask ); ++ } ++ ++ if ( !flags ) ++ return; + +- if (rmesa->do_irqs) { +- LOCK_HARDWARE( rmesa ); +- r200EmitIrqLocked( rmesa ); +- UNLOCK_HARDWARE( rmesa ); +- r200WaitIrq( rmesa ); ++ if (rmesa->using_hyperz) { ++ flags |= RADEON_USE_COMP_ZBUF; ++/* if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) ++ flags |= RADEON_USE_HIERZ; */ ++ if (!(rmesa->radeon.state.stencil.hwBuffer) || ++ ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) && ++ ((rmesa->radeon.state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) { ++ flags |= RADEON_CLEAR_FASTZ; ++ } + } +- else +- r200WaitForIdle( rmesa ); +-} + ++ if (rmesa->radeon.radeonScreen->kernel_mm) ++ r200UserClear(ctx, flags); ++ else ++ r200KernelClear(ctx, flags); ++ ++ rmesa->radeon.hw.all_dirty = GL_TRUE; ++} + + /* This version of AllocateMemoryMESA allocates only GART memory, and + * only does so after the point at which the driver has been +@@ -875,7 +288,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size, + fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq, + writefreq, priority); + +- if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->r200Screen->gartTextures.map) ++ if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map) + return NULL; + + if (getenv("R200_NO_ALLOC")) +@@ -886,7 +299,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size, + alloc.size = size; + alloc.region_offset = ®ion_offset; + +- ret = drmCommandWriteRead( rmesa->r200Screen->driScreen->fd, ++ ret = drmCommandWriteRead( rmesa->radeon.radeonScreen->driScreen->fd, + DRM_RADEON_ALLOC, + &alloc, sizeof(alloc)); + +@@ -896,7 +309,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size, + } + + { +- char *region_start = (char *)rmesa->r200Screen->gartTextures.map; ++ char *region_start = (char *)rmesa->radeon.radeonScreen->gartTextures.map; + return (void *)(region_start + region_offset); + } + } +@@ -914,24 +327,24 @@ void r200FreeMemoryMESA(__DRIscreen *screen, GLvoid *pointer) + if (R200_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s %p\n", __FUNCTION__, pointer); + +- if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->r200Screen->gartTextures.map) { ++ if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map) { + fprintf(stderr, "%s: no context\n", __FUNCTION__); + return; + } + +- region_offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map; ++ region_offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map; + + if (region_offset < 0 || +- region_offset > rmesa->r200Screen->gartTextures.size) { ++ region_offset > rmesa->radeon.radeonScreen->gartTextures.size) { + fprintf(stderr, "offset %d outside range 0..%d\n", region_offset, +- rmesa->r200Screen->gartTextures.size); ++ rmesa->radeon.radeonScreen->gartTextures.size); + return; + } + + memfree.region = RADEON_MEM_REGION_GART; + memfree.region_offset = region_offset; + +- ret = drmCommandWrite( rmesa->r200Screen->driScreen->fd, ++ ret = drmCommandWrite( rmesa->radeon.radeonScreen->driScreen->fd, + DRM_RADEON_FREE, + &memfree, sizeof(memfree)); + +@@ -956,16 +369,16 @@ GLuint r200GetMemoryOffsetMESA(__DRIscreen *screen, const GLvoid *pointer) + + card_offset = r200GartOffsetFromVirtual( rmesa, pointer ); + +- return card_offset - rmesa->r200Screen->gart_base; ++ return card_offset - rmesa->radeon.radeonScreen->gart_base; + } + + GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer, + GLint size ) + { +- ptrdiff_t offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map; ++ ptrdiff_t offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map; + int valid = (size >= 0 && + offset >= 0 && +- offset + size < rmesa->r200Screen->gartTextures.size); ++ offset + size < rmesa->radeon.radeonScreen->gartTextures.size); + + if (R200_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "r200IsGartMemory( %p ) : %d\n", pointer, valid ); +@@ -976,12 +389,12 @@ GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer, + + GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer ) + { +- ptrdiff_t offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map; ++ ptrdiff_t offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map; + +- if (offset < 0 || offset > rmesa->r200Screen->gartTextures.size) ++ if (offset < 0 || offset > rmesa->radeon.radeonScreen->gartTextures.size) + return ~0; + else +- return rmesa->r200Screen->gart_texture_offset + offset; ++ return rmesa->radeon.radeonScreen->gart_texture_offset + offset; + } + + +@@ -989,7 +402,7 @@ GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer ) + void r200InitIoctlFuncs( struct dd_function_table *functions ) + { + functions->Clear = r200Clear; +- functions->Finish = r200Finish; +- functions->Flush = r200Flush; ++ functions->Finish = radeonFinish; ++ functions->Flush = radeonFlush; + } + +diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.h b/src/mesa/drivers/dri/r200/r200_ioctl.h +index f7458e4..2a4b8a1 100644 +--- a/src/mesa/drivers/dri/r200/r200_ioctl.h ++++ b/src/mesa/drivers/dri/r200/r200_ioctl.h +@@ -37,65 +37,30 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #include "main/simple_list.h" + #include "radeon_dri.h" +-#include "r200_lock.h" ++ ++#include "radeon_bocs_wrapper.h" + + #include "xf86drm.h" + #include "drm.h" + #include "radeon_drm.h" + +-extern void r200EmitState( r200ContextPtr rmesa ); + extern void r200EmitVertexAOS( r200ContextPtr rmesa, +- GLuint vertex_size, +- GLuint offset ); ++ GLuint vertex_size, ++ struct radeon_bo *bo, ++ GLuint offset ); + + extern void r200EmitVbufPrim( r200ContextPtr rmesa, + GLuint primitive, + GLuint vertex_nr ); + +-extern void r200FlushElts( r200ContextPtr rmesa ); ++extern void r200FlushElts(GLcontext *ctx); + + extern GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, + GLuint primitive, + GLuint min_nr ); + +-extern void r200EmitAOS( r200ContextPtr rmesa, +- struct r200_dma_region **regions, +- GLuint n, +- GLuint offset ); +- +-extern void r200EmitBlit( r200ContextPtr rmesa, +- GLuint color_fmt, +- GLuint src_pitch, +- GLuint src_offset, +- GLuint dst_pitch, +- GLuint dst_offset, +- GLint srcx, GLint srcy, +- GLint dstx, GLint dsty, +- GLuint w, GLuint h ); +- +-extern void r200EmitWait( r200ContextPtr rmesa, GLuint flags ); +- +-extern void r200FlushCmdBuf( r200ContextPtr rmesa, const char * ); +-extern int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller ); +- +-extern void r200RefillCurrentDmaRegion( r200ContextPtr rmesa ); +- +-extern void r200AllocDmaRegion( r200ContextPtr rmesa, +- struct r200_dma_region *region, +- int bytes, +- int alignment ); +- +-extern void r200ReleaseDmaRegion( r200ContextPtr rmesa, +- struct r200_dma_region *region, +- const char *caller ); +- +-extern void r200CopyBuffer( __DRIdrawablePrivate *drawable, +- const drm_clip_rect_t *rect); +-extern void r200PageFlip( __DRIdrawablePrivate *drawable ); +-extern void r200Flush( GLcontext *ctx ); +-extern void r200Finish( GLcontext *ctx ); +-extern void r200WaitForIdleLocked( r200ContextPtr rmesa ); +-extern void r200WaitForVBlank( r200ContextPtr rmesa ); ++extern void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset); ++ + extern void r200InitIoctlFuncs( struct dd_function_table *functions ); + + extern void *r200AllocateMemoryMESA( __DRIscreen *screen, GLsizei size, GLfloat readfreq, +@@ -119,8 +84,8 @@ void r200SetUpAtomList( r200ContextPtr rmesa ); + */ + #define R200_NEWPRIM( rmesa ) \ + do { \ +- if ( rmesa->dma.flush ) \ +- rmesa->dma.flush( rmesa ); \ ++ if ( rmesa->radeon.dma.flush ) \ ++ rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \ + } while (0) + + /* Can accomodate several state changes and primitive changes without +@@ -130,7 +95,7 @@ do { \ + do { \ + R200_NEWPRIM( rmesa ); \ + rmesa->hw.ATOM.dirty = GL_TRUE; \ +- rmesa->hw.is_dirty = GL_TRUE; \ ++ rmesa->radeon.hw.is_dirty = GL_TRUE; \ + } while (0) + + #define R200_DB_STATE( ATOM ) \ +@@ -139,13 +104,13 @@ do { \ + + static INLINE int R200_DB_STATECHANGE( + r200ContextPtr rmesa, +- struct r200_state_atom *atom ) ++ struct radeon_state_atom *atom ) + { + if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) { +- int *tmp; ++ GLuint *tmp; + R200_NEWPRIM( rmesa ); + atom->dirty = GL_TRUE; +- rmesa->hw.is_dirty = GL_TRUE; ++ rmesa->radeon.hw.is_dirty = GL_TRUE; + tmp = atom->cmd; + atom->cmd = atom->lastcmd; + atom->lastcmd = tmp; +@@ -156,15 +121,6 @@ static INLINE int R200_DB_STATECHANGE( + } + + +-/* Fire the buffered vertices no matter what. +- */ +-#define R200_FIREVERTICES( rmesa ) \ +-do { \ +- if ( rmesa->store.cmd_used || rmesa->dma.flush ) { \ +- r200Flush( rmesa->glCtx ); \ +- } \ +-} while (0) +- + /* Command lengths. Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ + * are available, you will also be adding an rmesa->state.max_state_size because + * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts. +@@ -174,36 +130,36 @@ do { \ + #define ELTS_BUFSZ(nr) (12 + nr * 2) + #define VBUF_BUFSZ (3 * sizeof(int)) + +-/* Ensure that a minimum amount of space is available in the command buffer. +- * This is used to ensure atomicity of state updates with the rendering requests +- * that rely on them. +- * +- * An alternative would be to implement a "soft lock" such that when the buffer +- * wraps at an inopportune time, we grab the lock, flush the current buffer, +- * and hang on to the lock until the critical section is finished and we flush +- * the buffer again and unlock. +- */ +-static INLINE void r200EnsureCmdBufSpace( r200ContextPtr rmesa, int bytes ) ++static inline uint32_t cmdpacket3(int cmd_type) + { +- if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ) +- r200FlushCmdBuf( rmesa, __FUNCTION__ ); +- assert( bytes <= R200_CMD_BUF_SZ ); +-} ++ drm_radeon_cmd_header_t cmd; + +-/* Alloc space in the command buffer +- */ +-static INLINE char *r200AllocCmdBuf( r200ContextPtr rmesa, +- int bytes, const char *where ) +-{ +- char * head; ++ cmd.i = 0; ++ cmd.header.cmd_type = cmd_type; + +- if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ) +- r200FlushCmdBuf( rmesa, where ); ++ return (uint32_t)cmd.i; + +- head = rmesa->store.cmd_buf + rmesa->store.cmd_used; +- rmesa->store.cmd_used += bytes; +- assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ ); +- return head; + } + ++#define OUT_BATCH_PACKET3(packet, num_extra) do { \ ++ if (!b_l_rmesa->radeonScreen->kernel_mm) { \ ++ OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3)); \ ++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ ++ } else { \ ++ OUT_BATCH(CP_PACKET2); \ ++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ ++ } \ ++ } while(0) ++ ++#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do { \ ++ if (!b_l_rmesa->radeonScreen->kernel_mm) { \ ++ OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP)); \ ++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ ++ } else { \ ++ OUT_BATCH(CP_PACKET2); \ ++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ ++ } \ ++ } while(0) ++ ++ + #endif /* __R200_IOCTL_H__ */ +diff --git a/src/mesa/drivers/dri/r200/r200_lock.c b/src/mesa/drivers/dri/r200/r200_lock.c +deleted file mode 100644 +index 99661a4..0000000 +--- a/src/mesa/drivers/dri/r200/r200_lock.c ++++ /dev/null +@@ -1,116 +0,0 @@ +-/* +-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. +- +-The Weather Channel (TM) funded Tungsten Graphics to develop the +-initial release of the Radeon 8500 driver under the XFree86 license. +-This notice must be preserved. +- +-Permission is hereby granted, free of charge, to any person obtaining +-a copy of this software and associated documentation files (the +-"Software"), to deal in the Software without restriction, including +-without limitation the rights to use, copy, modify, merge, publish, +-distribute, sublicense, and/or sell copies of the Software, and to +-permit persons to whom the Software is furnished to do so, subject to +-the following conditions: +- +-The above copyright notice and this permission notice (including the +-next paragraph) shall be included in all copies or substantial +-portions of the Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +- +-**************************************************************************/ +- +-/* +- * Authors: +- * Keith Whitwell +- */ +- +-#include "r200_context.h" +-#include "r200_lock.h" +-#include "r200_tex.h" +-#include "r200_state.h" +-#include "r200_ioctl.h" +- +-#include "drirenderbuffer.h" +- +- +-#if DEBUG_LOCKING +-char *prevLockFile = NULL; +-int prevLockLine = 0; +-#endif +- +-/* Turn on/off page flipping according to the flags in the sarea: +- */ +-static void +-r200UpdatePageFlipping( r200ContextPtr rmesa ) +-{ +- rmesa->doPageFlip = rmesa->sarea->pfState; +- if (rmesa->glCtx->WinSysDrawBuffer) { +- driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer, +- rmesa->sarea->pfCurrentPage); +- } +-} +- +- +- +-/* Update the hardware state. This is called if another main/context.has +- * grabbed the hardware lock, which includes the X server. This +- * function also updates the driver's window state after the X server +- * moves, resizes or restacks a window -- the change will be reflected +- * in the drawable position and clip rects. Since the X server grabs +- * the hardware lock when it changes the window state, this routine will +- * automatically be called after such a change. +- */ +-void r200GetLock( r200ContextPtr rmesa, GLuint flags ) +-{ +- __DRIdrawablePrivate *drawable = rmesa->dri.drawable; +- __DRIdrawablePrivate *readable = rmesa->dri.readable; +- __DRIscreenPrivate *sPriv = rmesa->dri.screen; +- drm_radeon_sarea_t *sarea = rmesa->sarea; +- int i; +- +- drmGetLock( rmesa->dri.fd, rmesa->dri.hwContext, flags ); +- +- /* The window might have moved, so we might need to get new clip +- * rects. +- * +- * NOTE: This releases and regrabs the hw lock to allow the X server +- * to respond to the DRI protocol request for new drawable info. +- * Since the hardware state depends on having the latest drawable +- * clip rects, all state checking must be done _after_ this call. +- */ +- DRI_VALIDATE_DRAWABLE_INFO( sPriv, drawable ); +- if (drawable != readable) { +- DRI_VALIDATE_DRAWABLE_INFO( sPriv, readable ); +- } +- +- if ( rmesa->lastStamp != drawable->lastStamp ) { +- r200UpdatePageFlipping( rmesa ); +- r200SetCliprects( rmesa ); +- r200UpdateViewportOffset( rmesa->glCtx ); +- driUpdateFramebufferSize(rmesa->glCtx, drawable); +- } +- +- R200_STATECHANGE( rmesa, ctx ); +- if (rmesa->sarea->tiling_enabled) { +- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE; +- } +- else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE; +- +- if ( sarea->ctx_owner != rmesa->dri.hwContext ) { +- sarea->ctx_owner = rmesa->dri.hwContext; +- } +- +- for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) { +- DRI_AGE_TEXTURES( rmesa->texture_heaps[ i ] ); +- } +- +- rmesa->lost_context = GL_TRUE; +-} +diff --git a/src/mesa/drivers/dri/r200/r200_lock.h b/src/mesa/drivers/dri/r200/r200_lock.h +deleted file mode 100644 +index 4ff9890..0000000 +--- a/src/mesa/drivers/dri/r200/r200_lock.h ++++ /dev/null +@@ -1,106 +0,0 @@ +-/* +-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. +- +-The Weather Channel (TM) funded Tungsten Graphics to develop the +-initial release of the Radeon 8500 driver under the XFree86 license. +-This notice must be preserved. +- +-Permission is hereby granted, free of charge, to any person obtaining +-a copy of this software and associated documentation files (the +-"Software"), to deal in the Software without restriction, including +-without limitation the rights to use, copy, modify, merge, publish, +-distribute, sublicense, and/or sell copies of the Software, and to +-permit persons to whom the Software is furnished to do so, subject to +-the following conditions: +- +-The above copyright notice and this permission notice (including the +-next paragraph) shall be included in all copies or substantial +-portions of the Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +- +-**************************************************************************/ +- +-/* +- * Authors: +- * Keith Whitwell +- */ +- +-#ifndef __R200_LOCK_H__ +-#define __R200_LOCK_H__ +- +-extern void r200GetLock( r200ContextPtr rmesa, GLuint flags ); +- +-/* Turn DEBUG_LOCKING on to find locking conflicts. +- */ +-#define DEBUG_LOCKING 0 +- +-#if DEBUG_LOCKING +-extern char *prevLockFile; +-extern int prevLockLine; +- +-#define DEBUG_LOCK() \ +- do { \ +- prevLockFile = (__FILE__); \ +- prevLockLine = (__LINE__); \ +- } while (0) +- +-#define DEBUG_RESET() \ +- do { \ +- prevLockFile = 0; \ +- prevLockLine = 0; \ +- } while (0) +- +-#define DEBUG_CHECK_LOCK() \ +- do { \ +- if ( prevLockFile ) { \ +- fprintf( stderr, \ +- "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n", \ +- prevLockFile, prevLockLine, __FILE__, __LINE__ ); \ +- exit( 1 ); \ +- } \ +- } while (0) +- +-#else +- +-#define DEBUG_LOCK() +-#define DEBUG_RESET() +-#define DEBUG_CHECK_LOCK() +- +-#endif +- +-/* +- * !!! We may want to separate locks from locks with validation. This +- * could be used to improve performance for those things commands that +- * do not do any drawing !!! +- */ +- +- +-/* Lock the hardware and validate our state. +- */ +-#define LOCK_HARDWARE( rmesa ) \ +- do { \ +- char __ret = 0; \ +- DEBUG_CHECK_LOCK(); \ +- DRM_CAS( rmesa->dri.hwLock, rmesa->dri.hwContext, \ +- (DRM_LOCK_HELD | rmesa->dri.hwContext), __ret ); \ +- if ( __ret ) \ +- r200GetLock( rmesa, 0 ); \ +- DEBUG_LOCK(); \ +- } while (0) +- +-#define UNLOCK_HARDWARE( rmesa ) \ +- do { \ +- DRM_UNLOCK( rmesa->dri.fd, \ +- rmesa->dri.hwLock, \ +- rmesa->dri.hwContext ); \ +- DEBUG_RESET(); \ +- } while (0) +- +-#endif /* __R200_LOCK_H__ */ +diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c +index 8512b9a..5dbc202 100644 +--- a/src/mesa/drivers/dri/r200/r200_maos_arrays.c ++++ b/src/mesa/drivers/dri/r200/r200_maos_arrays.c +@@ -50,110 +50,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "r200_maos.h" + #include "r200_tcl.h" + +- +-#if 0 +-/* Usage: +- * - from r200_tcl_render +- * - call r200EmitArrays to ensure uptodate arrays in dma +- * - emit primitives (new type?) which reference the data +- * -- need to use elts for lineloop, quads, quadstrip/flat +- * -- other primitives are all well-formed (need tristrip-1,fake-poly) +- * +- */ +-static void emit_ubyte_rgba3( GLcontext *ctx, +- struct r200_dma_region *rvb, +- char *data, +- int stride, +- int count ) +-{ +- int i; +- r200_color_t *out = (r200_color_t *)(rvb->start + rvb->address); +- +- if (R200_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d out %p\n", +- __FUNCTION__, count, stride, (void *)out); +- +- for (i = 0; i < count; i++) { +- out->red = *data; +- out->green = *(data+1); +- out->blue = *(data+2); +- out->alpha = 0xFF; +- out++; +- data += stride; +- } +-} +- +-static void emit_ubyte_rgba4( GLcontext *ctx, +- struct r200_dma_region *rvb, +- char *data, +- int stride, +- int count ) +-{ +- int i; +- int *out = (int *)(rvb->address + rvb->start); +- +- if (R200_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d\n", +- __FUNCTION__, count, stride); +- +- if (stride == 4) { +- for (i = 0; i < count; i++) +- ((int *)out)[i] = LE32_TO_CPU(((int *)data)[i]); +- } else { +- for (i = 0; i < count; i++) { +- *(int *)out++ = LE32_TO_CPU(*(int *)data); +- data += stride; +- } +- } +-} +- +- +-static void emit_ubyte_rgba( GLcontext *ctx, +- struct r200_dma_region *rvb, +- char *data, +- int size, +- int stride, +- int count ) +-{ +- r200ContextPtr rmesa = R200_CONTEXT(ctx); +- +- if (R200_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size); +- +- assert (!rvb->buf); +- +- if (stride == 0) { +- r200AllocDmaRegion( rmesa, rvb, 4, 4 ); +- count = 1; +- rvb->aos_start = GET_START(rvb); +- rvb->aos_stride = 0; +- rvb->aos_size = 1; +- } +- else { +- r200AllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */ +- rvb->aos_start = GET_START(rvb); +- rvb->aos_stride = 1; +- rvb->aos_size = 1; +- } +- +- /* Emit the data +- */ +- switch (size) { +- case 3: +- emit_ubyte_rgba3( ctx, rvb, data, stride, count ); +- break; +- case 4: +- emit_ubyte_rgba4( ctx, rvb, data, stride, count ); +- break; +- default: +- assert(0); +- exit(1); +- break; +- } +-} +-#endif +- +- + #if defined(USE_X86_ASM) + #define COPY_DWORDS( dst, src, nr ) \ + do { \ +@@ -174,204 +70,34 @@ do { \ + } while (0) + #endif + +- +-static void emit_vecfog( GLcontext *ctx, +- struct r200_dma_region *rvb, +- char *data, +- int stride, +- int count ) ++static void r200_emit_vecfog(GLcontext *ctx, struct radeon_aos *aos, ++ GLvoid *data, int stride, int count) + { +- int i; +- GLfloat *out; +- +- r200ContextPtr rmesa = R200_CONTEXT(ctx); +- +- if (R200_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d\n", +- __FUNCTION__, count, stride); +- +- assert (!rvb->buf); +- +- if (stride == 0) { +- r200AllocDmaRegion( rmesa, rvb, 4, 4 ); +- count = 1; +- rvb->aos_start = GET_START(rvb); +- rvb->aos_stride = 0; +- rvb->aos_size = 1; +- } +- else { +- r200AllocDmaRegion( rmesa, rvb, count * 4, 4 ); /* alignment? */ +- rvb->aos_start = GET_START(rvb); +- rvb->aos_stride = 1; +- rvb->aos_size = 1; +- } +- +- /* Emit the data +- */ +- out = (GLfloat *)(rvb->address + rvb->start); +- for (i = 0; i < count; i++) { +- out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data ); +- out++; +- data += stride; +- } +- ++ radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ uint32_t *out; ++ int i; ++ int size = 1; ++ ++ if (stride == 0) { ++ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); ++ count = 1; ++ aos->stride = 0; ++ } else { ++ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); ++ aos->stride = size; ++ } ++ ++ aos->components = size; ++ aos->count = count; ++ ++ out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); ++ for (i = 0; i < count; i++) { ++ out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data ); ++ out++; ++ data += stride; ++ } + } + +- +-static void emit_vec4( GLcontext *ctx, +- struct r200_dma_region *rvb, +- char *data, +- int stride, +- int count ) +-{ +- int i; +- int *out = (int *)(rvb->address + rvb->start); +- +- if (R200_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d\n", +- __FUNCTION__, count, stride); +- +- if (stride == 4) +- COPY_DWORDS( out, data, count ); +- else +- for (i = 0; i < count; i++) { +- out[0] = *(int *)data; +- out++; +- data += stride; +- } +-} +- +- +-static void emit_vec8( GLcontext *ctx, +- struct r200_dma_region *rvb, +- char *data, +- int stride, +- int count ) +-{ +- int i; +- int *out = (int *)(rvb->address + rvb->start); +- +- if (R200_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d\n", +- __FUNCTION__, count, stride); +- +- if (stride == 8) +- COPY_DWORDS( out, data, count*2 ); +- else +- for (i = 0; i < count; i++) { +- out[0] = *(int *)data; +- out[1] = *(int *)(data+4); +- out += 2; +- data += stride; +- } +-} +- +-static void emit_vec12( GLcontext *ctx, +- struct r200_dma_region *rvb, +- char *data, +- int stride, +- int count ) +-{ +- int i; +- int *out = (int *)(rvb->address + rvb->start); +- +- if (R200_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d out %p data %p\n", +- __FUNCTION__, count, stride, (void *)out, (void *)data); +- +- if (stride == 12) +- COPY_DWORDS( out, data, count*3 ); +- else +- for (i = 0; i < count; i++) { +- out[0] = *(int *)data; +- out[1] = *(int *)(data+4); +- out[2] = *(int *)(data+8); +- out += 3; +- data += stride; +- } +-} +- +-static void emit_vec16( GLcontext *ctx, +- struct r200_dma_region *rvb, +- char *data, +- int stride, +- int count ) +-{ +- int i; +- int *out = (int *)(rvb->address + rvb->start); +- +- if (R200_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d\n", +- __FUNCTION__, count, stride); +- +- if (stride == 16) +- COPY_DWORDS( out, data, count*4 ); +- else +- for (i = 0; i < count; i++) { +- out[0] = *(int *)data; +- out[1] = *(int *)(data+4); +- out[2] = *(int *)(data+8); +- out[3] = *(int *)(data+12); +- out += 4; +- data += stride; +- } +-} +- +- +-static void emit_vector( GLcontext *ctx, +- struct r200_dma_region *rvb, +- char *data, +- int size, +- int stride, +- int count ) +-{ +- r200ContextPtr rmesa = R200_CONTEXT(ctx); +- +- if (R200_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d size %d stride %d\n", +- __FUNCTION__, count, size, stride); +- +- assert (!rvb->buf); +- +- if (stride == 0) { +- r200AllocDmaRegion( rmesa, rvb, size * 4, 4 ); +- count = 1; +- rvb->aos_start = GET_START(rvb); +- rvb->aos_stride = 0; +- rvb->aos_size = size; +- } +- else { +- r200AllocDmaRegion( rmesa, rvb, size * count * 4, 4 ); /* alignment? */ +- rvb->aos_start = GET_START(rvb); +- rvb->aos_stride = size; +- rvb->aos_size = size; +- } +- +- /* Emit the data +- */ +- switch (size) { +- case 1: +- emit_vec4( ctx, rvb, data, stride, count ); +- break; +- case 2: +- emit_vec8( ctx, rvb, data, stride, count ); +- break; +- case 3: +- emit_vec12( ctx, rvb, data, stride, count ); +- break; +- case 4: +- emit_vec16( ctx, rvb, data, stride, count ); +- break; +- default: +- assert(0); +- exit(1); +- break; +- } +- +-} +- +- +- + /* Emit any changed arrays to new GART memory, re-emit a packet to + * update the arrays. + */ +@@ -379,12 +105,12 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev ) + { + r200ContextPtr rmesa = R200_CONTEXT( ctx ); + struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb; +- struct r200_dma_region **component = rmesa->tcl.aos_components; + GLuint nr = 0; + GLuint vfmt0 = 0, vfmt1 = 0; + GLuint count = VB->Count; + GLuint i, emitsize; + ++ // fprintf(stderr,"emit arrays\n"); + for ( i = 0; i < 15; i++ ) { + GLubyte attrib = vimap_rev[i]; + if (attrib != 255) { +@@ -416,20 +142,20 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev ) + case 3: + /* special handling to fix up fog. Will get us into trouble with vbos...*/ + assert(attrib == VERT_ATTRIB_FOG); +- if (!rmesa->tcl.vertex_data[i].buf) { ++ if (!rmesa->tcl.aos[i].bo) { + if (ctx->VertexProgram._Enabled) +- emit_vector( ctx, +- &(rmesa->tcl.vertex_data[i]), +- (char *)VB->AttribPtr[attrib]->data, +- 1, +- VB->AttribPtr[attrib]->stride, +- count); ++ rcommon_emit_vector( ctx, ++ &(rmesa->tcl.aos[nr]), ++ (char *)VB->AttribPtr[attrib]->data, ++ 1, ++ VB->AttribPtr[attrib]->stride, ++ count); + else +- emit_vecfog( ctx, +- &(rmesa->tcl.vertex_data[i]), +- (char *)VB->AttribPtr[attrib]->data, +- VB->AttribPtr[attrib]->stride, +- count); ++ r200_emit_vecfog( ctx, ++ &(rmesa->tcl.aos[nr]), ++ (char *)VB->AttribPtr[attrib]->data, ++ VB->AttribPtr[attrib]->stride, ++ count); + } + vfmt0 |= R200_VTX_DISCRETE_FOG; + goto after_emit; +@@ -473,17 +199,17 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev ) + default: + assert(0); + } +- if (!rmesa->tcl.vertex_data[i].buf) { +- emit_vector( ctx, +- &(rmesa->tcl.vertex_data[i]), +- (char *)VB->AttribPtr[attrib]->data, +- emitsize, +- VB->AttribPtr[attrib]->stride, +- count ); ++ if (!rmesa->tcl.aos[nr].bo) { ++ rcommon_emit_vector( ctx, ++ &(rmesa->tcl.aos[nr]), ++ (char *)VB->AttribPtr[attrib]->data, ++ emitsize, ++ VB->AttribPtr[attrib]->stride, ++ count ); + } + after_emit: + assert(nr < 12); +- component[nr++] = &rmesa->tcl.vertex_data[i]; ++ nr++; + } + } + +@@ -501,12 +227,11 @@ after_emit: + void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs ) + { + r200ContextPtr rmesa = R200_CONTEXT( ctx ); +- +- /* only do it for changed inputs ? */ + int i; +- for (i = 0; i < 15; i++) { +- if (newinputs & (1 << i)) +- r200ReleaseDmaRegion( rmesa, +- &rmesa->tcl.vertex_data[i], __FUNCTION__ ); ++ for (i = 0; i < rmesa->tcl.nr_aos_components; i++) { ++ if (rmesa->tcl.aos[i].bo) { ++ radeon_bo_unref(rmesa->tcl.aos[i].bo); ++ rmesa->tcl.aos[i].bo = NULL; ++ } + } + } +diff --git a/src/mesa/drivers/dri/r200/r200_pixel.c b/src/mesa/drivers/dri/r200/r200_pixel.c +index be68821..a6c6558 100644 +--- a/src/mesa/drivers/dri/r200/r200_pixel.c ++++ b/src/mesa/drivers/dri/r200/r200_pixel.c +@@ -51,7 +51,7 @@ check_color( const GLcontext *ctx, GLenum type, GLenum format, + const void *pixels, GLint sz, GLint pitch ) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); +- GLuint cpp = rmesa->r200Screen->cpp; ++ GLuint cpp = rmesa->radeon.radeonScreen->cpp; + + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s\n", __FUNCTION__); +@@ -137,8 +137,8 @@ clip_pixelrect( const GLcontext *ctx, + if (*height <= 0) + return GL_FALSE; + +- *size = ((*y + *height - 1) * rmesa->r200Screen->frontPitch + +- (*x + *width - 1) * rmesa->r200Screen->cpp); ++ *size = ((*y + *height - 1) * rmesa->radeon.radeonScreen->frontPitch + ++ (*x + *width - 1) * rmesa->radeon.radeonScreen->cpp); + + return GL_TRUE; + } +@@ -153,19 +153,20 @@ r200TryReadPixels( GLcontext *ctx, + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLint pitch = pack->RowLength ? pack->RowLength : width; + GLint blit_format; +- GLuint cpp = rmesa->r200Screen->cpp; ++ GLuint cpp = rmesa->radeon.radeonScreen->cpp; + GLint size = width * height * cpp; + ++ return GL_FALSE; ++#if 0 + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s\n", __FUNCTION__); + + /* Only accelerate reading to GART buffers. + */ + if ( !r200IsGartMemory(rmesa, pixels, +- pitch * height * rmesa->r200Screen->cpp ) ) { ++ pitch * height * rmesa->radeon.radeonScreen->cpp ) ) { + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s: dest not GART\n", __FUNCTION__); +- return GL_FALSE; + } + + /* Need GL_PACK_INVERT_MESA to cope with upsidedown results from +@@ -180,7 +181,7 @@ r200TryReadPixels( GLcontext *ctx, + if (!check_color(ctx, type, format, pack, pixels, size, pitch)) + return GL_FALSE; + +- switch ( rmesa->r200Screen->cpp ) { ++ switch ( rmesa->radeon.radeonScreen->cpp ) { + case 4: + blit_format = R200_CP_COLOR_FORMAT_ARGB8888; + break; +@@ -197,14 +198,14 @@ r200TryReadPixels( GLcontext *ctx, + * a full command buffer expects to be called unlocked. As a + * workaround, immediately flush the buffer on aquiring the lock. + */ +- LOCK_HARDWARE( rmesa ); ++ LOCK_HARDWARE( &rmesa->radeon ); + + if (rmesa->store.cmd_used) +- r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); ++ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ ); + + if (!clip_pixelrect(ctx, ctx->ReadBuffer, &x, &y, &width, &height, + &size)) { +- UNLOCK_HARDWARE( rmesa ); ++ UNLOCK_HARDWARE( &rmesa->radeon ); + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s totally clipped -- nothing to do\n", + __FUNCTION__); +@@ -212,14 +213,14 @@ r200TryReadPixels( GLcontext *ctx, + } + + { +- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; ++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + driRenderbuffer *drb = (driRenderbuffer *) ctx->ReadBuffer->_ColorReadBuffer; + int nbox = dPriv->numClipRects; + int src_offset = drb->offset +- + rmesa->r200Screen->fbLocation; ++ + rmesa->radeon.radeonScreen->fbLocation; + int src_pitch = drb->pitch * drb->cpp; + int dst_offset = r200GartOffsetFromVirtual( rmesa, pixels ); +- int dst_pitch = pitch * rmesa->r200Screen->cpp; ++ int dst_pitch = pitch * rmesa->radeon.radeonScreen->cpp; + drm_clip_rect_t *box = dPriv->pClipRects; + int i; + +@@ -257,12 +258,12 @@ r200TryReadPixels( GLcontext *ctx, + bw, bh ); + } + +- r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); ++ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ ); + } +- UNLOCK_HARDWARE( rmesa ); +- +- r200Finish( ctx ); /* required by GL */ ++ UNLOCK_HARDWARE( &rmesa->radeon ); + ++ radeonFinish( ctx ); /* required by GL */ +#endif -+ return bo; + return GL_TRUE; + } + +@@ -292,7 +293,7 @@ static void do_draw_pix( GLcontext *ctx, + GLuint planemask) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); +- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; ++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + drm_clip_rect_t *box = dPriv->pClipRects; + struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorDrawBuffers[0]; + driRenderbuffer *drb = (driRenderbuffer *) rb; +@@ -301,12 +302,12 @@ static void do_draw_pix( GLcontext *ctx, + int blit_format; + int size; + int src_offset = r200GartOffsetFromVirtual( rmesa, pixels ); +- int src_pitch = pitch * rmesa->r200Screen->cpp; ++ int src_pitch = pitch * rmesa->radeon.radeonScreen->cpp; + + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s\n", __FUNCTION__); +- +- switch ( rmesa->r200Screen->cpp ) { ++#if 0 ++ switch ( rmesa->radeon.radeonScreen->cpp ) { + case 2: + blit_format = R200_CP_COLOR_FORMAT_RGB565; + break; +@@ -318,17 +319,17 @@ static void do_draw_pix( GLcontext *ctx, + } + + +- LOCK_HARDWARE( rmesa ); ++ LOCK_HARDWARE( &rmesa->radeon ); + + if (rmesa->store.cmd_used) +- r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); ++ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ ); + + y -= height; /* cope with pixel zoom */ + + if (!clip_pixelrect(ctx, ctx->DrawBuffer, + &x, &y, &width, &height, + &size)) { +- UNLOCK_HARDWARE( rmesa ); ++ UNLOCK_HARDWARE( &rmesa->radeon ); + return; + } + +@@ -357,15 +358,16 @@ static void do_draw_pix( GLcontext *ctx, + blit_format, + src_pitch, src_offset, + drb->pitch * drb->cpp, +- drb->offset + rmesa->r200Screen->fbLocation, ++ drb->offset + rmesa->radeon.radeonScreen->fbLocation, + bx - x, by - y, + bx, by, + bw, bh ); + } + +- r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); +- r200WaitForIdleLocked( rmesa ); /* required by GL */ +- UNLOCK_HARDWARE( rmesa ); ++ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ ); ++ radeonWaitForIdleLocked( &rmesa->radeon ); /* required by GL */ ++ UNLOCK_HARDWARE( &rmesa->radeon ); ++#endif + } + + +@@ -381,7 +383,7 @@ r200TryDrawPixels( GLcontext *ctx, + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLint pitch = unpack->RowLength ? unpack->RowLength : width; + GLuint planemask; +- GLuint cpp = rmesa->r200Screen->cpp; ++ GLuint cpp = rmesa->radeon.radeonScreen->cpp; + GLint size = height * pitch * cpp; + + if (R200_DEBUG & DEBUG_PIXEL) +@@ -395,7 +397,7 @@ r200TryDrawPixels( GLcontext *ctx, + case GL_RGB: + case GL_RGBA: + case GL_BGRA: +- planemask = r200PackColor(cpp, ++ planemask = radeonPackColor(cpp, + ctx->Color.ColorMask[RCOMP], + ctx->Color.ColorMask[GCOMP], + ctx->Color.ColorMask[BCOMP], +@@ -431,7 +433,7 @@ r200TryDrawPixels( GLcontext *ctx, + return GL_FALSE; + } + +- if ( r200IsGartMemory(rmesa, pixels, size) ) ++ if (0)// r200IsGartMemory(rmesa, pixels, size) ) + { + do_draw_pix( ctx, x, y, width, height, pitch, pixels, planemask ); + return GL_TRUE; +@@ -471,7 +473,7 @@ r200Bitmap( GLcontext *ctx, GLint px, GLint py, + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); + +- if (rmesa->Fallback) ++ if (rmesa->radeon.Fallback) + _swrast_Bitmap( ctx, px, py, width, height, unpack, bitmap ); + else + r200PointsBitmap( ctx, px, py, width, height, unpack, bitmap ); +diff --git a/src/mesa/drivers/dri/r200/r200_reg.h b/src/mesa/drivers/dri/r200/r200_reg.h +index 5ce287f..526a624 100644 +--- a/src/mesa/drivers/dri/r200/r200_reg.h ++++ b/src/mesa/drivers/dri/r200/r200_reg.h +@@ -463,8 +463,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #define R200_VSC_UPDATE_USER_COLOR_1_ENABLE 0x00020000 + /* gap */ + #define R200_SE_TCL_VECTOR_INDX_REG 0x2200 ++# define RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT 16 ++# define RADEON_VEC_INDX_DWORD_COUNT_SHIFT 28 + #define R200_SE_TCL_VECTOR_DATA_REG 0x2204 + #define R200_SE_TCL_SCALAR_INDX_REG 0x2208 ++# define RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT 16 + #define R200_SE_TCL_SCALAR_DATA_REG 0x220c + /* gap */ + #define R200_SE_TCL_MATRIX_SEL_0 0x2230 +@@ -949,6 +952,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #define R200_LOD_BIAS_MASK (0xfff80000) + #define R200_LOD_BIAS_SHIFT 19 + #define R200_PP_TXSIZE_0 0x2c0c /* NPOT only */ ++#define R200_PP_TX_WIDTHMASK_SHIFT 0 ++#define R200_PP_TX_HEIGHTMASK_SHIFT 16 ++ + #define R200_PP_TXPITCH_0 0x2c10 /* NPOT only */ + #define R200_PP_BORDER_COLOR_0 0x2c14 + #define R200_PP_CUBIC_FACES_0 0x2c18 +diff --git a/src/mesa/drivers/dri/r200/r200_span.c b/src/mesa/drivers/dri/r200/r200_span.c +deleted file mode 100644 +index 9783678..0000000 +--- a/src/mesa/drivers/dri/r200/r200_span.c ++++ /dev/null +@@ -1,307 +0,0 @@ +-/* +-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. +- +-The Weather Channel (TM) funded Tungsten Graphics to develop the +-initial release of the Radeon 8500 driver under the XFree86 license. +-This notice must be preserved. +- +-Permission is hereby granted, free of charge, to any person obtaining +-a copy of this software and associated documentation files (the +-"Software"), to deal in the Software without restriction, including +-without limitation the rights to use, copy, modify, merge, publish, +-distribute, sublicense, and/or sell copies of the Software, and to +-permit persons to whom the Software is furnished to do so, subject to +-the following conditions: +- +-The above copyright notice and this permission notice (including the +-next paragraph) shall be included in all copies or substantial +-portions of the Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +- +-**************************************************************************/ +- +-/* +- * Authors: +- * Keith Whitwell +- */ +- +-#include "main/glheader.h" +-#include "main/imports.h" +-#include "main/colormac.h" +-#include "swrast/swrast.h" +- +-#include "r200_context.h" +-#include "r200_ioctl.h" +-#include "r200_state.h" +-#include "r200_span.h" +-#include "r200_tex.h" +- +-#define DBG 0 +- +-/* +- * Note that all information needed to access pixels in a renderbuffer +- * should be obtained through the gl_renderbuffer parameter, not per-context +- * information. +- */ +-#define LOCAL_VARS \ +- driRenderbuffer *drb = (driRenderbuffer *) rb; \ +- const __DRIdrawablePrivate *dPriv = drb->dPriv; \ +- const GLuint bottom = dPriv->h - 1; \ +- GLubyte *buf = (GLubyte *) drb->flippedData \ +- + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \ +- GLuint p; \ +- (void) p; +- +-#define LOCAL_DEPTH_VARS \ +- driRenderbuffer *drb = (driRenderbuffer *) rb; \ +- const __DRIdrawablePrivate *dPriv = drb->dPriv; \ +- const GLuint bottom = dPriv->h - 1; \ +- GLuint xo = dPriv->x; \ +- GLuint yo = dPriv->y; \ +- GLubyte *buf = (GLubyte *) drb->Base.Data; +- +-#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS +- +-#define Y_FLIP(Y) (bottom - (Y)) +- +-#define HW_LOCK() +- +-#define HW_UNLOCK() +- +- +- +-/* ================================================================ +- * Color buffer +- */ +- +-/* 16 bit, RGB565 color spanline and pixel functions +- */ +-#define SPANTMP_PIXEL_FMT GL_RGB +-#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 +- +-#define TAG(x) r200##x##_RGB565 +-#define TAG2(x,y) r200##x##_RGB565##y +-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2) +-#include "spantmp2.h" +- +-/* 32 bit, ARGB8888 color spanline and pixel functions +- */ +-#define SPANTMP_PIXEL_FMT GL_BGRA +-#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV +- +-#define TAG(x) r200##x##_ARGB8888 +-#define TAG2(x,y) r200##x##_ARGB8888##y +-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4) +-#include "spantmp2.h" +- +- +-/* ================================================================ +- * Depth buffer +- */ +- +-/* The Radeon family has depth tiling on all the time, so we have to convert +- * the x,y coordinates into the memory bus address (mba) in the same +- * manner as the engine. In each case, the linear block address (ba) +- * is calculated, and then wired with x and y to produce the final +- * memory address. +- * The chip will do address translation on its own if the surface registers +- * are set up correctly. It is not quite enough to get it working with hyperz too... +- */ +- +-/* extract bit 'b' of x, result is zero or one */ +-#define BIT(x,b) ((x & (1<>b) +- +-static GLuint +-r200_mba_z32( driRenderbuffer *drb, GLint x, GLint y ) +-{ +- GLuint pitch = drb->pitch; +- if (drb->depthHasSurface) { +- return 4 * (x + y * pitch); +- } +- else { +- GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 5) + ((x & 0x7FF) >> 5); +- GLuint a = +- (BIT(x,0) << 2) | +- (BIT(y,0) << 3) | +- (BIT(x,1) << 4) | +- (BIT(y,1) << 5) | +- (BIT(x,3) << 6) | +- (BIT(x,4) << 7) | +- (BIT(x,2) << 8) | +- (BIT(y,2) << 9) | +- (BIT(y,3) << 10) | +- (((pitch & 0x20) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) | +- ((b >> 1) << 12); +- return a; +- } +-} +- +-static GLuint +-r200_mba_z16( driRenderbuffer *drb, GLint x, GLint y ) +-{ +- GLuint pitch = drb->pitch; +- if (drb->depthHasSurface) { +- return 2 * (x + y * pitch); +- } +- else { +- GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 6) + ((x & 0x7FF) >> 6); +- GLuint a = +- (BIT(x,0) << 1) | +- (BIT(y,0) << 2) | +- (BIT(x,1) << 3) | +- (BIT(y,1) << 4) | +- (BIT(x,2) << 5) | +- (BIT(x,4) << 6) | +- (BIT(x,5) << 7) | +- (BIT(x,3) << 8) | +- (BIT(y,2) << 9) | +- (BIT(y,3) << 10) | +- (((pitch & 0x40) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) | +- ((b >> 1) << 12); +- return a; +- } +-} +- +- +-/* 16-bit depth buffer functions +- */ +-#define VALUE_TYPE GLushort +- +-#define WRITE_DEPTH( _x, _y, d ) \ +- *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo )) = d; +- +-#define READ_DEPTH( d, _x, _y ) \ +- d = *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo )); +- +-#define TAG(x) r200##x##_z16 +-#include "depthtmp.h" +- +- +-/* 24 bit depth, 8 bit stencil depthbuffer functions +- */ +-#define VALUE_TYPE GLuint +- +-#define WRITE_DEPTH( _x, _y, d ) \ +-do { \ +- GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \ +- GLuint tmp = *(GLuint *)(buf + offset); \ +- tmp &= 0xff000000; \ +- tmp |= ((d) & 0x00ffffff); \ +- *(GLuint *)(buf + offset) = tmp; \ +-} while (0) +- +-#define READ_DEPTH( d, _x, _y ) \ +- d = *(GLuint *)(buf + r200_mba_z32( drb, _x + xo, \ +- _y + yo )) & 0x00ffffff; +- +-#define TAG(x) r200##x##_z24_s8 +-#include "depthtmp.h" +- +- +-/* ================================================================ +- * Stencil buffer +- */ +- +-/* 24 bit depth, 8 bit stencil depthbuffer functions +- */ +-#define WRITE_STENCIL( _x, _y, d ) \ +-do { \ +- GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \ +- GLuint tmp = *(GLuint *)(buf + offset); \ +- tmp &= 0x00ffffff; \ +- tmp |= (((d) & 0xff) << 24); \ +- *(GLuint *)(buf + offset) = tmp; \ +-} while (0) +- +-#define READ_STENCIL( d, _x, _y ) \ +-do { \ +- GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \ +- GLuint tmp = *(GLuint *)(buf + offset); \ +- tmp &= 0xff000000; \ +- d = tmp >> 24; \ +-} while (0) +- +-#define TAG(x) r200##x##_z24_s8 +-#include "stenciltmp.h" +- +- +-/* Move locking out to get reasonable span performance (10x better +- * than doing this in HW_LOCK above). WaitForIdle() is the main +- * culprit. +- */ +- +-static void r200SpanRenderStart( GLcontext *ctx ) +-{ +- r200ContextPtr rmesa = R200_CONTEXT( ctx ); +- +- R200_FIREVERTICES( rmesa ); +- LOCK_HARDWARE( rmesa ); +- r200WaitForIdleLocked( rmesa ); +- +- /* Read & rewrite the first pixel in the frame buffer. This should +- * be a noop, right? In fact without this conform fails as reading +- * from the framebuffer sometimes produces old results -- the +- * on-card read cache gets mixed up and doesn't notice that the +- * framebuffer has been updated. +- * +- * In the worst case this is buggy too as p might get the wrong +- * value first time, so really need a hidden pixel somewhere for this. +- */ +- { +- int p; +- driRenderbuffer *drb = +- (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0]; +- volatile int *buf = +- (volatile int *)(rmesa->dri.screen->pFB + drb->offset); +- p = *buf; +- *buf = p; +- } +-} +- +-static void r200SpanRenderFinish( GLcontext *ctx ) +-{ +- r200ContextPtr rmesa = R200_CONTEXT( ctx ); +- _swrast_flush( ctx ); +- UNLOCK_HARDWARE( rmesa ); +-} +- +-void r200InitSpanFuncs( GLcontext *ctx ) +-{ +- struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx); +- swdd->SpanRenderStart = r200SpanRenderStart; +- swdd->SpanRenderFinish = r200SpanRenderFinish; +-} +- +- +- +-/** +- * Plug in the Get/Put routines for the given driRenderbuffer. +- */ +-void +-radeonSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis) +-{ +- if (drb->Base.InternalFormat == GL_RGBA) { +- if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) { +- r200InitPointers_RGB565(&drb->Base); +- } +- else { +- r200InitPointers_ARGB8888(&drb->Base); +- } +- } +- else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { +- r200InitDepthPointers_z16(&drb->Base); +- } +- else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { +- r200InitDepthPointers_z24_s8(&drb->Base); +- } +- else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { +- r200InitStencilPointers_z24_s8(&drb->Base); +- } +-} +diff --git a/src/mesa/drivers/dri/r200/r200_span.h b/src/mesa/drivers/dri/r200/r200_span.h +deleted file mode 100644 +index bae5644..0000000 +--- a/src/mesa/drivers/dri/r200/r200_span.h ++++ /dev/null +@@ -1,45 +0,0 @@ +-/* +-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. +- +-The Weather Channel (TM) funded Tungsten Graphics to develop the +-initial release of the Radeon 8500 driver under the XFree86 license. +-This notice must be preserved. +- +-Permission is hereby granted, free of charge, to any person obtaining +-a copy of this software and associated documentation files (the +-"Software"), to deal in the Software without restriction, including +-without limitation the rights to use, copy, modify, merge, publish, +-distribute, sublicense, and/or sell copies of the Software, and to +-permit persons to whom the Software is furnished to do so, subject to +-the following conditions: +- +-The above copyright notice and this permission notice (including the +-next paragraph) shall be included in all copies or substantial +-portions of the Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +- +-**************************************************************************/ +- +-/* +- * Authors: +- * Keith Whitwell +- */ +- +-#ifndef __R200_SPAN_H__ +-#define __R200_SPAN_H__ +- +-#include "drirenderbuffer.h" +- +-extern void r200InitSpanFuncs( GLcontext *ctx ); +- +-extern void +-radeonSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis); +- +-#endif +diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c +index 0eaaaf6..126f78b 100644 +--- a/src/mesa/drivers/dri/r200/r200_state.c ++++ b/src/mesa/drivers/dri/r200/r200_state.c +@@ -47,6 +47,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "tnl/t_pipeline.h" + #include "swrast_setup/swrast_setup.h" + ++#include "radeon_common.h" ++#include "radeon_mipmap_tree.h" + #include "r200_context.h" + #include "r200_ioctl.h" + #include "r200_state.h" +@@ -114,8 +116,8 @@ static void r200BlendColor( GLcontext *ctx, const GLfloat cf[4] ) + CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]); + CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]); + CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]); +- if (rmesa->r200Screen->drmSupportsBlendColor) +- rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = r200PackColor( 4, color[0], color[1], color[2], color[3] ); ++ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) ++ rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = radeonPackColor( 4, color[0], color[1], color[2], color[3] ); + } + + /** +@@ -213,7 +215,7 @@ static void r200_set_blend_state( GLcontext * ctx ) + + R200_STATECHANGE( rmesa, ctx ); + +- if (rmesa->r200Screen->drmSupportsBlendColor) { ++ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) { + if (ctx->Color.ColorLogicOpEnabled) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl | R200_ROP_ENABLE; + rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func; +@@ -278,7 +280,7 @@ static void r200_set_blend_state( GLcontext * ctx ) + return; + } + +- if (!rmesa->r200Screen->drmSupportsBlendColor) { ++ if (!rmesa->radeon.radeonScreen->drmSupportsBlendColor) { + rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func; + return; + } +@@ -383,10 +385,10 @@ static void r200ClearDepth( GLcontext *ctx, GLclampd d ) + + switch ( format ) { + case R200_DEPTH_FORMAT_16BIT_INT_Z: +- rmesa->state.depth.clear = d * 0x0000ffff; ++ rmesa->radeon.state.depth.clear = d * 0x0000ffff; + break; + case R200_DEPTH_FORMAT_24BIT_INT_Z: +- rmesa->state.depth.clear = d * 0x00ffffff; ++ rmesa->radeon.state.depth.clear = d * 0x00ffffff; + break; + } + } +@@ -480,7 +482,7 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) + case GL_FOG_COLOR: + R200_STATECHANGE( rmesa, ctx ); + UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color ); +- i = r200PackColor( 4, col[0], col[1], col[2], 0 ); ++ i = radeonPackColor( 4, col[0], col[1], col[2], 0 ); + rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_COLOR_MASK; + rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= i; + break; +@@ -521,102 +523,6 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) + } + } + +- +-/* ============================================================= +- * Scissoring +- */ +- +- +-static GLboolean intersect_rect( drm_clip_rect_t *out, +- drm_clip_rect_t *a, +- drm_clip_rect_t *b ) +-{ +- *out = *a; +- if ( b->x1 > out->x1 ) out->x1 = b->x1; +- if ( b->y1 > out->y1 ) out->y1 = b->y1; +- if ( b->x2 < out->x2 ) out->x2 = b->x2; +- if ( b->y2 < out->y2 ) out->y2 = b->y2; +- if ( out->x1 >= out->x2 ) return GL_FALSE; +- if ( out->y1 >= out->y2 ) return GL_FALSE; +- return GL_TRUE; +-} +- +- +-void r200RecalcScissorRects( r200ContextPtr rmesa ) +-{ +- drm_clip_rect_t *out; +- int i; +- +- /* Grow cliprect store? +- */ +- if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) { +- while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) { +- rmesa->state.scissor.numAllocedClipRects += 1; /* zero case */ +- rmesa->state.scissor.numAllocedClipRects *= 2; +- } +- +- if (rmesa->state.scissor.pClipRects) +- FREE(rmesa->state.scissor.pClipRects); +- +- rmesa->state.scissor.pClipRects = +- MALLOC( rmesa->state.scissor.numAllocedClipRects * +- sizeof(drm_clip_rect_t) ); +- +- if ( rmesa->state.scissor.pClipRects == NULL ) { +- rmesa->state.scissor.numAllocedClipRects = 0; +- return; +- } +- } +- +- out = rmesa->state.scissor.pClipRects; +- rmesa->state.scissor.numClipRects = 0; +- +- for ( i = 0 ; i < rmesa->numClipRects ; i++ ) { +- if ( intersect_rect( out, +- &rmesa->pClipRects[i], +- &rmesa->state.scissor.rect ) ) { +- rmesa->state.scissor.numClipRects++; +- out++; +- } +- } +-} +- +- +-static void r200UpdateScissor( GLcontext *ctx ) +-{ +- r200ContextPtr rmesa = R200_CONTEXT(ctx); +- +- if ( rmesa->dri.drawable ) { +- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; +- +- int x = ctx->Scissor.X; +- int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height; +- int w = ctx->Scissor.X + ctx->Scissor.Width - 1; +- int h = dPriv->h - ctx->Scissor.Y - 1; +- +- rmesa->state.scissor.rect.x1 = x + dPriv->x; +- rmesa->state.scissor.rect.y1 = y + dPriv->y; +- rmesa->state.scissor.rect.x2 = w + dPriv->x + 1; +- rmesa->state.scissor.rect.y2 = h + dPriv->y + 1; +- +- r200RecalcScissorRects( rmesa ); +- } +-} +- +- +-static void r200Scissor( GLcontext *ctx, +- GLint x, GLint y, GLsizei w, GLsizei h ) +-{ +- r200ContextPtr rmesa = R200_CONTEXT(ctx); +- +- if ( ctx->Scissor.Enabled ) { +- R200_FIREVERTICES( rmesa ); /* don't pipeline cliprect changes */ +- r200UpdateScissor( ctx ); +- } +- +-} +- +- + /* ============================================================= + * Culling + */ +@@ -803,7 +709,7 @@ static void r200ColorMask( GLcontext *ctx, + GLboolean b, GLboolean a ) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); +- GLuint mask = r200PackColor( rmesa->r200Screen->cpp, ++ GLuint mask = radeonPackColor( rmesa->radeon.radeonScreen->cpp, + ctx->Color.ColorMask[RCOMP], + ctx->Color.ColorMask[GCOMP], + ctx->Color.ColorMask[BCOMP], +@@ -834,7 +740,7 @@ static void r200PolygonOffset( GLcontext *ctx, + GLfloat factor, GLfloat units ) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); +- float_ui32_type constant = { units * rmesa->state.depth.scale }; ++ float_ui32_type constant = { units * rmesa->radeon.state.depth.scale }; + float_ui32_type factoru = { factor }; + + /* factor *= 2; */ +@@ -861,15 +767,15 @@ static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask ) + + /* TODO: push this into cmd mechanism + */ +- R200_FIREVERTICES( rmesa ); +- LOCK_HARDWARE( rmesa ); ++ radeon_firevertices(&rmesa->radeon); ++ LOCK_HARDWARE( &rmesa->radeon ); + + /* FIXME: Use window x,y offsets into stipple RAM. + */ + stipple.mask = rmesa->state.stipple.mask; +- drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, ++ drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, + &stipple, sizeof(stipple) ); +- UNLOCK_HARDWARE( rmesa ); ++ UNLOCK_HARDWARE( &rmesa->radeon ); + } + + static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode ) +@@ -881,7 +787,7 @@ static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode ) + * cases work. + */ + TCL_FALLBACK( ctx, R200_TCL_FALLBACK_UNFILLED, flag); +- if (rmesa->TclFallback) { ++ if (rmesa->radeon.TclFallback) { + r200ChooseRenderState( ctx ); + r200ChooseVertexState( ctx ); + } +@@ -958,7 +864,7 @@ static void r200UpdateSpecular( GLcontext *ctx ) + + /* Update vertex/render formats + */ +- if (rmesa->TclFallback) { ++ if (rmesa->radeon.TclFallback) { + r200ChooseRenderState( ctx ); + r200ChooseVertexState( ctx ); + } +@@ -1430,7 +1336,7 @@ static void r200LightModelfv( GLcontext *ctx, GLenum pname, + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHT_TWOSIDE; + else + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~(R200_LIGHT_TWOSIDE); +- if (rmesa->TclFallback) { ++ if (rmesa->radeon.TclFallback) { + r200ChooseRenderState( ctx ); + r200ChooseVertexState( ctx ); + } +@@ -1675,7 +1581,7 @@ static void r200ClearStencil( GLcontext *ctx, GLint s ) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); + +- rmesa->state.stencil.clear = ++ rmesa->radeon.state.stencil.clear = + ((GLuint) (ctx->Stencil.Clear & 0xff) | + (0xff << R200_STENCIL_MASK_SHIFT) | + ((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT)); +@@ -1700,19 +1606,19 @@ static void r200ClearStencil( GLcontext *ctx, GLint s ) + void r200UpdateWindow( GLcontext *ctx ) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); +- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; +- GLfloat xoffset = (GLfloat)dPriv->x; +- GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h; ++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; ++ GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; ++ GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; + const GLfloat *v = ctx->Viewport._WindowMap.m; + + float_ui32_type sx = { v[MAT_SX] }; + float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X }; + float_ui32_type sy = { - v[MAT_SY] }; + float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y }; +- float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale }; +- float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale }; ++ float_ui32_type sz = { v[MAT_SZ] * rmesa->radeon.state.depth.scale }; ++ float_ui32_type tz = { v[MAT_TZ] * rmesa->radeon.state.depth.scale }; + +- R200_FIREVERTICES( rmesa ); ++ radeon_firevertices(&rmesa->radeon); + R200_STATECHANGE( rmesa, vpt ); + + rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE] = sx.ui32; +@@ -1744,7 +1650,7 @@ static void r200DepthRange( GLcontext *ctx, GLclampd nearval, + void r200UpdateViewportOffset( GLcontext *ctx ) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); +- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; ++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + GLfloat xoffset = (GLfloat)dPriv->x; + GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h; + const GLfloat *v = ctx->Viewport._WindowMap.m; +@@ -1774,8 +1680,8 @@ void r200UpdateViewportOffset( GLcontext *ctx ) + R200_STIPPLE_Y_OFFSET_MASK); + + /* add magic offsets, then invert */ +- stx = 31 - ((rmesa->dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK); +- sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1) ++ stx = 31 - ((rmesa->radeon.dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK); ++ sty = 31 - ((rmesa->radeon.dri.drawable->y + rmesa->radeon.dri.drawable->h - 1) + & R200_STIPPLE_COORD_MASK); + + m |= ((stx << R200_STIPPLE_X_OFFSET_SHIFT) | +@@ -1788,7 +1694,7 @@ void r200UpdateViewportOffset( GLcontext *ctx ) + } + } + +- r200UpdateScissor( ctx ); ++ radeonUpdateScissor( ctx ); + } + + +@@ -1805,7 +1711,7 @@ static void r200ClearColor( GLcontext *ctx, const GLfloat c[4] ) + CLAMPED_FLOAT_TO_UBYTE(color[1], c[1]); + CLAMPED_FLOAT_TO_UBYTE(color[2], c[2]); + CLAMPED_FLOAT_TO_UBYTE(color[3], c[3]); +- rmesa->state.color.clear = r200PackColor( rmesa->r200Screen->cpp, ++ rmesa->radeon.state.color.clear = radeonPackColor( rmesa->radeon.radeonScreen->cpp, + color[0], color[1], + color[2], color[3] ); + } +@@ -1849,56 +1755,6 @@ static void r200LogicOpCode( GLcontext *ctx, GLenum opcode ) + } + + +-/* +- * Set up the cliprects for either front or back-buffer drawing. +- */ +-void r200SetCliprects( r200ContextPtr rmesa ) +-{ +- __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; +- __DRIdrawablePrivate *const readable = rmesa->dri.readable; +- GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate; +- GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate; +- +- if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BIT_BACK_LEFT) { +- /* Can't ignore 2d windows if we are page flipping. +- */ +- if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) { +- rmesa->numClipRects = drawable->numClipRects; +- rmesa->pClipRects = drawable->pClipRects; +- } +- else { +- rmesa->numClipRects = drawable->numBackClipRects; +- rmesa->pClipRects = drawable->pBackClipRects; +- } +- } +- else { +- /* front buffer (or none, or multiple buffers) */ +- rmesa->numClipRects = drawable->numClipRects; +- rmesa->pClipRects = drawable->pClipRects; +- } +- +- if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) { +- _mesa_resize_framebuffer(rmesa->glCtx, draw_fb, +- drawable->w, drawable->h); +- draw_fb->Initialized = GL_TRUE; +- } +- +- if (drawable != readable) { +- if ((read_fb->Width != readable->w) || +- (read_fb->Height != readable->h)) { +- _mesa_resize_framebuffer(rmesa->glCtx, read_fb, +- readable->w, readable->h); +- read_fb->Initialized = GL_TRUE; +- } +- } +- +- if (rmesa->state.scissor.enabled) +- r200RecalcScissorRects( rmesa ); +- +- rmesa->lastStamp = drawable->lastStamp; +-} +- +- + static void r200DrawBuffer( GLcontext *ctx, GLenum mode ) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); +@@ -1907,7 +1763,7 @@ static void r200DrawBuffer( GLcontext *ctx, GLenum mode ) + fprintf(stderr, "%s %s\n", __FUNCTION__, + _mesa_lookup_enum_by_nr( mode )); + +- R200_FIREVERTICES(rmesa); /* don't pipeline cliprect changes */ ++ radeon_firevertices(&rmesa->radeon); /* don't pipeline cliprect changes */ + + if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) { + /* 0 (GL_NONE) buffers or multiple color drawing buffers */ +@@ -1925,7 +1781,8 @@ static void r200DrawBuffer( GLcontext *ctx, GLenum mode ) + return; + } + +- r200SetCliprects( rmesa ); ++ radeonSetCliprects( &rmesa->radeon ); ++ radeonUpdatePageFlipping(&rmesa->radeon); + + /* We'll set the drawing engine's offset/pitch parameters later + * when we update other state. +@@ -2013,10 +1870,10 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) + R200_STATECHANGE(rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE; +- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable; ++ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_DITHER_ENABLE; +- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable; ++ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable; + } + break; + +@@ -2031,7 +1888,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK; + } + r200UpdateSpecular( ctx ); /* for PK_SPEC */ +- if (rmesa->TclFallback) ++ if (rmesa->radeon.TclFallback) + r200ChooseVertexState( ctx ); + _mesa_allow_light_in_model( ctx, !state ); + break; +@@ -2068,7 +1925,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) + case GL_LIGHTING: + r200UpdateSpecular(ctx); + /* for reflection map fixup - might set recheck_texgen for all units too */ +- rmesa->NewGLState |= _NEW_TEXTURE; ++ rmesa->radeon.NewGLState |= _NEW_TEXTURE; + break; + + case GL_LINE_SMOOTH: +@@ -2181,13 +2038,13 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) + } + + case GL_SCISSOR_TEST: +- R200_FIREVERTICES( rmesa ); +- rmesa->state.scissor.enabled = state; +- r200UpdateScissor( ctx ); ++ radeon_firevertices(&rmesa->radeon); ++ rmesa->radeon.state.scissor.enabled = state; ++ radeonUpdateScissor( ctx ); + break; + + case GL_STENCIL_TEST: +- if ( rmesa->state.stencil.hwBuffer ) { ++ if ( rmesa->radeon.state.stencil.hwBuffer ) { + R200_STATECHANGE( rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_STENCIL_ENABLE; +@@ -2443,42 +2300,99 @@ r200UpdateDrawBuffer(GLcontext *ctx) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; +- driRenderbuffer *drb; ++ struct radeon_renderbuffer *rrb; + + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { +- /* draw to front */ +- drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; +- } +- else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { +- /* draw to back */ +- drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; +- } +- else { +- /* drawing to multiple buffers, or none */ +- return; ++ /* draw to front */ ++ rrb = (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; ++ } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { ++ /* draw to back */ ++ rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; ++ } else { ++ /* drawing to multiple buffers, or none */ ++ return; + } + +- assert(drb); +- assert(drb->flippedPitch); ++ assert(rrb); ++ assert(rrb->pitch); + + R200_STATECHANGE( rmesa, ctx ); + ++#if 0 + /* Note: we used the (possibly) page-flipped values */ + rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] +- = ((drb->flippedOffset + rmesa->r200Screen->fbLocation) ++ = ((rrb->flippedOffset + rmesa->radeon.radeonScreen->fbLocation) + & R200_COLOROFFSET_MASK); + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch; +- if (rmesa->sarea->tiling_enabled) { ++ if (rmesa->radeon.sarea->tiling_enabled) { + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE; + } ++#endif + } + ++static GLboolean r200ValidateBuffers(GLcontext *ctx) ++{ ++ r200ContextPtr rmesa = R200_CONTEXT(ctx); ++ struct radeon_cs_space_check bos[8]; ++ struct radeon_renderbuffer *rrb; ++ int num_bo = 0; ++ int i; ++ int flushed = 0, ret; ++again: ++ num_bo = 0; ++ ++ rrb = radeon_get_colorbuffer(&rmesa->radeon); ++ /* color buffer */ ++ if (rrb && rrb->bo) { ++ bos[num_bo].bo = rrb->bo; ++ bos[num_bo].read_domains = 0; ++ bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM; ++ bos[num_bo].new_accounted = 0; ++ num_bo++; ++ } ++ ++ /* depth buffer */ ++ rrb = radeon_get_depthbuffer(&rmesa->radeon); ++ /* color buffer */ ++ if (rrb && rrb->bo) { ++ bos[num_bo].bo = rrb->bo; ++ bos[num_bo].read_domains = 0; ++ bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM; ++ bos[num_bo].new_accounted = 0; ++ num_bo++; ++ } ++ ++ for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) { ++ radeonTexObj *t; ++ ++ if (!ctx->Texture.Unit[i]._ReallyEnabled) ++ continue; ++ ++ t = radeon_tex_obj(ctx->Texture.Unit[i]._Current); ++ bos[num_bo].bo = t->mt->bo; ++ bos[num_bo].read_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM; ++ bos[num_bo].write_domain = 0; ++ bos[num_bo].new_accounted = 0; ++ num_bo++; ++ } ++ ++ ret = radeon_cs_space_check(rmesa->radeon.cmdbuf.cs, bos, num_bo); ++ if (ret == RADEON_CS_SPACE_OP_TO_BIG) ++ return GL_FALSE; ++ if (ret == RADEON_CS_SPACE_FLUSH) { ++ radeonFlush(ctx); ++ if (flushed) ++ return GL_FALSE; ++ flushed = 1; ++ goto again; ++ } ++ return GL_TRUE; +} + +- +-void r200ValidateState( GLcontext *ctx ) ++GLboolean r200ValidateState( GLcontext *ctx ) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); +- GLuint new_state = rmesa->NewGLState; ++ GLuint new_state = rmesa->radeon.NewGLState; + + if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + r200UpdateDrawBuffer(ctx); +@@ -2486,10 +2400,14 @@ void r200ValidateState( GLcontext *ctx ) + + if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) { + r200UpdateTextureState( ctx ); +- new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */ ++ new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */ + r200UpdateLocalViewer( ctx ); + } + ++ /* we need to do a space check here */ ++ if (!r200ValidateBuffers(ctx)) ++ return GL_FALSE; ++ + /* FIXME: don't really need most of these when vertex progs are enabled */ + + /* Need an event driven matrix update? +@@ -2533,7 +2451,8 @@ void r200ValidateState( GLcontext *ctx ) + else TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, 0); + } + +- rmesa->NewGLState = 0; ++ rmesa->radeon.NewGLState = 0; ++ return GL_TRUE; + } + + +@@ -2544,7 +2463,7 @@ static void r200InvalidateState( GLcontext *ctx, GLuint new_state ) + _vbo_InvalidateState( ctx, new_state ); + _tnl_InvalidateState( ctx, new_state ); + _ae_invalidate_state( ctx, new_state ); +- R200_CONTEXT(ctx)->NewGLState |= new_state; ++ R200_CONTEXT(ctx)->radeon.NewGLState |= new_state; + } + + /* A hack. The r200 can actually cope just fine with materials +@@ -2573,12 +2492,13 @@ static void r200WrapRunPipeline( GLcontext *ctx ) + GLboolean has_material; + + if (0) +- fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState); ++ fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState); + + /* Validate state: + */ +- if (rmesa->NewGLState) +- r200ValidateState( ctx ); ++ if (rmesa->radeon.NewGLState) ++ if (!r200ValidateState( ctx )) ++ FALLBACK(rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE); + + has_material = !ctx->VertexProgram._Enabled && ctx->Light.Enabled && check_material( ctx ); + +@@ -2636,7 +2556,7 @@ void r200InitStateFuncs( struct dd_function_table *functions ) + functions->PointParameterfv = r200PointParameter; + functions->PointSize = r200PointSize; + functions->RenderMode = r200RenderMode; +- functions->Scissor = r200Scissor; ++ functions->Scissor = radeonScissor; + functions->ShadeModel = r200ShadeModel; + functions->StencilFuncSeparate = r200StencilFuncSeparate; + functions->StencilMaskSeparate = r200StencilMaskSeparate; +diff --git a/src/mesa/drivers/dri/r200/r200_state.h b/src/mesa/drivers/dri/r200/r200_state.h +index a917163..1dddbfd 100644 +--- a/src/mesa/drivers/dri/r200/r200_state.h ++++ b/src/mesa/drivers/dri/r200/r200_state.h +@@ -43,13 +43,11 @@ extern void r200InitTnlFuncs( GLcontext *ctx ); + + extern void r200UpdateMaterial( GLcontext *ctx ); + +-extern void r200SetCliprects( r200ContextPtr rmesa ); +-extern void r200RecalcScissorRects( r200ContextPtr rmesa ); + extern void r200UpdateViewportOffset( GLcontext *ctx ); + extern void r200UpdateWindow( GLcontext *ctx ); + extern void r200UpdateDrawBuffer(GLcontext *ctx); + +-extern void r200ValidateState( GLcontext *ctx ); ++extern GLboolean r200ValidateState( GLcontext *ctx ); + + extern void r200PrintDirty( r200ContextPtr rmesa, + const char *msg ); +@@ -59,7 +57,7 @@ extern void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode ); + #define FALLBACK( rmesa, bit, mode ) do { \ + if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \ + __FUNCTION__, bit, mode ); \ +- r200Fallback( rmesa->glCtx, bit, mode ); \ ++ r200Fallback( rmesa->radeon.glCtx, bit, mode ); \ + } while (0) + + extern void r200LightingSpaceChange( GLcontext *ctx ); +diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c +index 9e4677e..b40690e 100644 +--- a/src/mesa/drivers/dri/r200/r200_state_init.c ++++ b/src/mesa/drivers/dri/r200/r200_state_init.c +@@ -43,6 +43,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "tnl/t_pipeline.h" + #include "swrast_setup/swrast_setup.h" + ++#include "radeon_common.h" ++#include "radeon_mipmap_tree.h" + #include "r200_context.h" + #include "r200_ioctl.h" + #include "r200_state.h" +@@ -52,31 +54,145 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #include "xmlpool.h" + ++/* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in ++ * 1.3 cmdbuffers allow all previous state to be updated as well as ++ * the tcl scalar and vector areas. ++ */ ++static struct { ++ int start; ++ int len; ++ const char *name; ++} packet[RADEON_MAX_STATE_PACKETS] = { ++ {RADEON_PP_MISC, 7, "RADEON_PP_MISC"}, ++ {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"}, ++ {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"}, ++ {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"}, ++ {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"}, ++ {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"}, ++ {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"}, ++ {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"}, ++ {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"}, ++ {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"}, ++ {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"}, ++ {RADEON_RE_MISC, 1, "RADEON_RE_MISC"}, ++ {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"}, ++ {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"}, ++ {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"}, ++ {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"}, ++ {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"}, ++ {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"}, ++ {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"}, ++ {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"}, ++ {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17, ++ "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"}, ++ {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"}, ++ {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"}, ++ {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"}, ++ {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"}, ++ {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"}, ++ {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"}, ++ {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"}, ++ {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"}, ++ {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"}, ++ {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"}, ++ {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"}, ++ {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"}, ++ {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"}, ++ {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"}, ++ {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"}, ++ {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"}, ++ {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"}, ++ {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"}, ++ {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"}, ++ {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"}, ++ {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"}, ++ {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"}, ++ {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"}, ++ {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"}, ++ {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"}, ++ {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"}, ++ {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"}, ++ {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"}, ++ {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, ++ "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"}, ++ {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"}, ++ {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"}, ++ {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"}, ++ {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"}, ++ {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"}, ++ {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"}, ++ {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"}, ++ {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"}, ++ {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"}, ++ {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"}, ++ {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, ++ "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"}, ++ {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */ ++ {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */ ++ {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"}, ++ {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"}, ++ {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"}, ++ {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"}, ++ {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"}, ++ {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"}, ++ {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"}, ++ {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"}, ++ {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"}, ++ {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"}, ++ {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"}, ++ {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"}, ++ {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"}, ++ {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"}, ++ {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"}, ++ {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"}, ++ {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"}, ++ {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"}, ++ {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"}, ++ {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"}, ++ {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"}, ++ {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"}, ++ {R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"}, /* 85 */ ++ {R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"}, ++ {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"}, ++ {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"}, ++ {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"}, ++ {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"}, ++ {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"}, ++ {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"}, ++ {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"}, ++ {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"}, ++}; + -+static inline void _radeon_bo_ref(struct radeon_bo *bo, -+ const char *file, -+ const char *func, -+ int line) + /* ============================================================= + * State initialization + */ + + void r200PrintDirty( r200ContextPtr rmesa, const char *msg ) + { +- struct r200_state_atom *l; ++ struct radeon_state_atom *l; + + fprintf(stderr, msg); + fprintf(stderr, ": "); + +- foreach(l, &rmesa->hw.atomlist) { +- if (l->dirty || rmesa->hw.all_dirty) ++ foreach(l, &rmesa->radeon.hw.atomlist) { ++ if (l->dirty || rmesa->radeon.hw.all_dirty) + fprintf(stderr, "%s, ", l->name); + } + + fprintf(stderr, "\n"); + } + +-static int cmdpkt( int id ) ++static int cmdpkt( r200ContextPtr rmesa, int id ) + { + drm_radeon_cmd_header_t h; +- h.i = 0; +- h.packet.cmd_type = RADEON_CMD_PACKET; +- h.packet.packet_id = id; ++ ++ if (rmesa->radeon.radeonScreen->kernel_mm) { ++ return CP_PACKET0(packet[id].start, packet[id].len - 1); ++ } else { ++ h.i = 0; ++ h.packet.cmd_type = RADEON_CMD_PACKET; ++ h.packet.packet_id = id; ++ } + return h.i; + } + +@@ -127,96 +243,388 @@ static int cmdscl2( int offset, int stride, int count ) + } + + #define CHECK( NM, FLAG ) \ +-static GLboolean check_##NM( GLcontext *ctx, int idx ) \ ++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \ + { \ + r200ContextPtr rmesa = R200_CONTEXT(ctx); \ +- (void) idx; \ + (void) rmesa; \ +- return FLAG; \ ++ return (FLAG) ? atom->cmd_size : 0; \ + } + + #define TCL_CHECK( NM, FLAG ) \ +-static GLboolean check_##NM( GLcontext *ctx, int idx ) \ +-{ \ +- r200ContextPtr rmesa = R200_CONTEXT(ctx); \ +- (void) idx; \ +- return !rmesa->TclFallback && !ctx->VertexProgram._Enabled && (FLAG); \ ++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \ ++{ \ ++ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ ++ return (!rmesa->radeon.TclFallback && !ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \ + } + + #define TCL_OR_VP_CHECK( NM, FLAG ) \ +-static GLboolean check_##NM( GLcontext *ctx, int idx ) \ ++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \ + { \ + r200ContextPtr rmesa = R200_CONTEXT(ctx); \ +- (void) idx; \ +- return !rmesa->TclFallback && (FLAG); \ ++ return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0; \ + } + + #define VP_CHECK( NM, FLAG ) \ +-static GLboolean check_##NM( GLcontext *ctx, int idx ) \ +-{ \ +- r200ContextPtr rmesa = R200_CONTEXT(ctx); \ +- (void) idx; \ +- return !rmesa->TclFallback && ctx->VertexProgram._Enabled && (FLAG); \ ++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \ ++{ \ ++ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ ++ (void) atom; \ ++ return (!rmesa->radeon.TclFallback && ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \ + } + +- + CHECK( always, GL_TRUE ) + CHECK( never, GL_FALSE ) + CHECK( tex_any, ctx->Texture._EnabledUnits ) + CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled) ); +-CHECK( tex_pair, (rmesa->state.texture.unit[idx].unitneeded | rmesa->state.texture.unit[idx & ~1].unitneeded) ) +-CHECK( tex, rmesa->state.texture.unit[idx].unitneeded ) ++CHECK( tex_pair, (rmesa->state.texture.unit[atom->idx].unitneeded | rmesa->state.texture.unit[atom->idx & ~1].unitneeded) ) ++CHECK( tex, rmesa->state.texture.unit[atom->idx].unitneeded ) + CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled ) +-CHECK( texenv, (rmesa->state.envneeded & (1 << idx) && !ctx->ATIFragmentShader._Enabled) ) ++ CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled) ) + CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)) ) + CHECK( afs, ctx->ATIFragmentShader._Enabled ) +-CHECK( tex_cube, rmesa->state.texture.unit[idx].unitneeded & TEXTURE_CUBE_BIT ) ++CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT ) + TCL_CHECK( tcl_fog, ctx->Fog.Enabled ) + TCL_CHECK( tcl, GL_TRUE ) +-TCL_CHECK( tcl_tex, rmesa->state.texture.unit[idx].unitneeded ) ++TCL_CHECK( tcl_tex, rmesa->state.texture.unit[atom->idx].unitneeded ) + TCL_CHECK( tcl_lighting, ctx->Light.Enabled ) +-TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[idx].Enabled ) +-TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << idx)) ) ++TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled ) ++TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))) ) + TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE ) + VP_CHECK( tcl_vp, GL_TRUE ) + VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64 ) + VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96 ) + ++#define OUT_VEC(hdr, data) do { \ ++ drm_radeon_cmd_header_t h; \ ++ h.i = hdr; \ ++ OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0)); \ ++ OUT_BATCH(0); \ ++ OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0)); \ ++ OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \ ++ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1)); \ ++ OUT_BATCH_TABLE((data), h.vectors.count); \ ++ } while(0) ++ ++#define OUT_VECLINEAR(hdr, data) do { \ ++ drm_radeon_cmd_header_t h; \ ++ uint32_t _start = h.veclinear.addr_lo | (h.veclinear.addr_hi << 8); \ ++ uint32_t _sz = h.veclinear.count * 4; \ ++ h.i = hdr; \ ++ OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0)); \ ++ OUT_BATCH(0); \ ++ OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0)); \ ++ OUT_BATCH(_start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \ ++ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, _sz - 1)); \ ++ OUT_BATCH_TABLE((data), _sz); \ ++ } while(0) ++ ++#define OUT_SCL(hdr, data) do { \ ++ drm_radeon_cmd_header_t h; \ ++ h.i = hdr; \ ++ OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0)); \ ++ OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \ ++ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1)); \ ++ OUT_BATCH_TABLE((data), h.scalars.count); \ ++ } while(0) ++ ++#define OUT_SCL2(hdr, data) do { \ ++ drm_radeon_cmd_header_t h; \ ++ h.i = hdr; \ ++ OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0)); \ ++ OUT_BATCH((h.scalars.offset + 0x100) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \ ++ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1)); \ ++ OUT_BATCH_TABLE((data), h.scalars.count); \ ++ } while(0) ++ ++static void mtl_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ -+ bo->cref++; -+#ifdef RADEON_BO_TRACK -+ radeon_track_add_event(bo->track, file, func, "ref", line); -+#endif -+ bo->bom->funcs->bo_ref(bo); ++ r200ContextPtr r200 = R200_CONTEXT(ctx); ++ BATCH_LOCALS(&r200->radeon); ++ uint32_t dwords = atom->cmd_size; ++ ++ dwords += 6; ++ BEGIN_BATCH_NO_AUTOSTATE(dwords); ++ OUT_VEC(atom->cmd[MTL_CMD_0], (atom->cmd+1)); ++ OUT_SCL2(atom->cmd[MTL_CMD_1], (atom->cmd + 18)); ++ END_BATCH(); +} + -+static inline struct radeon_bo *_radeon_bo_unref(struct radeon_bo *bo, -+ const char *file, -+ const char *func, -+ int line) ++static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ -+ bo->cref--; -+#ifdef RADEON_BO_TRACK -+ radeon_track_add_event(bo->track, file, func, "unref", line); -+ if (bo->cref <= 0) { -+ radeon_tracker_remove_track(&bo->bom->tracker, bo->track); -+ bo->track = NULL; -+ } -+#endif -+ return bo->bom->funcs->bo_unref(bo); ++ r200ContextPtr r200 = R200_CONTEXT(ctx); ++ BATCH_LOCALS(&r200->radeon); ++ uint32_t dwords = atom->cmd_size; ++ ++ dwords += 8; ++ BEGIN_BATCH_NO_AUTOSTATE(dwords); ++ OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1); ++ OUT_VEC(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1); ++ END_BATCH(); +} + -+static inline int _radeon_bo_map(struct radeon_bo *bo, -+ int write, -+ const char *file, -+ const char *func, -+ int line) ++static void ptp_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ -+ return bo->bom->funcs->bo_map(bo, write); ++ r200ContextPtr r200 = R200_CONTEXT(ctx); ++ BATCH_LOCALS(&r200->radeon); ++ uint32_t dwords = atom->cmd_size; ++ ++ dwords += 8; ++ BEGIN_BATCH_NO_AUTOSTATE(dwords); ++ OUT_VEC(atom->cmd[PTP_CMD_0], atom->cmd+1); ++ OUT_VEC(atom->cmd[PTP_CMD_1], atom->cmd+PTP_CMD_1+1); ++ END_BATCH(); +} + -+static inline int _radeon_bo_unmap(struct radeon_bo *bo, -+ const char *file, -+ const char *func, -+ int line) ++static void veclinear_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ -+ return bo->bom->funcs->bo_unmap(bo); ++ r200ContextPtr r200 = R200_CONTEXT(ctx); ++ BATCH_LOCALS(&r200->radeon); ++ uint32_t dwords = atom->cmd_size; ++ ++ dwords += 4; ++ BEGIN_BATCH_NO_AUTOSTATE(dwords); ++ OUT_VECLINEAR(atom->cmd[0], atom->cmd+1); ++ END_BATCH(); +} + -+static inline int _radeon_bo_wait(struct radeon_bo *bo, -+ const char *file, -+ const char *func, -+ int line) ++static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ -+ return bo->bom->funcs->bo_wait(bo); ++ r200ContextPtr r200 = R200_CONTEXT(ctx); ++ BATCH_LOCALS(&r200->radeon); ++ uint32_t dwords = atom->cmd_size; ++ ++ dwords += 2; ++ BEGIN_BATCH_NO_AUTOSTATE(dwords); ++ OUT_SCL(atom->cmd[0], atom->cmd+1); ++ END_BATCH(); +} + -+#define radeon_bo_open(bom, h, s, a, d, f)\ -+ _radeon_bo_open(bom, h, s, a, d, f, __FILE__, __FUNCTION__, __LINE__) -+#define radeon_bo_ref(bo)\ -+ _radeon_bo_ref(bo, __FILE__, __FUNCTION__, __LINE__) -+#define radeon_bo_unref(bo)\ -+ _radeon_bo_unref(bo, __FILE__, __FUNCTION__, __LINE__) -+#define radeon_bo_map(bo, w)\ -+ _radeon_bo_map(bo, w, __FILE__, __FUNCTION__, __LINE__) -+#define radeon_bo_unmap(bo)\ -+ _radeon_bo_unmap(bo, __FILE__, __FUNCTION__, __LINE__) -+#define radeon_bo_debug(bo, opcode)\ -+ _radeon_bo_debug(bo, opcode, __FILE__, __FUNCTION__, __LINE__) -+#define radeon_bo_wait(bo) \ -+ _radeon_bo_wait(bo, __FILE__, __func__, __LINE__) + -+#endif -diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c -new file mode 100644 -index 0000000..03a6299 ---- /dev/null -+++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c -@@ -0,0 +1,825 @@ -+/* -+ * Copyright © 2008 Nicolai Haehnle -+ * Copyright © 2008 Dave Airlie -+ * Copyright © 2008 Jérôme Glisse -+ * All Rights Reserved. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the -+ * "Software"), to deal in the Software without restriction, including -+ * without limitation the rights to use, copy, modify, merge, publish, -+ * distribute, sub license, and/or sell copies of the Software, and to -+ * permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, -+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE -+ * USE OR OTHER DEALINGS IN THE SOFTWARE. -+ * -+ * The above copyright notice and this permission notice (including the -+ * next paragraph) shall be included in all copies or substantial portions -+ * of the Software. -+ */ -+/* -+ * Authors: -+ * Aapo Tahkola -+ * Nicolai Haehnle -+ * Dave Airlie -+ * Jérôme Glisse -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "xf86drm.h" -+#include "texmem.h" -+#include "main/simple_list.h" ++static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom) ++{ ++ r200ContextPtr r200 = R200_CONTEXT(ctx); ++ BATCH_LOCALS(&r200->radeon); ++ uint32_t dwords = atom->cmd_size; ++ ++ dwords += 4; ++ BEGIN_BATCH_NO_AUTOSTATE(dwords); ++ OUT_VEC(atom->cmd[0], atom->cmd+1); ++ END_BATCH(); ++} + -+#include "drm.h" -+#include "radeon_drm.h" -+#include "radeon_common.h" -+#include "radeon_bocs_wrapper.h" ++static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom) ++{ ++ r200ContextPtr r200 = R200_CONTEXT(ctx); ++ BATCH_LOCALS(&r200->radeon); ++ struct radeon_renderbuffer *rrb; ++ uint32_t cbpitch; ++ uint32_t zbpitch, depth_fmt; ++ uint32_t dwords = atom->cmd_size; ++ ++ /* output the first 7 bytes of context */ ++ BEGIN_BATCH_NO_AUTOSTATE(dwords+2+2); ++ OUT_BATCH_TABLE(atom->cmd, 5); ++ ++ rrb = radeon_get_depthbuffer(&r200->radeon); ++ if (!rrb) { ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ } else { ++ zbpitch = (rrb->pitch / rrb->cpp); ++ if (r200->using_hyperz) ++ zbpitch |= RADEON_DEPTH_HYPERZ; ++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); ++ OUT_BATCH(zbpitch); ++ if (rrb->cpp == 4) ++ depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; ++ else ++ depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z; ++ atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK; ++ atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt; ++ } ++ ++ OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]); ++ OUT_BATCH(atom->cmd[CTX_CMD_1]); ++ OUT_BATCH(atom->cmd[CTX_PP_CNTL]); ++ ++ rrb = radeon_get_colorbuffer(&r200->radeon); ++ if (!rrb || !rrb->bo) { ++ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); ++ OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]); ++ } else { ++ atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10); ++ if (rrb->cpp == 4) ++ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888; ++ else ++ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565; ++ ++ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); ++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); ++ } + -+/* no seriously texmem.c is this screwed up */ -+struct bo_legacy_texture_object { -+ driTextureObject base; -+ struct bo_legacy *parent; -+}; ++ OUT_BATCH(atom->cmd[CTX_CMD_2]); + -+struct bo_legacy { -+ struct radeon_bo base; -+ int map_count; -+ uint32_t pending; -+ int is_pending; -+ int static_bo; -+ uint32_t offset; -+ struct bo_legacy_texture_object *tobj; -+ int validated; -+ int dirty; -+ void *ptr; -+ struct bo_legacy *next, *prev; -+ struct bo_legacy *pnext, *pprev; -+}; ++ if (!rrb || !rrb->bo) { ++ OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]); ++ } else { ++ cbpitch = (rrb->pitch / rrb->cpp); ++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) ++ cbpitch |= R200_COLOR_TILE_ENABLE; ++ OUT_BATCH(cbpitch); ++ } + -+struct bo_manager_legacy { -+ struct radeon_bo_manager base; -+ unsigned nhandle; -+ unsigned nfree_handles; -+ unsigned cfree_handles; -+ uint32_t current_age; -+ struct bo_legacy bos; -+ struct bo_legacy pending_bos; -+ uint32_t fb_location; -+ uint32_t texture_offset; -+ unsigned dma_alloc_size; -+ uint32_t dma_buf_count; -+ unsigned cpendings; -+ driTextureObject texture_swapped; -+ driTexHeap *texture_heap; -+ struct radeon_screen *screen; -+ unsigned *free_handles; -+}; ++ if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) ++ OUT_BATCH_TABLE((atom->cmd + 14), 4); + -+static void bo_legacy_tobj_destroy(void *data, driTextureObject *t) -+{ -+ struct bo_legacy_texture_object *tobj = (struct bo_legacy_texture_object *)t; -+ -+ if (tobj->parent) { -+ tobj->parent->tobj = NULL; -+ tobj->parent->validated = 0; -+ } ++ END_BATCH(); +} + -+static void inline clean_handles(struct bo_manager_legacy *bom) ++static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) +{ -+ while (bom->cfree_handles > 0 && -+ !bom->free_handles[bom->cfree_handles - 1]) -+ bom->cfree_handles--; ++ r200ContextPtr r200 = R200_CONTEXT(ctx); ++ BATCH_LOCALS(&r200->radeon); ++ struct radeon_renderbuffer *rrb, *drb; ++ uint32_t cbpitch = 0; ++ uint32_t zbpitch = 0; ++ uint32_t dwords = atom->cmd_size; ++ uint32_t depth_fmt; ++ ++ rrb = radeon_get_colorbuffer(&r200->radeon); ++ if (!rrb || !rrb->bo) { ++ return; ++ } + -+} -+static int legacy_new_handle(struct bo_manager_legacy *bom, uint32_t *handle) -+{ -+ uint32_t tmp; ++ atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10); ++ if (rrb->cpp == 4) ++ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888; ++ else ++ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565; ++ ++ cbpitch = (rrb->pitch / rrb->cpp); ++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) ++ cbpitch |= R200_COLOR_TILE_ENABLE; ++ ++ drb = radeon_get_depthbuffer(&r200->radeon); ++ if (drb) { ++ zbpitch = (drb->pitch / drb->cpp); ++ if (drb->cpp == 4) ++ depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; ++ else ++ depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z; ++ atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK; ++ atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt; ++ } + -+ *handle = 0; -+ if (bom->nhandle == 0xFFFFFFFF) { -+ return -EINVAL; -+ } -+ if (bom->cfree_handles > 0) { -+ tmp = bom->free_handles[--bom->cfree_handles]; -+ clean_handles(bom); -+ } else { -+ bom->cfree_handles = 0; -+ tmp = bom->nhandle++; -+ } -+ assert(tmp); -+ *handle = tmp; -+ return 0; -+} ++ if (drb) ++ dwords += 4; ++ if (rrb) ++ dwords += 4; + -+static int legacy_free_handle(struct bo_manager_legacy *bom, uint32_t handle) -+{ -+ uint32_t *handles; ++ /* output the first 7 bytes of context */ ++ BEGIN_BATCH_NO_AUTOSTATE(dwords); + -+ if (!handle) { -+ return 0; -+ } -+ if (handle == (bom->nhandle - 1)) { -+ int i; ++ /* In the CS case we need to split this up */ ++ OUT_BATCH(CP_PACKET0(packet[0].start, 3)); ++ OUT_BATCH_TABLE((atom->cmd + 1), 4); + -+ bom->nhandle--; -+ for (i = bom->cfree_handles - 1; i >= 0; i--) { -+ if (bom->free_handles[i] == (bom->nhandle - 1)) { -+ bom->nhandle--; -+ bom->free_handles[i] = 0; -+ } -+ } -+ clean_handles(bom); -+ return 0; -+ } -+ if (bom->cfree_handles < bom->nfree_handles) { -+ bom->free_handles[bom->cfree_handles++] = handle; -+ return 0; -+ } -+ bom->nfree_handles += 0x100; -+ handles = (uint32_t*)realloc(bom->free_handles, bom->nfree_handles * 4); -+ if (handles == NULL) { -+ bom->nfree_handles -= 0x100; -+ return -ENOMEM; -+ } -+ bom->free_handles = handles; -+ bom->free_handles[bom->cfree_handles++] = handle; -+ return 0; -+} ++ if (drb) { ++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0)); ++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + -+static void legacy_get_current_age(struct bo_manager_legacy *boml) -+{ -+ drm_radeon_getparam_t gp; -+ int r; ++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0)); ++ OUT_BATCH(zbpitch); ++ } + -+ if (IS_R300_CLASS(boml->screen)) { -+ gp.param = RADEON_PARAM_LAST_CLEAR; -+ gp.value = (int *)&boml->current_age; -+ r = drmCommandWriteRead(boml->base.fd, DRM_RADEON_GETPARAM, -+ &gp, sizeof(gp)); -+ if (r) { -+ fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, r); -+ exit(1); -+ } -+ } else -+ boml->current_age = boml->screen->scratch[3]; -+} ++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0)); ++ OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]); ++ OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1)); ++ OUT_BATCH(atom->cmd[CTX_PP_CNTL]); ++ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); + -+static int legacy_is_pending(struct radeon_bo *bo) -+{ -+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; -+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; + -+ if (bo_legacy->is_pending <= 0) { -+ bo_legacy->is_pending = 0; -+ return 0; -+ } -+ if (boml->current_age >= bo_legacy->pending) { -+ if (boml->pending_bos.pprev == bo_legacy) { -+ boml->pending_bos.pprev = bo_legacy->pprev; -+ } -+ bo_legacy->pprev->pnext = bo_legacy->pnext; -+ if (bo_legacy->pnext) { -+ bo_legacy->pnext->pprev = bo_legacy->pprev; -+ } -+ assert(bo_legacy->is_pending <= bo->cref); -+ while (bo_legacy->is_pending--) { -+ bo = radeon_bo_unref(bo); -+ if (!bo) -+ break; -+ } -+ if (bo) -+ bo_legacy->is_pending = 0; -+ boml->cpendings--; -+ return 0; -+ } -+ return 1; -+} ++ if (rrb) { ++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0)); ++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); ++ } + -+static int legacy_wait_pending(struct radeon_bo *bo) -+{ -+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; -+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; ++ if (rrb) { ++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0)); ++ OUT_BATCH(cbpitch); ++ } + -+ if (!bo_legacy->is_pending) { -+ return 0; -+ } -+ /* FIXME: lockup and userspace busy looping that's all the folks */ -+ legacy_get_current_age(boml); -+ while (legacy_is_pending(bo)) { -+ usleep(10); -+ legacy_get_current_age(boml); -+ } -+ return 0; ++ if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) { ++ OUT_BATCH_TABLE((atom->cmd + 14), 4); ++ } ++ ++ END_BATCH(); +} + -+static void legacy_track_pending(struct bo_manager_legacy *boml, int debug) ++static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ -+ struct bo_legacy *bo_legacy; -+ struct bo_legacy *next; -+ -+ legacy_get_current_age(boml); -+ bo_legacy = boml->pending_bos.pnext; -+ while (bo_legacy) { -+ if (debug) -+ fprintf(stderr,"pending %p %d %d %d\n", bo_legacy, bo_legacy->base.size, -+ boml->current_age, bo_legacy->pending); -+ next = bo_legacy->pnext; -+ if (legacy_is_pending(&(bo_legacy->base))) { -+ } -+ bo_legacy = next; -+ } ++ r200ContextPtr r200 = R200_CONTEXT(ctx); ++ BATCH_LOCALS(&r200->radeon); ++ uint32_t dwords = atom->cmd_size; ++ int i = atom->idx; ++ radeonTexObj *t = r200->state.texture.unit[i].texobj; ++ ++ if (t && t->mt && !t->image_override) ++ dwords += 2; ++ BEGIN_BATCH_NO_AUTOSTATE(dwords); ++ OUT_BATCH_TABLE(atom->cmd, 10); ++ if (t && !t->image_override) { ++ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, ++ RADEON_GEM_DOMAIN_VRAM, 0, 0); ++ } else if (!t) { ++ /* workaround for old CS mechanism */ ++ OUT_BATCH(r200->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]); ++ } else if (t->image_override) ++ OUT_BATCH(t->override_offset); ++ ++ END_BATCH(); +} + -+static int legacy_wait_any_pending(struct bo_manager_legacy *boml) ++static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ -+ struct bo_legacy *bo_legacy; -+ -+ legacy_get_current_age(boml); -+ bo_legacy = boml->pending_bos.pnext; -+ if (!bo_legacy) -+ return -1; -+ legacy_wait_pending(&bo_legacy->base); -+ return 0; ++ r200ContextPtr r200 = R200_CONTEXT(ctx); ++ BATCH_LOCALS(&r200->radeon); ++ uint32_t dwords = atom->cmd_size; ++ int i = atom->idx; ++ radeonTexObj *t = r200->state.texture.unit[i].texobj; ++ GLuint size; ++ ++ BEGIN_BATCH_NO_AUTOSTATE(dwords + (2 * 5)); ++ OUT_BATCH_TABLE(atom->cmd, 3); ++ ++ if (t && !t->image_override) { ++ size = t->mt->totalsize / 6; ++ OUT_BATCH_RELOC(0, t->mt->bo, size, RADEON_GEM_DOMAIN_VRAM, 0, 0); ++ OUT_BATCH_RELOC(0, t->mt->bo, size * 2, RADEON_GEM_DOMAIN_VRAM, 0, 0); ++ OUT_BATCH_RELOC(0, t->mt->bo, size * 3, RADEON_GEM_DOMAIN_VRAM, 0, 0); ++ OUT_BATCH_RELOC(0, t->mt->bo, size * 4, RADEON_GEM_DOMAIN_VRAM, 0, 0); ++ OUT_BATCH_RELOC(0, t->mt->bo, size * 5, RADEON_GEM_DOMAIN_VRAM, 0, 0); ++ } ++ END_BATCH(); +} + + /* Initialize the context's hardware state. + */ + void r200InitState( r200ContextPtr rmesa ) + { +- GLcontext *ctx = rmesa->glCtx; +- GLuint color_fmt, depth_fmt, i; +- GLint drawPitch, drawOffset; +- +- switch ( rmesa->r200Screen->cpp ) { +- case 2: +- color_fmt = R200_COLOR_FORMAT_RGB565; +- break; +- case 4: +- color_fmt = R200_COLOR_FORMAT_ARGB8888; +- break; +- default: +- fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" ); +- exit( -1 ); +- } ++ GLcontext *ctx = rmesa->radeon.glCtx; ++ GLuint i; + +- rmesa->state.color.clear = 0x00000000; ++ rmesa->radeon.state.color.clear = 0x00000000; + + switch ( ctx->Visual.depthBits ) { + case 16: +- rmesa->state.depth.clear = 0x0000ffff; +- rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff; +- depth_fmt = R200_DEPTH_FORMAT_16BIT_INT_Z; +- rmesa->state.stencil.clear = 0x00000000; ++ rmesa->radeon.state.depth.clear = 0x0000ffff; ++ rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffff; ++ rmesa->radeon.state.stencil.clear = 0x00000000; + break; + case 24: +- rmesa->state.depth.clear = 0x00ffffff; +- rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff; +- depth_fmt = R200_DEPTH_FORMAT_24BIT_INT_Z; +- rmesa->state.stencil.clear = 0xffff0000; ++ rmesa->radeon.state.depth.clear = 0x00ffffff; ++ rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffffff; ++ rmesa->radeon.state.stencil.clear = 0xffff0000; + break; + default: + fprintf( stderr, "Error: Unsupported depth %d... exiting\n", +@@ -225,52 +633,37 @@ void r200InitState( r200ContextPtr rmesa ) + } + + /* Only have hw stencil when depth buffer is 24 bits deep */ +- rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 && ++ rmesa->radeon.state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 && + ctx->Visual.depthBits == 24 ); + +- rmesa->Fallback = 0; ++ rmesa->radeon.Fallback = 0; + +- if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) { +- drawOffset = rmesa->r200Screen->backOffset; +- drawPitch = rmesa->r200Screen->backPitch; +- } else { +- drawOffset = rmesa->r200Screen->frontOffset; +- drawPitch = rmesa->r200Screen->frontPitch; +- } +-#if 000 +- if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) { +- rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset; +- rmesa->state.color.drawPitch = rmesa->r200Screen->backPitch; +- } else { +- rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset; +- rmesa->state.color.drawPitch = rmesa->r200Screen->frontPitch; +- } +- +- rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset; +- rmesa->state.pixel.readPitch = rmesa->state.color.drawPitch; +-#endif +- +- rmesa->hw.max_state_size = 0; ++ rmesa->radeon.hw.max_state_size = 0; + + #define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX ) \ + do { \ + rmesa->hw.ATOM.cmd_size = SZ; \ +- rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int)); \ +- rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int)); \ ++ rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int)); \ ++ rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int)); \ + rmesa->hw.ATOM.name = NM; \ + rmesa->hw.ATOM.idx = IDX; \ + rmesa->hw.ATOM.check = check_##CHK; \ + rmesa->hw.ATOM.dirty = GL_FALSE; \ +- rmesa->hw.max_state_size += SZ * sizeof(int); \ ++ rmesa->radeon.hw.max_state_size += SZ * sizeof(int); \ + } while (0) + + + /* Allocate state buffers: + */ +- if (rmesa->r200Screen->drmSupportsBlendColor) ++ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) + ALLOC_STATE( ctx, always, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 ); + else + ALLOC_STATE( ctx, always, CTX_STATE_SIZE_OLDDRM, "CTX/context", 0 ); ++ ++ if (rmesa->radeon.radeonScreen->kernel_mm) ++ rmesa->hw.ctx.emit = ctx_emit_cs; ++ else ++ rmesa->hw.ctx.emit = ctx_emit; + ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 ); + ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 ); + ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 ); +@@ -282,8 +675,8 @@ void r200InitState( r200ContextPtr rmesa ) + ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 ); + ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 ); + ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 ); +- if (rmesa->r200Screen->drmSupportsFragShader) { +- if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) { ++ if (rmesa->radeon.radeonScreen->drmSupportsFragShader) { ++ if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) { + /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */ + ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 ); + ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 ); +@@ -303,7 +696,7 @@ void r200InitState( r200ContextPtr rmesa ) + ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); + } + else { +- if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) { ++ if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) { + ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 ); + ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 ); + ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 ); +@@ -321,13 +714,18 @@ void r200InitState( r200ContextPtr rmesa ) + ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 ); + ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); + } +- if (rmesa->r200Screen->drmSupportsCubeMapsR200) { ++ ++ for (i = 0; i < 5; i++) ++ rmesa->hw.tex[i].emit = tex_emit; ++ if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200) { + ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 ); + ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 ); + ALLOC_STATE( cube[2], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-2", 2 ); + ALLOC_STATE( cube[3], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-3", 3 ); + ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 ); + ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 ); ++ for (i = 0; i < 5; i++) ++ rmesa->hw.cube[i].emit = cube_emit; + } + else { + ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/tex-0", 0 ); +@@ -337,7 +735,8 @@ void r200InitState( r200ContextPtr rmesa ) + ALLOC_STATE( cube[4], never, CUBE_STATE_SIZE, "CUBE/tex-4", 4 ); + ALLOC_STATE( cube[5], never, CUBE_STATE_SIZE, "CUBE/tex-5", 5 ); + } +- if (rmesa->r200Screen->drmSupportsVertexProgram) { ++ ++ if (rmesa->radeon.radeonScreen->drmSupportsVertexProgram) { + ALLOC_STATE( pvs, tcl_vp, PVS_STATE_SIZE, "PVS/pvscntl", 0 ); + ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 ); + ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 ); +@@ -390,13 +789,13 @@ void r200InitState( r200ContextPtr rmesa ) + ALLOC_STATE( pix[3], texenv, PIX_STATE_SIZE, "PIX/pixstage-3", 3 ); + ALLOC_STATE( pix[4], texenv, PIX_STATE_SIZE, "PIX/pixstage-4", 4 ); + ALLOC_STATE( pix[5], texenv, PIX_STATE_SIZE, "PIX/pixstage-5", 5 ); +- if (rmesa->r200Screen->drmSupportsTriPerf) { ++ if (rmesa->radeon.radeonScreen->drmSupportsTriPerf) { + ALLOC_STATE( prf, always, PRF_STATE_SIZE, "PRF/performance-tri", 0 ); + } + else { + ALLOC_STATE( prf, never, PRF_STATE_SIZE, "PRF/performance-tri", 0 ); + } +- if (rmesa->r200Screen->drmSupportsPointSprites) { ++ if (rmesa->radeon.radeonScreen->drmSupportsPointSprites) { + ALLOC_STATE( spr, always, SPR_STATE_SIZE, "SPR/pointsprite", 0 ); + ALLOC_STATE( ptp, tcl, PTP_STATE_SIZE, "PTP/pointparams", 0 ); + } +@@ -409,87 +808,115 @@ void r200InitState( r200ContextPtr rmesa ) + + /* Fill in the packet headers: + */ +- rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC); +- rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL); +- rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH); +- if (rmesa->r200Screen->drmSupportsBlendColor) +- rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(R200_EMIT_RB3D_BLENDCOLOR); +- rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN); +- rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH); +- rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK); +- rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE); +- rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL); +- rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC); +- rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(R200_EMIT_PP_CNTL_X); +- rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(R200_EMIT_RB3D_DEPTHXY_OFFSET); +- rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(R200_EMIT_RE_AUX_SCISSOR_CNTL); +- rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(R200_EMIT_RE_SCISSOR_TL_0); +- rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(R200_EMIT_SE_VAP_CNTL_STATUS); +- rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(R200_EMIT_RE_POINTSIZE); +- rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0); +- rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(R200_EMIT_PP_TAM_DEBUG3); +- rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(R200_EMIT_TFACTOR_0); +- if (rmesa->r200Screen->drmSupportsFragShader) { +- rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(R200_EMIT_ATF_TFACTOR); +- rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_0); +- rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0); +- rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_1); +- rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1); +- rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_2); +- rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2); +- rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_3); +- rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3); +- rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_4); +- rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4); +- rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_5); +- rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5); ++ rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC); ++ rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL); ++ rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH); ++ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) ++ rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(rmesa, R200_EMIT_RB3D_BLENDCOLOR); ++ rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN); ++ rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH); ++ rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK); ++ rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE); ++ rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL); ++ rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC); ++ rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CNTL_X); ++ rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(rmesa, R200_EMIT_RB3D_DEPTHXY_OFFSET); ++ rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(rmesa, R200_EMIT_RE_AUX_SCISSOR_CNTL); ++ rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(rmesa, R200_EMIT_RE_SCISSOR_TL_0); ++ rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(rmesa, R200_EMIT_SE_VAP_CNTL_STATUS); ++ rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(rmesa, R200_EMIT_RE_POINTSIZE); ++ rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(rmesa, R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0); ++ rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TAM_DEBUG3); ++ rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(rmesa, R200_EMIT_TFACTOR_0); ++ if (rmesa->radeon.radeonScreen->drmSupportsFragShader) { ++ rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(rmesa, R200_EMIT_ATF_TFACTOR); ++ rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_0); ++ rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0); ++ rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_1); ++ rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1); ++ rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_2); ++ rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2); ++ rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_3); ++ rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3); ++ rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_4); ++ rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4); ++ rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_5); ++ rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5); + } else { +- rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0); +- rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0); +- rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1); +- rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1); +- rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_2); +- rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2); +- rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_3); +- rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3); +- rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_4); +- rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4); +- rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_5); +- rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5); +- } +- rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_0); +- rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_1); +- rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(R200_EMIT_VAP_PVS_CNTL); +- rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_0); +- rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_0); +- rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_1); +- rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_1); +- rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_2); +- rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_2); +- rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_3); +- rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_3); +- rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_4); +- rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_4); +- rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_5); +- rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_5); +- rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_0); +- rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_1); +- rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_2); +- rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_3); +- rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_4); +- rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_5); +- rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR); +- rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(R200_EMIT_TCL_LIGHT_MODEL_CTL_0); +- rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(R200_EMIT_TCL_UCP_VERT_BLEND_CTL); +- rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(R200_EMIT_TEX_PROC_CTL_2); +- rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(R200_EMIT_MATRIX_SELECT_0); +- rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(R200_EMIT_VAP_CTL); +- rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(R200_EMIT_VTX_FMT_0); +- rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(R200_EMIT_OUTPUT_VTX_COMP_SEL); +- rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(R200_EMIT_SE_VTX_STATE_CNTL); +- rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(R200_EMIT_VTE_CNTL); +- rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(R200_EMIT_PP_TRI_PERF_CNTL); +- rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(R200_EMIT_TCL_POINT_SPRITE_CNTL); ++ rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_0); ++ rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0); ++ rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_1); ++ rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1); ++ rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_2); ++ rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2); ++ rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_3); ++ rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3); ++ rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_4); ++ rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4); ++ rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_5); ++ rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5); ++ } ++ rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_0); ++ rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_1); ++ rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_PVS_CNTL); ++ rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_0); ++ rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_0); ++ rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_1); ++ rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_1); ++ rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_2); ++ rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_2); ++ rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_3); ++ rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_3); ++ rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_4); ++ rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_4); ++ rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_5); ++ rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_5); ++ rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_0); ++ rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_1); ++ rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_2); ++ rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_3); ++ rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_4); ++ rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_5); ++ rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR); ++ rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_LIGHT_MODEL_CTL_0); ++ rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(rmesa, R200_EMIT_TCL_UCP_VERT_BLEND_CTL); ++ rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(rmesa, R200_EMIT_TEX_PROC_CTL_2); ++ rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(rmesa, R200_EMIT_MATRIX_SELECT_0); ++ rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_CTL); ++ rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTX_FMT_0); ++ rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(rmesa, R200_EMIT_OUTPUT_VTX_COMP_SEL); ++ rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(rmesa, R200_EMIT_SE_VTX_STATE_CNTL); ++ rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTE_CNTL); ++ rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TRI_PERF_CNTL); ++ rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_POINT_SPRITE_CNTL); ++ if (rmesa->radeon.radeonScreen->kernel_mm) { ++ rmesa->hw.mtl[0].emit = mtl_emit; ++ rmesa->hw.mtl[1].emit = mtl_emit; ++ ++ rmesa->hw.vpi[0].emit = veclinear_emit; ++ rmesa->hw.vpi[1].emit = veclinear_emit; ++ rmesa->hw.vpp[0].emit = veclinear_emit; ++ rmesa->hw.vpp[1].emit = veclinear_emit; ++ ++ rmesa->hw.grd.emit = scl_emit; ++ rmesa->hw.fog.emit = vec_emit; ++ rmesa->hw.glt.emit = vec_emit; ++ rmesa->hw.eye.emit = vec_emit; ++ ++ for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++) ++ rmesa->hw.mat[i].emit = vec_emit; ++ ++ for (i = 0; i < 8; i++) ++ rmesa->hw.lit[i].emit = lit_emit; ++ ++ for (i = 0; i < 6; i++) ++ rmesa->hw.ucp[i].emit = vec_emit; ++ ++ rmesa->hw.ptp.emit = ptp_emit; ++ } + -+static void legacy_kick_all_buffers(struct bo_manager_legacy *boml) -+{ -+ struct bo_legacy *legacy; + -+ legacy = boml->bos.next; -+ while (legacy != &boml->bos) { -+ if (legacy->tobj) { -+ if (legacy->validated) { -+ driDestroyTextureObject(&legacy->tobj->base); -+ legacy->tobj = 0; -+ legacy->validated = 0; -+ } -+ } -+ legacy = legacy->next; -+ } -+} ++ + rmesa->hw.mtl[0].cmd[MTL_CMD_0] = + cmdvec( R200_VS_MAT_0_EMISS, 1, 16 ); + rmesa->hw.mtl[0].cmd[MTL_CMD_1] = +@@ -567,7 +994,7 @@ void r200InitState( r200ContextPtr rmesa ) + (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | + (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT)); + +- if (rmesa->r200Screen->drmSupportsBlendColor) { ++ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) { + rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = 0x00000000; + rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP | + (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | +@@ -578,18 +1005,17 @@ void r200InitState( r200ContextPtr rmesa ) + } + + rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] = +- rmesa->r200Screen->depthOffset + rmesa->r200Screen->fbLocation; ++ rmesa->radeon.radeonScreen->depthOffset + rmesa->radeon.radeonScreen->fbLocation; + + rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = +- ((rmesa->r200Screen->depthPitch & ++ ((rmesa->radeon.radeonScreen->depthPitch & + R200_DEPTHPITCH_MASK) | + R200_DEPTH_ENDIAN_NO_SWAP); + + if (rmesa->using_hyperz) + rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= R200_DEPTH_HYPERZ; + +- rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt | +- R200_Z_TEST_LESS | ++ rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (R200_Z_TEST_LESS | + R200_STENCIL_TEST_ALWAYS | + R200_STENCIL_FAIL_KEEP | + R200_STENCIL_ZPASS_KEEP | +@@ -599,15 +1025,14 @@ void r200InitState( r200ContextPtr rmesa ) + if (rmesa->using_hyperz) { + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_COMPRESSION_ENABLE | + R200_Z_DECOMPRESSION_ENABLE; +-/* if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) ++/* if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/ + } + + rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (R200_ANTI_ALIAS_NONE + | R200_TEX_BLEND_0_ENABLE); + +- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = color_fmt; +- switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) { ++ switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) { + case DRI_CONF_DITHER_XERRORDIFFRESET: + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_INIT; + break; +@@ -615,41 +1040,19 @@ void r200InitState( r200ContextPtr rmesa ) + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_SCALE_DITHER_ENABLE; + break; + } +- if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) == ++ if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) == + DRI_CONF_ROUND_ROUND ) +- rmesa->state.color.roundEnable = R200_ROUND_ENABLE; ++ rmesa->radeon.state.color.roundEnable = R200_ROUND_ENABLE; + else +- rmesa->state.color.roundEnable = 0; +- if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) == ++ rmesa->radeon.state.color.roundEnable = 0; ++ if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) == + DRI_CONF_COLOR_REDUCTION_DITHER ) + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE; + else +- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable; +- +-#if 000 +- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((rmesa->state.color.drawOffset + +- rmesa->r200Screen->fbLocation) +- & R200_COLOROFFSET_MASK); +- +- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((rmesa->state.color.drawPitch & +- R200_COLORPITCH_MASK) | +- R200_COLOR_ENDIAN_NO_SWAP); +-#else +- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset + +- rmesa->r200Screen->fbLocation) +- & R200_COLOROFFSET_MASK); +- +- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch & +- R200_COLORPITCH_MASK) | +- R200_COLOR_ENDIAN_NO_SWAP); +-#endif +- /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */ +- if (rmesa->sarea->tiling_enabled) { +- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE; +- } ++ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable; + + rmesa->hw.prf.cmd[PRF_PP_TRI_PERF] = R200_TRI_CUTOFF_MASK - R200_TRI_CUTOFF_MASK * +- driQueryOptionf (&rmesa->optionCache,"texture_blend_quality"); ++ driQueryOptionf (&rmesa->radeon.optionCache,"texture_blend_quality"); + rmesa->hw.prf.cmd[PRF_PP_PERF_CNTL] = 0; + + rmesa->hw.set.cmd[SET_SE_CNTL] = (R200_FFACE_CULL_CCW | +@@ -704,7 +1107,7 @@ void r200InitState( r200ContextPtr rmesa ) + R200_VC_NO_SWAP; + #endif + +- if (!(rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL)) { ++ if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { + /* Bypass TCL */ + rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] |= (1<<8); + } +@@ -743,28 +1146,28 @@ void r200InitState( r200ContextPtr rmesa ) + rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] = + (/* R200_TEXCOORD_PROJ | */ + 0x100000); /* Small default bias */ +- if (rmesa->r200Screen->drmSupportsFragShader) { ++ if (rmesa->radeon.radeonScreen->drmSupportsFragShader) { + rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] = +- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; ++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.tex[i].cmd[TEX_PP_CUBIC_FACES] = 0; + rmesa->hw.tex[i].cmd[TEX_PP_TXMULTI_CTL] = 0; + } + else { + rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_OLDDRM] = +- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; ++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + } + + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] = +- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; ++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F2] = +- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; ++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F3] = +- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; ++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F4] = +- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; ++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F5] = +- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; ++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + + rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND] = + (R200_TXC_ARG_A_ZERO | +@@ -967,5 +1370,7 @@ void r200InitState( r200ContextPtr rmesa ) + + r200LightingSpaceChange( ctx ); + +- rmesa->hw.all_dirty = GL_TRUE; ++ rmesa->radeon.hw.all_dirty = GL_TRUE; ++ ++ rcommonInitCmdBuf(&rmesa->radeon); + } +diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c +index b25f028..b006409 100644 +--- a/src/mesa/drivers/dri/r200/r200_swtcl.c ++++ b/src/mesa/drivers/dri/r200/r200_swtcl.c +@@ -55,27 +55,24 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "r200_tcl.h" + + +-static void flush_last_swtcl_prim( r200ContextPtr rmesa ); +- +- + /*********************************************************************** + * Initialization + ***********************************************************************/ + + #define EMIT_ATTR( ATTR, STYLE, F0 ) \ + do { \ +- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \ +- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \ +- rmesa->swtcl.vertex_attr_count++; \ ++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \ ++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \ ++ rmesa->radeon.swtcl.vertex_attr_count++; \ + fmt_0 |= F0; \ + } while (0) + + #define EMIT_PAD( N ) \ + do { \ +- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \ +- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \ +- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \ +- rmesa->swtcl.vertex_attr_count++; \ ++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \ ++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \ ++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \ ++ rmesa->radeon.swtcl.vertex_attr_count++; \ + } while (0) + + static void r200SetVertexFormat( GLcontext *ctx ) +@@ -100,7 +97,7 @@ static void r200SetVertexFormat( GLcontext *ctx ) + } + + assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); +- rmesa->swtcl.vertex_attr_count = 0; ++ rmesa->radeon.swtcl.vertex_attr_count = 0; + + /* EMIT_ATTR's must be in order as they tell t_vertex.c how to + * build up a hardware vertex. +@@ -185,7 +182,7 @@ static void r200SetVertexFormat( GLcontext *ctx ) + rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= R200_FOG_USE_SPEC_ALPHA; + } + +- if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) || ++ if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) || + (rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0) || + (rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) { + R200_NEWPRIM(rmesa); +@@ -193,26 +190,20 @@ static void r200SetVertexFormat( GLcontext *ctx ) + rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = fmt_0; + rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = fmt_1; + +- rmesa->swtcl.vertex_size = ++ rmesa->radeon.swtcl.vertex_size = + _tnl_install_attrs( ctx, +- rmesa->swtcl.vertex_attrs, +- rmesa->swtcl.vertex_attr_count, ++ rmesa->radeon.swtcl.vertex_attrs, ++ rmesa->radeon.swtcl.vertex_attr_count, + NULL, 0 ); +- rmesa->swtcl.vertex_size /= 4; +- RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); ++ rmesa->radeon.swtcl.vertex_size /= 4; ++ RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset ); + } + } + + + static void r200RenderStart( GLcontext *ctx ) + { +- r200ContextPtr rmesa = R200_CONTEXT( ctx ); +- + r200SetVertexFormat( ctx ); +- +- if (rmesa->dma.flush != 0 && +- rmesa->dma.flush != flush_last_swtcl_prim) +- rmesa->dma.flush( rmesa ); + } + + +@@ -232,7 +223,7 @@ void r200ChooseVertexState( GLcontext *ctx ) + * rasterization fallback. As this function will be called again when we + * leave a rasterization fallback, we can just skip it for now. + */ +- if (rmesa->Fallback != 0) ++ if (rmesa->radeon.Fallback != 0) + return; + + vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL]; +@@ -273,78 +264,27 @@ void r200ChooseVertexState( GLcontext *ctx ) + } + } + +- +-/* Flush vertices in the current dma region. +- */ +-static void flush_last_swtcl_prim( r200ContextPtr rmesa ) +-{ +- if (R200_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s\n", __FUNCTION__); +- +- rmesa->dma.flush = NULL; +- +- if (rmesa->dma.current.buf) { +- struct r200_dma_region *current = &rmesa->dma.current; +- GLuint current_offset = (rmesa->r200Screen->gart_buffer_offset + +- current->buf->buf->idx * RADEON_BUFFER_SIZE + +- current->start); +- +- assert (!(rmesa->swtcl.hw_primitive & R200_VF_PRIM_WALK_IND)); +- +- assert (current->start + +- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == +- current->ptr); +- +- if (rmesa->dma.current.start != rmesa->dma.current.ptr) { +- r200EnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ + +- rmesa->hw.max_state_size + VBUF_BUFSZ ); +- r200EmitVertexAOS( rmesa, +- rmesa->swtcl.vertex_size, +- current_offset); +- +- r200EmitVbufPrim( rmesa, +- rmesa->swtcl.hw_primitive, +- rmesa->swtcl.numverts); +- } +- +- rmesa->swtcl.numverts = 0; +- current->start = current->ptr; +- } +-} +- +- +-/* Alloc space in the current dma region. +- */ +-static INLINE void * +-r200AllocDmaLowVerts( r200ContextPtr rmesa, int nverts, int vsize ) ++void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset) + { +- GLuint bytes = vsize * nverts; +- +- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) +- r200RefillCurrentDmaRegion( rmesa ); +- +- if (!rmesa->dma.flush) { +- rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; +- rmesa->dma.flush = flush_last_swtcl_prim; +- } ++ r200ContextPtr rmesa = R200_CONTEXT(ctx); ++ rcommonEnsureCmdBufSpace(&rmesa->radeon, ++ rmesa->radeon.hw.max_state_size + (12*sizeof(int)), ++ __FUNCTION__); + +- ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); +- ASSERT( rmesa->dma.flush == flush_last_swtcl_prim ); +- ASSERT( rmesa->dma.current.start + +- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == +- rmesa->dma.current.ptr ); + ++ radeonEmitState(&rmesa->radeon); ++ r200EmitVertexAOS( rmesa, ++ rmesa->radeon.swtcl.vertex_size, ++ rmesa->radeon.dma.current, ++ current_offset); + +- { +- GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr); +- rmesa->dma.current.ptr += bytes; +- rmesa->swtcl.numverts += nverts; +- return head; +- } ++ ++ r200EmitVbufPrim( rmesa, ++ rmesa->radeon.swtcl.hw_primitive, ++ rmesa->radeon.swtcl.numverts); + + } + +- + /**************************************************************************/ + + +@@ -392,13 +332,13 @@ static void r200ResetLineStipple( GLcontext *ctx ); + #undef LOCAL_VARS + #undef ALLOC_VERTS + #define CTX_ARG r200ContextPtr rmesa +-#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size +-#define ALLOC_VERTS( n, size ) r200AllocDmaLowVerts( rmesa, n, size * 4 ) ++#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size ++#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ) + #define LOCAL_VARS \ + r200ContextPtr rmesa = R200_CONTEXT(ctx); \ +- const char *r200verts = (char *)rmesa->swtcl.verts; +-#define VERT(x) (r200Vertex *)(r200verts + ((x) * vertsize * sizeof(int))) +-#define VERTEX r200Vertex ++ const char *r200verts = (char *)rmesa->radeon.swtcl.verts; ++#define VERT(x) (radeonVertex *)(r200verts + ((x) * vertsize * sizeof(int))) ++#define VERTEX radeonVertex + #define DO_DEBUG_VERTS (1 && (R200_DEBUG & DEBUG_VERTS)) + + #undef TAG +@@ -456,11 +396,11 @@ static struct { + #define VERT_Y(_v) _v->v.y + #define VERT_Z(_v) _v->v.z + #define AREA_IS_CCW( a ) (a < 0) +-#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int))) ++#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int))) + + #define VERT_SET_RGBA( v, c ) \ + do { \ +- r200_color_t *color = (r200_color_t *)&((v)->ui[coloroffset]); \ ++ radeon_color_t *color = (radeon_color_t *)&((v)->ui[coloroffset]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \ +@@ -472,7 +412,7 @@ do { \ + #define VERT_SET_SPEC( v, c ) \ + do { \ + if (specoffset) { \ +- r200_color_t *spec = (r200_color_t *)&((v)->ui[specoffset]); \ ++ radeon_color_t *spec = (radeon_color_t *)&((v)->ui[specoffset]); \ + UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]); \ +@@ -481,8 +421,8 @@ do { \ + #define VERT_COPY_SPEC( v0, v1 ) \ + do { \ + if (specoffset) { \ +- r200_color_t *spec0 = (r200_color_t *)&((v0)->ui[specoffset]); \ +- r200_color_t *spec1 = (r200_color_t *)&((v1)->ui[specoffset]); \ ++ radeon_color_t *spec0 = (radeon_color_t *)&((v0)->ui[specoffset]); \ ++ radeon_color_t *spec1 = (radeon_color_t *)&((v1)->ui[specoffset]); \ + spec0->red = spec1->red; \ + spec0->green = spec1->green; \ + spec0->blue = spec1->blue; \ +@@ -513,7 +453,7 @@ do { \ + ***********************************************************************/ + + #define RASTERIZE(x) r200RasterPrimitive( ctx, reduced_hw_prim(ctx, x) ) +-#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive ++#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive + #undef TAG + #define TAG(x) x + #include "tnl_dd/t_dd_unfilled.h" +@@ -569,8 +509,8 @@ static void init_rast_tab( void ) + #undef LOCAL_VARS + #define LOCAL_VARS \ + r200ContextPtr rmesa = R200_CONTEXT(ctx); \ +- const GLuint vertsize = rmesa->swtcl.vertex_size; \ +- const char *r200verts = (char *)rmesa->swtcl.verts; \ ++ const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \ ++ const char *r200verts = (char *)rmesa->radeon.swtcl.verts; \ + const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \ + const GLboolean stipple = ctx->Line.StippleFlag; \ + (void) elt; (void) stipple; +@@ -599,13 +539,13 @@ void r200ChooseRenderState( GLcontext *ctx ) + GLuint index = 0; + GLuint flags = ctx->_TriangleCaps; + +- if (!rmesa->TclFallback || rmesa->Fallback) ++ if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback) + return; + + if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R200_TWOSIDE_BIT; + if (flags & DD_TRI_UNFILLED) index |= R200_UNFILLED_BIT; + +- if (index != rmesa->swtcl.RenderIndex) { ++ if (index != rmesa->radeon.swtcl.RenderIndex) { + tnl->Driver.Render.Points = rast_tab[index].points; + tnl->Driver.Render.Line = rast_tab[index].line; + tnl->Driver.Render.ClippedLine = rast_tab[index].line; +@@ -622,7 +562,7 @@ void r200ChooseRenderState( GLcontext *ctx ) + tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon; + } + +- rmesa->swtcl.RenderIndex = index; ++ rmesa->radeon.swtcl.RenderIndex = index; + } + } + +@@ -636,7 +576,7 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim ) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); + +- if (rmesa->swtcl.hw_primitive != hwprim) { ++ if (rmesa->radeon.swtcl.hw_primitive != hwprim) { + /* need to disable perspective-correct texturing for point sprites */ + if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) { + if (rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE) { +@@ -649,14 +589,14 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim ) + rmesa->hw.set.cmd[SET_RE_CNTL] |= R200_PERSPECTIVE_ENABLE; + } + R200_NEWPRIM( rmesa ); +- rmesa->swtcl.hw_primitive = hwprim; ++ rmesa->radeon.swtcl.hw_primitive = hwprim; + } + } + + static void r200RenderPrimitive( GLcontext *ctx, GLenum prim ) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); +- rmesa->swtcl.render_primitive = prim; ++ rmesa->radeon.swtcl.render_primitive = prim; + if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) + r200RasterPrimitive( ctx, reduced_hw_prim(ctx, prim) ); + } +@@ -701,15 +641,15 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode ) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); +- GLuint oldfallback = rmesa->Fallback; ++ GLuint oldfallback = rmesa->radeon.Fallback; + + if (mode) { +- rmesa->Fallback |= bit; ++ rmesa->radeon.Fallback |= bit; + if (oldfallback == 0) { +- R200_FIREVERTICES( rmesa ); ++ radeon_firevertices(&rmesa->radeon); + TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_TRUE ); + _swsetup_Wakeup( ctx ); +- rmesa->swtcl.RenderIndex = ~0; ++ rmesa->radeon.swtcl.RenderIndex = ~0; + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "R200 begin rasterization fallback: 0x%x %s\n", + bit, getFallbackString(bit)); +@@ -717,7 +657,7 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode ) + } + } + else { +- rmesa->Fallback &= ~bit; ++ rmesa->radeon.Fallback &= ~bit; + if (oldfallback == bit) { + + _swrast_flush( ctx ); +@@ -731,14 +671,14 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode ) + + tnl->Driver.Render.ResetLineStipple = r200ResetLineStipple; + TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_FALSE ); +- if (rmesa->TclFallback) { +- /* These are already done if rmesa->TclFallback goes to ++ if (rmesa->radeon.TclFallback) { ++ /* These are already done if rmesa->radeon.TclFallback goes to + * zero above. But not if it doesn't (R200_NO_TCL for + * example?) + */ + _tnl_invalidate_vertex_state( ctx, ~0 ); + _tnl_invalidate_vertices( ctx, ~0 ); +- RENDERINPUTS_ZERO( rmesa->tnl_index_bitset ); ++ RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset ); + r200ChooseVertexState( ctx ); + r200ChooseRenderState( ctx ); + } +@@ -772,7 +712,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py, + r200ContextPtr rmesa = R200_CONTEXT(ctx); + const GLfloat *rc = ctx->Current.RasterColor; + GLint row, col; +- r200Vertex vert; ++ radeonVertex vert; + GLuint orig_vte; + GLuint h; + +@@ -794,7 +734,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py, + vte |= R200_VTX_W0_FMT; + vap &= ~R200_VAP_FORCE_W_TO_ONE; + +- rmesa->swtcl.vertex_size = 5; ++ rmesa->radeon.swtcl.vertex_size = 5; + + if ( (rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0) + || (rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) { +@@ -871,10 +811,10 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py, + + /* Update window height + */ +- LOCK_HARDWARE( rmesa ); +- UNLOCK_HARDWARE( rmesa ); +- h = rmesa->dri.drawable->h + rmesa->dri.drawable->y; +- px += rmesa->dri.drawable->x; ++ LOCK_HARDWARE( &rmesa->radeon ); ++ UNLOCK_HARDWARE( &rmesa->radeon ); ++ h = rmesa->radeon.dri.drawable->h + rmesa->radeon.dri.drawable->y; ++ px += rmesa->radeon.dri.drawable->x; + + /* Clipping handled by existing mechansims in r200_ioctl.c? + */ +@@ -929,7 +869,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py, + + /* Need to restore vertexformat? + */ +- if (rmesa->TclFallback) ++ if (rmesa->radeon.TclFallback) + r200ChooseVertexState( ctx ); + } + +@@ -962,17 +902,13 @@ void r200InitSwtcl( GLcontext *ctx ) + _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, + 36 * sizeof(GLfloat) ); + +- rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; +- rmesa->swtcl.RenderIndex = ~0; +- rmesa->swtcl.render_primitive = GL_TRIANGLES; +- rmesa->swtcl.hw_primitive = 0; ++ rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; ++ rmesa->radeon.swtcl.RenderIndex = ~0; ++ rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES; ++ rmesa->radeon.swtcl.hw_primitive = 0; + } + + + void r200DestroySwtcl( GLcontext *ctx ) + { +- r200ContextPtr rmesa = R200_CONTEXT(ctx); +- +- if (rmesa->swtcl.indexed_verts.buf) +- r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, __FUNCTION__ ); + } +diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.h b/src/mesa/drivers/dri/r200/r200_swtcl.h +index 8c29fd0..a4051a4 100644 +--- a/src/mesa/drivers/dri/r200/r200_swtcl.h ++++ b/src/mesa/drivers/dri/r200/r200_swtcl.h +@@ -52,15 +52,11 @@ extern void r200BuildVertices( GLcontext *ctx, GLuint start, GLuint count, + extern void r200PrintSetupFlags(char *msg, GLuint flags ); + + +-extern void r200_emit_indexed_verts( GLcontext *ctx, +- GLuint start, +- GLuint count ); +- + extern void r200_translate_vertex( GLcontext *ctx, +- const r200Vertex *src, ++ const radeonVertex *src, + SWvertex *dst ); + +-extern void r200_print_vertex( GLcontext *ctx, const r200Vertex *v ); ++extern void r200_print_vertex( GLcontext *ctx, const radeonVertex *v ); + + extern void r200_import_float_colors( GLcontext *ctx ); + extern void r200_import_float_spec_colors( GLcontext *ctx ); +@@ -70,5 +66,5 @@ extern void r200PointsBitmap( GLcontext *ctx, GLint px, GLint py, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap ); + +- ++void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset); + #endif +diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c +index 99aecfe..8e0fb14 100644 +--- a/src/mesa/drivers/dri/r200/r200_tcl.c ++++ b/src/mesa/drivers/dri/r200/r200_tcl.c +@@ -123,7 +123,7 @@ static GLboolean discrete_prim[0x10] = { + + #define RESET_STIPPLE() do { \ + R200_STATECHANGE( rmesa, lin ); \ +- r200EmitState( rmesa ); \ ++ radeonEmitState(&rmesa->radeon); \ + } while (0) + + #define AUTO_STIPPLE( mode ) do { \ +@@ -134,7 +134,7 @@ static GLboolean discrete_prim[0x10] = { + else \ + rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \ + ~R200_LINE_PATTERN_AUTO_RESET; \ +- r200EmitState( rmesa ); \ ++ radeonEmitState(&rmesa->radeon); \ + } while (0) + + +@@ -142,25 +142,23 @@ static GLboolean discrete_prim[0x10] = { + + static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr ) + { +- if (rmesa->dma.flush == r200FlushElts && +- rmesa->store.cmd_used + nr*2 < R200_CMD_BUF_SZ) { ++ if (rmesa->radeon.dma.flush == r200FlushElts && ++ rmesa->tcl.elt_used + nr*2 < R200_ELT_BUF_SZ) { + +- GLushort *dest = (GLushort *)(rmesa->store.cmd_buf + +- rmesa->store.cmd_used); ++ GLushort *dest = (GLushort *)(rmesa->tcl.elt_dma_bo->ptr + ++ rmesa->tcl.elt_used); + +- rmesa->store.cmd_used += nr*2; ++ rmesa->tcl.elt_used += nr*2; + + return dest; + } + else { +- if (rmesa->dma.flush) +- rmesa->dma.flush( rmesa ); ++ if (rmesa->radeon.dma.flush) ++ rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); + +- r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + +- rmesa->hw.max_state_size + ELTS_BUFSZ(nr) ); ++ rcommonEnsureCmdBufSpace(&rmesa->radeon, AOS_BUFSZ(rmesa->tcl.nr_aos_components), __FUNCTION__); + + r200EmitAOS( rmesa, +- rmesa->tcl.aos_components, + rmesa->tcl.nr_aos_components, 0 ); + + return r200AllocEltsOpenEnded( rmesa, rmesa->tcl.hw_primitive, nr ); +@@ -188,13 +186,14 @@ static void r200EmitPrim( GLcontext *ctx, + r200ContextPtr rmesa = R200_CONTEXT( ctx ); + r200TclPrimitive( ctx, prim, hwprim ); + +- r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + +- rmesa->hw.max_state_size + VBUF_BUFSZ ); ++ // fprintf(stderr,"Emit prim %d\n", rmesa->tcl.nr_aos_components); ++ rcommonEnsureCmdBufSpace( &rmesa->radeon, ++ AOS_BUFSZ(rmesa->tcl.nr_aos_components) + ++ rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ ); + + r200EmitAOS( rmesa, +- rmesa->tcl.aos_components, +- rmesa->tcl.nr_aos_components, +- start ); ++ rmesa->tcl.nr_aos_components, ++ start ); + + /* Why couldn't this packet have taken an offset param? + */ +@@ -394,7 +393,7 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, + + /* TODO: separate this from the swtnl pipeline + */ +- if (rmesa->TclFallback) ++ if (rmesa->radeon.TclFallback) + return GL_TRUE; /* fallback to software t&l */ + + if (R200_DEBUG & DEBUG_PRIMS) +@@ -405,8 +404,9 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, + + /* Validate state: + */ +- if (rmesa->NewGLState) +- r200ValidateState( ctx ); ++ if (rmesa->radeon.NewGLState) ++ if (!r200ValidateState( ctx )) ++ return GL_TRUE; /* fallback to sw t&l */ + + if (!ctx->VertexProgram._Enabled) { + /* NOTE: inputs != tnl->render_inputs - these are the untransformed +@@ -565,15 +565,11 @@ static void transition_to_hwtnl( GLcontext *ctx ) + + tnl->Driver.NotifyMaterialChange = r200UpdateMaterial; + +- if ( rmesa->dma.flush ) +- rmesa->dma.flush( rmesa ); ++ if ( rmesa->radeon.dma.flush ) ++ rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); + +- rmesa->dma.flush = NULL; ++ rmesa->radeon.dma.flush = NULL; + +- if (rmesa->swtcl.indexed_verts.buf) +- r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, +- __FUNCTION__ ); +- + R200_STATECHANGE( rmesa, vap ); + rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_TCL_ENABLE; + rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_FORCE_W_TO_ONE; +@@ -631,10 +627,10 @@ static char *getFallbackString(GLuint bit) + void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); +- GLuint oldfallback = rmesa->TclFallback; ++ GLuint oldfallback = rmesa->radeon.TclFallback; + + if (mode) { +- rmesa->TclFallback |= bit; ++ rmesa->radeon.TclFallback |= bit; + if (oldfallback == 0) { + if (R200_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "R200 begin tcl fallback %s\n", +@@ -643,7 +639,7 @@ void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) + } + } + else { +- rmesa->TclFallback &= ~bit; ++ rmesa->radeon.TclFallback &= ~bit; + if (oldfallback == bit) { + if (R200_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "R200 end tcl fallback %s\n", +diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c +index 5a4db33..19a6cad 100644 +--- a/src/mesa/drivers/dri/r200/r200_tex.c ++++ b/src/mesa/drivers/dri/r200/r200_tex.c +@@ -43,8 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "main/teximage.h" + #include "main/texobj.h" + +-#include "texmem.h" +- ++#include "radeon_mipmap_tree.h" + #include "r200_context.h" + #include "r200_state.h" + #include "r200_ioctl.h" +@@ -63,10 +62,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * \param twrap Wrap mode for the \a t texture coordinate + */ + +-static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap ) ++static void r200SetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap ) + { + GLboolean is_clamp = GL_FALSE; + GLboolean is_clamp_to_border = GL_FALSE; ++ struct gl_texture_object *tObj = &t->base; + + t->pp_txfilter &= ~(R200_CLAMP_S_MASK | R200_CLAMP_T_MASK | R200_BORDER_MODE_D3D); + +@@ -103,7 +103,7 @@ static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum + _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__); + } + +- if (t->base.tObj->Target != GL_TEXTURE_1D) { ++ if (tObj->Target != GL_TEXTURE_1D) { + switch ( twrap ) { + case GL_REPEAT: + t->pp_txfilter |= R200_CLAMP_T_WRAP; +@@ -180,7 +180,7 @@ static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum + t->border_fallback = (is_clamp && is_clamp_to_border); + } + +-static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max ) ++static void r200SetTexMaxAnisotropy( radeonTexObjPtr t, GLfloat max ) + { + t->pp_txfilter &= ~R200_MAX_ANISO_MASK; + +@@ -205,10 +205,13 @@ static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max ) + * \param magf Texture magnification mode + */ + +-static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf ) ++static void r200SetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf ) + { + GLuint anisotropy = (t->pp_txfilter & R200_MAX_ANISO_MASK); + ++ /* Force revalidation to account for switches from/to mipmapping. */ ++ t->validated = GL_FALSE; ++ + t->pp_txfilter &= ~(R200_MIN_FILTER_MASK | R200_MAG_FILTER_MASK); + t->pp_txformat_x &= ~R200_VOLUME_FILTER_MASK; + +@@ -267,693 +270,12 @@ static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf ) + } + } + +-static void r200SetTexBorderColor( r200TexObjPtr t, GLubyte c[4] ) +-{ +- t->pp_border_color = r200PackColor( 4, c[0], c[1], c[2], c[3] ); +-} +- +- +-/** +- * Allocate space for and load the mesa images into the texture memory block. +- * This will happen before drawing with a new texture, or drawing with a +- * texture after it was swapped out or teximaged again. +- */ +- +-static r200TexObjPtr r200AllocTexObj( struct gl_texture_object *texObj ) +-{ +- r200TexObjPtr t; +- +- t = CALLOC_STRUCT( r200_tex_obj ); +- texObj->DriverData = t; +- if ( t != NULL ) { +- if ( R200_DEBUG & DEBUG_TEXTURE ) { +- fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj, +- (void *)t ); +- } +- +- /* Initialize non-image-dependent parts of the state: +- */ +- t->base.tObj = texObj; +- t->border_fallback = GL_FALSE; +- +- make_empty_list( & t->base ); +- +- r200SetTexWrap( t, texObj->WrapS, texObj->WrapT, texObj->WrapR ); +- r200SetTexMaxAnisotropy( t, texObj->MaxAnisotropy ); +- r200SetTexFilter( t, texObj->MinFilter, texObj->MagFilter ); +- r200SetTexBorderColor( t, texObj->_BorderChan ); +- } +- +- return t; +-} +- +-/* try to find a format which will only need a memcopy */ +-static const struct gl_texture_format * +-r200Choose8888TexFormat( GLenum srcFormat, GLenum srcType ) +-{ +- const GLuint ui = 1; +- const GLubyte littleEndian = *((const GLubyte *) &ui); +- +- if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) || +- (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) || +- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || +- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) { +- return &_mesa_texformat_rgba8888; +- } +- else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || +- (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) || +- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) || +- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) { +- return &_mesa_texformat_rgba8888_rev; +- } +- else return _dri_texformat_argb8888; +-} +- +-static const struct gl_texture_format * +-r200ChooseTextureFormat( GLcontext *ctx, GLint internalFormat, +- GLenum format, GLenum type ) +-{ +- r200ContextPtr rmesa = R200_CONTEXT(ctx); +- const GLboolean do32bpt = +- ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 ); +- const GLboolean force16bpt = +- ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 ); +- (void) format; +- +- switch ( internalFormat ) { +- case 4: +- case GL_RGBA: +- case GL_COMPRESSED_RGBA: +- switch ( type ) { +- case GL_UNSIGNED_INT_10_10_10_2: +- case GL_UNSIGNED_INT_2_10_10_10_REV: +- return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555; +- case GL_UNSIGNED_SHORT_4_4_4_4: +- case GL_UNSIGNED_SHORT_4_4_4_4_REV: +- return _dri_texformat_argb4444; +- case GL_UNSIGNED_SHORT_5_5_5_1: +- case GL_UNSIGNED_SHORT_1_5_5_5_REV: +- return _dri_texformat_argb1555; +- default: +- return do32bpt ? +- r200Choose8888TexFormat(format, type) : _dri_texformat_argb4444; +- } +- +- case 3: +- case GL_RGB: +- case GL_COMPRESSED_RGB: +- switch ( type ) { +- case GL_UNSIGNED_SHORT_4_4_4_4: +- case GL_UNSIGNED_SHORT_4_4_4_4_REV: +- return _dri_texformat_argb4444; +- case GL_UNSIGNED_SHORT_5_5_5_1: +- case GL_UNSIGNED_SHORT_1_5_5_5_REV: +- return _dri_texformat_argb1555; +- case GL_UNSIGNED_SHORT_5_6_5: +- case GL_UNSIGNED_SHORT_5_6_5_REV: +- return _dri_texformat_rgb565; +- default: +- return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565; +- } +- +- case GL_RGBA8: +- case GL_RGB10_A2: +- case GL_RGBA12: +- case GL_RGBA16: +- return !force16bpt ? +- r200Choose8888TexFormat(format, type) : _dri_texformat_argb4444; +- +- case GL_RGBA4: +- case GL_RGBA2: +- return _dri_texformat_argb4444; +- +- case GL_RGB5_A1: +- return _dri_texformat_argb1555; +- +- case GL_RGB8: +- case GL_RGB10: +- case GL_RGB12: +- case GL_RGB16: +- return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565; +- +- case GL_RGB5: +- case GL_RGB4: +- case GL_R3_G3_B2: +- return _dri_texformat_rgb565; +- +- case GL_ALPHA: +- case GL_ALPHA4: +- case GL_ALPHA8: +- case GL_ALPHA12: +- case GL_ALPHA16: +- case GL_COMPRESSED_ALPHA: +- /* can't use a8 format since interpreting hw I8 as a8 would result +- in wrong rgb values (same as alpha value instead of 0). */ +- return _dri_texformat_al88; +- +- case 1: +- case GL_LUMINANCE: +- case GL_LUMINANCE4: +- case GL_LUMINANCE8: +- case GL_LUMINANCE12: +- case GL_LUMINANCE16: +- case GL_COMPRESSED_LUMINANCE: +- return _dri_texformat_l8; +- +- case 2: +- case GL_LUMINANCE_ALPHA: +- case GL_LUMINANCE4_ALPHA4: +- case GL_LUMINANCE6_ALPHA2: +- case GL_LUMINANCE8_ALPHA8: +- case GL_LUMINANCE12_ALPHA4: +- case GL_LUMINANCE12_ALPHA12: +- case GL_LUMINANCE16_ALPHA16: +- case GL_COMPRESSED_LUMINANCE_ALPHA: +- return _dri_texformat_al88; +- +- case GL_INTENSITY: +- case GL_INTENSITY4: +- case GL_INTENSITY8: +- case GL_INTENSITY12: +- case GL_INTENSITY16: +- case GL_COMPRESSED_INTENSITY: +- return _dri_texformat_i8; +- +- case GL_YCBCR_MESA: +- if (type == GL_UNSIGNED_SHORT_8_8_APPLE || +- type == GL_UNSIGNED_BYTE) +- return &_mesa_texformat_ycbcr; +- else +- return &_mesa_texformat_ycbcr_rev; +- +- case GL_RGB_S3TC: +- case GL_RGB4_S3TC: +- case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: +- return &_mesa_texformat_rgb_dxt1; +- +- case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: +- return &_mesa_texformat_rgba_dxt1; +- +- case GL_RGBA_S3TC: +- case GL_RGBA4_S3TC: +- case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: +- return &_mesa_texformat_rgba_dxt3; +- +- case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: +- return &_mesa_texformat_rgba_dxt5; +- +- default: +- _mesa_problem(ctx, +- "unexpected internalFormat 0x%x in r200ChooseTextureFormat", +- (int) internalFormat); +- return NULL; +- } +- +- return NULL; /* never get here */ +-} +- +- +-static GLboolean +-r200ValidateClientStorage( GLcontext *ctx, GLenum target, +- GLint internalFormat, +- GLint srcWidth, GLint srcHeight, +- GLenum format, GLenum type, const void *pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage) +- +-{ +- r200ContextPtr rmesa = R200_CONTEXT(ctx); +- +- if ( R200_DEBUG & DEBUG_TEXTURE ) +- fprintf(stderr, "intformat %s format %s type %s\n", +- _mesa_lookup_enum_by_nr( internalFormat ), +- _mesa_lookup_enum_by_nr( format ), +- _mesa_lookup_enum_by_nr( type )); +- +- if (!ctx->Unpack.ClientStorage) +- return 0; +- +- if (ctx->_ImageTransferState || +- texImage->IsCompressed || +- texObj->GenerateMipmap) +- return 0; +- +- +- /* This list is incomplete, may be different on ppc??? +- */ +- switch ( internalFormat ) { +- case GL_RGBA: +- if ( format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV ) { +- texImage->TexFormat = _dri_texformat_argb8888; +- } +- else +- return 0; +- break; +- +- case GL_RGB: +- if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) { +- texImage->TexFormat = _dri_texformat_rgb565; +- } +- else +- return 0; +- break; +- +- case GL_YCBCR_MESA: +- if ( format == GL_YCBCR_MESA && +- type == GL_UNSIGNED_SHORT_8_8_REV_APPLE ) { +- texImage->TexFormat = &_mesa_texformat_ycbcr_rev; +- } +- else if ( format == GL_YCBCR_MESA && +- (type == GL_UNSIGNED_SHORT_8_8_APPLE || +- type == GL_UNSIGNED_BYTE)) { +- texImage->TexFormat = &_mesa_texformat_ycbcr; +- } +- else +- return 0; +- break; +- +- default: +- return 0; +- } +- +- /* Could deal with these packing issues, but currently don't: +- */ +- if (packing->SkipPixels || +- packing->SkipRows || +- packing->SwapBytes || +- packing->LsbFirst) { +- return 0; +- } +- +- { +- GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth, +- format, type); +- +- +- if ( R200_DEBUG & DEBUG_TEXTURE ) +- fprintf(stderr, "%s: srcRowStride %d/%x\n", +- __FUNCTION__, srcRowStride, srcRowStride); +- +- /* Could check this later in upload, pitch restrictions could be +- * relaxed, but would need to store the image pitch somewhere, +- * as packing details might change before image is uploaded: +- */ +- if (!r200IsGartMemory( rmesa, pixels, srcHeight * srcRowStride ) || +- (srcRowStride & 63)) +- return 0; +- +- +- /* Have validated that _mesa_transfer_teximage would be a straight +- * memcpy at this point. NOTE: future calls to TexSubImage will +- * overwrite the client data. This is explicitly mentioned in the +- * extension spec. +- */ +- texImage->Data = (void *)pixels; +- texImage->IsClientData = GL_TRUE; +- texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes; +- +- return 1; +- } +-} +- +- +-static void r200TexImage1D( GLcontext *ctx, GLenum target, GLint level, +- GLint internalFormat, +- GLint width, GLint border, +- GLenum format, GLenum type, const GLvoid *pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage ) +-{ +- driTextureObject * t = (driTextureObject *) texObj->DriverData; +- +- if ( t ) { +- driSwapOutTextureObject( t ); +- } +- else { +- t = (driTextureObject *) r200AllocTexObj( texObj ); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D"); +- return; +- } +- } +- +- /* Note, this will call ChooseTextureFormat */ +- _mesa_store_teximage1d(ctx, target, level, internalFormat, +- width, border, format, type, pixels, +- &ctx->Unpack, texObj, texImage); +- +- t->dirty_images[0] |= (1 << level); +-} +- +- +-static void r200TexSubImage1D( GLcontext *ctx, GLenum target, GLint level, +- GLint xoffset, +- GLsizei width, +- GLenum format, GLenum type, +- const GLvoid *pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage ) +-{ +- driTextureObject * t = (driTextureObject *) texObj->DriverData; +- +- assert( t ); /* this _should_ be true */ +- if ( t ) { +- driSwapOutTextureObject( t ); +- } +- else { +- t = (driTextureObject *) r200AllocTexObj( texObj ); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D"); +- return; +- } +- } +- +- _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, +- format, type, pixels, packing, texObj, +- texImage); +- +- t->dirty_images[0] |= (1 << level); +-} +- +- +-static void r200TexImage2D( GLcontext *ctx, GLenum target, GLint level, +- GLint internalFormat, +- GLint width, GLint height, GLint border, +- GLenum format, GLenum type, const GLvoid *pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage ) +-{ +- driTextureObject * t = (driTextureObject *) texObj->DriverData; +- GLuint face; +- +- /* which cube face or ordinary 2D image */ +- switch (target) { +- case GL_TEXTURE_CUBE_MAP_POSITIVE_X: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: +- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; +- ASSERT(face < 6); +- break; +- default: +- face = 0; +- } +- +- if ( t != NULL ) { +- driSwapOutTextureObject( t ); +- } +- else { +- t = (driTextureObject *) r200AllocTexObj( texObj ); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D"); +- return; +- } +- } +- +- texImage->IsClientData = GL_FALSE; +- +- if (r200ValidateClientStorage( ctx, target, +- internalFormat, +- width, height, +- format, type, pixels, +- packing, texObj, texImage)) { +- if (R200_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); +- } +- else { +- if (R200_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); +- +- /* Normal path: copy (to cached memory) and eventually upload +- * via another copy to GART memory and then a blit... Could +- * eliminate one copy by going straight to (permanent) GART. +- * +- * Note, this will call r200ChooseTextureFormat. +- */ +- _mesa_store_teximage2d(ctx, target, level, internalFormat, +- width, height, border, format, type, pixels, +- &ctx->Unpack, texObj, texImage); +- +- t->dirty_images[face] |= (1 << level); +- } +-} +- +- +-static void r200TexSubImage2D( GLcontext *ctx, GLenum target, GLint level, +- GLint xoffset, GLint yoffset, +- GLsizei width, GLsizei height, +- GLenum format, GLenum type, +- const GLvoid *pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage ) +-{ +- driTextureObject * t = (driTextureObject *) texObj->DriverData; +- GLuint face; +- +- /* which cube face or ordinary 2D image */ +- switch (target) { +- case GL_TEXTURE_CUBE_MAP_POSITIVE_X: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: +- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; +- ASSERT(face < 6); +- break; +- default: +- face = 0; +- } +- +- assert( t ); /* this _should_ be true */ +- if ( t ) { +- driSwapOutTextureObject( t ); +- } +- else { +- t = (driTextureObject *) r200AllocTexObj( texObj ); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D"); +- return; +- } +- } +- +- _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, +- height, format, type, pixels, packing, texObj, +- texImage); +- +- t->dirty_images[face] |= (1 << level); +-} +- +- +-static void r200CompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, +- GLint internalFormat, +- GLint width, GLint height, GLint border, +- GLsizei imageSize, const GLvoid *data, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage ) ++static void r200SetTexBorderColor( radeonTexObjPtr t, GLubyte c[4] ) + { +- driTextureObject * t = (driTextureObject *) texObj->DriverData; +- GLuint face; +- +- /* which cube face or ordinary 2D image */ +- switch (target) { +- case GL_TEXTURE_CUBE_MAP_POSITIVE_X: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: +- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; +- ASSERT(face < 6); +- break; +- default: +- face = 0; +- } +- +- if ( t != NULL ) { +- driSwapOutTextureObject( t ); +- } +- else { +- t = (driTextureObject *) r200AllocTexObj( texObj ); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D"); +- return; +- } +- } +- +- texImage->IsClientData = GL_FALSE; +-/* can't call this, different parameters. Would never evaluate to true anyway currently +- if (r200ValidateClientStorage( ctx, target, +- internalFormat, +- width, height, +- format, type, pixels, +- packing, texObj, texImage)) { +- if (R200_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); +- } +- else */{ +- if (R200_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); +- +- /* Normal path: copy (to cached memory) and eventually upload +- * via another copy to GART memory and then a blit... Could +- * eliminate one copy by going straight to (permanent) GART. +- * +- * Note, this will call r200ChooseTextureFormat. +- */ +- _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width, +- height, border, imageSize, data, texObj, texImage); +- +- t->dirty_images[face] |= (1 << level); +- } +-} +- +- +-static void r200CompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level, +- GLint xoffset, GLint yoffset, +- GLsizei width, GLsizei height, +- GLenum format, +- GLsizei imageSize, const GLvoid *data, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage ) +-{ +- driTextureObject * t = (driTextureObject *) texObj->DriverData; +- GLuint face; +- +- +- /* which cube face or ordinary 2D image */ +- switch (target) { +- case GL_TEXTURE_CUBE_MAP_POSITIVE_X: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: +- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; +- ASSERT(face < 6); +- break; +- default: +- face = 0; +- } +- +- assert( t ); /* this _should_ be true */ +- if ( t ) { +- driSwapOutTextureObject( t ); +- } +- else { +- t = (driTextureObject *) r200AllocTexObj( texObj ); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D"); +- return; +- } +- } +- +- _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width, +- height, format, imageSize, data, texObj, texImage); +- +- t->dirty_images[face] |= (1 << level); +-} +- +- +-#if ENABLE_HW_3D_TEXTURE +-static void r200TexImage3D( GLcontext *ctx, GLenum target, GLint level, +- GLint internalFormat, +- GLint width, GLint height, GLint depth, +- GLint border, +- GLenum format, GLenum type, const GLvoid *pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage ) +-{ +- driTextureObject * t = (driTextureObject *) texObj->DriverData; +- +- if ( t ) { +- driSwapOutTextureObject( t ); +- } +- else { +- t = (driTextureObject *) r200AllocTexObj( texObj ); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D"); +- return; +- } +- } +- +- texImage->IsClientData = GL_FALSE; +- +-#if 0 +- if (r200ValidateClientStorage( ctx, target, +- internalFormat, +- width, height, +- format, type, pixels, +- packing, texObj, texImage)) { +- if (R200_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); +- } +- else +-#endif +- { +- if (R200_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); +- +- /* Normal path: copy (to cached memory) and eventually upload +- * via another copy to GART memory and then a blit... Could +- * eliminate one copy by going straight to (permanent) GART. +- * +- * Note, this will call r200ChooseTextureFormat. +- */ +- _mesa_store_teximage3d(ctx, target, level, internalFormat, +- width, height, depth, border, +- format, type, pixels, +- &ctx->Unpack, texObj, texImage); +- +- t->dirty_images[0] |= (1 << level); +- } ++ t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] ); + } +-#endif +- + +-#if ENABLE_HW_3D_TEXTURE +-static void +-r200TexSubImage3D( GLcontext *ctx, GLenum target, GLint level, +- GLint xoffset, GLint yoffset, GLint zoffset, +- GLsizei width, GLsizei height, GLsizei depth, +- GLenum format, GLenum type, +- const GLvoid *pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage ) +-{ +- driTextureObject * t = (driTextureObject *) texObj->DriverData; +- +-/* fprintf(stderr, "%s\n", __FUNCTION__); */ +- +- assert( t ); /* this _should_ be true */ +- if ( t ) { +- driSwapOutTextureObject( t ); +- } +- else { +- t = (driTextureObject *) r200AllocTexObj( texObj ); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D"); +- return; +- } +- texObj->DriverData = t; +- } + +- _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset, +- width, height, depth, +- format, type, pixels, packing, texObj, texImage); +- +- t->dirty_images[0] |= (1 << level); +-} +-#endif + + + +@@ -978,7 +300,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target, + GLubyte c[4]; + GLuint envColor; + UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor ); +- envColor = r200PackColor( 4, c[0], c[1], c[2], c[3] ); ++ envColor = radeonPackColor( 4, c[0], c[1], c[2], c[3] ); + if ( rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] != envColor ) { + R200_STATECHANGE( rmesa, tf ); + rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] = envColor; +@@ -997,7 +319,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target, + * NOTE: Add a small bias to the bias for conform mipsel.c test. + */ + bias = *param + .01; +- min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ? ++ min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ? + 0.0 : -16.0; + bias = CLAMP( bias, min, 16.0 ); + b = (int)(bias * fixed_one) & R200_LOD_BIAS_MASK; +@@ -1034,7 +356,7 @@ static void r200TexParameter( GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj, + GLenum pname, const GLfloat *params ) + { +- r200TexObjPtr t = (r200TexObjPtr) texObj->DriverData; ++ radeonTexObj* t = radeon_tex_obj(texObj); + + if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { + fprintf( stderr, "%s( %s )\n", __FUNCTION__, +@@ -1068,59 +390,46 @@ static void r200TexParameter( GLcontext *ctx, GLenum target, + * we just have to rely on loading the right subset of mipmap levels + * to simulate a clamped LOD. + */ +- driSwapOutTextureObject( (driTextureObject *) t ); ++ if (t->mt) { ++ radeon_miptree_unreference(t->mt); ++ t->mt = 0; ++ t->validated = GL_FALSE; ++ } + break; + + default: + return; + } +- +- /* Mark this texobj as dirty (one bit per tex unit) +- */ +- t->dirty_state = TEX_ALL; + } + + +- +-static void r200BindTexture( GLcontext *ctx, GLenum target, +- struct gl_texture_object *texObj ) +-{ +- if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { +- fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj, +- ctx->Texture.CurrentUnit ); +- } +- +- if ( (target == GL_TEXTURE_1D) +- || (target == GL_TEXTURE_2D) +-#if ENABLE_HW_3D_TEXTURE +- || (target == GL_TEXTURE_3D) +-#endif +- || (target == GL_TEXTURE_CUBE_MAP) +- || (target == GL_TEXTURE_RECTANGLE_NV) ) { +- assert( texObj->DriverData != NULL ); +- } +-} +- +- +-static void r200DeleteTexture( GLcontext *ctx, +- struct gl_texture_object *texObj ) ++static void r200DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); +- driTextureObject * t = (driTextureObject *) texObj->DriverData; +- +- if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { +- fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj, +- _mesa_lookup_enum_by_nr( texObj->Target ) ); ++ radeonTexObj* t = radeon_tex_obj(texObj); ++ ++ if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { ++ fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, ++ (void *)texObj, ++ _mesa_lookup_enum_by_nr(texObj->Target)); ++ } ++ ++ if (rmesa) { ++ int i; ++ radeon_firevertices(&rmesa->radeon); ++ for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) { ++ if ( t == rmesa->state.texture.unit[i].texobj ) { ++ rmesa->state.texture.unit[i].texobj = NULL; ++ rmesa->hw.tex[i].dirty = GL_FALSE; ++ rmesa->hw.cube[i].dirty = GL_FALSE; ++ } ++ } + } +- +- if ( t != NULL ) { +- if ( rmesa ) { +- R200_FIREVERTICES( rmesa ); +- } +- +- driDestroyTextureObject( t ); ++ ++ if (t->mt) { ++ radeon_miptree_unreference(t->mt); ++ t->mt = 0; + } +- /* Free mipmap images and the texture object itself */ + _mesa_delete_texture_object(ctx, texObj); + } + +@@ -1150,46 +459,59 @@ static void r200TexGen( GLcontext *ctx, + * Called via ctx->Driver.NewTextureObject. + * Note: this function will be called during context creation to + * allocate the default texture objects. +- * Note: we could use containment here to 'derive' the driver-specific +- * texture object from the core mesa gl_texture_object. Not done at this time. + * Fixup MaxAnisotropy according to user preference. + */ +-static struct gl_texture_object * +-r200NewTextureObject( GLcontext *ctx, GLuint name, GLenum target ) ++static struct gl_texture_object *r200NewTextureObject(GLcontext * ctx, ++ GLuint name, ++ GLenum target) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); +- struct gl_texture_object *obj; +- obj = _mesa_new_texture_object(ctx, name, target); +- if (!obj) +- return NULL; +- obj->MaxAnisotropy = rmesa->initialMaxAnisotropy; +- r200AllocTexObj( obj ); +- return obj; ++ radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj); ++ ++ ++ if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { ++ fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, ++ t, _mesa_lookup_enum_by_nr(target)); ++ } + -+static struct bo_legacy *bo_allocate(struct bo_manager_legacy *boml, -+ uint32_t size, -+ uint32_t alignment, -+ uint32_t domains, -+ uint32_t flags) ++ _mesa_initialize_texture_object(&t->base, name, target); ++ t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy; ++ ++ /* Initialize hardware state */ ++ r200SetTexWrap( t, t->base.WrapS, t->base.WrapT, t->base.WrapR ); ++ r200SetTexMaxAnisotropy( t, t->base.MaxAnisotropy ); ++ r200SetTexFilter(t, t->base.MinFilter, t->base.MagFilter); ++ r200SetTexBorderColor(t, t->base._BorderChan); ++ ++ return &t->base; + } + + ++ + void r200InitTextureFuncs( struct dd_function_table *functions ) + { + /* Note: we only plug in the functions we implement in the driver + * since _mesa_init_driver_functions() was already called. + */ +- functions->ChooseTextureFormat = r200ChooseTextureFormat; +- functions->TexImage1D = r200TexImage1D; +- functions->TexImage2D = r200TexImage2D; ++ functions->ChooseTextureFormat = radeonChooseTextureFormat; ++ functions->TexImage1D = radeonTexImage1D; ++ functions->TexImage2D = radeonTexImage2D; + #if ENABLE_HW_3D_TEXTURE +- functions->TexImage3D = r200TexImage3D; ++ functions->TexImage3D = radeonTexImage3D; + #else + functions->TexImage3D = _mesa_store_teximage3d; + #endif +- functions->TexSubImage1D = r200TexSubImage1D; +- functions->TexSubImage2D = r200TexSubImage2D; ++ functions->TexSubImage1D = radeonTexSubImage1D; ++ functions->TexSubImage2D = radeonTexSubImage2D; + #if ENABLE_HW_3D_TEXTURE +- functions->TexSubImage3D = r200TexSubImage3D; ++ functions->TexSubImage3D = radeonTexSubImage3D; + #else + functions->TexSubImage3D = _mesa_store_texsubimage3d; + #endif ++ functions->GetTexImage = radeonGetTexImage; ++ functions->GetCompressedTexImage = radeonGetCompressedTexImage; + functions->NewTextureObject = r200NewTextureObject; +- functions->BindTexture = r200BindTexture; ++ // functions->BindTexture = r200BindTexture; + functions->DeleteTexture = r200DeleteTexture; + functions->IsTextureResident = driIsTextureResident; + +@@ -1197,22 +519,16 @@ void r200InitTextureFuncs( struct dd_function_table *functions ) + functions->TexParameter = r200TexParameter; + functions->TexGen = r200TexGen; + +- functions->CompressedTexImage2D = r200CompressedTexImage2D; +- functions->CompressedTexSubImage2D = r200CompressedTexSubImage2D; ++ functions->CompressedTexImage2D = radeonCompressedTexImage2D; ++ functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; ++ ++ functions->GenerateMipmap = radeonGenerateMipmap; ++ ++ functions->NewTextureImage = radeonNewTextureImage; ++ functions->FreeTexImageData = radeonFreeTexImageData; ++ functions->MapTexture = radeonMapTexture; ++ functions->UnmapTexture = radeonUnmapTexture; + + driInitTextureFormats(); + +-#if 000 +- /* moved or obsolete code */ +- r200ContextPtr rmesa = R200_CONTEXT(ctx); +- driInitTextureObjects( ctx, & rmesa->swapped, +- DRI_TEXMGR_DO_TEXTURE_1D +- | DRI_TEXMGR_DO_TEXTURE_2D ); +- +- /* Hack: r200NewTextureObject is not yet installed when the +- * default textures are created. Therefore set MaxAnisotropy of the +- * default 2D texture now. */ +- ctx->Shared->Default2D->MaxAnisotropy = driQueryOptionf (&rmesa->optionCache, +- "def_max_anisotropy"); +-#endif + } +diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h +index 10ff8e8..55592ed 100644 +--- a/src/mesa/drivers/dri/r200/r200_tex.h ++++ b/src/mesa/drivers/dri/r200/r200_tex.h +@@ -41,9 +41,9 @@ extern void r200SetTexOffset(__DRIcontext *pDRICtx, GLint texname, + + extern void r200UpdateTextureState( GLcontext *ctx ); + +-extern int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face ); ++extern int r200UploadTexImages( r200ContextPtr rmesa, radeonTexObjPtr t, GLuint face ); + +-extern void r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t ); ++extern void r200DestroyTexObj( r200ContextPtr rmesa, radeonTexObjPtr t ); + + extern void r200InitTextureFuncs( struct dd_function_table *functions ); + +diff --git a/src/mesa/drivers/dri/r200/r200_texmem.c b/src/mesa/drivers/dri/r200/r200_texmem.c +deleted file mode 100644 +index 3b81ac0..0000000 +--- a/src/mesa/drivers/dri/r200/r200_texmem.c ++++ /dev/null +@@ -1,530 +0,0 @@ +-/************************************************************************** +- +-Copyright (C) Tungsten Graphics 2002. All Rights Reserved. +-The Weather Channel, Inc. funded Tungsten Graphics to develop the +-initial release of the Radeon 8500 driver under the XFree86 +-license. This notice must be preserved. +- +-Permission is hereby granted, free of charge, to any person obtaining +-a copy of this software and associated documentation files (the +-"Software"), to deal in the Software without restriction, including +-without limitation on the rights to use, copy, modify, merge, publish, +-distribute, sub license, and/or sell copies of the Software, and to +-permit persons to whom the Software is furnished to do so, subject to +-the following conditions: +- +-The above copyright notice and this permission notice (including the +-next paragraph) shall be included in all copies or substantial +-portions of the Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +-NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR +-SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +-IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +-SOFTWARE. +- +-**************************************************************************/ +- +-/* +- * Authors: +- * Kevin E. Martin +- * Gareth Hughes +- * +- */ +- +-#include +- +-#include "main/glheader.h" +-#include "main/imports.h" +-#include "main/context.h" +-#include "main/colormac.h" +-#include "main/macros.h" +-#include "r200_context.h" +-#include "r200_ioctl.h" +-#include "r200_tex.h" +-#include "radeon_reg.h" +- +-#include /* for usleep() */ +- +- +-/** +- * Destroy any device-dependent state associated with the texture. This may +- * include NULLing out hardware state that points to the texture. +- */ +-void +-r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t ) +-{ +- if ( R200_DEBUG & DEBUG_TEXTURE ) { +- fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, +- (void *)t, (void *)t->base.tObj ); +- } +- +- if ( rmesa != NULL ) { +- unsigned i; +- +- +- for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) { +- if ( t == rmesa->state.texture.unit[i].texobj ) { +- rmesa->state.texture.unit[i].texobj = NULL; +- rmesa->hw.tex[i].dirty = GL_FALSE; +- rmesa->hw.cube[i].dirty = GL_FALSE; +- } +- } +- } +-} +- +- +-/* ------------------------------------------------------------ +- * Texture image conversions +- */ +- +- +-static void r200UploadGARTClientSubImage( r200ContextPtr rmesa, +- r200TexObjPtr t, +- struct gl_texture_image *texImage, +- GLint hwlevel, +- GLint x, GLint y, +- GLint width, GLint height ) +-{ +- const struct gl_texture_format *texFormat = texImage->TexFormat; +- GLuint srcPitch, dstPitch; +- int blit_format; +- int srcOffset; +- +- /* +- * XXX it appears that we always upload the full image, not a subimage. +- * I.e. x==0, y==0, width=texWidth, height=texWidth. If this is ever +- * changed, the src pitch will have to change. +- */ +- switch ( texFormat->TexelBytes ) { +- case 1: +- blit_format = R200_CP_COLOR_FORMAT_CI8; +- srcPitch = t->image[0][0].width * texFormat->TexelBytes; +- dstPitch = t->image[0][0].width * texFormat->TexelBytes; +- break; +- case 2: +- blit_format = R200_CP_COLOR_FORMAT_RGB565; +- srcPitch = t->image[0][0].width * texFormat->TexelBytes; +- dstPitch = t->image[0][0].width * texFormat->TexelBytes; +- break; +- case 4: +- blit_format = R200_CP_COLOR_FORMAT_ARGB8888; +- srcPitch = t->image[0][0].width * texFormat->TexelBytes; +- dstPitch = t->image[0][0].width * texFormat->TexelBytes; +- break; +- default: +- return; +- } +- +- t->image[0][hwlevel].data = texImage->Data; +- srcOffset = r200GartOffsetFromVirtual( rmesa, texImage->Data ); +- +- assert( srcOffset != ~0 ); +- +- /* Don't currently need to cope with small pitches? +- */ +- width = texImage->Width; +- height = texImage->Height; +- +- r200EmitWait( rmesa, RADEON_WAIT_3D ); +- +- r200EmitBlit( rmesa, blit_format, +- srcPitch, +- srcOffset, +- dstPitch, +- t->bufAddr, +- x, +- y, +- t->image[0][hwlevel].x + x, +- t->image[0][hwlevel].y + y, +- width, +- height ); +- +- r200EmitWait( rmesa, RADEON_WAIT_2D ); +-} +- +-static void r200UploadRectSubImage( r200ContextPtr rmesa, +- r200TexObjPtr t, +- struct gl_texture_image *texImage, +- GLint x, GLint y, +- GLint width, GLint height ) +-{ +- const struct gl_texture_format *texFormat = texImage->TexFormat; +- int blit_format, dstPitch, done; +- +- switch ( texFormat->TexelBytes ) { +- case 1: +- blit_format = R200_CP_COLOR_FORMAT_CI8; +- break; +- case 2: +- blit_format = R200_CP_COLOR_FORMAT_RGB565; +- break; +- case 4: +- blit_format = R200_CP_COLOR_FORMAT_ARGB8888; +- break; +- default: +- return; +- } +- +- t->image[0][0].data = texImage->Data; +- +- /* Currently don't need to cope with small pitches. +- */ +- width = texImage->Width; +- height = texImage->Height; +- dstPitch = t->pp_txpitch + 32; +- +- if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) { +- /* In this case, could also use GART texturing. This is +- * currently disabled, but has been tested & works. +- */ +- if ( !t->image_override ) +- t->pp_txoffset = r200GartOffsetFromVirtual( rmesa, texImage->Data ); +- t->pp_txpitch = texImage->RowStride * texFormat->TexelBytes - 32; +- +- if (R200_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, +- "Using GART texturing for rectangular client texture\n"); +- +- /* Release FB memory allocated for this image: +- */ +- /* FIXME This may not be correct as driSwapOutTextureObject sets +- * FIXME dirty_images. It may be fine, though. +- */ +- if ( t->base.memBlock ) { +- driSwapOutTextureObject( (driTextureObject *) t ); +- } +- } +- else if (texImage->IsClientData) { +- /* Data already in GART memory, with usable pitch. +- */ +- GLuint srcPitch; +- srcPitch = texImage->RowStride * texFormat->TexelBytes; +- r200EmitBlit( rmesa, +- blit_format, +- srcPitch, +- r200GartOffsetFromVirtual( rmesa, texImage->Data ), +- dstPitch, t->bufAddr, +- 0, 0, +- 0, 0, +- width, height ); +- } +- else { +- /* Data not in GART memory, or bad pitch. +- */ +- for (done = 0; done < height ; ) { +- struct r200_dma_region region; +- int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch ); +- int src_pitch; +- char *tex; +- +- src_pitch = texImage->RowStride * texFormat->TexelBytes; +- +- tex = (char *)texImage->Data + done * src_pitch; +- +- memset(®ion, 0, sizeof(region)); +- r200AllocDmaRegion( rmesa, ®ion, lines * dstPitch, 1024 ); +- +- /* Copy texdata to dma: +- */ +- if (0) +- fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n", +- __FUNCTION__, src_pitch, dstPitch); +- +- if (src_pitch == dstPitch) { +- memcpy( region.address + region.start, tex, lines * src_pitch ); +- } +- else { +- char *buf = region.address + region.start; +- int i; +- for (i = 0 ; i < lines ; i++) { +- memcpy( buf, tex, src_pitch ); +- buf += dstPitch; +- tex += src_pitch; +- } +- } +- +- r200EmitWait( rmesa, RADEON_WAIT_3D ); +- +- /* Blit to framebuffer +- */ +- r200EmitBlit( rmesa, +- blit_format, +- dstPitch, GET_START( ®ion ), +- dstPitch | (t->tile_bits >> 16), +- t->bufAddr, +- 0, 0, +- 0, done, +- width, lines ); +- +- r200EmitWait( rmesa, RADEON_WAIT_2D ); +- +- r200ReleaseDmaRegion( rmesa, ®ion, __FUNCTION__ ); +- done += lines; +- } +- } +-} +- +- +-/** +- * Upload the texture image associated with texture \a t at the specified +- * level at the address relative to \a start. +- */ +-static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t, +- GLint hwlevel, +- GLint x, GLint y, GLint width, GLint height, +- GLuint face ) +-{ +- struct gl_texture_image *texImage = NULL; +- GLuint offset; +- GLint imageWidth, imageHeight; +- GLint ret; +- drm_radeon_texture_t tex; +- drm_radeon_tex_image_t tmp; +- const int level = hwlevel + t->base.firstLevel; +- +- if ( R200_DEBUG & DEBUG_TEXTURE ) { +- fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", +- __FUNCTION__, (void *)t, (void *)t->base.tObj, +- level, width, height, face ); +- } +- +- ASSERT(face < 6); +- +- /* Ensure we have a valid texture to upload */ +- if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) { +- _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__); +- return; +- } +- +- texImage = t->base.tObj->Image[face][level]; +- +- if ( !texImage ) { +- if ( R200_DEBUG & DEBUG_TEXTURE ) +- fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level ); +- return; +- } +- if ( !texImage->Data ) { +- if ( R200_DEBUG & DEBUG_TEXTURE ) +- fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ ); +- return; +- } +- +- +- if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { +- assert(level == 0); +- assert(hwlevel == 0); +- if ( R200_DEBUG & DEBUG_TEXTURE ) +- fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__); +- r200UploadRectSubImage( rmesa, t, texImage, x, y, width, height ); +- return; +- } +- else if (texImage->IsClientData) { +- if ( R200_DEBUG & DEBUG_TEXTURE ) +- fprintf( stderr, "%s: image data is in GART client storage\n", +- __FUNCTION__); +- r200UploadGARTClientSubImage( rmesa, t, texImage, hwlevel, +- x, y, width, height ); +- return; +- } +- else if ( R200_DEBUG & DEBUG_TEXTURE ) +- fprintf( stderr, "%s: image data is in normal memory\n", +- __FUNCTION__); +- +- +- imageWidth = texImage->Width; +- imageHeight = texImage->Height; +- +- offset = t->bufAddr + t->base.totalSize / 6 * face; +- +- if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) { +- GLint imageX = 0; +- GLint imageY = 0; +- GLint blitX = t->image[face][hwlevel].x; +- GLint blitY = t->image[face][hwlevel].y; +- GLint blitWidth = t->image[face][hwlevel].width; +- GLint blitHeight = t->image[face][hwlevel].height; +- fprintf( stderr, " upload image: %d,%d at %d,%d\n", +- imageWidth, imageHeight, imageX, imageY ); +- fprintf( stderr, " upload blit: %d,%d at %d,%d\n", +- blitWidth, blitHeight, blitX, blitY ); +- fprintf( stderr, " blit ofs: 0x%07x level: %d/%d\n", +- (GLuint)offset, hwlevel, level ); +- } +- +- t->image[face][hwlevel].data = texImage->Data; +- +- /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct. +- * NOTE: we're always use a 1KB-wide blit and I8 texture format. +- * We used to use 1, 2 and 4-byte texels and used to use the texture +- * width to dictate the blit width - but that won't work for compressed +- * textures. (Brian) +- * NOTE: can't do that with texture tiling. (sroland) +- */ +- tex.offset = offset; +- tex.image = &tmp; +- /* copy (x,y,width,height,data) */ +- memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) ); +- +- if (texImage->TexFormat->TexelBytes) { +- /* use multi-byte upload scheme */ +- tex.height = imageHeight; +- tex.width = imageWidth; +- tex.format = t->pp_txformat & R200_TXFORMAT_FORMAT_MASK; +- if (tex.format == R200_TXFORMAT_ABGR8888) { +- /* drm will refuse abgr8888 textures. */ +- tex.format = R200_TXFORMAT_ARGB8888; +- } +- tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1); +- tex.offset += tmp.x & ~1023; +- tmp.x = tmp.x % 1024; +- if (t->tile_bits & R200_TXO_MICRO_TILE) { +- /* need something like "tiled coordinates" ? */ +- tmp.y = tmp.x / (tex.pitch * 128) * 2; +- tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes; +- tex.pitch |= RADEON_DST_TILE_MICRO >> 22; +- } +- else { +- tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); +- } +- if ((t->tile_bits & R200_TXO_MACRO_TILE) && +- (texImage->Width * texImage->TexFormat->TexelBytes >= 256) && +- ((!(t->tile_bits & R200_TXO_MICRO_TILE) && (texImage->Height >= 8)) || +- (texImage->Height >= 16))) { +- /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes, +- OR if height is smaller than 8 automatically, but if micro tiling is active +- the limit is height 16 instead ? */ +- tex.pitch |= RADEON_DST_TILE_MACRO >> 22; +- } +- } +- else { +- /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is +- needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ +- /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed +- so the kernel module reads the right amount of data. */ +- tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */ +- tex.pitch = (BLIT_WIDTH_BYTES / 64); +- tex.height = (imageHeight + 3) / 4; +- tex.width = (imageWidth + 3) / 4; +- switch (t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) { +- case R200_TXFORMAT_DXT1: +- tex.width *= 8; +- break; +- case R200_TXFORMAT_DXT23: +- case R200_TXFORMAT_DXT45: +- tex.width *= 16; +- break; +- default: +- fprintf(stderr, "unknown compressed tex format in uploadSubImage\n"); +- } +- } +- +- LOCK_HARDWARE( rmesa ); +- do { +- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE, +- &tex, sizeof(drm_radeon_texture_t) ); +- if (ret) { +- if (R200_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "DRM_RADEON_TEXTURE: again!\n"); +- usleep(1); +- } +- } while ( ret == -EAGAIN ); +- +- UNLOCK_HARDWARE( rmesa ); +- +- if ( ret ) { +- fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret ); +- fprintf( stderr, " offset=0x%08x\n", +- offset ); +- fprintf( stderr, " image width=%d height=%d\n", +- imageWidth, imageHeight ); +- fprintf( stderr, " blit width=%d height=%d data=%p\n", +- t->image[face][hwlevel].width, t->image[face][hwlevel].height, +- t->image[face][hwlevel].data ); +- exit( 1 ); +- } +-} +- +- +-/** +- * Upload the texture images associated with texture \a t. This might +- * require the allocation of texture memory. +- * +- * \param rmesa Context pointer +- * \param t Texture to be uploaded +- * \param face Cube map face to be uploaded. Zero for non-cube maps. +- */ +- +-int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face ) +-{ +- const int numLevels = t->base.lastLevel - t->base.firstLevel + 1; +- +- if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) { +- fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__, +- (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize, +- t->base.firstLevel, t->base.lastLevel ); +- } +- +- if ( !t || t->base.totalSize == 0 || t->image_override ) +- return 0; +- +- if (R200_DEBUG & DEBUG_SYNC) { +- fprintf(stderr, "%s: Syncing\n", __FUNCTION__ ); +- r200Finish( rmesa->glCtx ); +- } +- +- LOCK_HARDWARE( rmesa ); +- +- if ( t->base.memBlock == NULL ) { +- int heap; +- +- heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps, +- (driTextureObject *) t ); +- if ( heap == -1 ) { +- UNLOCK_HARDWARE( rmesa ); +- return -1; +- } +- +- /* Set the base offset of the texture image */ +- t->bufAddr = rmesa->r200Screen->texOffset[heap] +- + t->base.memBlock->ofs; +- t->pp_txoffset = t->bufAddr; +- +- if (!(t->base.tObj->Image[0][0]->IsClientData)) { +- /* hope it's safe to add that here... */ +- t->pp_txoffset |= t->tile_bits; +- } +- +- /* Mark this texobj as dirty on all units: +- */ +- t->dirty_state = TEX_ALL; +- } +- +- /* Let the world know we've used this memory recently. +- */ +- driUpdateTextureLRU( (driTextureObject *) t ); +- UNLOCK_HARDWARE( rmesa ); +- +- /* Upload any images that are new */ +- if (t->base.dirty_images[face]) { +- int i; +- for ( i = 0 ; i < numLevels ; i++ ) { +- if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) { +- uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width, +- t->image[face][i].height, face ); +- } +- } +- t->base.dirty_images[face] = 0; +- } +- +- +- if (R200_DEBUG & DEBUG_SYNC) { +- fprintf(stderr, "%s: Syncing\n", __FUNCTION__ ); +- r200Finish( rmesa->glCtx ); +- } +- +- return 0; +-} +diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c +index 3f9a2f4..6432068 100644 +--- a/src/mesa/drivers/dri/r200/r200_texstate.c ++++ b/src/mesa/drivers/dri/r200/r200_texstate.c +@@ -40,6 +40,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "main/texobj.h" + #include "main/enums.h" + ++#include "radeon_common.h" ++#include "radeon_mipmap_tree.h" + #include "r200_context.h" + #include "r200_state.h" + #include "r200_ioctl.h" +@@ -139,257 +141,6 @@ static const struct tx_table tx_table_le[] = + #undef _ALPHA + #undef _INVALID + +-/** +- * This function computes the number of bytes of storage needed for +- * the given texture object (all mipmap levels, all cube faces). +- * The \c image[face][level].x/y/width/height parameters for upload/blitting +- * are computed here. \c pp_txfilter, \c pp_txformat, etc. will be set here +- * too. +- * +- * \param rmesa Context pointer +- * \param tObj GL texture object whose images are to be posted to +- * hardware state. +- */ +-static void r200SetTexImages( r200ContextPtr rmesa, +- struct gl_texture_object *tObj ) +-{ +- r200TexObjPtr t = (r200TexObjPtr)tObj->DriverData; +- const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; +- GLint curOffset, blitWidth; +- GLint i, texelBytes; +- GLint numLevels; +- GLint log2Width, log2Height, log2Depth; +- +- /* Set the hardware texture format +- */ +- if ( !t->image_override ) { +- if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) { +- const struct tx_table *table = _mesa_little_endian() ? tx_table_le : +- tx_table_be; +- +- t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK | +- R200_TXFORMAT_ALPHA_IN_MAP); +- t->pp_txfilter &= ~R200_YUV_TO_RGB; +- +- t->pp_txformat |= table[ baseImage->TexFormat->MesaFormat ].format; +- t->pp_txfilter |= table[ baseImage->TexFormat->MesaFormat ].filter; +- } +- else { +- _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); +- return; +- } +- } +- +- texelBytes = baseImage->TexFormat->TexelBytes; +- +- /* Compute which mipmap levels we really want to send to the hardware. +- */ +- +- driCalculateTextureFirstLastLevel( (driTextureObject *) t ); +- log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2; +- log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2; +- log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2; +- +- numLevels = t->base.lastLevel - t->base.firstLevel + 1; +- +- assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); +- +- /* Calculate mipmap offsets and dimensions for blitting (uploading) +- * The idea is that we lay out the mipmap levels within a block of +- * memory organized as a rectangle of width BLIT_WIDTH_BYTES. +- */ +- curOffset = 0; +- blitWidth = BLIT_WIDTH_BYTES; +- t->tile_bits = 0; +- +- /* figure out if this texture is suitable for tiling. */ +- if (texelBytes) { +- if (rmesa->texmicrotile && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) && +- /* texrect might be able to use micro tiling too in theory? */ +- (baseImage->Height > 1)) { +- /* allow 32 (bytes) x 1 mip (which will use two times the space +- the non-tiled version would use) max if base texture is large enough */ +- if ((numLevels == 1) || +- (((baseImage->Width * texelBytes / baseImage->Height) <= 32) && +- (baseImage->Width * texelBytes > 64)) || +- ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) { +- t->tile_bits |= R200_TXO_MICRO_TILE; +- } +- } +- if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) { +- /* we can set macro tiling even for small textures, they will be untiled anyway */ +- t->tile_bits |= R200_TXO_MACRO_TILE; +- } +- } +- +- for (i = 0; i < numLevels; i++) { +- const struct gl_texture_image *texImage; +- GLuint size; +- +- texImage = tObj->Image[0][i + t->base.firstLevel]; +- if ( !texImage ) +- break; +- +- /* find image size in bytes */ +- if (texImage->IsCompressed) { +- /* need to calculate the size AFTER padding even though the texture is +- submitted without padding. +- Only handle pot textures currently - don't know if npot is even possible, +- size calculation would certainly need (trivial) adjustments. +- Align (and later pad) to 32byte, not sure what that 64byte blit width is +- good for? */ +- if ((t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) == R200_TXFORMAT_DXT1) { +- /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */ +- if ((texImage->Width + 3) < 8) /* width one block */ +- size = texImage->CompressedSize * 4; +- else if ((texImage->Width + 3) < 16) +- size = texImage->CompressedSize * 2; +- else size = texImage->CompressedSize; +- } +- else /* DXT3/5, 16 bytes per block */ +- if ((texImage->Width + 3) < 8) +- size = texImage->CompressedSize * 2; +- else size = texImage->CompressedSize; +- } +- else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { +- size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height; +- } +- else if (t->tile_bits & R200_TXO_MICRO_TILE) { +- /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, +- though the actual offset may be different (if texture is less than +- 32 bytes width) to the untiled case */ +- int w = (texImage->Width * texelBytes * 2 + 31) & ~31; +- size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth; +- blitWidth = MAX2(texImage->Width, 64 / texelBytes); +- } +- else { +- int w = (texImage->Width * texelBytes + 31) & ~31; +- size = w * texImage->Height * texImage->Depth; +- blitWidth = MAX2(texImage->Width, 64 / texelBytes); +- } +- assert(size > 0); +- +- /* Align to 32-byte offset. It is faster to do this unconditionally +- * (no branch penalty). +- */ +- +- curOffset = (curOffset + 0x1f) & ~0x1f; +- +- if (texelBytes) { +- t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */ +- t->image[0][i].y = 0; +- t->image[0][i].width = MIN2(size / texelBytes, blitWidth); +- t->image[0][i].height = (size / texelBytes) / t->image[0][i].width; +- } +- else { +- t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES; +- t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES; +- t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES); +- t->image[0][i].height = size / t->image[0][i].width; +- } +- +-#if 0 +- /* for debugging only and only applicable to non-rectangle targets */ +- assert(size % t->image[0][i].width == 0); +- assert(t->image[0][i].x == 0 +- || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1)); +-#endif +- +- if (0) +- fprintf(stderr, +- "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", +- i, texImage->Width, texImage->Height, +- t->image[0][i].x, t->image[0][i].y, +- t->image[0][i].width, t->image[0][i].height, size, curOffset); +- +- curOffset += size; +- +- } +- +- /* Align the total size of texture memory block. +- */ +- t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; +- +- /* Setup remaining cube face blits, if needed */ +- if (tObj->Target == GL_TEXTURE_CUBE_MAP) { +- const GLuint faceSize = t->base.totalSize; +- GLuint face; +- /* reuse face 0 x/y/width/height - just update the offset when uploading */ +- for (face = 1; face < 6; face++) { +- for (i = 0; i < numLevels; i++) { +- t->image[face][i].x = t->image[0][i].x; +- t->image[face][i].y = t->image[0][i].y; +- t->image[face][i].width = t->image[0][i].width; +- t->image[face][i].height = t->image[0][i].height; +- } +- } +- t->base.totalSize = 6 * faceSize; /* total texmem needed */ +- } +- +- +- /* Hardware state: +- */ +- t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK; +- t->pp_txfilter |= (numLevels - 1) << R200_MAX_MIP_LEVEL_SHIFT; +- +- t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK | +- R200_TXFORMAT_HEIGHT_MASK | +- R200_TXFORMAT_CUBIC_MAP_ENABLE | +- R200_TXFORMAT_F5_WIDTH_MASK | +- R200_TXFORMAT_F5_HEIGHT_MASK); +- t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) | +- (log2Height << R200_TXFORMAT_HEIGHT_SHIFT)); +- +- t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK); +- if (tObj->Target == GL_TEXTURE_3D) { +- t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT); +- t->pp_txformat_x |= R200_TEXCOORD_VOLUME; +- } +- else if (tObj->Target == GL_TEXTURE_CUBE_MAP) { +- ASSERT(log2Width == log2Height); +- t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) | +- (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) | +-/* don't think we need this bit, if it exists at all - fglrx does not set it */ +- (R200_TXFORMAT_CUBIC_MAP_ENABLE)); +- t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV; +- t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) | +- (log2Height << R200_FACE_HEIGHT_1_SHIFT) | +- (log2Width << R200_FACE_WIDTH_2_SHIFT) | +- (log2Height << R200_FACE_HEIGHT_2_SHIFT) | +- (log2Width << R200_FACE_WIDTH_3_SHIFT) | +- (log2Height << R200_FACE_HEIGHT_3_SHIFT) | +- (log2Width << R200_FACE_WIDTH_4_SHIFT) | +- (log2Height << R200_FACE_HEIGHT_4_SHIFT)); +- } +- else { +- /* If we don't in fact send enough texture coordinates, q will be 1, +- * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?) +- */ +- t->pp_txformat_x |= R200_TEXCOORD_PROJ; +- } +- +- t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) | +- ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16)); +- +- /* Only need to round to nearest 32 for textures, but the blitter +- * requires 64-byte aligned pitches, and we may/may not need the +- * blitter. NPOT only! +- */ +- if ( !t->image_override ) { +- if (baseImage->IsCompressed) +- t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); +- else +- t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); +- t->pp_txpitch -= 32; +- } +- +- t->dirty_state = TEX_ALL; +- +- /* FYI: r200UploadTexImages( rmesa, t ) used to be called here */ +-} +- +- +- + /* ================================================================ + * Texture combine functions + */ +@@ -981,20 +732,19 @@ void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname, + { + r200ContextPtr rmesa = pDRICtx->driverPrivate; + struct gl_texture_object *tObj = +- _mesa_lookup_texture(rmesa->glCtx, texname); +- r200TexObjPtr t; ++ _mesa_lookup_texture(rmesa->radeon.glCtx, texname); ++ radeonTexObjPtr t = radeon_tex_obj(tObj); + + if (!tObj) + return; + +- t = (r200TexObjPtr) tObj->DriverData; +- + t->image_override = GL_TRUE; + + if (!offset) + return; + +- t->pp_txoffset = offset; ++ t->bo = NULL; ++ t->override_offset = offset; + t->pp_txpitch = pitch - 32; + + switch (depth) { +@@ -1207,12 +957,41 @@ static GLboolean r200UpdateAllTexEnv( GLcontext *ctx ) + R200_VOLUME_FILTER_MASK) + + ++static void disable_tex_obj_state( r200ContextPtr rmesa, ++ int unit ) +{ -+ struct bo_legacy *bo_legacy; -+ static int pgsize; ++ ++ R200_STATECHANGE( rmesa, vtx ); ++ rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3)); + -+ if (pgsize == 0) -+ pgsize = getpagesize() - 1; ++ if (rmesa->radeon.TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<radeon.glCtx, (R200_TCL_FALLBACK_TEXGEN_0<2 texunits. ++ */ + -+ bo_legacy = (struct bo_legacy*)calloc(1, sizeof(struct bo_legacy)); -+ if (bo_legacy == NULL) { -+ return NULL; -+ } -+ bo_legacy->base.bom = (struct radeon_bo_manager*)boml; -+ bo_legacy->base.handle = 0; -+ bo_legacy->base.size = size; -+ bo_legacy->base.alignment = alignment; -+ bo_legacy->base.domains = domains; -+ bo_legacy->base.flags = flags; -+ bo_legacy->base.ptr = NULL; -+ bo_legacy->map_count = 0; -+ bo_legacy->next = NULL; -+ bo_legacy->prev = NULL; -+ bo_legacy->pnext = NULL; -+ bo_legacy->pprev = NULL; -+ bo_legacy->next = boml->bos.next; -+ bo_legacy->prev = &boml->bos; -+ boml->bos.next = bo_legacy; -+ if (bo_legacy->next) { -+ bo_legacy->next->prev = bo_legacy; -+ } -+ return bo_legacy; -+} ++ { ++ GLuint tmp = rmesa->TexGenEnabled; + -+static int bo_dma_alloc(struct radeon_bo *bo) -+{ -+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; -+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; -+ drm_radeon_mem_alloc_t alloc; -+ unsigned size; -+ int base_offset; -+ int r; ++ rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<TexGenNeedNormals[unit] = GL_FALSE; ++ rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit); + -+ /* align size on 4Kb */ -+ size = (((4 * 1024) - 1) + bo->size) & ~((4 * 1024) - 1); -+ alloc.region = RADEON_MEM_REGION_GART; -+ alloc.alignment = bo_legacy->base.alignment; -+ alloc.size = size; -+ alloc.region_offset = &base_offset; -+ r = drmCommandWriteRead(bo->bom->fd, -+ DRM_RADEON_ALLOC, -+ &alloc, -+ sizeof(alloc)); -+ if (r) { -+ /* ptr is set to NULL if dma allocation failed */ -+ bo_legacy->ptr = NULL; -+ return r; ++ if (tmp != rmesa->TexGenEnabled) { ++ rmesa->recheck_texgen[unit] = GL_TRUE; ++ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX; ++ } ++ } ++} + static void import_tex_obj_state( r200ContextPtr rmesa, + int unit, +- r200TexObjPtr texobj ) ++ radeonTexObjPtr texobj ) + { + /* do not use RADEON_DB_STATE to avoid stale texture caches */ +- int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0]; ++ GLuint *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0]; + + R200_STATECHANGE( rmesa, tex[unit] ); + +@@ -1225,36 +1004,21 @@ static void import_tex_obj_state( r200ContextPtr rmesa, + cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */ + cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */ + cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color; +- if (rmesa->r200Screen->drmSupportsFragShader) { +- cmd[TEX_PP_TXOFFSET_NEWDRM] = texobj->pp_txoffset; +- } +- else { +- cmd[TEX_PP_TXOFFSET_OLDDRM] = texobj->pp_txoffset; +- } + +- if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) { +- int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0]; +- GLuint bytesPerFace = texobj->base.totalSize / 6; +- ASSERT(texobj->base.totalSize % 6 == 0); ++ if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) { ++ GLuint *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0]; + + R200_STATECHANGE( rmesa, cube[unit] ); + cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces; +- if (rmesa->r200Screen->drmSupportsFragShader) { ++ if (rmesa->radeon.radeonScreen->drmSupportsFragShader) { + /* that value is submitted twice. could change cube atom + to not include that command when new drm is used */ + cmd[TEX_PP_CUBIC_FACES] = texobj->pp_cubic_faces; + } +- cube_cmd[CUBE_PP_CUBIC_OFFSET_F1] = texobj->pp_txoffset + 1 * bytesPerFace; +- cube_cmd[CUBE_PP_CUBIC_OFFSET_F2] = texobj->pp_txoffset + 2 * bytesPerFace; +- cube_cmd[CUBE_PP_CUBIC_OFFSET_F3] = texobj->pp_txoffset + 3 * bytesPerFace; +- cube_cmd[CUBE_PP_CUBIC_OFFSET_F4] = texobj->pp_txoffset + 4 * bytesPerFace; +- cube_cmd[CUBE_PP_CUBIC_OFFSET_F5] = texobj->pp_txoffset + 5 * bytesPerFace; + } + +- texobj->dirty_state &= ~(1<TexGenEnabled & R_BIT) { + if (texUnit->GenModeR != mode) + mixed_fallback = GL_TRUE; +@@ -1513,52 +1276,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) + return GL_TRUE; + } + +- +-static void disable_tex( GLcontext *ctx, int unit ) +-{ +- r200ContextPtr rmesa = R200_CONTEXT(ctx); +- +- if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<state.texture.unit[unit].texobj != NULL ) { +- /* The old texture is no longer bound to this texture unit. +- * Mark it as such. +- */ +- +- rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit); +- rmesa->state.texture.unit[unit].texobj = NULL; +- } +- +- R200_STATECHANGE( rmesa, ctx ); +- rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_0_ENABLE << unit); +- +- R200_STATECHANGE( rmesa, vtx ); +- rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3)); +- +- if (rmesa->TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<2 texunits. +- */ +- +- { +- GLuint tmp = rmesa->TexGenEnabled; +- +- rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<TexGenNeedNormals[unit] = GL_FALSE; +- rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit); +- +- if (tmp != rmesa->TexGenEnabled) { +- rmesa->recheck_texgen[unit] = GL_TRUE; +- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; +- } +- } +- } +-} +- + void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d ) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); +@@ -1575,237 +1292,165 @@ void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d ) + } + } + +-static GLboolean enable_tex_2d( GLcontext *ctx, int unit ) +-{ +- r200ContextPtr rmesa = R200_CONTEXT(ctx); +- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; +- struct gl_texture_object *tObj = texUnit->_Current; +- r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData; +- +- /* Need to load the 2d images associated with this unit. +- */ +- if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) { +- t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2; +- t->base.dirty_images[0] = ~0; +- } +- +- ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D); +- +- if ( t->base.dirty_images[0] ) { +- R200_FIREVERTICES( rmesa ); +- r200SetTexImages( rmesa, tObj ); +- r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 ); +- if ( !t->base.memBlock && !t->image_override ) +- return GL_FALSE; +- } +- +- set_re_cntl_d3d( ctx, unit, GL_FALSE ); +- +- return GL_TRUE; +-} +- +-#if ENABLE_HW_3D_TEXTURE +-static GLboolean enable_tex_3d( GLcontext *ctx, int unit ) ++/** ++ * Compute the cached hardware register values for the given texture object. ++ * ++ * \param rmesa Context pointer ++ * \param t the r300 texture object ++ */ ++static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t) + { +- r200ContextPtr rmesa = R200_CONTEXT(ctx); +- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; +- struct gl_texture_object *tObj = texUnit->_Current; +- r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData; +- +- /* Need to load the 3d images associated with this unit. +- */ +- if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) { +- t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2; +- t->base.dirty_images[0] = ~0; ++ const struct gl_texture_image *firstImage = ++ t->base.Image[0][t->mt->firstLevel]; ++ GLint log2Width, log2Height, log2Depth, texelBytes; ++ ++ log2Width = firstImage->WidthLog2; ++ log2Height = firstImage->HeightLog2; ++ log2Depth = firstImage->DepthLog2; ++ texelBytes = firstImage->TexFormat->TexelBytes; ++ ++ ++ if (!t->image_override) { ++ if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) { ++ const struct tx_table *table = _mesa_little_endian() ? tx_table_le : ++ tx_table_be; ++ ++ t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK | ++ R200_TXFORMAT_ALPHA_IN_MAP); ++ t->pp_txfilter &= ~R200_YUV_TO_RGB; ++ ++ t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format; ++ t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter; ++ } else { ++ _mesa_problem(NULL, "unexpected texture format in %s", ++ __FUNCTION__); ++ return; ++ } + } ++ ++ t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK; ++ t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << R200_MAX_MIP_LEVEL_SHIFT; ++ ++ t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK | ++ R200_TXFORMAT_HEIGHT_MASK | ++ R200_TXFORMAT_CUBIC_MAP_ENABLE | ++ R200_TXFORMAT_F5_WIDTH_MASK | ++ R200_TXFORMAT_F5_HEIGHT_MASK); ++ t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) | ++ (log2Height << R200_TXFORMAT_HEIGHT_SHIFT)); ++ ++ t->tile_bits = 0; ++ ++ t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK); ++ if (t->base.Target == GL_TEXTURE_3D) { ++ t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT); ++ t->pp_txformat_x |= R200_TEXCOORD_VOLUME; + +- ASSERT(tObj->Target == GL_TEXTURE_3D); +- +- /* R100 & R200 do not support mipmaps for 3D textures. +- */ +- if ( (tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR) ) { +- return GL_FALSE; + } +- +- if ( t->base.dirty_images[0] ) { +- R200_FIREVERTICES( rmesa ); +- r200SetTexImages( rmesa, tObj ); +- r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 ); +- if ( !t->base.memBlock ) +- return GL_FALSE; ++ else if (t->base.Target == GL_TEXTURE_CUBE_MAP) { ++ ASSERT(log2Width == log2Height); ++ t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) | ++ (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) | ++ /* don't think we need this bit, if it exists at all - fglrx does not set it */ ++ (R200_TXFORMAT_CUBIC_MAP_ENABLE)); ++ t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV; ++ t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) | ++ (log2Height << R200_FACE_HEIGHT_1_SHIFT) | ++ (log2Width << R200_FACE_WIDTH_2_SHIFT) | ++ (log2Height << R200_FACE_HEIGHT_2_SHIFT) | ++ (log2Width << R200_FACE_WIDTH_3_SHIFT) | ++ (log2Height << R200_FACE_HEIGHT_3_SHIFT) | ++ (log2Width << R200_FACE_WIDTH_4_SHIFT) | ++ (log2Height << R200_FACE_HEIGHT_4_SHIFT)); + } +- +- set_re_cntl_d3d( ctx, unit, GL_TRUE ); +- +- return GL_TRUE; +-} +-#endif +- +-static GLboolean enable_tex_cube( GLcontext *ctx, int unit ) +-{ +- r200ContextPtr rmesa = R200_CONTEXT(ctx); +- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; +- struct gl_texture_object *tObj = texUnit->_Current; +- r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData; +- GLuint face; +- +- /* Need to load the 2d images associated with this unit. +- */ +- if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) { +- t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2; +- for (face = 0; face < 6; face++) +- t->base.dirty_images[face] = ~0; ++ else { ++ /* If we don't in fact send enough texture coordinates, q will be 1, ++ * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?) ++ */ ++ t->pp_txformat_x |= R200_TEXCOORD_PROJ; + } + +- ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP); +- +- if ( t->base.dirty_images[0] || t->base.dirty_images[1] || +- t->base.dirty_images[2] || t->base.dirty_images[3] || +- t->base.dirty_images[4] || t->base.dirty_images[5] ) { +- /* flush */ +- R200_FIREVERTICES( rmesa ); +- /* layout memory space, once for all faces */ +- r200SetTexImages( rmesa, tObj ); +- } ++ t->pp_txsize = (((firstImage->Width - 1) << R200_PP_TX_WIDTHMASK_SHIFT) ++ | ((firstImage->Height - 1) << R200_PP_TX_HEIGHTMASK_SHIFT)); + +- /* upload (per face) */ +- for (face = 0; face < 6; face++) { +- if (t->base.dirty_images[face]) { +- r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, face ); +- } +- } +- +- if ( !t->base.memBlock ) { +- /* texmem alloc failed, use s/w fallback */ +- return GL_FALSE; ++ if ( !t->image_override ) { ++ if (firstImage->IsCompressed) ++ t->pp_txpitch = (firstImage->Width + 63) & ~(63); ++ else ++ t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63); ++ t->pp_txpitch -= 32; + } + +- set_re_cntl_d3d( ctx, unit, GL_TRUE ); +- +- return GL_TRUE; +-} +- +-static GLboolean enable_tex_rect( GLcontext *ctx, int unit ) +-{ +- r200ContextPtr rmesa = R200_CONTEXT(ctx); +- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; +- struct gl_texture_object *tObj = texUnit->_Current; +- r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData; +- +- if (!(t->pp_txformat & R200_TXFORMAT_NON_POWER2)) { ++ if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) { + t->pp_txformat |= R200_TXFORMAT_NON_POWER2; +- t->base.dirty_images[0] = ~0; + } + +- ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV); +- +- if ( t->base.dirty_images[0] ) { +- R200_FIREVERTICES( rmesa ); +- r200SetTexImages( rmesa, tObj ); +- r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 ); +- if ( !t->base.memBlock && +- !t->image_override && +- !rmesa->prefer_gart_client_texturing ) +- return GL_FALSE; +- } +- +- set_re_cntl_d3d( ctx, unit, GL_FALSE ); +- +- return GL_TRUE; + } + +- +-static GLboolean update_tex_common( GLcontext *ctx, int unit ) ++static GLboolean r200_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); +- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; +- struct gl_texture_object *tObj = texUnit->_Current; +- r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData; +- +- /* Fallback if there's a texture border */ +- if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) +- return GL_FALSE; +- +- /* Update state if this is a different texture object to last +- * time. +- */ +- if ( rmesa->state.texture.unit[unit].texobj != t ) { +- if ( rmesa->state.texture.unit[unit].texobj != NULL ) { +- /* The old texture is no longer bound to this texture unit. +- * Mark it as such. +- */ +- +- rmesa->state.texture.unit[unit].texobj->base.bound &= +- ~(1UL << unit); +- } ++ radeonTexObj *t = radeon_tex_obj(texObj); + +- rmesa->state.texture.unit[unit].texobj = t; +- t->base.bound |= (1UL << unit); +- t->dirty_state |= 1<hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit; ++ if (!radeon_validate_texture_miptree(ctx, texObj)) ++ return GL_FALSE; + +- R200_STATECHANGE( rmesa, vtx ); +- rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3)); +- rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3); ++ r200_validate_texgen(ctx, unit); ++ /* Configure the hardware registers (more precisely, the cached version ++ * of the hardware registers). */ ++ setup_hardware_state(rmesa, t); ++ ++ if (texObj->Target == GL_TEXTURE_RECTANGLE_NV || ++ texObj->Target == GL_TEXTURE_2D || ++ texObj->Target == GL_TEXTURE_1D) ++ set_re_cntl_d3d( ctx, unit, GL_FALSE ); ++ else ++ set_re_cntl_d3d( ctx, unit, GL_TRUE ); ++ R200_STATECHANGE( rmesa, ctx ); ++ rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit; ++ ++ R200_STATECHANGE( rmesa, vtx ); ++ rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3)); ++ rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3); + +- rmesa->recheck_texgen[unit] = GL_TRUE; +- } +- +- if (t->dirty_state & (1<recheck_texgen[unit] = GL_TRUE; ++ import_tex_obj_state( rmesa, unit, t ); + + if (rmesa->recheck_texgen[unit]) { + GLboolean fallback = !r200_validate_texgen( ctx, unit ); + TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<recheck_texgen[unit] = 0; +- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; ++ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX; + } + +- FALLBACK( rmesa, R200_FALLBACK_BORDER_MODE, t->border_fallback ); +- return !t->border_fallback; +-} ++ t->validated = GL_TRUE; + ++ FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback ); + ++ return !t->border_fallback; ++} + +-static GLboolean r200UpdateTextureUnit( GLcontext *ctx, int unit ) ++static GLboolean r200UpdateTextureUnit(GLcontext *ctx, int unit) + { + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint unitneeded = rmesa->state.texture.unit[unit].unitneeded; + +- if ( unitneeded & (TEXTURE_RECT_BIT) ) { +- return (enable_tex_rect( ctx, unit ) && +- update_tex_common( ctx, unit )); +- } +- else if ( unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) { +- return (enable_tex_2d( ctx, unit ) && +- update_tex_common( ctx, unit )); +- } +-#if ENABLE_HW_3D_TEXTURE +- else if ( unitneeded & (TEXTURE_3D_BIT) ) { +- return (enable_tex_3d( ctx, unit ) && +- update_tex_common( ctx, unit )); +- } +-#endif +- else if ( unitneeded & (TEXTURE_CUBE_BIT) ) { +- return (enable_tex_cube( ctx, unit ) && +- update_tex_common( ctx, unit )); +- } +- else if ( unitneeded ) { +- return GL_FALSE; +- } +- else { +- disable_tex( ctx, unit ); +- return GL_TRUE; ++ if (!unitneeded) { ++ /* disable the unit */ ++ disable_tex_obj_state(rmesa, unit); ++ return GL_TRUE; + } ++ ++ if (!r200_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) { ++ _mesa_warning(ctx, ++ "failed to validate texture for unit %d.\n", ++ unit); ++ rmesa->state.texture.unit[unit].texobj = NULL; ++ return GL_FALSE; ++ } ++ ++ rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current); ++ return GL_TRUE; + } + + +@@ -1846,11 +1491,11 @@ void r200UpdateTextureState( GLcontext *ctx ) + + FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok ); + +- if (rmesa->TclFallback) ++ if (rmesa->radeon.TclFallback) + r200ChooseVertexState( ctx ); + + +- if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) { ++ if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) { + + /* + * T0 hang workaround ------------- +@@ -1863,7 +1508,7 @@ void r200UpdateTextureState( GLcontext *ctx ) + R200_STATECHANGE(rmesa, tex[1]); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE; + if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE)) +- rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK; ++ rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK; + rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= R200_TXFORMAT_LOOKUP_DISABLE; + } + else if (!ctx->ATIFragmentShader._Enabled) { +diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c +index 562992f..888f91d 100644 +--- a/src/mesa/drivers/dri/r200/r200_vertprog.c ++++ b/src/mesa/drivers/dri/r200/r200_vertprog.c +@@ -1110,9 +1110,9 @@ void r200SetupVertexProg( GLcontext *ctx ) { + } + /* could optimize setting up vertex progs away for non-tcl hw */ + fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) && +- rmesa->r200Screen->drmSupportsVertexProgram); ++ rmesa->radeon.radeonScreen->drmSupportsVertexProgram); + TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback); +- if (rmesa->TclFallback) return; ++ if (rmesa->radeon.TclFallback) return; + + R200_STATECHANGE( rmesa, vap ); + /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it? +diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile +index 6ca9342..497b1ec 100644 +--- a/src/mesa/drivers/dri/r300/Makefile ++++ b/src/mesa/drivers/dri/r300/Makefile +@@ -3,6 +3,8 @@ + TOP = ../../../../.. + include $(TOP)/configs/current + ++CFLAGS += $(RADEON_CFLAGS) ++ + LIBNAME = r300_dri.so + + MINIGLX_SOURCES = server/radeon_dri.c +@@ -20,20 +22,24 @@ COMMON_SOURCES = \ + ../common/xmlconfig.c \ + ../common/dri_util.c + ++RADEON_COMMON_SOURCES = \ ++ radeon_texture.c \ ++ radeon_common_context.c \ ++ radeon_common.c \ ++ radeon_dma.c \ ++ radeon_lock.c \ ++ radeon_bo_legacy.c \ ++ radeon_cs_legacy.c \ ++ radeon_mipmap_tree.c \ ++ radeon_span.c ++ + DRIVER_SOURCES = \ + radeon_screen.c \ +- radeon_context.c \ +- radeon_ioctl.c \ +- radeon_lock.c \ +- radeon_span.c \ +- radeon_state.c \ +- r300_mem.c \ + r300_context.c \ + r300_ioctl.c \ + r300_cmdbuf.c \ + r300_state.c \ + r300_render.c \ +- r300_texmem.c \ + r300_tex.c \ + r300_texstate.c \ + radeon_program.c \ +@@ -49,12 +55,15 @@ DRIVER_SOURCES = \ + r300_shader.c \ + r300_emit.c \ + r300_swtcl.c \ ++ $(RADEON_COMMON_SOURCES) \ + $(EGL_SOURCES) + + C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) + + DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \ +- -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 ++ -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 \ ++# -DRADEON_BO_TRACK \ ++ -Wall + + SYMLINKS = \ + server/radeon_dri.c \ +@@ -68,7 +77,28 @@ COMMON_SYMLINKS = \ + radeon_chipset.h \ + radeon_screen.c \ + radeon_screen.h \ +- radeon_span.h ++ radeon_span.h \ ++ radeon_span.c \ ++ radeon_bo_legacy.c \ ++ radeon_cs_legacy.c \ ++ radeon_bo_legacy.h \ ++ radeon_cs_legacy.h \ ++ radeon_bocs_wrapper.h \ ++ radeon_lock.c \ ++ radeon_lock.h \ ++ radeon_common.c \ ++ radeon_common.h \ ++ radeon_common_context.c \ ++ radeon_common_context.h \ ++ radeon_cmdbuf.h \ ++ radeon_dma.c \ ++ radeon_dma.h \ ++ radeon_mipmap_tree.c \ ++ radeon_mipmap_tree.h \ ++ radeon_texture.c \ ++ radeon_texture.h ++ ++DRI_LIB_DEPS += $(RADEON_LDFLAGS) + + ##### TARGETS ##### + +diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c +index c9e1dfe..cfc9785 100644 +--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c ++++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c +@@ -44,235 +44,288 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "drm.h" + #include "radeon_drm.h" + +-#include "radeon_ioctl.h" + #include "r300_context.h" + #include "r300_ioctl.h" + #include "radeon_reg.h" + #include "r300_reg.h" + #include "r300_cmdbuf.h" + #include "r300_emit.h" ++#include "radeon_bocs_wrapper.h" ++#include "radeon_mipmap_tree.h" + #include "r300_state.h" ++#include "radeon_reg.h" + +-// Set this to 1 for extremely verbose debugging of command buffers +-#define DEBUG_CMDBUF 0 ++#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200 ++# define RADEON_ONE_REG_WR (1 << 15) + +-/** +- * Send the current command buffer via ioctl to the hardware. ++/** # of dwords reserved for additional instructions that may need to be written ++ * during flushing. + */ +-int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller) ++#define SPACE_FOR_FLUSHING 4 ++ ++static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt) + { +- int ret; +- int i; +- drm_radeon_cmd_buffer_t cmd; +- int start; +- +- if (r300->radeon.lost_context) { +- start = 0; +- r300->radeon.lost_context = GL_FALSE; +- } else +- start = r300->cmdbuf.count_reemit; +- +- if (RADEON_DEBUG & DEBUG_IOCTL) { +- fprintf(stderr, "%s from %s - %i cliprects\n", +- __FUNCTION__, caller, r300->radeon.numClipRects); +- +- if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE) +- for (i = start; i < r300->cmdbuf.count_used; ++i) +- fprintf(stderr, "%d: %08x\n", i, +- r300->cmdbuf.cmd_buf[i]); +- } ++ if (r300->radeon.radeonScreen->kernel_mm) { ++ return ((((*pkt) >> 16) & 0x3FFF) + 1); ++ } else { ++ drm_r300_cmd_header_t *t = (drm_r300_cmd_header_t*)pkt; ++ return t->packet0.count; + } -+ bo_legacy->ptr = boml->screen->gartTextures.map + base_offset; -+ bo_legacy->offset = boml->screen->gart_texture_offset + base_offset; -+ bo->size = size; -+ boml->dma_alloc_size += size; -+ boml->dma_buf_count++; + return 0; +} -+ -+static int bo_dma_free(struct radeon_bo *bo) + +- cmd.buf = (char *)(r300->cmdbuf.cmd_buf + start); +- cmd.bufsz = (r300->cmdbuf.count_used - start) * 4; ++#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) ++#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) + +- if (r300->radeon.state.scissor.enabled) { +- cmd.nbox = r300->radeon.state.scissor.numClipRects; +- cmd.boxes = +- (drm_clip_rect_t *) r300->radeon.state.scissor.pClipRects; +- } else { +- cmd.nbox = r300->radeon.numClipRects; +- cmd.boxes = (drm_clip_rect_t *) r300->radeon.pClipRects; ++void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom) +{ -+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; -+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; -+ drm_radeon_mem_free_t memfree; -+ int r; ++ r300ContextPtr r300 = R300_CONTEXT(ctx); ++ BATCH_LOCALS(&r300->radeon); ++ drm_r300_cmd_header_t cmd; ++ uint32_t addr, ndw, i; ++ ++ if (!r300->radeon.radeonScreen->kernel_mm) { ++ uint32_t dwords; ++ dwords = (*atom->check) (ctx, atom); ++ BEGIN_BATCH_NO_AUTOSTATE(dwords); ++ OUT_BATCH_TABLE(atom->cmd, dwords); ++ END_BATCH(); ++ return; + } +- +- ret = drmCommandWrite(r300->radeon.dri.fd, +- DRM_RADEON_CMDBUF, &cmd, sizeof(cmd)); +- +- if (RADEON_DEBUG & DEBUG_SYNC) { +- fprintf(stderr, "Syncing in %s (from %s)\n\n", +- __FUNCTION__, caller); +- radeonWaitForIdleLocked(&r300->radeon); ++ ++ cmd.u = atom->cmd[0]; ++ addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; ++ ndw = cmd.vpu.count * 4; ++ if (ndw) { ++ ++ if (r300->vap_flush_needed) { ++ BEGIN_BATCH_NO_AUTOSTATE(15 + ndw); ++ ++ /* flush processing vertices */ ++ OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0); ++ OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); ++ OUT_BATCH_REGVAL(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); ++ OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0xffffff); ++ OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); ++ r300->vap_flush_needed = GL_FALSE; ++ } else { ++ BEGIN_BATCH_NO_AUTOSTATE(5 + ndw); ++ } ++ OUT_BATCH_REGVAL(R300_VAP_PVS_UPLOAD_ADDRESS, addr); ++ OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR); ++ for (i = 0; i < ndw; i++) { ++ OUT_BATCH(atom->cmd[i+1]); ++ } ++ OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); ++ END_BATCH(); + } +- +- r300->dma.nr_released_bufs = 0; +- r300->cmdbuf.count_used = 0; +- r300->cmdbuf.count_reemit = 0; +- +- return ret; + } + +-int r300FlushCmdBuf(r300ContextPtr r300, const char *caller) ++void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom) + { +- int ret; ++ r300ContextPtr r300 = R300_CONTEXT(ctx); ++ BATCH_LOCALS(&r300->radeon); ++ drm_r300_cmd_header_t cmd; ++ uint32_t addr, ndw, i, sz; ++ int type, clamp, stride; ++ ++ if (!r300->radeon.radeonScreen->kernel_mm) { ++ uint32_t dwords; ++ dwords = (*atom->check) (ctx, atom); ++ BEGIN_BATCH_NO_AUTOSTATE(dwords); ++ OUT_BATCH_TABLE(atom->cmd, dwords); ++ END_BATCH(); ++ return; ++ } + +- LOCK_HARDWARE(&r300->radeon); ++ cmd.u = atom->cmd[0]; ++ sz = cmd.r500fp.count; ++ addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo; ++ type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE); ++ clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP); + +- ret = r300FlushCmdBufLocked(r300, caller); ++ addr |= (type << 16); ++ addr |= (clamp << 17); + +- UNLOCK_HARDWARE(&r300->radeon); ++ stride = type ? 4 : 6; + +- if (ret) { +- fprintf(stderr, "drmRadeonCmdBuffer: %d\n", ret); +- _mesa_exit(ret); +- } ++ ndw = sz * stride; ++ if (ndw) { + +- return ret; ++ BEGIN_BATCH_NO_AUTOSTATE(3 + ndw); ++ OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0)); ++ OUT_BATCH(addr); ++ OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR); ++ for (i = 0; i < ndw; i++) { ++ OUT_BATCH(atom->cmd[i+1]); ++ } ++ END_BATCH(); ++ } + } + +-static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state) ++static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) + { +- int i; +- int dwords = (*state->check) (r300, state); +- +- fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, +- state->cmd_size); ++ r300ContextPtr r300 = R300_CONTEXT(ctx); ++ BATCH_LOCALS(&r300->radeon); ++ int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd); ++ int notexture = 0; ++ ++ if (numtmus) { ++ int i; ++ ++ for(i = 0; i < numtmus; ++i) { ++ radeonTexObj *t = r300->hw.textures[i]; ++ ++ if (!t) ++ notexture = 1; ++ } + +- if (RADEON_DEBUG & DEBUG_VERBOSE) { +- for (i = 0; i < dwords; i++) { +- fprintf(stderr, " %s[%d]: %08x\n", +- state->name, i, state->cmd[i]); ++ if (r300->radeon.radeonScreen->kernel_mm && notexture) { ++ return; + } ++ BEGIN_BATCH_NO_AUTOSTATE(4 * numtmus); ++ for(i = 0; i < numtmus; ++i) { ++ radeonTexObj *t = r300->hw.textures[i]; ++ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); ++ if (t && !t->image_override) { ++ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, ++ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); ++ } else if (!t) { ++ OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]); ++ } else { /* override cases */ ++ if (t->bo) { ++ OUT_BATCH_RELOC(t->tile_bits, t->bo, 0, ++ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); ++ } else if (!r300->radeon.radeonScreen->kernel_mm) { ++ OUT_BATCH(t->override_offset); ++ } ++ else ++ OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]); ++ } ++ } ++ END_BATCH(); + } + } + +-/** +- * Emit all atoms with a dirty field equal to dirty. +- * +- * The caller must have ensured that there is enough space in the command +- * buffer. +- */ +-static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) ++static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) + { +- struct r300_state_atom *atom; +- uint32_t *dest; +- int dwords; +- +- dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used; +- +- /* Emit WAIT */ +- *dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN); +- dest++; +- r300->cmdbuf.count_used++; +- +- /* Emit cache flush */ +- *dest = cmdpacket0(R300_TX_INVALTAGS, 1); +- dest++; +- r300->cmdbuf.count_used++; +- +- *dest = R300_TX_FLUSH; +- dest++; +- r300->cmdbuf.count_used++; +- +- /* Emit END3D */ +- *dest = cmdpacify(); +- dest++; +- r300->cmdbuf.count_used++; +- +- /* Emit actual atoms */ +- +- foreach(atom, &r300->hw.atomlist) { +- if ((atom->dirty || r300->hw.all_dirty) == dirty) { +- dwords = (*atom->check) (r300, atom); +- if (dwords) { +- if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { +- r300PrintStateAtom(r300, atom); +- } +- memcpy(dest, atom->cmd, dwords * 4); +- dest += dwords; +- r300->cmdbuf.count_used += dwords; +- atom->dirty = GL_FALSE; +- } else { +- if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { +- fprintf(stderr, " skip state %s\n", +- atom->name); +- } +- } +- } ++ r300ContextPtr r300 = R300_CONTEXT(ctx); ++ BATCH_LOCALS(&r300->radeon); ++ struct radeon_renderbuffer *rrb; ++ uint32_t cbpitch; + -+ if (bo_legacy->ptr == NULL) { -+ /* ptr is set to NULL if dma allocation failed */ -+ return 0; -+ } -+ legacy_get_current_age(boml); -+ memfree.region = RADEON_MEM_REGION_GART; -+ memfree.region_offset = bo_legacy->offset; -+ memfree.region_offset -= boml->screen->gart_texture_offset; -+ r = drmCommandWrite(boml->base.fd, -+ DRM_RADEON_FREE, -+ &memfree, -+ sizeof(memfree)); -+ if (r) { -+ fprintf(stderr, "Failed to free bo[%p] at %08x\n", -+ &bo_legacy->base, memfree.region_offset); -+ fprintf(stderr, "ret = %s\n", strerror(-r)); -+ return r; -+ } -+ boml->dma_alloc_size -= bo_legacy->base.size; -+ boml->dma_buf_count--; -+ return 0; ++ rrb = radeon_get_colorbuffer(&r300->radeon); ++ if (!rrb || !rrb->bo) { ++ fprintf(stderr, "no rrb\n"); ++ return; + } ++ ++ cbpitch = (rrb->pitch / rrb->cpp); ++ if (rrb->cpp == 4) ++ cbpitch |= R300_COLOR_FORMAT_ARGB8888; ++ else ++ cbpitch |= R300_COLOR_FORMAT_RGB565; ++ ++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) ++ cbpitch |= R300_COLOR_TILE_ENABLE; ++ ++ BEGIN_BATCH_NO_AUTOSTATE(6); ++ OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); ++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); ++ OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1); ++ OUT_BATCH(cbpitch); ++ END_BATCH(); + } + +-/** +- * Copy dirty hardware state atoms into the command buffer. +- * +- * We also copy out clean state if we're at the start of a buffer. That makes +- * it easy to recover from lost contexts. +- */ +-void r300EmitState(r300ContextPtr r300) ++static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) + { +- if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS)) +- fprintf(stderr, "%s\n", __FUNCTION__); ++ r300ContextPtr r300 = R300_CONTEXT(ctx); ++ BATCH_LOCALS(&r300->radeon); ++ struct radeon_renderbuffer *rrb; ++ uint32_t zbpitch; + +- if (r300->cmdbuf.count_used && !r300->hw.is_dirty +- && !r300->hw.all_dirty) ++ rrb = radeon_get_depthbuffer(&r300->radeon); ++ if (!rrb) + return; + +- /* To avoid going across the entire set of states multiple times, just check +- * for enough space for the case of emitting all state, and inline the +- * r300AllocCmdBuf code here without all the checks. +- */ +- r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__); +- +- if (!r300->cmdbuf.count_used) { +- if (RADEON_DEBUG & DEBUG_STATE) +- fprintf(stderr, "Begin reemit state\n"); +- +- r300EmitAtoms(r300, GL_FALSE); +- r300->cmdbuf.count_reemit = r300->cmdbuf.count_used; ++ zbpitch = (rrb->pitch / rrb->cpp); ++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { ++ zbpitch |= R300_DEPTHMACROTILE_ENABLE; + } ++ if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){ ++ zbpitch |= R300_DEPTHMICROTILE_TILED; ++ } ++ ++ BEGIN_BATCH_NO_AUTOSTATE(6); ++ OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); ++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); ++ OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, zbpitch); ++ END_BATCH(); +} + +- if (RADEON_DEBUG & DEBUG_STATE) +- fprintf(stderr, "Begin dirty state\n"); +- +- r300EmitAtoms(r300, GL_TRUE); +- +- assert(r300->cmdbuf.count_used < r300->cmdbuf.size); ++static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom) ++{ ++ r300ContextPtr r300 = R300_CONTEXT(ctx); ++ BATCH_LOCALS(&r300->radeon); ++ struct radeon_renderbuffer *rrb; ++ uint32_t zbpitch; ++ uint32_t format; + -+static void bo_free(struct bo_legacy *bo_legacy) ++ rrb = radeon_get_depthbuffer(&r300->radeon); ++ if (!rrb) ++ format = 0; ++ else { ++ if (rrb->cpp == 2) ++ format = R300_DEPTHFORMAT_16BIT_INT_Z; ++ else if (rrb->cpp == 4) ++ format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; ++ } + +- r300->hw.is_dirty = GL_FALSE; +- r300->hw.all_dirty = GL_FALSE; ++ OUT_BATCH(atom->cmd[0]); ++ atom->cmd[1] &= ~(3 << 0); ++ atom->cmd[1] |= format; ++ OUT_BATCH(atom->cmd[1]); ++ OUT_BATCH(atom->cmd[2]); ++ OUT_BATCH(atom->cmd[3]); ++ OUT_BATCH(atom->cmd[4]); + } + +-#define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count) +-#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) +-#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) +- +-static int check_always(r300ContextPtr r300, struct r300_state_atom *atom) ++static int check_always(GLcontext *ctx, struct radeon_state_atom *atom) + { + return atom->cmd_size; + } + +-static int check_variable(r300ContextPtr r300, struct r300_state_atom *atom) ++static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom) + { ++ r300ContextPtr r300 = R300_CONTEXT(ctx); + int cnt; +- cnt = packet0_count(atom->cmd); ++ if (atom->cmd[0] == CP_PACKET2) { ++ return 0; ++ } ++ cnt = packet0_count(r300, atom->cmd); + return cnt ? cnt + 1 : 0; + } + +-static int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom) ++int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) + { + int cnt; ++ + cnt = vpu_count(atom->cmd); + return cnt ? (cnt * 4) + 1 : 0; + } + +-static int check_r500fp(r300ContextPtr r300, struct r300_state_atom *atom) ++int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom) + { + int cnt; ++ + cnt = r500fp_count(atom->cmd); + return cnt ? (cnt * 6) + 1 : 0; + } + +-static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom) ++int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom) + { + int cnt; ++ + cnt = r500fp_count(atom->cmd); + return cnt ? (cnt * 4) + 1 : 0; + } +@@ -285,8 +338,8 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom) + r300->hw.ATOM.idx = (IDX); \ + r300->hw.ATOM.check = check_##CHK; \ + r300->hw.ATOM.dirty = GL_FALSE; \ +- r300->hw.max_state_size += (SZ); \ +- insert_at_tail(&r300->hw.atomlist, &r300->hw.ATOM); \ ++ r300->radeon.hw.max_state_size += (SZ); \ ++ insert_at_tail(&r300->radeon.hw.atomlist, &r300->hw.ATOM); \ + } while (0) + /** + * Allocate memory for the command buffer and initialize the state atom +@@ -294,7 +347,7 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom) + */ + void r300InitCmdBuf(r300ContextPtr r300) + { +- int size, mtu; ++ int mtu; + int has_tcl = 1; + int is_r500 = 0; + int i; +@@ -305,7 +358,7 @@ void r300InitCmdBuf(r300ContextPtr r300) + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + is_r500 = 1; + +- r300->hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */ ++ r300->radeon.hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */ + + mtu = r300->radeon.glCtx->Const.MaxTextureUnits; + if (RADEON_DEBUG & DEBUG_TEXTURE) { +@@ -313,97 +366,97 @@ void r300InitCmdBuf(r300ContextPtr r300) + } + + /* Setup the atom linked list */ +- make_empty_list(&r300->hw.atomlist); +- r300->hw.atomlist.name = "atom-list"; ++ make_empty_list(&r300->radeon.hw.atomlist); ++ r300->radeon.hw.atomlist.name = "atom-list"; + + /* Initialize state atoms */ + ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0); +- r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6); ++ r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VPORT_XSCALE, 6); + ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0); +- r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(R300_VAP_PVS_STATE_FLUSH_REG, 1); ++ r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1); + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0; +- r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(R300_VAP_CNTL, 1); ++ r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL, 1); + if (is_r500) { + ALLOC_STATE(vap_index_offset, always, 2, 0); +- r300->hw.vap_index_offset.cmd[0] = cmdpacket0(R500_VAP_INDEX_OFFSET, 1); ++ r300->hw.vap_index_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_VAP_INDEX_OFFSET, 1); + r300->hw.vap_index_offset.cmd[1] = 0; + } + ALLOC_STATE(vte, always, 3, 0); +- r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2); ++ r300->hw.vte.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VTE_CNTL, 2); + ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0); +- r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(R300_VAP_VF_MAX_VTX_INDX, 2); ++ r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VF_MAX_VTX_INDX, 2); + ALLOC_STATE(vap_cntl_status, always, 2, 0); +- r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1); ++ r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL_STATUS, 1); + ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0); + r300->hw.vir[0].cmd[R300_VIR_CMD_0] = +- cmdpacket0(R300_VAP_PROG_STREAM_CNTL_0, 1); ++ cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_0, 1); + ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1); + r300->hw.vir[1].cmd[R300_VIR_CMD_0] = +- cmdpacket0(R300_VAP_PROG_STREAM_CNTL_EXT_0, 1); ++ cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_EXT_0, 1); + ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0); +- r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_VTX_STATE_CNTL, 2); ++ r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VTX_STATE_CNTL, 2); + ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0); +- r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE); ++ r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE); + + if (has_tcl) { + ALLOC_STATE(vap_clip_cntl, always, 2, 0); +- r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(R300_VAP_CLIP_CNTL, 1); ++ r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CLIP_CNTL, 1); + ALLOC_STATE(vap_clip, always, 5, 0); +- r300->hw.vap_clip.cmd[0] = cmdpacket0(R300_VAP_GB_VERT_CLIP_ADJ, 4); ++ r300->hw.vap_clip.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_GB_VERT_CLIP_ADJ, 4); + ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0); +- r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(VAP_PVS_VTX_TIMEOUT_REG, 1); ++ r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, VAP_PVS_VTX_TIMEOUT_REG, 1); + } + + ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0); + r300->hw.vof.cmd[R300_VOF_CMD_0] = +- cmdpacket0(R300_VAP_OUTPUT_VTX_FMT_0, 2); ++ cmdpacket0(r300->radeon.radeonScreen, R300_VAP_OUTPUT_VTX_FMT_0, 2); + + if (has_tcl) { + ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0); + r300->hw.pvs.cmd[R300_PVS_CMD_0] = +- cmdpacket0(R300_VAP_PVS_CODE_CNTL_0, 3); ++ cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_CODE_CNTL_0, 3); + } + + ALLOC_STATE(gb_enable, always, 2, 0); +- r300->hw.gb_enable.cmd[0] = cmdpacket0(R300_GB_ENABLE, 1); ++ r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1); + ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0); +- r300->hw.gb_misc.cmd[0] = cmdpacket0(R300_GB_MSPOS0, 5); ++ r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 5); + ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0); +- r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(R300_TX_ENABLE, 1); ++ r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1); + ALLOC_STATE(ga_point_s0, always, 5, 0); +- r300->hw.ga_point_s0.cmd[0] = cmdpacket0(R300_GA_POINT_S0, 4); ++ r300->hw.ga_point_s0.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_S0, 4); + ALLOC_STATE(ga_triangle_stipple, always, 2, 0); +- r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(R300_GA_TRIANGLE_STIPPLE, 1); ++ r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_TRIANGLE_STIPPLE, 1); + ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0); +- r300->hw.ps.cmd[0] = cmdpacket0(R300_GA_POINT_SIZE, 1); ++ r300->hw.ps.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_SIZE, 1); + ALLOC_STATE(ga_point_minmax, always, 4, 0); +- r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(R300_GA_POINT_MINMAX, 3); ++ r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_MINMAX, 3); + ALLOC_STATE(lcntl, always, 2, 0); +- r300->hw.lcntl.cmd[0] = cmdpacket0(R300_GA_LINE_CNTL, 1); ++ r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1); + ALLOC_STATE(ga_line_stipple, always, 4, 0); +- r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(R300_GA_LINE_STIPPLE_VALUE, 3); ++ r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3); + ALLOC_STATE(shade, always, 5, 0); +- r300->hw.shade.cmd[0] = cmdpacket0(R300_GA_ENHANCE, 4); ++ r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 4); + ALLOC_STATE(polygon_mode, always, 4, 0); +- r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_GA_POLY_MODE, 3); ++ r300->hw.polygon_mode.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POLY_MODE, 3); + ALLOC_STATE(fogp, always, 3, 0); +- r300->hw.fogp.cmd[0] = cmdpacket0(R300_GA_FOG_SCALE, 2); ++ r300->hw.fogp.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_FOG_SCALE, 2); + ALLOC_STATE(zbias_cntl, always, 2, 0); +- r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_SU_TEX_WRAP, 1); ++ r300->hw.zbias_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_TEX_WRAP, 1); + ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0); + r300->hw.zbs.cmd[R300_ZBS_CMD_0] = +- cmdpacket0(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); ++ cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_FRONT_SCALE, 4); + ALLOC_STATE(occlusion_cntl, always, 2, 0); +- r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_SU_POLY_OFFSET_ENABLE, 1); ++ r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_ENABLE, 1); + ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0); +- r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_SU_CULL_MODE, 1); ++ r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_CULL_MODE, 1); + ALLOC_STATE(su_depth_scale, always, 3, 0); +- r300->hw.su_depth_scale.cmd[0] = cmdpacket0(R300_SU_DEPTH_SCALE, 2); ++ r300->hw.su_depth_scale.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_DEPTH_SCALE, 2); + ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0); +- r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2); ++ r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_COUNT, 2); + if (is_r500) { + ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0); +- r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16); ++ r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, 16); + for (i = 0; i < 8; i++) { + r300->hw.ri.cmd[R300_RI_CMD_0 + i +1] = + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | +@@ -412,133 +465,149 @@ void r300InitCmdBuf(r300ContextPtr r300) + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); + } + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); +- r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1); ++ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, 1); + } else { + ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); +- r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8); ++ r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, 8); + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); +- r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, 1); ++ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, 1); + } + ALLOC_STATE(sc_hyperz, always, 3, 0); +- r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2); ++ r300->hw.sc_hyperz.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_HYPERZ, 2); + ALLOC_STATE(sc_screendoor, always, 2, 0); +- r300->hw.sc_screendoor.cmd[0] = cmdpacket0(R300_SC_SCREENDOOR, 1); ++ r300->hw.sc_screendoor.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1); + ALLOC_STATE(us_out_fmt, always, 6, 0); +- r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R300_US_OUT_FMT, 5); ++ r300->hw.us_out_fmt.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_OUT_FMT, 5); + + if (is_r500) { + ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0); +- r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(R500_US_CONFIG, 2); ++ r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CONFIG, 2); + r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO; +- r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(R500_US_CODE_ADDR, 3); +- r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(R500_US_FC_CTRL, 1); ++ r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CODE_ADDR, 3); ++ r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(r300->radeon.radeonScreen, R500_US_FC_CTRL, 1); + r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */ + + ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0); +- r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0); ++ r300->hw.r500fp.cmd[R300_FPI_CMD_0] = ++ cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0); ++ r300->hw.r500fp.emit = emit_r500fp; + ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0); +- r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0); ++ r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = ++ cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0); ++ r300->hw.r500fp_const.emit = emit_r500fp; + } else { + ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); +- r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_US_CONFIG, 3); +- r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_US_CODE_ADDR_0, 4); ++ r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3); ++ r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CODE_ADDR_0, 4); ++ + ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0); +- r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_US_TEX_INST_0, 0); ++ r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_TEX_INST_0, 0); + + ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); +- r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, 1); ++ r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, 1); + ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); +- r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, 1); ++ r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, 1); + ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2); +- r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, 1); ++ r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, 1); + ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); +- r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, 1); ++ r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, 1); + ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0); +- r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0); ++ r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_PFS_PARAM_0_X, 0); + } + ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0); +- r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(R300_FG_FOG_BLEND, 1); ++ r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_BLEND, 1); + ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0); +- r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(R300_FG_FOG_COLOR_R, 3); ++ r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_COLOR_R, 3); + ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0); +- r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(R300_FG_ALPHA_FUNC, 2); ++ r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_ALPHA_FUNC, 2); + ALLOC_STATE(fg_depth_src, always, 2, 0); +- r300->hw.fg_depth_src.cmd[0] = cmdpacket0(R300_FG_DEPTH_SRC, 1); ++ r300->hw.fg_depth_src.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_DEPTH_SRC, 1); + ALLOC_STATE(rb3d_cctl, always, 2, 0); +- r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(R300_RB3D_CCTL, 1); ++ r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CCTL, 1); + ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0); +- r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(R300_RB3D_CBLEND, 2); ++ r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CBLEND, 2); + ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0); +- r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(RB3D_COLOR_CHANNEL_MASK, 1); ++ r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RB3D_COLOR_CHANNEL_MASK, 1); + if (is_r500) { + ALLOC_STATE(blend_color, always, 3, 0); +- r300->hw.blend_color.cmd[0] = cmdpacket0(R500_RB3D_CONSTANT_COLOR_AR, 2); ++ r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_CONSTANT_COLOR_AR, 2); + } else { + ALLOC_STATE(blend_color, always, 2, 0); +- r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 1); ++ r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_BLEND_COLOR, 1); + } + ALLOC_STATE(rop, always, 2, 0); +- r300->hw.rop.cmd[0] = cmdpacket0(R300_RB3D_ROPCNTL, 1); ++ r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1); + ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0); +- r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1); +- r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1); ++ r300->hw.cb.emit = &emit_cb_offset; + ALLOC_STATE(rb3d_dither_ctl, always, 10, 0); +- r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9); ++ r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9); + ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); +- r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(R300_RB3D_AARESOLVE_CTL, 1); ++ r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1); + ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); +- r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); ++ r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); + ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0); + r300->hw.zs.cmd[R300_ZS_CMD_0] = +- cmdpacket0(R300_ZB_CNTL, 3); ++ cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3); ++ + ALLOC_STATE(zstencil_format, always, 5, 0); + r300->hw.zstencil_format.cmd[0] = +- cmdpacket0(R300_ZB_FORMAT, 4); ++ cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4); ++ r300->hw.zstencil_format.emit = emit_zstencil_format; ++ + ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0); +- r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2); ++ r300->hw.zb.emit = emit_zb_offset; + ALLOC_STATE(zb_depthclearvalue, always, 2, 0); +- r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1); ++ r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1); + ALLOC_STATE(unk4F30, always, 3, 0); +- r300->hw.unk4F30.cmd[0] = cmdpacket0(0x4F30, 2); ++ r300->hw.unk4F30.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4F30, 2); + ALLOC_STATE(zb_hiz_offset, always, 2, 0); +- r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(R300_ZB_HIZ_OFFSET, 1); ++ r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_OFFSET, 1); + ALLOC_STATE(zb_hiz_pitch, always, 2, 0); +- r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(R300_ZB_HIZ_PITCH, 1); ++ r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_PITCH, 1); + + /* VPU only on TCL */ + if (has_tcl) { + int i; + ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0); +- r300->hw.vpi.cmd[R300_VPI_CMD_0] = +- cmdvpu(R300_PVS_CODE_START, 0); ++ r300->hw.vpi.cmd[0] = ++ cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0); ++ r300->hw.vpi.emit = emit_vpu; + + if (is_r500) { + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); +- r300->hw.vpp.cmd[R300_VPP_CMD_0] = +- cmdvpu(R500_PVS_CONST_START, 0); ++ r300->hw.vpp.cmd[0] = ++ cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0); ++ r300->hw.vpp.emit = emit_vpu; + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); +- r300->hw.vps.cmd[R300_VPS_CMD_0] = +- cmdvpu(R500_POINT_VPORT_SCALE_OFFSET, 1); ++ r300->hw.vps.cmd[0] = ++ cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1); ++ r300->hw.vps.emit = emit_vpu; + + for (i = 0; i < 6; i++) { +- ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); +- r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = +- cmdvpu(R500_PVS_UCP_START + i, 1); ++ ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); ++ r300->hw.vpucp[i].cmd[0] = ++ cmdvpu(r300->radeon.radeonScreen, ++ R500_PVS_UCP_START + i, 1); ++ r300->hw.vpucp[i].emit = emit_vpu; + } + } else { + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); +- r300->hw.vpp.cmd[R300_VPP_CMD_0] = +- cmdvpu(R300_PVS_CONST_START, 0); ++ r300->hw.vpp.cmd[0] = ++ cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0); ++ r300->hw.vpp.emit = emit_vpu; + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); +- r300->hw.vps.cmd[R300_VPS_CMD_0] = +- cmdvpu(R300_POINT_VPORT_SCALE_OFFSET, 1); ++ r300->hw.vps.cmd[0] = ++ cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1); ++ r300->hw.vps.emit = emit_vpu; + + for (i = 0; i < 6; i++) { + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); +- r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = +- cmdvpu(R300_PVS_UCP_START + i, 1); ++ r300->hw.vpucp[i].cmd[0] = ++ cmdvpu(r300->radeon.radeonScreen, ++ R300_PVS_UCP_START + i, 1); ++ r300->hw.vpucp[i].emit = emit_vpu; + } + } + } +@@ -546,61 +615,39 @@ void r300InitCmdBuf(r300ContextPtr r300) + /* Textures */ + ALLOC_STATE(tex.filter, variable, mtu + 1, 0); + r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = +- cmdpacket0(R300_TX_FILTER0_0, 0); ++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 0); + + ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0); + r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = +- cmdpacket0(R300_TX_FILTER1_0, 0); ++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, 0); + + ALLOC_STATE(tex.size, variable, mtu + 1, 0); +- r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, 0); ++ r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, 0); + + ALLOC_STATE(tex.format, variable, mtu + 1, 0); + r300->hw.tex.format.cmd[R300_TEX_CMD_0] = +- cmdpacket0(R300_TX_FORMAT_0, 0); ++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, 0); + + ALLOC_STATE(tex.pitch, variable, mtu + 1, 0); +- r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT2_0, 0); ++ r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0); + +- ALLOC_STATE(tex.offset, variable, mtu + 1, 0); ++ ALLOC_STATE(tex.offset, variable, 1, 0); + r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = +- cmdpacket0(R300_TX_OFFSET_0, 0); ++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0); ++ r300->hw.tex.offset.emit = &emit_tex_offsets; + + ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0); + r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = +- cmdpacket0(R300_TX_CHROMA_KEY_0, 0); ++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, 0); + + ALLOC_STATE(tex.border_color, variable, mtu + 1, 0); + r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = +- cmdpacket0(R300_TX_BORDER_COLOR_0, 0); +- +- r300->hw.is_dirty = GL_TRUE; +- r300->hw.all_dirty = GL_TRUE; ++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, 0); + +- /* Initialize command buffer */ +- size = +- 256 * driQueryOptioni(&r300->radeon.optionCache, +- "command_buffer_size"); +- if (size < 2 * r300->hw.max_state_size) { +- size = 2 * r300->hw.max_state_size + 65535; +- } +- if (size > 64 * 256) +- size = 64 * 256; +- +- if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) { +- fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n", +- sizeof(drm_r300_cmd_header_t)); +- fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%zd\n", +- sizeof(drm_radeon_cmd_buffer_t)); +- fprintf(stderr, +- "Allocating %d bytes command buffer (max state is %d bytes)\n", +- size * 4, r300->hw.max_state_size * 4); +- } ++ r300->radeon.hw.is_dirty = GL_TRUE; ++ r300->radeon.hw.all_dirty = GL_TRUE; + +- r300->cmdbuf.size = size; +- r300->cmdbuf.cmd_buf = (uint32_t *) CALLOC(size * 4); +- r300->cmdbuf.count_used = 0; +- r300->cmdbuf.count_reemit = 0; ++ rcommonInitCmdBuf(&r300->radeon); + } + + /** +@@ -608,68 +655,10 @@ void r300InitCmdBuf(r300ContextPtr r300) + */ + void r300DestroyCmdBuf(r300ContextPtr r300) + { +- struct r300_state_atom *atom; ++ struct radeon_state_atom *atom; + +- FREE(r300->cmdbuf.cmd_buf); +- +- foreach(atom, &r300->hw.atomlist) { ++ foreach(atom, &r300->radeon.hw.atomlist) { + FREE(atom->cmd); + } +-} +- +-void r300EmitBlit(r300ContextPtr rmesa, +- GLuint color_fmt, +- GLuint src_pitch, +- GLuint src_offset, +- GLuint dst_pitch, +- GLuint dst_offset, +- GLint srcx, GLint srcy, +- GLint dstx, GLint dsty, GLuint w, GLuint h) +-{ +- drm_r300_cmd_header_t *cmd; +- +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, +- "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n", +- __FUNCTION__, src_pitch, src_offset, srcx, srcy, +- dst_pitch, dst_offset, dstx, dsty, w, h); +- +- assert((src_pitch & 63) == 0); +- assert((dst_pitch & 63) == 0); +- assert((src_offset & 1023) == 0); +- assert((dst_offset & 1023) == 0); +- assert(w < (1 << 16)); +- assert(h < (1 << 16)); +- +- cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__); +- +- cmd[0].header.cmd_type = R300_CMD_PACKET3; +- cmd[0].header.pad0 = R300_CMD_PACKET3_RAW; +- cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16); +- cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | +- RADEON_GMC_DST_PITCH_OFFSET_CNTL | +- RADEON_GMC_BRUSH_NONE | +- (color_fmt << 8) | +- RADEON_GMC_SRC_DATATYPE_COLOR | +- RADEON_ROP3_S | +- RADEON_DP_SRC_SOURCE_MEMORY | +- RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS); +- +- cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10); +- cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10); +- cmd[5].u = (srcx << 16) | srcy; +- cmd[6].u = (dstx << 16) | dsty; /* dst */ +- cmd[7].u = (w << 16) | h; +-} +- +-void r300EmitWait(r300ContextPtr rmesa, GLuint flags) +-{ +- drm_r300_cmd_header_t *cmd; +- +- assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D))); + +- cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); +- cmd[0].u = 0; +- cmd[0].wait.cmd_type = R300_CMD_WAIT; +- cmd[0].wait.flags = flags; + } +diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h +index a8eaa58..b7798eb 100644 +--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h ++++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h +@@ -38,79 +38,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #include "r300_context.h" + +-extern int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller); +-extern int r300FlushCmdBuf(r300ContextPtr r300, const char *caller); +- +-extern void r300EmitState(r300ContextPtr r300); +- + extern void r300InitCmdBuf(r300ContextPtr r300); + extern void r300DestroyCmdBuf(r300ContextPtr r300); + +-/** +- * Make sure that enough space is available in the command buffer +- * by flushing if necessary. +- * +- * \param dwords The number of dwords we need to be free on the command buffer +- */ +-static INLINE void r300EnsureCmdBufSpace(r300ContextPtr r300, +- int dwords, const char *caller) +-{ +- assert(dwords < r300->cmdbuf.size); +- +- if (r300->cmdbuf.count_used + dwords > r300->cmdbuf.size) +- r300FlushCmdBuf(r300, caller); +-} +- +-/** +- * Allocate the given number of dwords in the command buffer and return +- * a pointer to the allocated area. +- * When necessary, these functions cause a flush. r300AllocCmdBuf() also +- * causes state reemission after a flush. This is necessary to ensure +- * correct hardware state after an unlock. +- */ +-static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300, +- int dwords, const char *caller) +-{ +- uint32_t *ptr; +- +- r300EnsureCmdBufSpace(r300, dwords, caller); +- +- ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used]; +- r300->cmdbuf.count_used += dwords; +- return ptr; +-} +- +-static INLINE uint32_t *r300AllocCmdBuf(r300ContextPtr r300, +- int dwords, const char *caller) +-{ +- uint32_t *ptr; +- +- r300EnsureCmdBufSpace(r300, dwords, caller); +- +- if (!r300->cmdbuf.count_used) { +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, +- "Reemit state after flush (from %s)\n", caller); +- r300EmitState(r300); +- } +- +- ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used]; +- r300->cmdbuf.count_used += dwords; +- return ptr; +-} + +-extern void r300EmitBlit(r300ContextPtr rmesa, +- GLuint color_fmt, +- GLuint src_pitch, +- GLuint src_offset, +- GLuint dst_pitch, +- GLuint dst_offset, +- GLint srcx, GLint srcy, +- GLint dstx, GLint dsty, GLuint w, GLuint h); ++void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom); ++int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom); + +-extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags); +-extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start); +-extern void r300EmitVertexShader(r300ContextPtr rmesa); +-extern void r300EmitPixelShader(r300ContextPtr rmesa); ++void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom); ++int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom); ++int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom); + + #endif /* __R300_CMDBUF_H__ */ +diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c +index 3743627..5d497ef 100644 +--- a/src/mesa/drivers/dri/r300/r300_context.c ++++ b/src/mesa/drivers/dri/r300/r300_context.c +@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "main/extensions.h" + #include "main/state.h" + #include "main/bufferobj.h" ++#include "main/texobj.h" + + #include "swrast/swrast.h" + #include "swrast_setup/swrast_setup.h" +@@ -55,19 +56,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #include "drivers/common/driverfuncs.h" + +-#include "radeon_ioctl.h" +-#include "radeon_span.h" + #include "r300_context.h" ++#include "radeon_context.h" ++#include "radeon_span.h" + #include "r300_cmdbuf.h" + #include "r300_state.h" + #include "r300_ioctl.h" + #include "r300_tex.h" + #include "r300_emit.h" + #include "r300_swtcl.h" ++#include "radeon_bocs_wrapper.h" + +-#ifdef USER_BUFFERS +-#include "r300_mem.h" +-#endif + + #include "vblank.h" + #include "utils.h" +@@ -77,19 +76,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + int future_hw_tcl_on = 1; + int hw_tcl_on = 1; + +-#define need_GL_EXT_stencil_two_side +-#define need_GL_ARB_multisample ++#define need_GL_VERSION_2_0 + #define need_GL_ARB_point_parameters +-#define need_GL_ARB_texture_compression +-#define need_GL_ARB_vertex_buffer_object + #define need_GL_ARB_vertex_program +-#define need_GL_EXT_blend_minmax +-//#define need_GL_EXT_fog_coord +-#define need_GL_EXT_multi_draw_arrays +-#define need_GL_EXT_secondary_color + #define need_GL_EXT_blend_equation_separate + #define need_GL_EXT_blend_func_separate ++#define need_GL_EXT_blend_minmax ++//#define need_GL_EXT_fog_coord + #define need_GL_EXT_gpu_program_parameters ++#define need_GL_EXT_secondary_color ++#define need_GL_EXT_stencil_two_side ++#define need_GL_ATI_separate_stencil + #define need_GL_NV_vertex_program + #include "extension_helper.h" + +@@ -97,27 +94,23 @@ const struct dri_extension card_extensions[] = { + /* *INDENT-OFF* */ + {"GL_ARB_depth_texture", NULL}, + {"GL_ARB_fragment_program", NULL}, +- {"GL_ARB_multisample", GL_ARB_multisample_functions}, + {"GL_ARB_multitexture", NULL}, + {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, + {"GL_ARB_shadow", NULL}, + {"GL_ARB_shadow_ambient", NULL}, + {"GL_ARB_texture_border_clamp", NULL}, +- {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions}, + {"GL_ARB_texture_cube_map", NULL}, + {"GL_ARB_texture_env_add", NULL}, + {"GL_ARB_texture_env_combine", NULL}, + {"GL_ARB_texture_env_crossbar", NULL}, + {"GL_ARB_texture_env_dot3", NULL}, + {"GL_ARB_texture_mirrored_repeat", NULL}, +- {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions}, + {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, + {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions}, + {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, + {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, + {"GL_EXT_blend_subtract", NULL}, + // {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, +- {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions}, + {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, + {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, + {"GL_EXT_shadow_funcs", NULL}, +@@ -130,6 +123,7 @@ const struct dri_extension card_extensions[] = { + {"GL_EXT_texture_lod_bias", NULL}, + {"GL_EXT_texture_mirror_clamp", NULL}, + {"GL_EXT_texture_rectangle", NULL}, ++ {"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions}, + {"GL_ATI_texture_env_combine3", NULL}, + {"GL_ATI_texture_mirror_once", NULL}, + {"GL_MESA_pack_invert", NULL}, +@@ -142,6 +136,16 @@ const struct dri_extension card_extensions[] = { + /* *INDENT-ON* */ + }; + ++ ++/** ++ * The GL 2.0 functions are needed to make display lists work with ++ * functions added by GL_ATI_separate_stencil. ++ */ ++const struct dri_extension gl_20_extension[] = { ++ {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, ++}; ++ ++ + extern struct tnl_pipeline_stage _r300_render_stage; + extern const struct tnl_pipeline_stage _r300_tcl_stage; + +@@ -178,6 +182,82 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = { + 0, + }; + ++static void r300RunPipeline(GLcontext * ctx) +{ -+ struct bo_manager_legacy *boml; ++ _mesa_lock_context_textures(ctx); + -+ if (bo_legacy == NULL) { -+ return; -+ } -+ boml = (struct bo_manager_legacy *)bo_legacy->base.bom; -+ bo_legacy->prev->next = bo_legacy->next; -+ if (bo_legacy->next) { -+ bo_legacy->next->prev = bo_legacy->prev; -+ } -+ if (!bo_legacy->static_bo) { -+ legacy_free_handle(boml, bo_legacy->base.handle); -+ if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) { -+ /* dma buffers */ -+ bo_dma_free(&bo_legacy->base); -+ } else { -+ driDestroyTextureObject(&bo_legacy->tobj->base); -+ bo_legacy->tobj = NULL; -+ /* free backing store */ -+ free(bo_legacy->ptr); -+ } -+ } -+ memset(bo_legacy, 0 , sizeof(struct bo_legacy)); -+ free(bo_legacy); ++ if (ctx->NewState) ++ _mesa_update_state_locked(ctx); ++ ++ _tnl_run_pipeline(ctx); ++ _mesa_unlock_context_textures(ctx); +} + -+static struct radeon_bo *bo_open(struct radeon_bo_manager *bom, -+ uint32_t handle, -+ uint32_t size, -+ uint32_t alignment, -+ uint32_t domains, -+ uint32_t flags) ++static void r300_get_lock(radeonContextPtr rmesa) +{ -+ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom; -+ struct bo_legacy *bo_legacy; -+ int r; -+ -+ if (handle) { -+ bo_legacy = boml->bos.next; -+ while (bo_legacy) { -+ if (bo_legacy->base.handle == handle) { -+ radeon_bo_ref(&(bo_legacy->base)); -+ return (struct radeon_bo*)bo_legacy; -+ } -+ bo_legacy = bo_legacy->next; -+ } -+ return NULL; -+ } ++ drm_radeon_sarea_t *sarea = rmesa->sarea; + -+ bo_legacy = bo_allocate(boml, size, alignment, domains, flags); -+ bo_legacy->static_bo = 0; -+ r = legacy_new_handle(boml, &bo_legacy->base.handle); -+ if (r) { -+ bo_free(bo_legacy); -+ return NULL; -+ } -+ if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) { -+ retry: -+ legacy_track_pending(boml, 0); -+ /* dma buffers */ ++ if (sarea->ctx_owner != rmesa->dri.hwContext) { ++ sarea->ctx_owner = rmesa->dri.hwContext; ++ if (!rmesa->radeonScreen->kernel_mm) ++ radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom); ++ } ++} + -+ r = bo_dma_alloc(&(bo_legacy->base)); -+ if (r) { -+ if (legacy_wait_any_pending(boml) == -1) { -+ bo_free(bo_legacy); -+ return NULL; -+ } -+ goto retry; -+ return NULL; -+ } -+ } else { -+ bo_legacy->ptr = malloc(bo_legacy->base.size); -+ if (bo_legacy->ptr == NULL) { -+ bo_free(bo_legacy); -+ return NULL; -+ } -+ } -+ radeon_bo_ref(&(bo_legacy->base)); -+ return (struct radeon_bo*)bo_legacy; ++static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa) ++{ ++ /* please flush pipe do all pending work */ ++ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, ++ R300_SC_SCREENDOOR, 1)); ++ radeon_cs_write_dword(cs, 0x0); ++ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, ++ R300_SC_SCREENDOOR, 1)); ++ radeon_cs_write_dword(cs, 0x00FFFFFF); ++ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, ++ R300_SC_HYPERZ, 1)); ++ radeon_cs_write_dword(cs, 0x0); ++ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, ++ R300_US_CONFIG, 1)); ++ radeon_cs_write_dword(cs, 0x0); ++ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, ++ R300_ZB_CNTL, 1)); ++ radeon_cs_write_dword(cs, 0x0); ++ radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, R300_WAIT_3D)); ++ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, ++ R300_RB3D_DSTCACHE_CTLSTAT, 1)); ++ radeon_cs_write_dword(cs, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); ++ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, ++ R300_ZB_ZCACHE_CTLSTAT, 1)); ++ radeon_cs_write_dword(cs, R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE); ++ radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, ++ R300_WAIT_3D | R300_WAIT_3D_CLEAN)); +} + -+static void bo_ref(struct radeon_bo *bo) ++static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon) +{ ++ r300ContextPtr r300 = (r300ContextPtr)radeon; ++ BATCH_LOCALS(radeon); ++ ++ r300->vap_flush_needed = GL_TRUE; ++ ++ cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); ++ BEGIN_BATCH_NO_AUTOSTATE(2); ++ OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH); ++ END_BATCH(); ++ end_3d(radeon); +} + -+static struct radeon_bo *bo_unref(struct radeon_bo *bo) ++static void r300_init_vtbl(radeonContextPtr radeon) +{ -+ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; ++ radeon->vtbl.get_lock = r300_get_lock; ++ radeon->vtbl.update_viewport_offset = r300UpdateViewportOffset; ++ radeon->vtbl.update_draw_buffer = r300UpdateDrawBuffer; ++ radeon->vtbl.emit_cs_header = r300_vtbl_emit_cs_header; ++ radeon->vtbl.swtcl_flush = r300_swtcl_flush; ++ radeon->vtbl.pre_emit_atoms = r300_vtbl_pre_emit_atoms; ++} + -+ if (bo->cref <= 0) { -+ bo_legacy->prev->next = bo_legacy->next; -+ if (bo_legacy->next) { -+ bo_legacy->next->prev = bo_legacy->prev; -+ } -+ if (!bo_legacy->is_pending) { -+ bo_free(bo_legacy); ++ + /* Create the device specific rendering context. + */ + GLboolean r300CreateContext(const __GLcontextModes * glVisual, +@@ -189,7 +269,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, + struct dd_function_table functions; + r300ContextPtr r300; + GLcontext *ctx; +- int tcl_mode, i; ++ int tcl_mode; + + assert(glVisual); + assert(driContextPriv); +@@ -203,13 +283,14 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, + if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) + hw_tcl_on = future_hw_tcl_on = 0; + ++ r300_init_vtbl(&r300->radeon); + /* Parse configuration files. + * Do this here so that initialMaxAnisotropy is set before we create + * the default textures. + */ + driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache, + screen->driScreen->myNum, "r300"); +- r300->initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache, ++ r300->radeon.initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache, + "def_max_anisotropy"); + + /* Init default driver functions then plug in our R300-specific functions +@@ -221,10 +302,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, + r300InitTextureFuncs(&functions); + r300InitShaderFuncs(&functions); + +-#ifdef USER_BUFFERS +- r300_mem_init(r300); +-#endif +- + if (!radeonInitContext(&r300->radeon, &functions, + glVisual, driContextPriv, + sharedContextPrivate)) { +@@ -233,37 +310,10 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, + } + + /* Init r300 context data */ +- r300->dma.buf0_address = +- r300->radeon.radeonScreen->buffers->list[0].address; +- +- (void)memset(r300->texture_heaps, 0, sizeof(r300->texture_heaps)); +- make_empty_list(&r300->swapped); +- +- r300->nr_heaps = 1 /* screen->numTexHeaps */ ; +- assert(r300->nr_heaps < RADEON_NR_TEX_HEAPS); +- for (i = 0; i < r300->nr_heaps; i++) { +- /* *INDENT-OFF* */ +- r300->texture_heaps[i] = driCreateTextureHeap(i, r300, +- screen-> +- texSize[i], 12, +- RADEON_NR_TEX_REGIONS, +- (drmTextureRegionPtr) +- r300->radeon.sarea-> +- tex_list[i], +- &r300->radeon.sarea-> +- tex_age[i], +- &r300->swapped, +- sizeof +- (r300TexObj), +- (destroy_texture_object_t +- *) +- r300DestroyTexObj); +- /* *INDENT-ON* */ +- } +- r300->texture_depth = driQueryOptioni(&r300->radeon.optionCache, ++ r300->radeon.texture_depth = driQueryOptioni(&r300->radeon.optionCache, + "texture_depth"); +- if (r300->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) +- r300->texture_depth = (screen->cpp == 4) ? ++ if (r300->radeon.texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) ++ r300->radeon.texture_depth = (screen->cpp == 4) ? + DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16; + + /* Set the maximum texture size small enough that we can guarentee that +@@ -298,13 +348,13 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, + ctx->Const.MaxLineWidth = R300_LINESIZE_MAX; + ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; + +-#ifdef USER_BUFFERS + /* Needs further modifications */ + #if 0 + ctx->Const.MaxArrayLockSize = + ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4); + #endif +-#endif ++ ++ ctx->Const.MaxDrawBuffers = 1; + + /* Initialize the software rasterizer and helper modules. + */ +@@ -377,13 +427,13 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, + driQueryOptionb(&r300->radeon.optionCache, + "disable_lowimpact_fallback"); + +- radeonInitSpanFuncs(ctx); ++ radeonInitSpanFuncs( ctx ); + r300InitCmdBuf(r300); + r300InitState(r300); + if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) + r300InitSwtcl(ctx); + +- TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; ++ TNL_CONTEXT(ctx)->Driver.RunPipeline = r300RunPipeline; + + tcl_mode = driQueryOptioni(&r300->radeon.optionCache, "tcl_mode"); + if (driQueryOptionb(&r300->radeon.optionCache, "no_rast")) { +@@ -406,72 +456,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, + return GL_TRUE; + } + +-static void r300FreeGartAllocations(r300ContextPtr r300) +-{ +- int i, ret, tries = 0, done_age, in_use = 0; +- drm_radeon_mem_free_t memfree; +- +- memfree.region = RADEON_MEM_REGION_GART; +- +-#ifdef USER_BUFFERS +- for (i = r300->rmm->u_last; i > 0; i--) { +- if (r300->rmm->u_list[i].ptr == NULL) { +- continue; +- } +- +- /* check whether this buffer is still in use */ +- if (r300->rmm->u_list[i].pending) { +- in_use++; +- } +- } +- /* Cannot flush/lock if no context exists. */ +- if (in_use) +- r300FlushCmdBuf(r300, __FUNCTION__); +- +- done_age = radeonGetAge((radeonContextPtr) r300); +- +- for (i = r300->rmm->u_last; i > 0; i--) { +- if (r300->rmm->u_list[i].ptr == NULL) { +- continue; +- } +- +- /* check whether this buffer is still in use */ +- if (!r300->rmm->u_list[i].pending) { +- continue; +- } +- +- assert(r300->rmm->u_list[i].h_pending == 0); +- +- tries = 0; +- while (r300->rmm->u_list[i].age > done_age && tries++ < 1000) { +- usleep(10); +- done_age = radeonGetAge((radeonContextPtr) r300); +- } +- if (tries >= 1000) { +- WARN_ONCE("Failed to idle region!"); +- } +- +- memfree.region_offset = (char *)r300->rmm->u_list[i].ptr - +- (char *)r300->radeon.radeonScreen->gartTextures.map; +- +- ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd, +- DRM_RADEON_FREE, &memfree, +- sizeof(memfree)); +- if (ret) { +- fprintf(stderr, "Failed to free at %p\nret = %s\n", +- r300->rmm->u_list[i].ptr, strerror(-ret)); +- } else { +- if (i == r300->rmm->u_last) +- r300->rmm->u_last--; +- +- r300->rmm->u_list[i].pending = 0; +- r300->rmm->u_list[i].ptr = NULL; +- } +- } +- r300->rmm->u_head = i; +-#endif /* USER_BUFFERS */ +-} +- + /* Destroy the device specific context. + */ + void r300DestroyContext(__DRIcontextPrivate * driContextPriv) +@@ -495,55 +479,27 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv) + assert(r300); /* should never be null */ + + if (r300) { +- GLboolean release_texture_heaps; +- +- release_texture_heaps = +- (r300->radeon.glCtx->Shared->RefCount == 1); + _swsetup_DestroyContext(r300->radeon.glCtx); + _tnl_DestroyContext(r300->radeon.glCtx); + _vbo_DestroyContext(r300->radeon.glCtx); + _swrast_DestroyContext(r300->radeon.glCtx); + +- if (r300->dma.current.buf) { +- r300ReleaseDmaRegion(r300, &r300->dma.current, +- __FUNCTION__); +-#ifndef USER_BUFFERS +- r300FlushCmdBuf(r300, __FUNCTION__); +-#endif +- } +- r300FreeGartAllocations(r300); +- r300DestroyCmdBuf(r300); ++ rcommonFlushCmdBuf(&r300->radeon, __FUNCTION__); + + if (radeon->state.scissor.pClipRects) { + FREE(radeon->state.scissor.pClipRects); + radeon->state.scissor.pClipRects = NULL; + } + +- if (release_texture_heaps) { +- /* This share group is about to go away, free our private +- * texture object data. +- */ +- int i; +- +- for (i = 0; i < r300->nr_heaps; i++) { +- driDestroyTextureHeap(r300->texture_heaps[i]); +- r300->texture_heaps[i] = NULL; +- } +- +- assert(is_empty_list(&r300->swapped)); +- } ++ r300DestroyCmdBuf(r300); + + radeonCleanupContext(&r300->radeon); + +-#ifdef USER_BUFFERS ++ + /* the memory manager might be accessed when Mesa frees the shared + * state, so don't destroy it earlier + */ +- r300_mem_destroy(r300); +-#endif + +- /* free the option cache */ +- driDestroyOptionCache(&r300->radeon.optionCache); + + FREE(r300); + } +diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h +index c15e9fa..37718f5 100644 +--- a/src/mesa/drivers/dri/r300/r300_context.h ++++ b/src/mesa/drivers/dri/r300/r300_context.h +@@ -42,21 +42,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "radeon_drm.h" + #include "dri_util.h" + #include "texmem.h" ++#include "radeon_common.h" + + #include "main/macros.h" + #include "main/mtypes.h" + #include "main/colormac.h" + +-#define USER_BUFFERS +- + struct r300_context; + typedef struct r300_context r300ContextRec; + typedef struct r300_context *r300ContextPtr; + +-#include "radeon_lock.h" ++ + #include "main/mm.h" + +-/* From http://gcc.gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . ++/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . + I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble + with other compilers ... GLUE! + */ +@@ -75,174 +74,19 @@ typedef struct r300_context *r300ContextPtr; + #include "r300_vertprog.h" + #include "r500_fragprog.h" + +-/** +- * This function takes a float and packs it into a uint32_t +- */ +-static INLINE uint32_t r300PackFloat32(float fl) +-{ +- union { +- float fl; +- uint32_t u; +- } u; +- +- u.fl = fl; +- return u.u; +-} +- +-/* This is probably wrong for some values, I need to test this +- * some more. Range checking would be a good idea also.. +- * +- * But it works for most things. I'll fix it later if someone +- * else with a better clue doesn't +- */ +-static INLINE uint32_t r300PackFloat24(float f) +-{ +- float mantissa; +- int exponent; +- uint32_t float24 = 0; +- +- if (f == 0.0) +- return 0; + +- mantissa = frexpf(f, &exponent); +- +- /* Handle -ve */ +- if (mantissa < 0) { +- float24 |= (1 << 23); +- mantissa = mantissa * -1.0; +- } +- /* Handle exponent, bias of 63 */ +- exponent += 62; +- float24 |= (exponent << 16); +- /* Kill 7 LSB of mantissa */ +- float24 |= (r300PackFloat32(mantissa) & 0x7FFFFF) >> 7; +- +- return float24; +-} + + /************ DMA BUFFERS **************/ + +-/* Need refcounting on dma buffers: +- */ +-struct r300_dma_buffer { +- int refcount; /**< the number of retained regions in buf */ +- drmBufPtr buf; +- int id; +-}; +-#undef GET_START +-#ifdef USER_BUFFERS +-#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start)) +-#else +-#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset + \ +- (rvb)->address - rmesa->dma.buf0_address + \ +- (rvb)->start) +-#endif +-/* A retained region, eg vertices for indexed vertices. +- */ +-struct r300_dma_region { +- struct r300_dma_buffer *buf; +- char *address; /* == buf->address */ +- int start, end, ptr; /* offsets from start of buf */ +- +- int aos_offset; /* address in GART memory */ +- int aos_stride; /* distance between elements, in dwords */ +- int aos_size; /* number of components (1-4) */ +-}; +- +-struct r300_dma { +- /* Active dma region. Allocations for vertices and retained +- * regions come from here. Also used for emitting random vertices, +- * these may be flushed by calling flush_current(); +- */ +- struct r300_dma_region current; +- +- void (*flush) (r300ContextPtr); +- +- char *buf0_address; /* start of buf[0], for index calcs */ +- +- /* Number of "in-flight" DMA buffers, i.e. the number of buffers +- * for which a DISCARD command is currently queued in the command buffer. +- */ +- GLuint nr_released_bufs; +-}; +- +- /* Texture related */ +- +-typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr; +- +-/* Texture object in locally shared texture space. +- */ +-struct r300_tex_obj { +- driTextureObject base; +- +- GLuint bufAddr; /* Offset to start of locally +- shared texture block */ +- +- drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; +- /* Six, for the cube faces */ +- +- GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ +- +- GLuint pitch; /* this isn't sent to hardware just used in calculations */ +- /* hardware register values */ +- /* Note that R200 has 8 registers per texture and R300 only 7 */ +- GLuint filter; +- GLuint filter_1; +- GLuint pitch_reg; +- GLuint size; /* npot only */ +- GLuint format; +- GLuint offset; /* Image location in the card's address space. +- All cube faces follow. */ +- GLuint unknown4; +- GLuint unknown5; +- /* end hardware registers */ +- +- /* registers computed by r200 code - keep them here to +- compare against what is actually written. +- +- to be removed later.. */ +- GLuint pp_border_color; +- GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ +- GLuint format_x; +- +- GLboolean border_fallback; +- +- GLuint tile_bits; /* hw texture tile bits used on this texture */ +-}; +- +-struct r300_texture_env_state { +- r300TexObjPtr texobj; +- GLenum format; +- GLenum envMode; +-}; +- + /* The blit width for texture uploads + */ + #define R300_BLIT_WIDTH_BYTES 1024 + #define R300_MAX_TEXTURE_UNITS 8 + + struct r300_texture_state { +- struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS]; + int tc_count; /* number of incoming texture coordinates from VAP */ + }; + +-/** +- * A block of hardware state. +- * +- * When check returns non-zero, the returned number of dwords must be +- * copied verbatim into the command buffer in order to update a state atom +- * when it is dirty. +- */ +-struct r300_state_atom { +- struct r300_state_atom *next, *prev; +- const char *name; /* for debug */ +- int cmd_size; /* maximum size in dwords */ +- GLuint idx; /* index in an array (e.g. textures) */ +- uint32_t *cmd; +- GLboolean dirty; +- +- int (*check) (r300ContextPtr, struct r300_state_atom * atom); +-}; + + #define R300_VPT_CMD_0 0 + #define R300_VPT_XSCALE 1 +@@ -459,124 +303,98 @@ struct r300_state_atom { + * Cache for hardware register state. + */ + struct r300_hw_state { +- struct r300_state_atom atomlist; +- +- GLboolean is_dirty; +- GLboolean all_dirty; +- int max_state_size; /* in dwords */ +- +- struct r300_state_atom vpt; /* viewport (1D98) */ +- struct r300_state_atom vap_cntl; +- struct r300_state_atom vap_index_offset; /* 0x208c r5xx only */ +- struct r300_state_atom vof; /* VAP output format register 0x2090 */ +- struct r300_state_atom vte; /* (20B0) */ +- struct r300_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */ +- struct r300_state_atom vap_cntl_status; +- struct r300_state_atom vir[2]; /* vap input route (2150/21E0) */ +- struct r300_state_atom vic; /* vap input control (2180) */ +- struct r300_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */ +- struct r300_state_atom vap_clip_cntl; +- struct r300_state_atom vap_clip; +- struct r300_state_atom vap_pvs_vtx_timeout_reg; /* Vertex timeout register (2288) */ +- struct r300_state_atom pvs; /* pvs_cntl (22D0) */ +- struct r300_state_atom gb_enable; /* (4008) */ +- struct r300_state_atom gb_misc; /* Multisampling position shifts ? (4010) */ +- struct r300_state_atom ga_point_s0; /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */ +- struct r300_state_atom ga_triangle_stipple; /* (4214) */ +- struct r300_state_atom ps; /* pointsize (421C) */ +- struct r300_state_atom ga_point_minmax; /* (4230) */ +- struct r300_state_atom lcntl; /* line control */ +- struct r300_state_atom ga_line_stipple; /* (4260) */ +- struct r300_state_atom shade; +- struct r300_state_atom polygon_mode; +- struct r300_state_atom fogp; /* fog parameters (4294) */ +- struct r300_state_atom ga_soft_reset; /* (429C) */ +- struct r300_state_atom zbias_cntl; +- struct r300_state_atom zbs; /* zbias (42A4) */ +- struct r300_state_atom occlusion_cntl; +- struct r300_state_atom cul; /* cull cntl (42B8) */ +- struct r300_state_atom su_depth_scale; /* (42C0) */ +- struct r300_state_atom rc; /* rs control (4300) */ +- struct r300_state_atom ri; /* rs interpolators (4310) */ +- struct r300_state_atom rr; /* rs route (4330) */ +- struct r300_state_atom sc_hyperz; /* (43A4) */ +- struct r300_state_atom sc_screendoor; /* (43E8) */ +- struct r300_state_atom fp; /* fragment program cntl + nodes (4600) */ +- struct r300_state_atom fpt; /* texi - (4620) */ +- struct r300_state_atom us_out_fmt; /* (46A4) */ +- struct r300_state_atom r500fp; /* r500 fp instructions */ +- struct r300_state_atom r500fp_const; /* r500 fp constants */ +- struct r300_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */ +- struct r300_state_atom fogs; /* fog state (4BC0) */ +- struct r300_state_atom fogc; /* fog color (4BC8) */ +- struct r300_state_atom at; /* alpha test (4BD4) */ +- struct r300_state_atom fg_depth_src; /* (4BD8) */ +- struct r300_state_atom fpp; /* 0x4C00 and following */ +- struct r300_state_atom rb3d_cctl; /* (4E00) */ +- struct r300_state_atom bld; /* blending (4E04) */ +- struct r300_state_atom cmk; /* colormask (4E0C) */ +- struct r300_state_atom blend_color; /* constant blend color */ +- struct r300_state_atom rop; /* ropcntl */ +- struct r300_state_atom cb; /* colorbuffer (4E28) */ +- struct r300_state_atom rb3d_dither_ctl; /* (4E50) */ +- struct r300_state_atom rb3d_aaresolve_ctl; /* (4E88) */ +- struct r300_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */ +- struct r300_state_atom zs; /* zstencil control (4F00) */ +- struct r300_state_atom zstencil_format; +- struct r300_state_atom zb; /* z buffer (4F20) */ +- struct r300_state_atom zb_depthclearvalue; /* (4F28) */ +- struct r300_state_atom unk4F30; /* (4F30) */ +- struct r300_state_atom zb_hiz_offset; /* (4F44) */ +- struct r300_state_atom zb_hiz_pitch; /* (4F54) */ +- +- struct r300_state_atom vpi; /* vp instructions */ +- struct r300_state_atom vpp; /* vp parameters */ +- struct r300_state_atom vps; /* vertex point size (?) */ +- struct r300_state_atom vpucp[6]; /* vp user clip plane - 6 */ ++ struct radeon_state_atom vpt; /* viewport (1D98) */ ++ struct radeon_state_atom vap_cntl; ++ struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */ ++ struct radeon_state_atom vof; /* VAP output format register 0x2090 */ ++ struct radeon_state_atom vte; /* (20B0) */ ++ struct radeon_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */ ++ struct radeon_state_atom vap_cntl_status; ++ struct radeon_state_atom vir[2]; /* vap input route (2150/21E0) */ ++ struct radeon_state_atom vic; /* vap input control (2180) */ ++ struct radeon_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */ ++ struct radeon_state_atom vap_clip_cntl; ++ struct radeon_state_atom vap_clip; ++ struct radeon_state_atom vap_pvs_vtx_timeout_reg; /* Vertex timeout register (2288) */ ++ struct radeon_state_atom pvs; /* pvs_cntl (22D0) */ ++ struct radeon_state_atom gb_enable; /* (4008) */ ++ struct radeon_state_atom gb_misc; /* Multisampling position shifts ? (4010) */ ++ struct radeon_state_atom ga_point_s0; /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */ ++ struct radeon_state_atom ga_triangle_stipple; /* (4214) */ ++ struct radeon_state_atom ps; /* pointsize (421C) */ ++ struct radeon_state_atom ga_point_minmax; /* (4230) */ ++ struct radeon_state_atom lcntl; /* line control */ ++ struct radeon_state_atom ga_line_stipple; /* (4260) */ ++ struct radeon_state_atom shade; ++ struct radeon_state_atom polygon_mode; ++ struct radeon_state_atom fogp; /* fog parameters (4294) */ ++ struct radeon_state_atom ga_soft_reset; /* (429C) */ ++ struct radeon_state_atom zbias_cntl; ++ struct radeon_state_atom zbs; /* zbias (42A4) */ ++ struct radeon_state_atom occlusion_cntl; ++ struct radeon_state_atom cul; /* cull cntl (42B8) */ ++ struct radeon_state_atom su_depth_scale; /* (42C0) */ ++ struct radeon_state_atom rc; /* rs control (4300) */ ++ struct radeon_state_atom ri; /* rs interpolators (4310) */ ++ struct radeon_state_atom rr; /* rs route (4330) */ ++ struct radeon_state_atom sc_hyperz; /* (43A4) */ ++ struct radeon_state_atom sc_screendoor; /* (43E8) */ ++ struct radeon_state_atom fp; /* fragment program cntl + nodes (4600) */ ++ struct radeon_state_atom fpt; /* texi - (4620) */ ++ struct radeon_state_atom us_out_fmt; /* (46A4) */ ++ struct radeon_state_atom r500fp; /* r500 fp instructions */ ++ struct radeon_state_atom r500fp_const; /* r500 fp constants */ ++ struct radeon_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */ ++ struct radeon_state_atom fogs; /* fog state (4BC0) */ ++ struct radeon_state_atom fogc; /* fog color (4BC8) */ ++ struct radeon_state_atom at; /* alpha test (4BD4) */ ++ struct radeon_state_atom fg_depth_src; /* (4BD8) */ ++ struct radeon_state_atom fpp; /* 0x4C00 and following */ ++ struct radeon_state_atom rb3d_cctl; /* (4E00) */ ++ struct radeon_state_atom bld; /* blending (4E04) */ ++ struct radeon_state_atom cmk; /* colormask (4E0C) */ ++ struct radeon_state_atom blend_color; /* constant blend color */ ++ struct radeon_state_atom rop; /* ropcntl */ ++ struct radeon_state_atom cb; /* colorbuffer (4E28) */ ++ struct radeon_state_atom rb3d_dither_ctl; /* (4E50) */ ++ struct radeon_state_atom rb3d_aaresolve_ctl; /* (4E88) */ ++ struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */ ++ struct radeon_state_atom zs; /* zstencil control (4F00) */ ++ struct radeon_state_atom zstencil_format; ++ struct radeon_state_atom zb; /* z buffer (4F20) */ ++ struct radeon_state_atom zb_depthclearvalue; /* (4F28) */ ++ struct radeon_state_atom unk4F30; /* (4F30) */ ++ struct radeon_state_atom zb_hiz_offset; /* (4F44) */ ++ struct radeon_state_atom zb_hiz_pitch; /* (4F54) */ ++ ++ struct radeon_state_atom vpi; /* vp instructions */ ++ struct radeon_state_atom vpp; /* vp parameters */ ++ struct radeon_state_atom vps; /* vertex point size (?) */ ++ struct radeon_state_atom vpucp[6]; /* vp user clip plane - 6 */ + /* 8 texture units */ + /* the state is grouped by function and not by + texture unit. This makes single unit updates + really awkward - we are much better off + updating the whole thing at once */ + struct { +- struct r300_state_atom filter; +- struct r300_state_atom filter_1; +- struct r300_state_atom size; +- struct r300_state_atom format; +- struct r300_state_atom pitch; +- struct r300_state_atom offset; +- struct r300_state_atom chroma_key; +- struct r300_state_atom border_color; ++ struct radeon_state_atom filter; ++ struct radeon_state_atom filter_1; ++ struct radeon_state_atom size; ++ struct radeon_state_atom format; ++ struct radeon_state_atom pitch; ++ struct radeon_state_atom offset; ++ struct radeon_state_atom chroma_key; ++ struct radeon_state_atom border_color; + } tex; +- struct r300_state_atom txe; /* tex enable (4104) */ +-}; ++ struct radeon_state_atom txe; /* tex enable (4104) */ + +-/** +- * This structure holds the command buffer while it is being constructed. +- * +- * The first batch of commands in the buffer is always the state that needs +- * to be re-emitted when the context is lost. This batch can be skipped +- * otherwise. +- */ +-struct r300_cmdbuf { +- int size; /* DWORDs allocated for buffer */ +- uint32_t *cmd_buf; +- int count_used; /* DWORDs filled so far */ +- int count_reemit; /* size of re-emission batch */ ++ radeonTexObj *textures[R300_MAX_TEXTURE_UNITS]; + }; + + /** + * State cache + */ + +-struct r300_depthbuffer_state { +- GLfloat scale; +-}; +- +-struct r300_stencilbuffer_state { +- GLboolean hw_stencil; +-}; +- + /* Vertex shader state */ + + /* Perhaps more if we store programs in vmem? */ +@@ -812,22 +630,18 @@ struct r500_fragment_program { + #define REG_TEX0 2 + + struct r300_state { +- struct r300_depthbuffer_state depth; + struct r300_texture_state texture; + int sw_tcl_inputs[VERT_ATTRIB_MAX]; + struct r300_vertex_shader_state vertex_shader; +- struct r300_dma_region aos[R300_MAX_AOS_ARRAYS]; ++ struct radeon_aos aos[R300_MAX_AOS_ARRAYS]; + int aos_count; + +- GLuint *Elts; +- struct r300_dma_region elt_dma; ++ struct radeon_bo *elt_dma_bo; /** Buffer object that contains element indices */ ++ int elt_dma_offset; /** Offset into this buffer object, in bytes */ + +- struct r300_dma_region swtcl_dma; + DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. + They are the same as tnl->render_inputs for fixed pipeline */ + +- struct r300_stencilbuffer_state stencil; +- + }; + + #define R300_FALLBACK_NONE 0 +@@ -837,41 +651,7 @@ struct r300_state { + /* r300_swtcl.c + */ + struct r300_swtcl_info { +- GLuint RenderIndex; +- +- /** +- * Size of a hardware vertex. This is calculated when \c ::vertex_attrs is +- * installed in the Mesa state vector. +- */ +- GLuint vertex_size; +- +- /** +- * Attributes instructing the Mesa TCL pipeline where / how to put vertex +- * data in the hardware buffer. +- */ +- struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; +- +- /** +- * Number of elements of \c ::vertex_attrs that are actually used. +- */ +- GLuint vertex_attr_count; +- +- /** +- * Cached pointer to the buffer where Mesa will store vertex data. +- */ +- GLubyte *verts; +- +- /* Fallback rasterization functions +- */ +- // r200_point_func draw_point; +- // r200_line_func draw_line; +- // r200_tri_func draw_tri; +- +- GLuint hw_primitive; +- GLenum render_primitive; +- GLuint numverts; +- +- /** ++ /* + * Offset of the 4UB color data within a hardware (swtcl) vertex. + */ + GLuint coloroffset; +@@ -880,13 +660,6 @@ struct r300_swtcl_info { + * Offset of the 3UB specular color data within a hardware (swtcl) vertex. + */ + GLuint specoffset; +- +- /** +- * Should Mesa project vertex data or will the hardware do it? +- */ +- GLboolean needproj; +- +- struct r300_dma_region indexed_verts; + }; + + +@@ -897,40 +670,22 @@ struct r300_context { + struct radeon_context radeon; /* parent class, must be first */ + + struct r300_hw_state hw; +- struct r300_cmdbuf cmdbuf; ++ + struct r300_state state; + struct gl_vertex_program *curr_vp; + struct r300_vertex_program *selected_vp; + + /* Vertex buffers + */ +- struct r300_dma dma; +- GLboolean save_on_next_unlock; +- GLuint NewGLState; +- +- /* Texture object bookkeeping +- */ +- unsigned nr_heaps; +- driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS]; +- driTextureObject swapped; +- int texture_depth; +- float initialMaxAnisotropy; +- +- /* Clientdata textures; +- */ +- GLuint prefer_gart_client_texturing; +- +-#ifdef USER_BUFFERS +- struct r300_memory_manager *rmm; +-#endif +- + GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; + GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; + + GLboolean disable_lowimpact_fallback; + + DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */ ++ + struct r300_swtcl_info swtcl; ++ GLboolean vap_flush_needed; + }; + + struct r300_buffer_object { +@@ -956,4 +711,7 @@ extern int r300VertexProgUpdateParams(GLcontext * ctx, + #define RADEON_D_PLAYBACK_RAW 2 + #define RADEON_D_T 3 + ++#define r300PackFloat32 radeonPackFloat32 ++#define r300PackFloat24 radeonPackFloat24 ++ + #endif /* __R300_CONTEXT_H__ */ +diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c +index 80bd338..1512e90 100644 +--- a/src/mesa/drivers/dri/r300/r300_emit.c ++++ b/src/mesa/drivers/dri/r300/r300_emit.c +@@ -46,14 +46,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "tnl/t_context.h" + + #include "r300_context.h" +-#include "radeon_ioctl.h" + #include "r300_state.h" + #include "r300_emit.h" + #include "r300_ioctl.h" + +-#ifdef USER_BUFFERS +-#include "r300_mem.h" +-#endif + + #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \ + SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \ +@@ -66,147 +62,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #define DEBUG_ALL DEBUG_VERTS + +-#if defined(USE_X86_ASM) +-#define COPY_DWORDS( dst, src, nr ) \ +-do { \ +- int __tmp; \ +- __asm__ __volatile__( "rep ; movsl" \ +- : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ +- : "0" (nr), \ +- "D" ((long)dst), \ +- "S" ((long)src) ); \ +-} while (0) +-#else +-#define COPY_DWORDS( dst, src, nr ) \ +-do { \ +- int j; \ +- for ( j = 0 ; j < nr ; j++ ) \ +- dst[j] = ((int *)src)[j]; \ +- dst += nr; \ +-} while (0) +-#endif +- +-static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb, +- GLvoid * data, int stride, int count) +-{ +- int i; +- int *out = (int *)(rvb->address + rvb->start); +- +- if (RADEON_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d out %p data %p\n", +- __FUNCTION__, count, stride, (void *)out, (void *)data); +- +- if (stride == 4) +- COPY_DWORDS(out, data, count); +- else +- for (i = 0; i < count; i++) { +- out[0] = *(int *)data; +- out++; +- data += stride; +- } +-} +- +-static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb, +- GLvoid * data, int stride, int count) +-{ +- int i; +- int *out = (int *)(rvb->address + rvb->start); +- +- if (RADEON_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d out %p data %p\n", +- __FUNCTION__, count, stride, (void *)out, (void *)data); +- +- if (stride == 8) +- COPY_DWORDS(out, data, count * 2); +- else +- for (i = 0; i < count; i++) { +- out[0] = *(int *)data; +- out[1] = *(int *)(data + 4); +- out += 2; +- data += stride; +- } +-} +- +-static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb, +- GLvoid * data, int stride, int count) +-{ +- int i; +- int *out = (int *)(rvb->address + rvb->start); +- +- if (RADEON_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d out %p data %p\n", +- __FUNCTION__, count, stride, (void *)out, (void *)data); +- +- if (stride == 12) +- COPY_DWORDS(out, data, count * 3); +- else +- for (i = 0; i < count; i++) { +- out[0] = *(int *)data; +- out[1] = *(int *)(data + 4); +- out[2] = *(int *)(data + 8); +- out += 3; +- data += stride; +- } +-} +- +-static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb, +- GLvoid * data, int stride, int count) +-{ +- int i; +- int *out = (int *)(rvb->address + rvb->start); +- +- if (RADEON_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d out %p data %p\n", +- __FUNCTION__, count, stride, (void *)out, (void *)data); +- +- if (stride == 16) +- COPY_DWORDS(out, data, count * 4); +- else +- for (i = 0; i < count; i++) { +- out[0] = *(int *)data; +- out[1] = *(int *)(data + 4); +- out[2] = *(int *)(data + 8); +- out[3] = *(int *)(data + 12); +- out += 4; +- data += stride; +- } +-} +- +-static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb, +- GLvoid * data, int size, int stride, int count) +-{ +- r300ContextPtr rmesa = R300_CONTEXT(ctx); +- +- if (stride == 0) { +- r300AllocDmaRegion(rmesa, rvb, size * 4, 4); +- count = 1; +- rvb->aos_offset = GET_START(rvb); +- rvb->aos_stride = 0; +- } else { +- r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); +- rvb->aos_offset = GET_START(rvb); +- rvb->aos_stride = size; +- } +- +- switch (size) { +- case 1: +- r300EmitVec4(ctx, rvb, data, stride, count); +- break; +- case 2: +- r300EmitVec8(ctx, rvb, data, stride, count); +- break; +- case 3: +- r300EmitVec12(ctx, rvb, data, stride, count); +- break; +- case 4: +- r300EmitVec16(ctx, rvb, data, stride, count); +- break; +- default: +- assert(0); +- break; +- } +-} +- + #define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) | \ + (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT) + +@@ -314,10 +169,6 @@ GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten) + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; + +-#if 0 +- if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ; +-#endif +- + if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + +@@ -371,7 +222,6 @@ int r300EmitArrays(GLcontext * ctx) + + assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)); + assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0); +- //assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)); + + if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) { + InputsRead |= 1 << VERT_ATTRIB_POS; +@@ -433,7 +283,7 @@ int r300EmitArrays(GLcontext * ctx) + } + + for (i = 0; i < nr; i++) { +- int ci, fix, found = 0; ++ int ci; + + swizzle[i][0] = SWIZZLE_ZERO; + swizzle[i][1] = SWIZZLE_ZERO; +@@ -443,61 +293,35 @@ int r300EmitArrays(GLcontext * ctx) + for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) { + swizzle[i][ci] = ci; + } +- +- if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) { +- if (vb->AttribPtr[tab[i]]->stride % 4) { +- return R300_FALLBACK_TCL; +- } +- rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data); +- rmesa->state.aos[i].start = 0; +- rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data); +- rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4; +- rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size; +- } else { +- r300EmitVec(ctx, &rmesa->state.aos[i], ++ rcommon_emit_vector(ctx, &rmesa->state.aos[i], + vb->AttribPtr[tab[i]]->data, + vb->AttribPtr[tab[i]]->size, + vb->AttribPtr[tab[i]]->stride, count); +- } +- +- rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size; +- +- for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) { +- if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) { +- continue; +- } +- found = 1; +- break; +- } +- +- if (found) { +- if (fix > 0) { +- WARN_ONCE("Feeling lucky?\n"); +- } +- rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix; +- for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) { +- swizzle[i][ci] += fix; +- } +- } else { +- WARN_ONCE +- ("Cannot handle offset %x with stride %d, comp %d\n", +- rmesa->state.aos[i].aos_offset, +- rmesa->state.aos[i].aos_stride, +- vb->AttribPtr[tab[i]]->size); +- return R300_FALLBACK_TCL; +- } + } + + /* Setup INPUT_ROUTE. */ +- R300_STATECHANGE(rmesa, vir[0]); +- ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = +- r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], +- vb->AttribPtr, inputs, tab, nr); +- R300_STATECHANGE(rmesa, vir[1]); +- ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = +- r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, +- nr); +- ++ if (rmesa->radeon.radeonScreen->kernel_mm) { ++ R300_STATECHANGE(rmesa, vir[0]); ++ rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF; ++ rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF; ++ rmesa->hw.vir[0].cmd[0] |= ++ (r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], ++ vb->AttribPtr, inputs, tab, nr) & 0x3FFF) << 16; ++ R300_STATECHANGE(rmesa, vir[1]); ++ rmesa->hw.vir[1].cmd[0] |= ++ (r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, ++ nr) & 0x3FFF) << 16; ++ } else { ++ R300_STATECHANGE(rmesa, vir[0]); ++ ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = ++ r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], ++ vb->AttribPtr, inputs, tab, nr); ++ R300_STATECHANGE(rmesa, vir[1]); ++ ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = ++ r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, ++ nr); ++ } ++ + /* Setup INPUT_CNTL. */ + R300_STATECHANGE(rmesa, vic); + rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); +@@ -515,45 +339,34 @@ int r300EmitArrays(GLcontext * ctx) + return R300_FALLBACK_NONE; + } + +-#ifdef USER_BUFFERS +-void r300UseArrays(GLcontext * ctx) +-{ +- r300ContextPtr rmesa = R300_CONTEXT(ctx); +- int i; +- +- if (rmesa->state.elt_dma.buf) +- r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id); +- +- for (i = 0; i < rmesa->state.aos_count; i++) { +- if (rmesa->state.aos[i].buf) +- r300_mem_use(rmesa, rmesa->state.aos[i].buf->id); +- } +-} +-#endif +- + void r300ReleaseArrays(GLcontext * ctx) + { + r300ContextPtr rmesa = R300_CONTEXT(ctx); + int i; + +- r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__); ++ if (rmesa->state.elt_dma_bo) { ++ radeon_bo_unref(rmesa->state.elt_dma_bo); ++ rmesa->state.elt_dma_bo = NULL; ++ } + for (i = 0; i < rmesa->state.aos_count; i++) { +- r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__); ++ if (rmesa->state.aos[i].bo) { ++ radeon_bo_unref(rmesa->state.aos[i].bo); ++ rmesa->state.aos[i].bo = NULL; ++ } + } + } + + void r300EmitCacheFlush(r300ContextPtr rmesa) + { +- int cmd_reserved = 0; +- int cmd_written = 0; +- +- drm_radeon_cmd_header_t *cmd = NULL; +- +- reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); +- e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | +- R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); +- +- reg_start(R300_ZB_ZCACHE_CTLSTAT, 0); +- e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | +- R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); ++ BATCH_LOCALS(&rmesa->radeon); ++ ++ BEGIN_BATCH(4); ++ OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, ++ R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | ++ R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); ++ OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT, ++ R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | ++ R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); ++ END_BATCH(); ++ COMMIT_BATCH(); + } +diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h +index 89d7383..6bc8f8e 100644 +--- a/src/mesa/drivers/dri/r300/r300_emit.h ++++ b/src/mesa/drivers/dri/r300/r300_emit.h +@@ -44,28 +44,31 @@ + #include "r300_cmdbuf.h" + #include "radeon_reg.h" + +-/* TODO: move these defines (and the ones from DRM) into r300_reg.h and sync up +- * with DRM */ +-#define CP_PACKET0(reg, n) (RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2)) +-#define CP_PACKET3( pkt, n ) \ +- (RADEON_CP_PACKET3 | (pkt) | ((n) << 16)) +- +-static INLINE uint32_t cmdpacket0(int reg, int count) ++static INLINE uint32_t cmdpacket0(struct radeon_screen *rscrn, ++ int reg, int count) + { +- drm_r300_cmd_header_t cmd; +- +- cmd.packet0.cmd_type = R300_CMD_PACKET0; +- cmd.packet0.count = count; +- cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8; +- cmd.packet0.reglo = ((unsigned int)reg & 0x00FF); +- +- return cmd.u; ++ if (!rscrn->kernel_mm) { ++ drm_r300_cmd_header_t cmd; ++ ++ cmd.u = 0; ++ cmd.packet0.cmd_type = R300_CMD_PACKET0; ++ cmd.packet0.count = count; ++ cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8; ++ cmd.packet0.reglo = ((unsigned int)reg & 0x00FF); ++ ++ return cmd.u; ++ } ++ if (count) { ++ return CP_PACKET0(reg, count - 1); ++ } ++ return CP_PACKET2; + } + +-static INLINE uint32_t cmdvpu(int addr, int count) ++static INLINE uint32_t cmdvpu(struct radeon_screen *rscrn, int addr, int count) + { + drm_r300_cmd_header_t cmd; + ++ cmd.u = 0; + cmd.vpu.cmd_type = R300_CMD_VPU; + cmd.vpu.count = count; + cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8; +@@ -74,10 +77,12 @@ static INLINE uint32_t cmdvpu(int addr, int count) + return cmd.u; + } + +-static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp) ++static INLINE uint32_t cmdr500fp(struct radeon_screen *rscrn, ++ int addr, int count, int type, int clamp) + { + drm_r300_cmd_header_t cmd; + ++ cmd.u = 0; + cmd.r500fp.cmd_type = R300_CMD_R500FP; + cmd.r500fp.count = count; + cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8; +@@ -88,169 +93,131 @@ static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp) + return cmd.u; + } + +-static INLINE uint32_t cmdpacket3(int packet) ++static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet) + { + drm_r300_cmd_header_t cmd; + ++ cmd.u = 0; + cmd.packet3.cmd_type = R300_CMD_PACKET3; + cmd.packet3.packet = packet; + + return cmd.u; + } + +-static INLINE uint32_t cmdcpdelay(unsigned short count) ++static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn, ++ unsigned short count) + { + drm_r300_cmd_header_t cmd; + ++ cmd.u = 0; ++ + cmd.delay.cmd_type = R300_CMD_CP_DELAY; + cmd.delay.count = count; + + return cmd.u; + } + +-static INLINE uint32_t cmdwait(unsigned char flags) ++static INLINE uint32_t cmdwait(struct radeon_screen *rscrn, ++ unsigned char flags) + { + drm_r300_cmd_header_t cmd; + ++ cmd.u = 0; + cmd.wait.cmd_type = R300_CMD_WAIT; + cmd.wait.flags = flags; + + return cmd.u; + } + +-static INLINE uint32_t cmdpacify(void) ++static INLINE uint32_t cmdpacify(struct radeon_screen *rscrn) + { + drm_r300_cmd_header_t cmd; + ++ cmd.u = 0; + cmd.header.cmd_type = R300_CMD_END3D; + + return cmd.u; + } + + /** +- * Prepare to write a register value to register at address reg. +- * If num_extra > 0 then the following extra values are written +- * to registers with address +4, +8 and so on.. +- */ +-#define reg_start(reg, num_extra) \ +- do { \ +- int _n; \ +- _n=(num_extra); \ +- cmd = (drm_radeon_cmd_header_t*) \ +- r300AllocCmdBuf(rmesa, \ +- (_n+2), \ +- __FUNCTION__); \ +- cmd_reserved=_n+2; \ +- cmd_written=1; \ +- cmd[0].i=cmdpacket0((reg), _n+1); \ +- } while (0); +- +-/** +- * Emit GLuint freestyle ++ * Write the header of a packet3 to the command buffer. ++ * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards. + */ +-#define e32(dword) \ +- do { \ +- if(cmd_writtenradeonScreen->kernel_mm) { \ ++ OUT_BATCH(cmdpacket3(b_l_rmesa->radeonScreen,\ ++ R300_CMD_PACKET3_RAW)); \ ++ } else b_l_rmesa->cmdbuf.cs->section_cdw++;\ ++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ + } while(0) + +-#define efloat(f) e32(r300PackFloat32(f)) +- +-#define vsf_start_fragment(dest, length) \ +- do { \ +- int _n; \ +- _n = (length); \ +- cmd = (drm_radeon_cmd_header_t*) \ +- r300AllocCmdBuf(rmesa, \ +- (_n+1), \ +- __FUNCTION__); \ +- cmd_reserved = _n+2; \ +- cmd_written =1; \ +- cmd[0].i = cmdvpu((dest), _n/4); \ +- } while (0); +- +-#define r500fp_start_fragment(dest, length) \ +- do { \ +- int _n; \ +- _n = (length); \ +- cmd = (drm_radeon_cmd_header_t*) \ +- r300AllocCmdBuf(rmesa, \ +- (_n+1), \ +- __FUNCTION__); \ +- cmd_reserved = _n+1; \ +- cmd_written =1; \ +- cmd[0].i = cmdr500fp((dest), _n/6, 0, 0); \ +- } while (0); +- +-#define start_packet3(packet, count) \ +- { \ +- int _n; \ +- GLuint _p; \ +- _n = (count); \ +- _p = (packet); \ +- cmd = (drm_radeon_cmd_header_t*) \ +- r300AllocCmdBuf(rmesa, \ +- (_n+3), \ +- __FUNCTION__); \ +- cmd_reserved = _n+3; \ +- cmd_written = 2; \ +- if(_n > 0x3fff) { \ +- fprintf(stderr,"Too big packet3 %08x: cannot " \ +- "store %d dwords\n", \ +- _p, _n); \ +- _mesa_exit(-1); \ +- } \ +- cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW); \ +- cmd[1].i = _p | ((_n & 0x3fff)<<16); \ +- } +- + /** + * Must be sent to switch to 2d commands + */ +-void static INLINE end_3d(r300ContextPtr rmesa) ++void static INLINE end_3d(radeonContextPtr radeon) + { +- drm_radeon_cmd_header_t *cmd = NULL; ++ BATCH_LOCALS(radeon); + +- cmd = +- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); +- cmd[0].header.cmd_type = R300_CMD_END3D; ++ if (!radeon->radeonScreen->kernel_mm) { ++ BEGIN_BATCH_NO_AUTOSTATE(1); ++ OUT_BATCH(cmdpacify(radeon->radeonScreen)); ++ END_BATCH(); ++ } + } + + void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count) + { +- drm_radeon_cmd_header_t *cmd = NULL; ++ BATCH_LOCALS(&rmesa->radeon); + +- cmd = +- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); +- cmd[0].i = cmdcpdelay(count); ++ if (!rmesa->radeon.radeonScreen->kernel_mm) { ++ BEGIN_BATCH_NO_AUTOSTATE(1); ++ OUT_BATCH(cmdcpdelay(rmesa->radeon.radeonScreen, count)); ++ END_BATCH(); ++ } + } + +-void static INLINE cp_wait(r300ContextPtr rmesa, unsigned char flags) ++void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags) + { +- drm_radeon_cmd_header_t *cmd = NULL; +- +- cmd = +- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); +- cmd[0].i = cmdwait(flags); ++ BATCH_LOCALS(radeon); ++ uint32_t wait_until; ++ ++ if (!radeon->radeonScreen->kernel_mm) { ++ BEGIN_BATCH_NO_AUTOSTATE(1); ++ OUT_BATCH(cmdwait(radeon->radeonScreen, flags)); ++ END_BATCH(); ++ } else { ++ switch(flags) { ++ case R300_WAIT_2D: ++ wait_until = (1 << 14); ++ break; ++ case R300_WAIT_3D: ++ wait_until = (1 << 15); ++ break; ++ case R300_NEW_WAIT_2D_3D: ++ wait_until = (1 << 14) | (1 << 15); ++ break; ++ case R300_NEW_WAIT_2D_2D_CLEAN: ++ wait_until = (1 << 14) | (1 << 16) | (1 << 18); ++ break; ++ case R300_NEW_WAIT_3D_3D_CLEAN: ++ wait_until = (1 << 15) | (1 << 17) | (1 << 18); ++ break; ++ case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN: ++ wait_until = (1 << 14) | (1 << 16) | (1 << 18); ++ wait_until |= (1 << 15) | (1 << 17) | (1 << 18); ++ break; ++ default: ++ return; ++ } ++ BEGIN_BATCH_NO_AUTOSTATE(2); ++ OUT_BATCH(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); ++ OUT_BATCH(wait_until); ++ END_BATCH(); ++ } + } + + extern int r300EmitArrays(GLcontext * ctx); + +-#ifdef USER_BUFFERS +-void r300UseArrays(GLcontext * ctx); +-#endif +- + extern void r300ReleaseArrays(GLcontext * ctx); + extern int r300PrimitiveType(r300ContextPtr rmesa, int prim); + extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); +diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c +index 4ef7f2b..8d030c6 100644 +--- a/src/mesa/drivers/dri/r300/r300_fragprog.c ++++ b/src/mesa/drivers/dri/r300/r300_fragprog.c +@@ -163,6 +163,19 @@ static GLboolean transform_TEX( + } + } + ++ if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { ++ int tmpreg = radeonFindFreeTemporary(t); ++ tgt = radeonAppendInstructions(t->Program, 1); ++ tgt->Opcode = OPCODE_MOV; ++ tgt->DstReg.File = PROGRAM_TEMPORARY; ++ tgt->DstReg.Index = tmpreg; ++ tgt->SrcReg[0] = inst.SrcReg[0]; ++ ++ reset_srcreg(&inst.SrcReg[0]); ++ inst.SrcReg[0].File = PROGRAM_TEMPORARY; ++ inst.SrcReg[0].Index = tmpreg; ++ } ++ + tgt = radeonAppendInstructions(t->Program, 1); + _mesa_copy_instructions(tgt, &inst, 1); + +diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c +index ee85e22..619d268 100644 +--- a/src/mesa/drivers/dri/r300/r300_ioctl.c ++++ b/src/mesa/drivers/dri/r300/r300_ioctl.c +@@ -46,8 +46,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "main/context.h" + #include "swrast/swrast.h" + ++#include "radeon_common.h" ++#include "radeon_lock.h" + #include "r300_context.h" +-#include "radeon_ioctl.h" + #include "r300_ioctl.h" + #include "r300_cmdbuf.h" + #include "r300_state.h" +@@ -55,71 +56,83 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "radeon_reg.h" + #include "r300_emit.h" + #include "r300_fragprog.h" ++#include "r300_context.h" + + #include "vblank.h" + ++#define R200_3D_DRAW_IMMD_2 0xC0003500 ++ + #define CLEARBUFFER_COLOR 0x1 + #define CLEARBUFFER_DEPTH 0x2 + #define CLEARBUFFER_STENCIL 0x4 + +-static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) ++static void r300ClearBuffer(r300ContextPtr r300, int flags, ++ struct radeon_renderbuffer *rrb, ++ struct radeon_renderbuffer *rrbd) + { ++ BATCH_LOCALS(&r300->radeon); + GLcontext *ctx = r300->radeon.glCtx; + __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; +- GLuint cboffset, cbpitch; +- drm_r300_cmd_header_t *cmd2; +- int cmd_reserved = 0; +- int cmd_written = 0; +- drm_radeon_cmd_header_t *cmd = NULL; ++ GLuint cbpitch = 0; + r300ContextPtr rmesa = r300; + + if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s: %s buffer (%i,%i %ix%i)\n", +- __FUNCTION__, buffer ? "back" : "front", +- dPriv->x, dPriv->y, dPriv->w, dPriv->h); +- +- if (buffer) { +- cboffset = r300->radeon.radeonScreen->backOffset; +- cbpitch = r300->radeon.radeonScreen->backPitch; +- } else { +- cboffset = r300->radeon.radeonScreen->frontOffset; +- cbpitch = r300->radeon.radeonScreen->frontPitch; ++ fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n", ++ __FUNCTION__, rrb, dPriv->x, dPriv->y, ++ dPriv->w, dPriv->h); ++ ++ if (rrb) { ++ cbpitch = (rrb->pitch / rrb->cpp); ++ if (rrb->cpp == 4) ++ cbpitch |= R300_COLOR_FORMAT_ARGB8888; ++ else ++ cbpitch |= R300_COLOR_FORMAT_RGB565; ++ ++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){ ++ cbpitch |= R300_COLOR_TILE_ENABLE; ++ } + } + +- cboffset += r300->radeon.radeonScreen->fbLocation; +- +- cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN); +- end_3d(rmesa); +- +- R300_STATECHANGE(r300, cb); +- reg_start(R300_RB3D_COLOROFFSET0, 0); +- e32(cboffset); +- +- if (r300->radeon.radeonScreen->cpp == 4) +- cbpitch |= R300_COLOR_FORMAT_ARGB8888; +- else +- cbpitch |= R300_COLOR_FORMAT_RGB565; +- +- if (r300->radeon.sarea->tiling_enabled) +- cbpitch |= R300_COLOR_TILE_ENABLE; +- +- reg_start(R300_RB3D_COLORPITCH0, 0); +- e32(cbpitch); +- +- R300_STATECHANGE(r300, cmk); +- reg_start(RB3D_COLOR_CHANNEL_MASK, 0); ++ /* TODO in bufmgr */ ++ cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); ++ end_3d(&rmesa->radeon); + + if (flags & CLEARBUFFER_COLOR) { +- e32((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | +- (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | +- (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | +- (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0)); ++ assert(rrb != 0); ++ BEGIN_BATCH_NO_AUTOSTATE(6); ++ OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); ++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); ++ OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch); ++ END_BATCH(); ++ } ++#if 1 ++ if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) { ++ assert(rrbd != 0); ++ cbpitch = (rrbd->pitch / rrbd->cpp); ++ if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){ ++ cbpitch |= R300_DEPTHMACROTILE_ENABLE; ++ } ++ if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){ ++ cbpitch |= R300_DEPTHMICROTILE_TILED; ++ } ++ BEGIN_BATCH_NO_AUTOSTATE(6); ++ OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); ++ OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); ++ OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, cbpitch); ++ END_BATCH(); ++ } ++#endif ++ BEGIN_BATCH_NO_AUTOSTATE(6); ++ OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1); ++ if (flags & CLEARBUFFER_COLOR) { ++ OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | ++ (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | ++ (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | ++ (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0)); + } else { +- e32(0x0); ++ OUT_BATCH(0); + } + +- R300_STATECHANGE(r300, zs); +- reg_start(R300_ZB_CNTL, 2); + + { + uint32_t t1, t2; +@@ -146,37 +159,55 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) + R300_S_FRONT_ZFAIL_OP_SHIFT); + } + +- e32(t1); +- e32(t2); +- e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) | +- (ctx->Stencil.Clear & R300_STENCILREF_MASK)); ++ OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3); ++ OUT_BATCH(t1); ++ OUT_BATCH(t2); ++ OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << ++ R300_STENCILWRITEMASK_SHIFT) | ++ (ctx->Stencil.Clear & R300_STENCILREF_MASK)); ++ END_BATCH(); + } + +- cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__); +- cmd2[0].packet3.cmd_type = R300_CMD_PACKET3; +- cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR; +- cmd2[1].u = r300PackFloat32(dPriv->w / 2.0); +- cmd2[2].u = r300PackFloat32(dPriv->h / 2.0); +- cmd2[3].u = r300PackFloat32(ctx->Depth.Clear); +- cmd2[4].u = r300PackFloat32(1.0); +- cmd2[5].u = r300PackFloat32(ctx->Color.ClearColor[0]); +- cmd2[6].u = r300PackFloat32(ctx->Color.ClearColor[1]); +- cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]); +- cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]); +- ++ if (!rmesa->radeon.radeonScreen->kernel_mm) { ++ BEGIN_BATCH_NO_AUTOSTATE(9); ++ OUT_BATCH(cmdpacket3(r300->radeon.radeonScreen, R300_CMD_PACKET3_CLEAR)); ++ OUT_BATCH_FLOAT32(dPriv->w / 2.0); ++ OUT_BATCH_FLOAT32(dPriv->h / 2.0); ++ OUT_BATCH_FLOAT32(ctx->Depth.Clear); ++ OUT_BATCH_FLOAT32(1.0); ++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]); ++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]); ++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]); ++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]); ++ END_BATCH(); ++ } else { ++ OUT_BATCH(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); ++ OUT_BATCH(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | ++ (1 << R300_PRIM_NUM_VERTICES_SHIFT)); ++ OUT_BATCH_FLOAT32(dPriv->w / 2.0); ++ OUT_BATCH_FLOAT32(dPriv->h / 2.0); ++ OUT_BATCH_FLOAT32(ctx->Depth.Clear); ++ OUT_BATCH_FLOAT32(1.0); ++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]); ++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]); ++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]); ++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]); ++ } ++ + r300EmitCacheFlush(rmesa); +- cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN); ++ cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); ++ ++ R300_STATECHANGE(r300, cb); ++ R300_STATECHANGE(r300, cmk); ++ R300_STATECHANGE(r300, zs); + } + + static void r300EmitClearState(GLcontext * ctx) + { + r300ContextPtr r300 = R300_CONTEXT(ctx); +- r300ContextPtr rmesa = r300; ++ BATCH_LOCALS(&r300->radeon); + __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; + int i; +- int cmd_reserved = 0; +- int cmd_written = 0; +- drm_radeon_cmd_header_t *cmd = NULL; + int has_tcl = 1; + int is_r500 = 0; + GLuint vap_cntl; +@@ -184,35 +215,37 @@ static void r300EmitClearState(GLcontext * ctx) + if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + has_tcl = 0; + +- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) +- is_r500 = 1; ++ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ++ is_r500 = 1; + +- +- /* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and +- * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are +- * quite complex; see the functions in r300_emit.c. ++ /* State atom dirty tracking is a little subtle here. ++ * ++ * On the one hand, we need to make sure base state is emitted ++ * here if we start with an empty batch buffer, otherwise clear ++ * works incorrectly with multiple processes. Therefore, the first ++ * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE. ++ * ++ * On the other hand, implicit state emission clears the state atom ++ * dirty bits, so we have to call R300_STATECHANGE later than the ++ * first BEGIN_BATCH. + * +- * I believe it would be a good idea to extend the functions in +- * r300_emit.c so that they can be used to setup the default values for +- * these registers, as well as the actual values used for rendering. ++ * The final trickiness is that, because we change state, we need ++ * to ensure that any stored swtcl primitives are flushed properly ++ * before we start changing state. See the R300_NEWPRIM in r300Clear ++ * for this. + */ +- R300_STATECHANGE(r300, vir[0]); +- reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0); ++ BEGIN_BATCH(31); ++ OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1); + if (!has_tcl) +- e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | ++ OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); + else +- e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | ++ OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); + +- /* disable fog */ +- R300_STATECHANGE(r300, fogs); +- reg_start(R300_FG_FOG_BLEND, 0); +- e32(0x0); +- +- R300_STATECHANGE(r300, vir[1]); +- reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0); +- e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | ++ OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0); ++ OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0, ++ ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | +@@ -226,238 +259,278 @@ static void r300EmitClearState(GLcontext * ctx) + << R300_SWIZZLE1_SHIFT))); + + /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */ +- R300_STATECHANGE(r300, vic); +- reg_start(R300_VAP_VTX_STATE_CNTL, 1); +- e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); +- e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); ++ OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2); ++ OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); ++ OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); + +- R300_STATECHANGE(r300, vte); + /* comes from fglrx startup of clear */ +- reg_start(R300_SE_VTE_CNTL, 1); +- e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | +- R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | +- R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | +- R300_VPORT_Z_OFFSET_ENA); +- e32(0x8); ++ OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2); ++ OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | ++ R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | ++ R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | ++ R300_VPORT_Z_OFFSET_ENA); ++ OUT_BATCH(0x8); + +- reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0); +- e32(0xaaaaaaaa); ++ OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa); + +- R300_STATECHANGE(r300, vof); +- reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1); +- e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | +- R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT); +- e32(0x0); /* no textures */ ++ OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); ++ OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | ++ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT); ++ OUT_BATCH(0); /* no textures */ + +- R300_STATECHANGE(r300, txe); +- reg_start(R300_TX_ENABLE, 0); +- e32(0x0); ++ OUT_BATCH_REGVAL(R300_TX_ENABLE, 0); + +- R300_STATECHANGE(r300, vpt); +- reg_start(R300_SE_VPORT_XSCALE, 5); +- efloat(1.0); +- efloat(dPriv->x); +- efloat(1.0); +- efloat(dPriv->y); +- efloat(1.0); +- efloat(0.0); ++ OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6); ++ OUT_BATCH_FLOAT32(1.0); ++ OUT_BATCH_FLOAT32(dPriv->x); ++ OUT_BATCH_FLOAT32(1.0); ++ OUT_BATCH_FLOAT32(dPriv->y); ++ OUT_BATCH_FLOAT32(1.0); ++ OUT_BATCH_FLOAT32(0.0); + +- R300_STATECHANGE(r300, at); +- reg_start(R300_FG_ALPHA_FUNC, 0); +- e32(0x0); ++ OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0); ++ ++ OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2); ++ OUT_BATCH(0x0); ++ OUT_BATCH(0x0); ++ END_BATCH(); + ++ R300_STATECHANGE(r300, vir[0]); ++ R300_STATECHANGE(r300, fogs); ++ R300_STATECHANGE(r300, vir[1]); ++ R300_STATECHANGE(r300, vic); ++ R300_STATECHANGE(r300, vte); ++ R300_STATECHANGE(r300, vof); ++ R300_STATECHANGE(r300, txe); ++ R300_STATECHANGE(r300, vpt); ++ R300_STATECHANGE(r300, at); + R300_STATECHANGE(r300, bld); +- reg_start(R300_RB3D_CBLEND, 1); +- e32(0x0); +- e32(0x0); ++ R300_STATECHANGE(r300, ps); + + if (has_tcl) { +- R300_STATECHANGE(r300, vap_clip_cntl); +- reg_start(R300_VAP_CLIP_CNTL, 0); +- e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); ++ R300_STATECHANGE(r300, vap_clip_cntl); ++ ++ BEGIN_BATCH_NO_AUTOSTATE(2); ++ OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); ++ END_BATCH(); + } + +- R300_STATECHANGE(r300, ps); +- reg_start(R300_GA_POINT_SIZE, 0); +- e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | +- ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); ++ BEGIN_BATCH_NO_AUTOSTATE(2); ++ OUT_BATCH_REGVAL(R300_GA_POINT_SIZE, ++ ((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | ++ ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); ++ END_BATCH(); + + if (!is_r500) { + R300_STATECHANGE(r300, ri); +- reg_start(R300_RS_IP_0, 7); +- for (i = 0; i < 8; ++i) { +- e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); +- } +- + R300_STATECHANGE(r300, rc); +- /* The second constant is needed to get glxgears display anything .. */ +- reg_start(R300_RS_COUNT, 1); +- e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); +- e32(0x0); +- + R300_STATECHANGE(r300, rr); +- reg_start(R300_RS_INST_0, 0); +- e32(R300_RS_INST_COL_CN_WRITE); ++ ++ BEGIN_BATCH(14); ++ OUT_BATCH_REGSEQ(R300_RS_IP_0, 8); ++ for (i = 0; i < 8; ++i) ++ OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); ++ ++ OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); ++ OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); ++ OUT_BATCH(0x0); ++ ++ OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE); ++ END_BATCH(); + } else { + R300_STATECHANGE(r300, ri); +- reg_start(R500_RS_IP_0, 7); ++ R300_STATECHANGE(r300, rc); ++ R300_STATECHANGE(r300, rr); ++ ++ BEGIN_BATCH(14); ++ OUT_BATCH_REGSEQ(R500_RS_IP_0, 8); + for (i = 0; i < 8; ++i) { +- e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | +- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | +- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | +- (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); ++ OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | ++ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | ++ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | ++ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); + } + +- R300_STATECHANGE(r300, rc); +- /* The second constant is needed to get glxgears display anything .. */ +- reg_start(R300_RS_COUNT, 1); +- e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); +- e32(0x0); +- +- R300_STATECHANGE(r300, rr); +- reg_start(R500_RS_INST_0, 0); +- e32(R500_RS_INST_COL_CN_WRITE); ++ OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); ++ OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); ++ OUT_BATCH(0x0); + ++ OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE); ++ END_BATCH(); + } + + if (!is_r500) { + R300_STATECHANGE(r300, fp); +- reg_start(R300_US_CONFIG, 2); +- e32(0x0); +- e32(0x0); +- e32(0x0); +- reg_start(R300_US_CODE_ADDR_0, 3); +- e32(0x0); +- e32(0x0); +- e32(0x0); +- e32(R300_RGBA_OUT); +- + R300_STATECHANGE(r300, fpi[0]); + R300_STATECHANGE(r300, fpi[1]); + R300_STATECHANGE(r300, fpi[2]); + R300_STATECHANGE(r300, fpi[3]); + +- reg_start(R300_US_ALU_RGB_INST_0, 0); +- e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); +- +- reg_start(R300_US_ALU_RGB_ADDR_0, 0); +- e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); +- +- reg_start(R300_US_ALU_ALPHA_INST_0, 0); +- e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); +- +- reg_start(R300_US_ALU_ALPHA_ADDR_0, 0); +- e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); ++ BEGIN_BATCH(17); ++ OUT_BATCH_REGSEQ(R300_US_CONFIG, 3); ++ OUT_BATCH(0x0); ++ OUT_BATCH(0x0); ++ OUT_BATCH(0x0); ++ OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4); ++ OUT_BATCH(0x0); ++ OUT_BATCH(0x0); ++ OUT_BATCH(0x0); ++ OUT_BATCH(R300_RGBA_OUT); ++ ++ OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0, ++ FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); ++ OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0, ++ FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); ++ OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0, ++ FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); ++ OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0, ++ FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); ++ END_BATCH(); + } else { +- R300_STATECHANGE(r300, fp); +- reg_start(R500_US_CONFIG, 1); +- e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); +- e32(0x0); +- reg_start(R500_US_CODE_ADDR, 2); +- e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); +- e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); +- e32(R500_US_CODE_OFFSET_ADDR(0)); ++ struct radeon_state_atom r500fp; ++ uint32_t _cmd[10]; + ++ R300_STATECHANGE(r300, fp); + R300_STATECHANGE(r300, r500fp); +- r500fp_start_fragment(0, 6); +- +- e32(R500_INST_TYPE_OUT | +- R500_INST_TEX_SEM_WAIT | +- R500_INST_LAST | +- R500_INST_RGB_OMASK_R | +- R500_INST_RGB_OMASK_G | +- R500_INST_RGB_OMASK_B | +- R500_INST_ALPHA_OMASK | +- R500_INST_RGB_CLAMP | +- R500_INST_ALPHA_CLAMP); +- +- e32(R500_RGB_ADDR0(0) | +- R500_RGB_ADDR1(0) | +- R500_RGB_ADDR1_CONST | +- R500_RGB_ADDR2(0) | +- R500_RGB_ADDR2_CONST); +- +- e32(R500_ALPHA_ADDR0(0) | +- R500_ALPHA_ADDR1(0) | +- R500_ALPHA_ADDR1_CONST | +- R500_ALPHA_ADDR2(0) | +- R500_ALPHA_ADDR2_CONST); +- +- e32(R500_ALU_RGB_SEL_A_SRC0 | +- R500_ALU_RGB_R_SWIZ_A_R | +- R500_ALU_RGB_G_SWIZ_A_G | +- R500_ALU_RGB_B_SWIZ_A_B | +- R500_ALU_RGB_SEL_B_SRC0 | +- R500_ALU_RGB_R_SWIZ_B_R | +- R500_ALU_RGB_B_SWIZ_B_G | +- R500_ALU_RGB_G_SWIZ_B_B); +- +- e32(R500_ALPHA_OP_CMP | +- R500_ALPHA_SWIZ_A_A | +- R500_ALPHA_SWIZ_B_A); +- +- e32(R500_ALU_RGBA_OP_CMP | +- R500_ALU_RGBA_R_SWIZ_0 | +- R500_ALU_RGBA_G_SWIZ_0 | +- R500_ALU_RGBA_B_SWIZ_0 | +- R500_ALU_RGBA_A_SWIZ_0); ++ ++ BEGIN_BATCH(7); ++ OUT_BATCH_REGSEQ(R500_US_CONFIG, 2); ++ OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); ++ OUT_BATCH(0x0); ++ OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3); ++ OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); ++ OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); ++ OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0)); ++ END_BATCH(); ++ ++ r500fp.check = check_r500fp; ++ r500fp.cmd = _cmd; ++ r500fp.cmd[0] = cmdr500fp(r300->radeon.radeonScreen, 0, 1, 0, 0); ++ r500fp.cmd[1] = R500_INST_TYPE_OUT | ++ R500_INST_TEX_SEM_WAIT | ++ R500_INST_LAST | ++ R500_INST_RGB_OMASK_R | ++ R500_INST_RGB_OMASK_G | ++ R500_INST_RGB_OMASK_B | ++ R500_INST_ALPHA_OMASK | ++ R500_INST_RGB_CLAMP | ++ R500_INST_ALPHA_CLAMP; ++ r500fp.cmd[2] = R500_RGB_ADDR0(0) | ++ R500_RGB_ADDR1(0) | ++ R500_RGB_ADDR1_CONST | ++ R500_RGB_ADDR2(0) | ++ R500_RGB_ADDR2_CONST; ++ r500fp.cmd[3] = R500_ALPHA_ADDR0(0) | ++ R500_ALPHA_ADDR1(0) | ++ R500_ALPHA_ADDR1_CONST | ++ R500_ALPHA_ADDR2(0) | ++ R500_ALPHA_ADDR2_CONST; ++ r500fp.cmd[4] = R500_ALU_RGB_SEL_A_SRC0 | ++ R500_ALU_RGB_R_SWIZ_A_R | ++ R500_ALU_RGB_G_SWIZ_A_G | ++ R500_ALU_RGB_B_SWIZ_A_B | ++ R500_ALU_RGB_SEL_B_SRC0 | ++ R500_ALU_RGB_R_SWIZ_B_R | ++ R500_ALU_RGB_B_SWIZ_B_G | ++ R500_ALU_RGB_G_SWIZ_B_B; ++ r500fp.cmd[5] = R500_ALPHA_OP_CMP | ++ R500_ALPHA_SWIZ_A_A | ++ R500_ALPHA_SWIZ_B_A; ++ r500fp.cmd[6] = R500_ALU_RGBA_OP_CMP | ++ R500_ALU_RGBA_R_SWIZ_0 | ++ R500_ALU_RGBA_G_SWIZ_0 | ++ R500_ALU_RGBA_B_SWIZ_0 | ++ R500_ALU_RGBA_A_SWIZ_0; ++ ++ r500fp.cmd[7] = 0; ++ emit_r500fp(ctx, &r500fp); + } + +- reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0); +- e32(0x00000000); ++ BEGIN_BATCH(2); ++ OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); ++ END_BATCH(); ++ + if (has_tcl) { +- vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | ++ vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (12 << R300_VF_MAX_VTX_NUM_SHIFT)); +- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) +- vap_cntl |= R500_TCL_STATE_OPTIMIZATION; +- } else +- vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | ++ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ++ vap_cntl |= R500_TCL_STATE_OPTIMIZATION; ++ } else { ++ vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (5 << R300_VF_MAX_VTX_NUM_SHIFT)); ++ } + + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) +- vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); ++ vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) +- vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); ++ vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)) +- vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); ++ vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580)) +- vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); ++ vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); + else +- vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); ++ vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); ++ ++ R300_STATECHANGE(r300, vap_cntl); + +- R300_STATECHANGE(rmesa, vap_cntl); +- reg_start(R300_VAP_CNTL, 0); +- e32(vap_cntl); ++ BEGIN_BATCH(2); ++ OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl); ++ END_BATCH(); + + if (has_tcl) { ++ struct radeon_state_atom vpu; ++ uint32_t _cmd[10]; + R300_STATECHANGE(r300, pvs); +- reg_start(R300_VAP_PVS_CODE_CNTL_0, 2); +- +- e32((0 << R300_PVS_FIRST_INST_SHIFT) | +- (0 << R300_PVS_XYZW_VALID_INST_SHIFT) | +- (1 << R300_PVS_LAST_INST_SHIFT)); +- e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | +- (0 << R300_PVS_MAX_CONST_ADDR_SHIFT)); +- e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT); +- + R300_STATECHANGE(r300, vpi); +- vsf_start_fragment(0x0, 8); +- +- e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT)); +- e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); +- e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); +- e32(0x0); + +- e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT)); +- e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); +- e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); +- e32(0x0); ++ BEGIN_BATCH(4); ++ OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3); ++ OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) | ++ (0 << R300_PVS_XYZW_VALID_INST_SHIFT) | ++ (1 << R300_PVS_LAST_INST_SHIFT)); ++ OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | ++ (0 << R300_PVS_MAX_CONST_ADDR_SHIFT)); ++ OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT); ++ END_BATCH(); ++ ++ vpu.check = check_vpu; ++ vpu.cmd = _cmd; ++ vpu.cmd[0] = cmdvpu(r300->radeon.radeonScreen, 0, 2); ++ ++ vpu.cmd[1] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, ++ 0, 0xf, PVS_DST_REG_OUT); ++ vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, ++ PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, ++ PVS_SRC_REG_INPUT, VSF_FLAG_NONE); ++ vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, ++ PVS_SRC_SELECT_FORCE_0, ++ PVS_SRC_SELECT_FORCE_0, ++ PVS_SRC_SELECT_FORCE_0, ++ PVS_SRC_REG_INPUT, VSF_FLAG_NONE); ++ vpu.cmd[4] = 0x0; ++ ++ vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, ++ PVS_DST_REG_OUT); ++ vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, ++ PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, ++ PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, ++ ++ VSF_FLAG_NONE); ++ vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, ++ PVS_SRC_SELECT_FORCE_0, ++ PVS_SRC_SELECT_FORCE_0, ++ PVS_SRC_SELECT_FORCE_0, ++ PVS_SRC_REG_INPUT, VSF_FLAG_NONE); ++ vpu.cmd[8] = 0x0; ++ ++ r300->vap_flush_needed = GL_TRUE; ++ emit_vpu(ctx, &vpu); + } + } + +@@ -468,9 +541,11 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) + { + r300ContextPtr r300 = R300_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; ++ GLframebuffer *fb = dPriv->driverPrivate; ++ struct radeon_renderbuffer *rrb; ++ struct radeon_renderbuffer *rrbd; + int flags = 0; + int bits = 0; +- int swapped; + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "r300Clear\n"); +@@ -482,6 +557,12 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) + return; + } + ++ /* Flush swtcl vertices if necessary, because we will change hardware ++ * state during clear. See also the state-related comment in ++ * r300EmitClearState. ++ */ ++ R300_NEWPRIM(r300); ++ + if (mask & BUFFER_BIT_FRONT_LEFT) { + flags |= BUFFER_BIT_FRONT_LEFT; + mask &= ~BUFFER_BIT_FRONT_LEFT; +@@ -497,7 +578,7 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) + mask &= ~BUFFER_BIT_DEPTH; + } + +- if ((mask & BUFFER_BIT_STENCIL) && r300->state.stencil.hw_stencil) { ++ if ((mask & BUFFER_BIT_STENCIL) && r300->radeon.state.stencil.hwBuffer) { + bits |= CLEARBUFFER_STENCIL; + mask &= ~BUFFER_BIT_STENCIL; + } +@@ -509,336 +590,33 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) + _swrast_Clear(ctx, mask); + } + +- swapped = r300->radeon.sarea->pfCurrentPage == 1; +- + /* Make sure it fits there. */ +- r300EnsureCmdBufSpace(r300, 421 * 3, __FUNCTION__); ++ rcommonEnsureCmdBufSpace(&r300->radeon, 421 * 3, __FUNCTION__); + if (flags || bits) + r300EmitClearState(ctx); ++ rrbd = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer; + + if (flags & BUFFER_BIT_FRONT_LEFT) { +- r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped); ++ rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; ++ r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd); + bits = 0; + } + + if (flags & BUFFER_BIT_BACK_LEFT) { +- r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped ^ 1); ++ rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; ++ r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd); + bits = 0; + } + + if (bits) +- r300ClearBuffer(r300, bits, 0); ++ r300ClearBuffer(r300, bits, NULL, rrbd); + +-} +- +-void r300Flush(GLcontext * ctx) +-{ +- r300ContextPtr rmesa = R300_CONTEXT(ctx); +- +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s\n", __FUNCTION__); +- +- if (rmesa->dma.flush) +- rmesa->dma.flush( rmesa ); +- +- if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit) +- r300FlushCmdBuf(rmesa, __FUNCTION__); +-} +- +-#ifdef USER_BUFFERS +-#include "r300_mem.h" +- +-void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) +-{ +- struct r300_dma_buffer *dmabuf; +- size = MAX2(size, RADEON_BUFFER_SIZE * 16); +- +- if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) +- fprintf(stderr, "%s\n", __FUNCTION__); +- +- if (rmesa->dma.flush) { +- rmesa->dma.flush(rmesa); +- } +- +- if (rmesa->dma.current.buf) { +-#ifdef USER_BUFFERS +- r300_mem_use(rmesa, rmesa->dma.current.buf->id); +-#endif +- r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__); +- } +- if (rmesa->dma.nr_released_bufs > 4) +- r300FlushCmdBuf(rmesa, __FUNCTION__); +- +- dmabuf = CALLOC_STRUCT(r300_dma_buffer); +- dmabuf->buf = (void *)1; /* hack */ +- dmabuf->refcount = 1; +- +- dmabuf->id = r300_mem_alloc(rmesa, 4, size); +- if (dmabuf->id == 0) { +- LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */ +- +- r300FlushCmdBufLocked(rmesa, __FUNCTION__); +- radeonWaitForIdleLocked(&rmesa->radeon); +- +- dmabuf->id = r300_mem_alloc(rmesa, 4, size); +- +- UNLOCK_HARDWARE(&rmesa->radeon); +- +- if (dmabuf->id == 0) { +- fprintf(stderr, +- "Error: Could not get dma buffer... exiting\n"); +- _mesa_exit(-1); +- } +- } +- +- rmesa->dma.current.buf = dmabuf; +- rmesa->dma.current.address = r300_mem_ptr(rmesa, dmabuf->id); +- rmesa->dma.current.end = size; +- rmesa->dma.current.start = 0; +- rmesa->dma.current.ptr = 0; +-} +- +-void r300ReleaseDmaRegion(r300ContextPtr rmesa, +- struct r300_dma_region *region, const char *caller) +-{ +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); +- +- if (!region->buf) +- return; +- +- if (rmesa->dma.flush) +- rmesa->dma.flush(rmesa); +- +- if (--region->buf->refcount == 0) { +- r300_mem_free(rmesa, region->buf->id); +- FREE(region->buf); +- rmesa->dma.nr_released_bufs++; +- } +- +- region->buf = 0; +- region->start = 0; +-} +- +-/* Allocates a region from rmesa->dma.current. If there isn't enough +- * space in current, grab a new buffer (and discard what was left of current) +- */ +-void r300AllocDmaRegion(r300ContextPtr rmesa, +- struct r300_dma_region *region, +- int bytes, int alignment) +-{ +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); +- +- if (rmesa->dma.flush) +- rmesa->dma.flush(rmesa); +- +- if (region->buf) +- r300ReleaseDmaRegion(rmesa, region, __FUNCTION__); +- +- alignment--; +- rmesa->dma.current.start = rmesa->dma.current.ptr = +- (rmesa->dma.current.ptr + alignment) & ~alignment; +- +- if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end) +- r300RefillCurrentDmaRegion(rmesa, (bytes + 0x7) & ~0x7); +- +- region->start = rmesa->dma.current.start; +- region->ptr = rmesa->dma.current.start; +- region->end = rmesa->dma.current.start + bytes; +- region->address = rmesa->dma.current.address; +- region->buf = rmesa->dma.current.buf; +- region->buf->refcount++; +- +- rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ +- rmesa->dma.current.start = +- rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; +- +- assert(rmesa->dma.current.ptr <= rmesa->dma.current.end); +-} +- +-#else +-static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa) +-{ +- struct r300_dma_buffer *dmabuf; +- int fd = rmesa->radeon.dri.fd; +- int index = 0; +- int size = 0; +- drmDMAReq dma; +- int ret; +- +- if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) +- fprintf(stderr, "%s\n", __FUNCTION__); +- +- if (rmesa->dma.flush) { +- rmesa->dma.flush(rmesa); +- } +- +- if (rmesa->dma.current.buf) +- r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__); +- +- if (rmesa->dma.nr_released_bufs > 4) +- r300FlushCmdBuf(rmesa, __FUNCTION__); +- +- dma.context = rmesa->radeon.dri.hwContext; +- dma.send_count = 0; +- dma.send_list = NULL; +- dma.send_sizes = NULL; +- dma.flags = 0; +- dma.request_count = 1; +- dma.request_size = RADEON_BUFFER_SIZE; +- dma.request_list = &index; +- dma.request_sizes = &size; +- dma.granted_count = 0; +- +- LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */ +- +- ret = drmDMA(fd, &dma); +- +- if (ret != 0) { +- /* Try to release some buffers and wait until we can't get any more */ +- if (rmesa->dma.nr_released_bufs) { +- r300FlushCmdBufLocked(rmesa, __FUNCTION__); +- } +- +- if (RADEON_DEBUG & DEBUG_DMA) +- fprintf(stderr, "Waiting for buffers\n"); +- +- radeonWaitForIdleLocked(&rmesa->radeon); +- ret = drmDMA(fd, &dma); +- +- if (ret != 0) { +- UNLOCK_HARDWARE(&rmesa->radeon); +- fprintf(stderr, +- "Error: Could not get dma buffer... exiting\n"); +- _mesa_exit(-1); +- } +- } +- +- UNLOCK_HARDWARE(&rmesa->radeon); +- +- if (RADEON_DEBUG & DEBUG_DMA) +- fprintf(stderr, "Allocated buffer %d\n", index); +- +- dmabuf = CALLOC_STRUCT(r300_dma_buffer); +- dmabuf->buf = &rmesa->radeon.radeonScreen->buffers->list[index]; +- dmabuf->refcount = 1; +- +- rmesa->dma.current.buf = dmabuf; +- rmesa->dma.current.address = dmabuf->buf->address; +- rmesa->dma.current.end = dmabuf->buf->total; +- rmesa->dma.current.start = 0; +- rmesa->dma.current.ptr = 0; +-} +- +-void r300ReleaseDmaRegion(r300ContextPtr rmesa, +- struct r300_dma_region *region, const char *caller) +-{ +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); +- +- if (!region->buf) +- return; +- +- if (rmesa->dma.flush) +- rmesa->dma.flush(rmesa); +- +- if (--region->buf->refcount == 0) { +- drm_radeon_cmd_header_t *cmd; +- +- if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) +- fprintf(stderr, "%s -- DISCARD BUF %d\n", +- __FUNCTION__, region->buf->buf->idx); +- cmd = +- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, +- sizeof +- (*cmd) / 4, +- __FUNCTION__); +- cmd->dma.cmd_type = R300_CMD_DMA_DISCARD; +- cmd->dma.buf_idx = region->buf->buf->idx; +- +- FREE(region->buf); +- rmesa->dma.nr_released_bufs++; +- } +- +- region->buf = 0; +- region->start = 0; +-} +- +-/* Allocates a region from rmesa->dma.current. If there isn't enough +- * space in current, grab a new buffer (and discard what was left of current) +- */ +-void r300AllocDmaRegion(r300ContextPtr rmesa, +- struct r300_dma_region *region, +- int bytes, int alignment) +-{ +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); +- +- if (rmesa->dma.flush) +- rmesa->dma.flush(rmesa); +- +- if (region->buf) +- r300ReleaseDmaRegion(rmesa, region, __FUNCTION__); +- +- alignment--; +- rmesa->dma.current.start = rmesa->dma.current.ptr = +- (rmesa->dma.current.ptr + alignment) & ~alignment; +- +- if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end) +- r300RefillCurrentDmaRegion(rmesa); +- +- region->start = rmesa->dma.current.start; +- region->ptr = rmesa->dma.current.start; +- region->end = rmesa->dma.current.start + bytes; +- region->address = rmesa->dma.current.address; +- region->buf = rmesa->dma.current.buf; +- region->buf->refcount++; +- +- rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ +- rmesa->dma.current.start = +- rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; +- +- assert(rmesa->dma.current.ptr <= rmesa->dma.current.end); +-} +- +-#endif +- +-GLboolean r300IsGartMemory(r300ContextPtr rmesa, const GLvoid * pointer, +- GLint size) +-{ +- int offset = +- (char *)pointer - +- (char *)rmesa->radeon.radeonScreen->gartTextures.map; +- int valid = (size >= 0 && offset >= 0 +- && offset + size < +- rmesa->radeon.radeonScreen->gartTextures.size); +- +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "r300IsGartMemory( %p ) : %d\n", pointer, +- valid); +- +- return valid; +-} +- +-GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, const GLvoid * pointer) +-{ +- int offset = +- (char *)pointer - +- (char *)rmesa->radeon.radeonScreen->gartTextures.map; +- +- //fprintf(stderr, "offset=%08x\n", offset); +- +- if (offset < 0 +- || offset > rmesa->radeon.radeonScreen->gartTextures.size) +- return ~0; +- else +- return rmesa->radeon.radeonScreen->gart_texture_offset + offset; ++ COMMIT_BATCH(); + } + + void r300InitIoctlFuncs(struct dd_function_table *functions) + { + functions->Clear = r300Clear; + functions->Finish = radeonFinish; +- functions->Flush = r300Flush; ++ functions->Flush = radeonFlush; + } +diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.h b/src/mesa/drivers/dri/r300/r300_ioctl.h +index e1143fb..3abfa71 100644 +--- a/src/mesa/drivers/dri/r300/r300_ioctl.h ++++ b/src/mesa/drivers/dri/r300/r300_ioctl.h +@@ -39,22 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "r300_context.h" + #include "radeon_drm.h" + +-extern GLboolean r300IsGartMemory(r300ContextPtr rmesa, +- const GLvoid * pointer, GLint size); +- +-extern GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, +- const GLvoid * pointer); +- +-extern void r300Flush(GLcontext * ctx); +- +-extern void r300ReleaseDmaRegion(r300ContextPtr rmesa, +- struct r300_dma_region *region, +- const char *caller); +-extern void r300AllocDmaRegion(r300ContextPtr rmesa, +- struct r300_dma_region *region, int bytes, +- int alignment); +- + extern void r300InitIoctlFuncs(struct dd_function_table *functions); + +-extern void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size); + #endif /* __R300_IOCTL_H__ */ +diff --git a/src/mesa/drivers/dri/r300/r300_mem.c b/src/mesa/drivers/dri/r300/r300_mem.c +deleted file mode 100644 +index f8f9d4f..0000000 +--- a/src/mesa/drivers/dri/r300/r300_mem.c ++++ /dev/null +@@ -1,385 +0,0 @@ +-/* +- * Copyright (C) 2005 Aapo Tahkola. +- * +- * All Rights Reserved. +- * +- * Permission is hereby granted, free of charge, to any person obtaining +- * a copy of this software and associated documentation files (the +- * "Software"), to deal in the Software without restriction, including +- * without limitation the rights to use, copy, modify, merge, publish, +- * distribute, sublicense, and/or sell copies of the Software, and to +- * permit persons to whom the Software is furnished to do so, subject to +- * the following conditions: +- * +- * The above copyright notice and this permission notice (including the +- * next paragraph) shall be included in all copies or substantial +- * portions of the Software. +- * +- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +- * +- */ +- +-/** +- * \file +- * +- * \author Aapo Tahkola +- */ +- +-#include +- +-#include "r300_context.h" +-#include "r300_cmdbuf.h" +-#include "r300_ioctl.h" +-#include "r300_mem.h" +-#include "radeon_ioctl.h" +- +-#ifdef USER_BUFFERS +- +-static void resize_u_list(r300ContextPtr rmesa) +-{ +- void *temp; +- int nsize; +- +- temp = rmesa->rmm->u_list; +- nsize = rmesa->rmm->u_size * 2; +- +- rmesa->rmm->u_list = _mesa_malloc(nsize * sizeof(*rmesa->rmm->u_list)); +- _mesa_memset(rmesa->rmm->u_list, 0, +- nsize * sizeof(*rmesa->rmm->u_list)); +- +- if (temp) { +- r300FlushCmdBuf(rmesa, __FUNCTION__); +- +- _mesa_memcpy(rmesa->rmm->u_list, temp, +- rmesa->rmm->u_size * sizeof(*rmesa->rmm->u_list)); +- _mesa_free(temp); +- } +- +- rmesa->rmm->u_size = nsize; +-} +- +-void r300_mem_init(r300ContextPtr rmesa) +-{ +- rmesa->rmm = malloc(sizeof(struct r300_memory_manager)); +- memset(rmesa->rmm, 0, sizeof(struct r300_memory_manager)); +- +- rmesa->rmm->u_size = 128; +- resize_u_list(rmesa); +-} +- +-void r300_mem_destroy(r300ContextPtr rmesa) +-{ +- _mesa_free(rmesa->rmm->u_list); +- rmesa->rmm->u_list = NULL; +- +- _mesa_free(rmesa->rmm); +- rmesa->rmm = NULL; +-} +- +-void *r300_mem_ptr(r300ContextPtr rmesa, int id) +-{ +- assert(id <= rmesa->rmm->u_last); +- return rmesa->rmm->u_list[id].ptr; +-} +- +-int r300_mem_find(r300ContextPtr rmesa, void *ptr) +-{ +- int i; +- +- for (i = 1; i < rmesa->rmm->u_size + 1; i++) +- if (rmesa->rmm->u_list[i].ptr && +- ptr >= rmesa->rmm->u_list[i].ptr && +- ptr < +- rmesa->rmm->u_list[i].ptr + rmesa->rmm->u_list[i].size) +- break; +- +- if (i < rmesa->rmm->u_size + 1) +- return i; +- +- fprintf(stderr, "%p failed\n", ptr); +- return 0; +-} +- +-//#define MM_DEBUG +-int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size) +-{ +- drm_radeon_mem_alloc_t alloc; +- int offset = 0, ret; +- int i, free = -1; +- int done_age; +- drm_radeon_mem_free_t memfree; +- int tries = 0; +- static int bytes_wasted = 0, allocated = 0; +- +- if (size < 4096) +- bytes_wasted += 4096 - size; +- +- allocated += size; +- +-#if 0 +- static int t = 0; +- if (t != time(NULL)) { +- t = time(NULL); +- fprintf(stderr, "slots used %d, wasted %d kb, allocated %d\n", +- rmesa->rmm->u_last, bytes_wasted / 1024, +- allocated / 1024); +- } +-#endif +- +- memfree.region = RADEON_MEM_REGION_GART; +- +- again: +- +- done_age = radeonGetAge((radeonContextPtr) rmesa); +- +- if (rmesa->rmm->u_last + 1 >= rmesa->rmm->u_size) +- resize_u_list(rmesa); +- +- for (i = rmesa->rmm->u_last + 1; i > 0; i--) { +- if (rmesa->rmm->u_list[i].ptr == NULL) { +- free = i; +- continue; +- } +- +- if (rmesa->rmm->u_list[i].h_pending == 0 && +- rmesa->rmm->u_list[i].pending +- && rmesa->rmm->u_list[i].age <= done_age) { +- memfree.region_offset = +- (char *)rmesa->rmm->u_list[i].ptr - +- (char *)rmesa->radeon.radeonScreen->gartTextures. +- map; +- +- ret = +- drmCommandWrite(rmesa->radeon.radeonScreen-> +- driScreen->fd, DRM_RADEON_FREE, +- &memfree, sizeof(memfree)); +- +- if (ret) { +- fprintf(stderr, "Failed to free at %p\n", +- rmesa->rmm->u_list[i].ptr); +- fprintf(stderr, "ret = %s\n", strerror(-ret)); +- exit(1); +- } else { +-#ifdef MM_DEBUG +- fprintf(stderr, "really freed %d at age %x\n", +- i, +- radeonGetAge((radeonContextPtr) rmesa)); +-#endif +- if (i == rmesa->rmm->u_last) +- rmesa->rmm->u_last--; +- +- if (rmesa->rmm->u_list[i].size < 4096) +- bytes_wasted -= +- 4096 - rmesa->rmm->u_list[i].size; +- +- allocated -= rmesa->rmm->u_list[i].size; +- rmesa->rmm->u_list[i].pending = 0; +- rmesa->rmm->u_list[i].ptr = NULL; +- free = i; +- } +- } +- } +- rmesa->rmm->u_head = i; +- +- if (free == -1) { +- WARN_ONCE("Ran out of slots!\n"); +- //usleep(100); +- r300FlushCmdBuf(rmesa, __FUNCTION__); +- tries++; +- if (tries > 100) { +- WARN_ONCE("Ran out of slots!\n"); +- exit(1); +- } +- goto again; +- } +- +- alloc.region = RADEON_MEM_REGION_GART; +- alloc.alignment = alignment; +- alloc.size = size; +- alloc.region_offset = &offset; +- +- ret = +- drmCommandWriteRead(rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc, +- sizeof(alloc)); +- if (ret) { +-#if 0 +- WARN_ONCE("Ran out of mem!\n"); +- r300FlushCmdBuf(rmesa, __FUNCTION__); +- //usleep(100); +- tries2++; +- tries = 0; +- if (tries2 > 100) { +- WARN_ONCE("Ran out of GART memory!\n"); +- exit(1); +- } +- goto again; +-#else +- WARN_ONCE +- ("Ran out of GART memory (for %d)!\nPlease consider adjusting GARTSize option.\n", +- size); +- return 0; +-#endif +- } +- +- i = free; +- +- if (i > rmesa->rmm->u_last) +- rmesa->rmm->u_last = i; +- +- rmesa->rmm->u_list[i].ptr = +- ((GLubyte *) rmesa->radeon.radeonScreen->gartTextures.map) + offset; +- rmesa->rmm->u_list[i].size = size; +- rmesa->rmm->u_list[i].age = 0; +- //fprintf(stderr, "alloc %p at id %d\n", rmesa->rmm->u_list[i].ptr, i); +- +-#ifdef MM_DEBUG +- fprintf(stderr, "allocated %d at age %x\n", i, +- radeonGetAge((radeonContextPtr) rmesa)); +-#endif +- +- return i; +-} +- +-void r300_mem_use(r300ContextPtr rmesa, int id) +-{ +- uint64_t ull; +-#ifdef MM_DEBUG +- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, +- radeonGetAge((radeonContextPtr) rmesa)); +-#endif +- drm_r300_cmd_header_t *cmd; +- +- assert(id <= rmesa->rmm->u_last); +- +- if (id == 0) +- return; +- +- cmd = +- (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, +- 2 + sizeof(ull) / 4, +- __FUNCTION__); +- cmd[0].scratch.cmd_type = R300_CMD_SCRATCH; +- cmd[0].scratch.reg = R300_MEM_SCRATCH; +- cmd[0].scratch.n_bufs = 1; +- cmd[0].scratch.flags = 0; +- cmd++; +- +- ull = (uint64_t) (intptr_t) & rmesa->rmm->u_list[id].age; +- _mesa_memcpy(cmd, &ull, sizeof(ull)); +- cmd += sizeof(ull) / 4; +- +- cmd[0].u = /*id */ 0; +- +- LOCK_HARDWARE(&rmesa->radeon); /* Protect from DRM. */ +- rmesa->rmm->u_list[id].h_pending++; +- UNLOCK_HARDWARE(&rmesa->radeon); +-} +- +-unsigned long r300_mem_offset(r300ContextPtr rmesa, int id) +-{ +- unsigned long offset; +- +- assert(id <= rmesa->rmm->u_last); +- +- offset = (char *)rmesa->rmm->u_list[id].ptr - +- (char *)rmesa->radeon.radeonScreen->gartTextures.map; +- offset += rmesa->radeon.radeonScreen->gart_texture_offset; +- +- return offset; +-} +- +-void *r300_mem_map(r300ContextPtr rmesa, int id, int access) +-{ +-#ifdef MM_DEBUG +- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, +- radeonGetAge((radeonContextPtr) rmesa)); +-#endif +- void *ptr; +- int tries = 0; +- +- assert(id <= rmesa->rmm->u_last); +- +- if (access == R300_MEM_R) { +- +- if (rmesa->rmm->u_list[id].mapped == 1) +- WARN_ONCE("buffer %d already mapped\n", id); +- +- rmesa->rmm->u_list[id].mapped = 1; +- ptr = r300_mem_ptr(rmesa, id); +- +- return ptr; +- } +- +- if (rmesa->rmm->u_list[id].h_pending) +- r300FlushCmdBuf(rmesa, __FUNCTION__); +- +- if (rmesa->rmm->u_list[id].h_pending) { +- return NULL; +- } +- +- while (rmesa->rmm->u_list[id].age > +- radeonGetAge((radeonContextPtr) rmesa) && tries++ < 1000) +- usleep(10); +- +- if (tries >= 1000) { +- fprintf(stderr, "Idling failed (%x vs %x)\n", +- rmesa->rmm->u_list[id].age, +- radeonGetAge((radeonContextPtr) rmesa)); +- return NULL; +- } +- +- if (rmesa->rmm->u_list[id].mapped == 1) +- WARN_ONCE("buffer %d already mapped\n", id); +- +- rmesa->rmm->u_list[id].mapped = 1; +- ptr = r300_mem_ptr(rmesa, id); +- +- return ptr; +-} +- +-void r300_mem_unmap(r300ContextPtr rmesa, int id) +-{ +-#ifdef MM_DEBUG +- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, +- radeonGetAge((radeonContextPtr) rmesa)); +-#endif +- +- assert(id <= rmesa->rmm->u_last); +- +- if (rmesa->rmm->u_list[id].mapped == 0) +- WARN_ONCE("buffer %d not mapped\n", id); +- +- rmesa->rmm->u_list[id].mapped = 0; +-} +- +-void r300_mem_free(r300ContextPtr rmesa, int id) +-{ +-#ifdef MM_DEBUG +- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, +- radeonGetAge((radeonContextPtr) rmesa)); +-#endif +- +- assert(id <= rmesa->rmm->u_last); +- +- if (id == 0) +- return; +- +- if (rmesa->rmm->u_list[id].ptr == NULL) { +- WARN_ONCE("Not allocated!\n"); +- return; +- } +- +- if (rmesa->rmm->u_list[id].pending) { +- WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr); +- return; +- } +- +- rmesa->rmm->u_list[id].pending = 1; +-} +-#endif +diff --git a/src/mesa/drivers/dri/r300/r300_mem.h b/src/mesa/drivers/dri/r300/r300_mem.h +deleted file mode 100644 +index 625a7f6..0000000 +--- a/src/mesa/drivers/dri/r300/r300_mem.h ++++ /dev/null +@@ -1,37 +0,0 @@ +-#ifndef __R300_MEM_H__ +-#define __R300_MEM_H__ +- +-//#define R300_MEM_PDL 0 +-#define R300_MEM_UL 1 +- +-#define R300_MEM_R 1 +-#define R300_MEM_W 2 +-#define R300_MEM_RW (R300_MEM_R | R300_MEM_W) +- +-#define R300_MEM_SCRATCH 2 +- +-struct r300_memory_manager { +- struct { +- void *ptr; +- uint32_t size; +- uint32_t age; +- uint32_t h_pending; +- int pending; +- int mapped; +- } *u_list; +- int u_head, u_size, u_last; +- +-}; +- +-extern void r300_mem_init(r300ContextPtr rmesa); +-extern void r300_mem_destroy(r300ContextPtr rmesa); +-extern void *r300_mem_ptr(r300ContextPtr rmesa, int id); +-extern int r300_mem_find(r300ContextPtr rmesa, void *ptr); +-extern int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size); +-extern void r300_mem_use(r300ContextPtr rmesa, int id); +-extern unsigned long r300_mem_offset(r300ContextPtr rmesa, int id); +-extern void *r300_mem_map(r300ContextPtr rmesa, int id, int access); +-extern void r300_mem_unmap(r300ContextPtr rmesa, int id); +-extern void r300_mem_free(r300ContextPtr rmesa, int id); +- +-#endif +diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h +index 7c6485e..5f344be 100644 +--- a/src/mesa/drivers/dri/r300/r300_reg.h ++++ b/src/mesa/drivers/dri/r300/r300_reg.h +@@ -656,7 +656,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. + # define R300_GB_FOG_SELECT_C3A (3 << 0) + # define R300_GB_FOG_SELECT_1_1_W (4 << 0) + # define R300_GB_FOG_SELECT_Z (5 << 0) +-# define R300_GB_DEPTH_SELECT_Z (0 << 3 ++# define R300_GB_DEPTH_SELECT_Z (0 << 3) + # define R300_GB_DEPTH_SELECT_1_1_W (1 << 3) + # define R300_GB_W_SELECT_1_W (0 << 4) + # define R300_GB_W_SELECT_1 (1 << 4) +@@ -730,8 +730,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. + #define R500_RS_IP_TEX_PTR_Q_SHIFT 18 + #define R500_RS_IP_COL_PTR_SHIFT 24 + #define R500_RS_IP_COL_FMT_SHIFT 27 +-# define R500_RS_COL_PTR(x) (x << 24) +-# define R500_RS_COL_FMT(x) (x << 27) ++# define R500_RS_COL_PTR(x) ((x) << 24) ++# define R500_RS_COL_FMT(x) ((x) << 27) + /* gap */ + #define R500_RS_IP_OFFSET_DIS (0 << 31) + #define R500_RS_IP_OFFSET_EN (1 << 31) +@@ -1172,9 +1172,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. + #define R300_RS_IP_3 0x431C + # define R300_RS_INTERP_SRC_SHIFT 2 /* TODO: check for removal */ + # define R300_RS_INTERP_SRC_MASK (7 << 2) /* TODO: check for removal */ +-# define R300_RS_TEX_PTR(x) (x << 0) +-# define R300_RS_COL_PTR(x) (x << 6) +-# define R300_RS_COL_FMT(x) (x << 9) ++# define R300_RS_TEX_PTR(x) ((x) << 0) ++# define R300_RS_COL_PTR(x) ((x) << 6) ++# define R300_RS_COL_FMT(x) ((x) << 9) + # define R300_RS_COL_FMT_RGBA 0 + # define R300_RS_COL_FMT_RGB0 1 + # define R300_RS_COL_FMT_RGB1 2 +@@ -1184,10 +1184,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. + # define R300_RS_COL_FMT_111A 8 + # define R300_RS_COL_FMT_1110 9 + # define R300_RS_COL_FMT_1111 10 +-# define R300_RS_SEL_S(x) (x << 13) +-# define R300_RS_SEL_T(x) (x << 16) +-# define R300_RS_SEL_R(x) (x << 19) +-# define R300_RS_SEL_Q(x) (x << 22) ++# define R300_RS_SEL_S(x) ((x) << 13) ++# define R300_RS_SEL_T(x) ((x) << 16) ++# define R300_RS_SEL_R(x) ((x) << 19) ++# define R300_RS_SEL_Q(x) ((x) << 22) + # define R300_RS_SEL_C0 0 + # define R300_RS_SEL_C1 1 + # define R300_RS_SEL_C2 2 +@@ -1525,6 +1525,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. + # define R500_SEL_FILTER4_TC3 (3 << 18) + + #define R300_TX_OFFSET_0 0x4540 ++#define R300_TX_OFFSET_1 0x4544 ++#define R300_TX_OFFSET_2 0x4548 ++#define R300_TX_OFFSET_3 0x454C ++#define R300_TX_OFFSET_4 0x4550 ++#define R300_TX_OFFSET_5 0x4554 ++#define R300_TX_OFFSET_6 0x4558 ++#define R300_TX_OFFSET_7 0x455C + /* BEGIN: Guess from R200 */ + # define R300_TXO_ENDIAN_NO_SWAP (0 << 0) + # define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) +@@ -2705,7 +2712,7 @@ enum { + # define R500_ALPHA_OP_COS 13 + # define R500_ALPHA_OP_MDH 14 + # define R500_ALPHA_OP_MDV 15 +-# define R500_ALPHA_ADDRD(x) (x << 4) ++# define R500_ALPHA_ADDRD(x) ((x) << 4) + # define R500_ALPHA_ADDRD_REL (1 << 11) + # define R500_ALPHA_SEL_A_SHIFT 12 + # define R500_ALPHA_SEL_A_SRC0 (0 << 12) +@@ -2749,16 +2756,16 @@ enum { + # define R500_ALPHA_OMOD_DIV_4 (5 << 26) + # define R500_ALPHA_OMOD_DIV_8 (6 << 26) + # define R500_ALPHA_OMOD_DISABLE (7 << 26) +-# define R500_ALPHA_TARGET(x) (x << 29) ++# define R500_ALPHA_TARGET(x) ((x) << 29) + # define R500_ALPHA_W_OMASK (1 << 31) + #define R500_US_ALU_ALPHA_ADDR_0 0x9800 +-# define R500_ALPHA_ADDR0(x) (x << 0) ++# define R500_ALPHA_ADDR0(x) ((x) << 0) + # define R500_ALPHA_ADDR0_CONST (1 << 8) + # define R500_ALPHA_ADDR0_REL (1 << 9) +-# define R500_ALPHA_ADDR1(x) (x << 10) ++# define R500_ALPHA_ADDR1(x) ((x) << 10) + # define R500_ALPHA_ADDR1_CONST (1 << 18) + # define R500_ALPHA_ADDR1_REL (1 << 19) +-# define R500_ALPHA_ADDR2(x) (x << 20) ++# define R500_ALPHA_ADDR2(x) ((x) << 20) + # define R500_ALPHA_ADDR2_CONST (1 << 28) + # define R500_ALPHA_ADDR2_REL (1 << 29) + # define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30) +@@ -2779,7 +2786,7 @@ enum { + # define R500_ALU_RGBA_OP_SOP (10 << 0) + # define R500_ALU_RGBA_OP_MDH (11 << 0) + # define R500_ALU_RGBA_OP_MDV (12 << 0) +-# define R500_ALU_RGBA_ADDRD(x) (x << 4) ++# define R500_ALU_RGBA_ADDRD(x) ((x) << 4) + # define R500_ALU_RGBA_ADDRD_REL (1 << 11) + # define R500_ALU_RGBA_SEL_C_SHIFT 12 + # define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12) +@@ -2906,16 +2913,16 @@ enum { + # define R500_ALU_RGB_OMOD_DIV_4 (5 << 26) + # define R500_ALU_RGB_OMOD_DIV_8 (6 << 26) + # define R500_ALU_RGB_OMOD_DISABLE (7 << 26) +-# define R500_ALU_RGB_TARGET(x) (x << 29) ++# define R500_ALU_RGB_TARGET(x) ((x) << 29) + # define R500_ALU_RGB_WMASK (1 << 31) + #define R500_US_ALU_RGB_ADDR_0 0x9000 +-# define R500_RGB_ADDR0(x) (x << 0) ++# define R500_RGB_ADDR0(x) ((x) << 0) + # define R500_RGB_ADDR0_CONST (1 << 8) + # define R500_RGB_ADDR0_REL (1 << 9) +-# define R500_RGB_ADDR1(x) (x << 10) ++# define R500_RGB_ADDR1(x) ((x) << 10) + # define R500_RGB_ADDR1_CONST (1 << 18) + # define R500_RGB_ADDR1_REL (1 << 19) +-# define R500_RGB_ADDR2(x) (x << 20) ++# define R500_RGB_ADDR2(x) ((x) << 20) + # define R500_RGB_ADDR2_CONST (1 << 28) + # define R500_RGB_ADDR2_REL (1 << 29) + # define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30) +@@ -2970,19 +2977,19 @@ enum { + + /* note that these are 8 bit lengths, despite the offsets, at least for R500 */ + #define R500_US_CODE_ADDR 0x4630 +-# define R500_US_CODE_START_ADDR(x) (x << 0) +-# define R500_US_CODE_END_ADDR(x) (x << 16) ++# define R500_US_CODE_START_ADDR(x) ((x) << 0) ++# define R500_US_CODE_END_ADDR(x) ((x) << 16) + #define R500_US_CODE_OFFSET 0x4638 +-# define R500_US_CODE_OFFSET_ADDR(x) (x << 0) ++# define R500_US_CODE_OFFSET_ADDR(x) ((x) << 0) + #define R500_US_CODE_RANGE 0x4634 +-# define R500_US_CODE_RANGE_ADDR(x) (x << 0) +-# define R500_US_CODE_RANGE_SIZE(x) (x << 16) ++# define R500_US_CODE_RANGE_ADDR(x) ((x) << 0) ++# define R500_US_CODE_RANGE_SIZE(x) ((x) << 16) + #define R500_US_CONFIG 0x4600 + # define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1) + #define R500_US_FC_ADDR_0 0xa000 +-# define R500_FC_BOOL_ADDR(x) (x << 0) +-# define R500_FC_INT_ADDR(x) (x << 8) +-# define R500_FC_JUMP_ADDR(x) (x << 16) ++# define R500_FC_BOOL_ADDR(x) ((x) << 0) ++# define R500_FC_INT_ADDR(x) ((x) << 8) ++# define R500_FC_JUMP_ADDR(x) ((x) << 16) + # define R500_FC_JUMP_GLOBAL (1 << 31) + #define R500_US_FC_BOOL_CONST 0x4620 + # define R500_FC_KBOOL(x) (x) +@@ -3003,8 +3010,8 @@ enum { + # define R500_FC_A_OP_NONE (0 << 6) + # define R500_FC_A_OP_POP (1 << 6) + # define R500_FC_A_OP_PUSH (2 << 6) +-# define R500_FC_JUMP_FUNC(x) (x << 8) +-# define R500_FC_B_POP_CNT(x) (x << 16) ++# define R500_FC_JUMP_FUNC(x) ((x) << 8) ++# define R500_FC_B_POP_CNT(x) ((x) << 16) + # define R500_FC_B_OP0_NONE (0 << 24) + # define R500_FC_B_OP0_DECR (1 << 24) + # define R500_FC_B_OP0_INCR (2 << 24) +@@ -3013,14 +3020,14 @@ enum { + # define R500_FC_B_OP1_INCR (2 << 26) + # define R500_FC_IGNORE_UNCOVERED (1 << 28) + #define R500_US_FC_INT_CONST_0 0x4c00 +-# define R500_FC_INT_CONST_KR(x) (x << 0) +-# define R500_FC_INT_CONST_KG(x) (x << 8) +-# define R500_FC_INT_CONST_KB(x) (x << 16) ++# define R500_FC_INT_CONST_KR(x) ((x) << 0) ++# define R500_FC_INT_CONST_KG(x) ((x) << 8) ++# define R500_FC_INT_CONST_KB(x) ((x) << 16) + /* _0 through _15 */ + #define R500_US_FORMAT0_0 0x4640 +-# define R500_FORMAT_TXWIDTH(x) (x << 0) +-# define R500_FORMAT_TXHEIGHT(x) (x << 11) +-# define R500_FORMAT_TXDEPTH(x) (x << 22) ++# define R500_FORMAT_TXWIDTH(x) ((x) << 0) ++# define R500_FORMAT_TXHEIGHT(x) ((x) << 11) ++# define R500_FORMAT_TXDEPTH(x) ((x) << 22) + /* _0 through _3 */ + #define R500_US_OUT_FMT_0 0x46a4 + # define R500_OUT_FMT_C4_8 (0 << 0) +@@ -3061,12 +3068,12 @@ enum { + # define R500_C3_SEL_R (1 << 14) + # define R500_C3_SEL_G (2 << 14) + # define R500_C3_SEL_B (3 << 14) +-# define R500_OUT_SIGN(x) (x << 16) ++# define R500_OUT_SIGN(x) ((x) << 16) + # define R500_ROUND_ADJ (1 << 20) + #define R500_US_PIXSIZE 0x4604 + # define R500_PIX_SIZE(x) (x) + #define R500_US_TEX_ADDR_0 0x9800 +-# define R500_TEX_SRC_ADDR(x) (x << 0) ++# define R500_TEX_SRC_ADDR(x) ((x) << 0) + # define R500_TEX_SRC_ADDR_REL (1 << 7) + # define R500_TEX_SRC_S_SWIZ_R (0 << 8) + # define R500_TEX_SRC_S_SWIZ_G (1 << 8) +@@ -3084,7 +3091,7 @@ enum { + # define R500_TEX_SRC_Q_SWIZ_G (1 << 14) + # define R500_TEX_SRC_Q_SWIZ_B (2 << 14) + # define R500_TEX_SRC_Q_SWIZ_A (3 << 14) +-# define R500_TEX_DST_ADDR(x) (x << 16) ++# define R500_TEX_DST_ADDR(x) ((x) << 16) + # define R500_TEX_DST_ADDR_REL (1 << 23) + # define R500_TEX_DST_R_SWIZ_R (0 << 24) + # define R500_TEX_DST_R_SWIZ_G (1 << 24) +@@ -3103,7 +3110,7 @@ enum { + # define R500_TEX_DST_A_SWIZ_B (2 << 30) + # define R500_TEX_DST_A_SWIZ_A (3 << 30) + #define R500_US_TEX_ADDR_DXDY_0 0xa000 +-# define R500_DX_ADDR(x) (x << 0) ++# define R500_DX_ADDR(x) ((x) << 0) + # define R500_DX_ADDR_REL (1 << 7) + # define R500_DX_S_SWIZ_R (0 << 8) + # define R500_DX_S_SWIZ_G (1 << 8) +@@ -3121,7 +3128,7 @@ enum { + # define R500_DX_Q_SWIZ_G (1 << 14) + # define R500_DX_Q_SWIZ_B (2 << 14) + # define R500_DX_Q_SWIZ_A (3 << 14) +-# define R500_DY_ADDR(x) (x << 16) ++# define R500_DY_ADDR(x) ((x) << 16) + # define R500_DY_ADDR_REL (1 << 17) + # define R500_DY_S_SWIZ_R (0 << 24) + # define R500_DY_S_SWIZ_G (1 << 24) +@@ -3140,7 +3147,7 @@ enum { + # define R500_DY_Q_SWIZ_B (2 << 30) + # define R500_DY_Q_SWIZ_A (3 << 30) + #define R500_US_TEX_INST_0 0x9000 +-# define R500_TEX_ID(x) (x << 16) ++# define R500_TEX_ID(x) ((x) << 16) + # define R500_TEX_INST_NOP (0 << 22) + # define R500_TEX_INST_LD (1 << 22) + # define R500_TEX_INST_TEXKILL (2 << 22) +diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c +index f9266e4..f46477f 100644 +--- a/src/mesa/drivers/dri/r300/r300_render.c ++++ b/src/mesa/drivers/dri/r300/r300_render.c +@@ -66,8 +66,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "tnl/t_vp_build.h" + #include "radeon_reg.h" + #include "radeon_macros.h" +-#include "radeon_ioctl.h" +-#include "radeon_state.h" + #include "r300_context.h" + #include "r300_ioctl.h" + #include "r300_state.h" +@@ -175,85 +173,164 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) + static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts) + { + r300ContextPtr rmesa = R300_CONTEXT(ctx); +- struct r300_dma_region *rvb = &rmesa->state.elt_dma; + void *out; + +- if (r300IsGartMemory(rmesa, elts, n_elts * 4)) { +- rvb->address = rmesa->radeon.radeonScreen->gartTextures.map; +- rvb->start = ((char *)elts) - rvb->address; +- rvb->aos_offset = +- rmesa->radeon.radeonScreen->gart_texture_offset + +- rvb->start; +- return; +- } else if (r300IsGartMemory(rmesa, elts, 1)) { +- WARN_ONCE("Pointer not within GART memory!\n"); +- _mesa_exit(-1); +- } +- +- r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4); +- rvb->aos_offset = GET_START(rvb); +- +- out = rvb->address + rvb->start; ++ radeonAllocDmaRegion(&rmesa->radeon, &rmesa->state.elt_dma_bo, ++ &rmesa->state.elt_dma_offset, n_elts * 4, 4); ++ radeon_bo_map(rmesa->state.elt_dma_bo, 1); ++ out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset; + memcpy(out, elts, n_elts * 4); ++ radeon_bo_unmap(rmesa->state.elt_dma_bo); + } + +-static void r300FireEB(r300ContextPtr rmesa, unsigned long addr, +- int vertex_count, int type) ++static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) + { +- int cmd_reserved = 0; +- int cmd_written = 0; +- drm_radeon_cmd_header_t *cmd = NULL; +- +- start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0); +- e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); +- +- start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2); +- e32(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | +- (R300_VAP_PORT_IDX0 >> 2)); +- e32(addr); +- e32(vertex_count); ++ BATCH_LOCALS(&rmesa->radeon); ++ ++ if (vertex_count > 0) { ++ BEGIN_BATCH(10); ++ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); ++ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | ++ ((vertex_count + 0) << 16) | ++ type | ++ R300_VAP_VF_CNTL__INDEX_SIZE_32bit); ++ ++ if (!rmesa->radeon.radeonScreen->kernel_mm) { ++ OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); ++ OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | ++ (R300_VAP_PORT_IDX0 >> 2)); ++ OUT_BATCH_RELOC(rmesa->state.elt_dma_offset, ++ rmesa->state.elt_dma_bo, ++ rmesa->state.elt_dma_offset, ++ RADEON_GEM_DOMAIN_GTT, 0, 0); ++ OUT_BATCH(vertex_count); ++ } else { ++ OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); ++ OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | ++ (R300_VAP_PORT_IDX0 >> 2)); ++ OUT_BATCH(rmesa->state.elt_dma_offset); ++ OUT_BATCH(vertex_count); ++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, ++ rmesa->state.elt_dma_bo, ++ RADEON_GEM_DOMAIN_GTT, 0, 0); ++ } ++ END_BATCH(); ++ } + } + + static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) + { ++ BATCH_LOCALS(&rmesa->radeon); ++ uint32_t voffset; + int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; + int i; +- int cmd_reserved = 0; +- int cmd_written = 0; +- drm_radeon_cmd_header_t *cmd = NULL; +- ++ + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, + offset); + +- start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1); +- e32(nr); +- +- for (i = 0; i + 1 < nr; i += 2) { +- e32((rmesa->state.aos[i].aos_size << 0) | +- (rmesa->state.aos[i].aos_stride << 8) | +- (rmesa->state.aos[i + 1].aos_size << 16) | +- (rmesa->state.aos[i + 1].aos_stride << 24)); ++ ++ if (!rmesa->radeon.radeonScreen->kernel_mm) { ++ BEGIN_BATCH(sz+2+(nr * 2)); ++ OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); ++ OUT_BATCH(nr); ++ ++ for (i = 0; i + 1 < nr; i += 2) { ++ OUT_BATCH((rmesa->state.aos[i].components << 0) | ++ (rmesa->state.aos[i].stride << 8) | ++ (rmesa->state.aos[i + 1].components << 16) | ++ (rmesa->state.aos[i + 1].stride << 24)); ++ ++ voffset = rmesa->state.aos[i + 0].offset + ++ offset * 4 * rmesa->state.aos[i + 0].stride; ++ OUT_BATCH_RELOC(voffset, ++ rmesa->state.aos[i].bo, ++ voffset, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); ++ voffset = rmesa->state.aos[i + 1].offset + ++ offset * 4 * rmesa->state.aos[i + 1].stride; ++ OUT_BATCH_RELOC(voffset, ++ rmesa->state.aos[i+1].bo, ++ voffset, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); ++ } ++ ++ if (nr & 1) { ++ OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) | ++ (rmesa->state.aos[nr - 1].stride << 8)); ++ voffset = rmesa->state.aos[nr - 1].offset + ++ offset * 4 * rmesa->state.aos[nr - 1].stride; ++ OUT_BATCH_RELOC(voffset, ++ rmesa->state.aos[nr - 1].bo, ++ voffset, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); ++ } ++ END_BATCH(); ++ } else { + +- e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride); +- e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride); ++ BEGIN_BATCH(sz+2+(nr * 2)); ++ OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); ++ OUT_BATCH(nr); ++ ++ for (i = 0; i + 1 < nr; i += 2) { ++ OUT_BATCH((rmesa->state.aos[i].components << 0) | ++ (rmesa->state.aos[i].stride << 8) | ++ (rmesa->state.aos[i + 1].components << 16) | ++ (rmesa->state.aos[i + 1].stride << 24)); ++ ++ voffset = rmesa->state.aos[i + 0].offset + ++ offset * 4 * rmesa->state.aos[i + 0].stride; ++ OUT_BATCH(voffset); ++ voffset = rmesa->state.aos[i + 1].offset + ++ offset * 4 * rmesa->state.aos[i + 1].stride; ++ OUT_BATCH(voffset); ++ } ++ ++ if (nr & 1) { ++ OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) | ++ (rmesa->state.aos[nr - 1].stride << 8)); ++ voffset = rmesa->state.aos[nr - 1].offset + ++ offset * 4 * rmesa->state.aos[nr - 1].stride; ++ OUT_BATCH(voffset); ++ } ++ for (i = 0; i + 1 < nr; i += 2) { ++ voffset = rmesa->state.aos[i + 0].offset + ++ offset * 4 * rmesa->state.aos[i + 0].stride; ++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, ++ rmesa->state.aos[i+0].bo, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); ++ voffset = rmesa->state.aos[i + 1].offset + ++ offset * 4 * rmesa->state.aos[i + 1].stride; ++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, ++ rmesa->state.aos[i+1].bo, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); ++ } ++ if (nr & 1) { ++ voffset = rmesa->state.aos[nr - 1].offset + ++ offset * 4 * rmesa->state.aos[nr - 1].stride; ++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, ++ rmesa->state.aos[nr-1].bo, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); ++ } ++ END_BATCH(); + } + +- if (nr & 1) { +- e32((rmesa->state.aos[nr - 1].aos_size << 0) | +- (rmesa->state.aos[nr - 1].aos_stride << 8)); +- e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride); +- } + } + + static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) + { +- int cmd_reserved = 0; +- int cmd_written = 0; +- drm_radeon_cmd_header_t *cmd = NULL; ++ BATCH_LOCALS(&rmesa->radeon); + +- start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); +- e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); ++ BEGIN_BATCH(3); ++ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); ++ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); ++ END_BATCH(); + } + + static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, +@@ -269,6 +346,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, + if (type < 0 || num_verts <= 0) + return; + ++ /* Make space for at least 64 dwords. ++ * This is supposed to ensure that we can get all rendering ++ * commands into a single command buffer. ++ */ ++ rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__); ++ + if (vb->Elts) { + if (num_verts > 65535) { + /* not implemented yet */ +@@ -288,11 +371,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, + */ + r300EmitElts(ctx, vb->Elts, num_verts); + r300EmitAOS(rmesa, rmesa->state.aos_count, start); +- r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type); ++ r300FireEB(rmesa, num_verts, type); + } else { + r300EmitAOS(rmesa, rmesa->state.aos_count, start); + r300FireAOS(rmesa, num_verts, type); + } ++ COMMIT_BATCH(); + } + + static GLboolean r300RunRender(GLcontext * ctx, +@@ -303,7 +387,6 @@ static GLboolean r300RunRender(GLcontext * ctx, + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + +- + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s\n", __FUNCTION__); + +@@ -314,7 +397,7 @@ static GLboolean r300RunRender(GLcontext * ctx, + r300UpdateShaderStates(rmesa); + + r300EmitCacheFlush(rmesa); +- r300EmitState(rmesa); ++ radeonEmitState(&rmesa->radeon); + + for (i = 0; i < vb->PrimitiveCount; i++) { + GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); +@@ -325,10 +408,6 @@ static GLboolean r300RunRender(GLcontext * ctx, + + r300EmitCacheFlush(rmesa); + +-#ifdef USER_BUFFERS +- r300UseArrays(ctx); +-#endif +- + r300ReleaseArrays(ctx); + + return GL_FALSE; +@@ -347,6 +426,8 @@ static GLboolean r300RunRender(GLcontext * ctx, + static int r300Fallback(GLcontext * ctx) + { + r300ContextPtr r300 = R300_CONTEXT(ctx); ++ const unsigned back = ctx->Stencil._BackFace; ++ + /* Do we need to use new-style shaders? + * Also is there a better way to do this? */ + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { +@@ -371,12 +452,14 @@ static int r300Fallback(GLcontext * ctx) + + FALLBACK_IF(ctx->RenderMode != GL_RENDER); + +- FALLBACK_IF(ctx->Stencil._TestTwoSide +- && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1] +- || ctx->Stencil.ValueMask[0] != +- ctx->Stencil.ValueMask[1] +- || ctx->Stencil.WriteMask[0] != +- ctx->Stencil.WriteMask[1])); ++ /* If GL_EXT_stencil_two_side is disabled, this fallback check can ++ * be removed. ++ */ ++ FALLBACK_IF(ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back] ++ || ctx->Stencil.ValueMask[0] != ++ ctx->Stencil.ValueMask[back] ++ || ctx->Stencil.WriteMask[0] != ++ ctx->Stencil.WriteMask[back]); + + if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) + FALLBACK_IF(ctx->Point.PointSprite); +@@ -428,6 +511,9 @@ static GLboolean r300RunTCLRender(GLcontext * ctx, + return GL_TRUE; + } + ++ if (!r300ValidateBuffers(ctx)) ++ return GL_TRUE; ++ + r300UpdateShaders(rmesa); + + vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); +diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c +index a63dbac..59728a0 100644 +--- a/src/mesa/drivers/dri/r300/r300_state.c ++++ b/src/mesa/drivers/dri/r300/r300_state.c +@@ -53,8 +53,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "vbo/vbo.h" + #include "tnl/tnl.h" + +-#include "radeon_ioctl.h" +-#include "radeon_state.h" + #include "r300_context.h" + #include "r300_ioctl.h" + #include "r300_state.h" +@@ -590,7 +588,7 @@ static void r300SetStencilState(GLcontext * ctx, GLboolean state) + { + r300ContextPtr r300 = R300_CONTEXT(ctx); + +- if (r300->state.stencil.hw_stencil) { ++ if (r300->radeon.state.stencil.hwBuffer) { + R300_STATECHANGE(r300, zs); + if (state) { + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= +@@ -783,6 +781,7 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) + R300_FG_FOG_BLEND_FN_EXP2; + fogScale.f = 0.3 * ctx->Fog.Density; + fogStart.f = 0.0; ++ break; + default: + return; + } +@@ -974,15 +973,9 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, + { + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint refmask = +- (((ctx->Stencil. +- Ref[0] & 0xff) << R300_STENCILREF_SHIFT) | ((ctx-> +- Stencil. +- ValueMask +- [0] & +- 0xff) +- << +- R300_STENCILMASK_SHIFT)); +- ++ ((ctx->Stencil.Ref[0] & 0xff) << R300_STENCILREF_SHIFT) ++ | ((ctx->Stencil.ValueMask[0] & 0xff) << R300_STENCILMASK_SHIFT); ++ const unsigned back = ctx->Stencil._BackFace; + GLuint flag; + + R300_STATECHANGE(rmesa, zs); +@@ -1000,8 +993,7 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= + (flag << R300_S_FRONT_FUNC_SHIFT); + +- if (ctx->Stencil._TestTwoSide) +- flag = translate_func(ctx->Stencil.Function[1]); ++ flag = translate_func(ctx->Stencil.Function[back]); + + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= + (flag << R300_S_BACK_FUNC_SHIFT); +@@ -1026,6 +1018,7 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, + GLenum fail, GLenum zfail, GLenum zpass) + { + r300ContextPtr rmesa = R300_CONTEXT(ctx); ++ const unsigned back = ctx->Stencil._BackFace; + + R300_STATECHANGE(rmesa, zs); + /* It is easier to mask what's left.. */ +@@ -1042,23 +1035,13 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, + | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << + R300_S_FRONT_ZPASS_OP_SHIFT); + +- if (ctx->Stencil._TestTwoSide) { +- rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= +- (translate_stencil_op(ctx->Stencil.FailFunc[1]) << +- R300_S_BACK_SFAIL_OP_SHIFT) +- | (translate_stencil_op(ctx->Stencil.ZFailFunc[1]) << +- R300_S_BACK_ZFAIL_OP_SHIFT) +- | (translate_stencil_op(ctx->Stencil.ZPassFunc[1]) << +- R300_S_BACK_ZPASS_OP_SHIFT); +- } else { +- rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= +- (translate_stencil_op(ctx->Stencil.FailFunc[0]) << +- R300_S_BACK_SFAIL_OP_SHIFT) +- | (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << +- R300_S_BACK_ZFAIL_OP_SHIFT) +- | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << +- R300_S_BACK_ZPASS_OP_SHIFT); +- } ++ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= ++ (translate_stencil_op(ctx->Stencil.FailFunc[back]) << ++ R300_S_BACK_SFAIL_OP_SHIFT) ++ | (translate_stencil_op(ctx->Stencil.ZFailFunc[back]) << ++ R300_S_BACK_ZFAIL_OP_SHIFT) ++ | (translate_stencil_op(ctx->Stencil.ZPassFunc[back]) << ++ R300_S_BACK_ZPASS_OP_SHIFT); + } + + /* ============================================================= +@@ -1083,10 +1066,10 @@ static void r300UpdateWindow(GLcontext * ctx) + GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X; + GLfloat sy = -v[MAT_SY]; + GLfloat ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y; +- GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale; +- GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale; ++ GLfloat sz = v[MAT_SZ] * rmesa->radeon.state.depth.scale; ++ GLfloat tz = v[MAT_TZ] * rmesa->radeon.state.depth.scale; + +- R300_FIREVERTICES(rmesa); ++ radeon_firevertices(&rmesa->radeon); + R300_STATECHANGE(rmesa, vpt); + + rmesa->hw.vpt.cmd[R300_VPT_XSCALE] = r300PackFloat32(sx); +@@ -1100,10 +1083,19 @@ static void r300UpdateWindow(GLcontext * ctx) + static void r300Viewport(GLcontext * ctx, GLint x, GLint y, + GLsizei width, GLsizei height) + { ++ r300ContextPtr rmesa = R300_CONTEXT(ctx); ++ __DRIcontext *driContext = rmesa->radeon.dri.context; + /* Don't pipeline viewport changes, conflict with window offset + * setting below. Could apply deltas to rescue pipelined viewport + * values, or keep the originals hanging around. + */ ++ if (rmesa->radeon.radeonScreen->driScreen->dri2.enabled) { ++ radeon_update_renderbuffers(driContext, driContext->driDrawablePriv); ++ if (driContext->driDrawablePriv != driContext->driReadablePriv) { ++ radeon_update_renderbuffers(driContext, ++ driContext->driReadablePriv); ++ } ++ } + r300UpdateWindow(ctx); + } + +@@ -1144,55 +1136,25 @@ void r300UpdateViewportOffset(GLcontext * ctx) + void r300UpdateDrawBuffer(GLcontext * ctx) + { + r300ContextPtr rmesa = R300_CONTEXT(ctx); +- r300ContextPtr r300 = rmesa; + struct gl_framebuffer *fb = ctx->DrawBuffer; +- driRenderbuffer *drb; ++ struct radeon_renderbuffer *rrb; + + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { + /* draw to front */ +- drb = +- (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT]. +- Renderbuffer; ++ rrb = ++ (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; + } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { + /* draw to back */ +- drb = +- (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT]. +- Renderbuffer; ++ rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; + } else { + /* drawing to multiple buffers, or none */ + return; + } + +- assert(drb); +- assert(drb->flippedPitch); ++ assert(rrb); ++ assert(rrb->pitch); + + R300_STATECHANGE(rmesa, cb); +- +- r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset + //r300->radeon.state.color.drawOffset + +- r300->radeon.radeonScreen->fbLocation; +- r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch; //r300->radeon.state.color.drawPitch; +- +- if (r300->radeon.radeonScreen->cpp == 4) +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; +- else +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; +- +- if (r300->radeon.sarea->tiling_enabled) +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; +-#if 0 +- R200_STATECHANGE(rmesa, ctx); +- +- /* Note: we used the (possibly) page-flipped values */ +- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] +- = ((drb->flippedOffset + rmesa->r200Screen->fbLocation) +- & R200_COLOROFFSET_MASK); +- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch; +- +- if (rmesa->sarea->tiling_enabled) { +- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= +- R200_COLOR_TILE_ENABLE; +- } +-#endif + } + + static void +@@ -1412,7 +1374,8 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) + } + + r300->hw.fpt.cmd[R300_FPT_CMD_0] = +- cmdpacket0(R300_US_TEX_INST_0, code->tex.length); ++ cmdpacket0(r300->radeon.radeonScreen, ++ R300_US_TEX_INST_0, code->tex.length); + } + + static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) +@@ -1463,7 +1426,7 @@ static GLuint translate_lod_bias(GLfloat bias) + static void r300SetupTextures(GLcontext * ctx) + { + int i, mtu; +- struct r300_tex_obj *t; ++ struct radeon_tex_obj *t; + r300ContextPtr r300 = R300_CONTEXT(ctx); + int hw_tmu = 0; + int last_hw_tmu = -1; /* -1 translates into no setup costs for fields */ +@@ -1497,21 +1460,16 @@ static void r300SetupTextures(GLcontext * ctx) + /* We cannot let disabled tmu offsets pass DRM */ + for (i = 0; i < mtu; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { +- +-#if 0 /* Enables old behaviour */ +- hw_tmu = i; +-#endif + tmu_mappings[i] = hw_tmu; + +- t = r300->state.texture.unit[i].texobj; +- /* XXX questionable fix for bug 9170: */ ++ t = radeon_tex_obj(ctx->Texture.Unit[i]._Current); + if (!t) + continue; + +- if ((t->format & 0xffffff00) == 0xffffff00) { ++ if ((t->pp_txformat & 0xffffff00) == 0xffffff00) { + WARN_ONCE + ("unknown texture format (entry %x) encountered. Help me !\n", +- t->format & 0xff); ++ t->pp_txformat & 0xff); + } + + if (RADEON_DEBUG & DEBUG_STATE) +@@ -1522,29 +1480,28 @@ static void r300SetupTextures(GLcontext * ctx) + + r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 + + hw_tmu] = +- gen_fixed_filter(t->filter) | (hw_tmu << 28); ++ gen_fixed_filter(t->pp_txfilter) | (hw_tmu << 28); + /* Note: There is a LOD bias per texture unit and a LOD bias + * per texture object. We add them here to get the correct behaviour. + * (The per-texture object LOD bias was introduced in OpenGL 1.4 + * and is not present in the EXT_texture_object extension). + */ + r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = +- t->filter_1 | +- translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.tObj->LodBias); ++ t->pp_txfilter_1 | ++ translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias); + r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] = +- t->size; ++ t->pp_txsize; + r300->hw.tex.format.cmd[R300_TEX_VALUE_0 + +- hw_tmu] = t->format; ++ hw_tmu] = t->pp_txformat; + r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] = +- t->pitch_reg; +- r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 + +- hw_tmu] = t->offset; ++ t->pp_txpitch; ++ r300->hw.textures[hw_tmu] = t; + +- if (t->offset & R300_TXO_MACRO_TILE) { ++ if (t->tile_bits & R300_TXO_MACRO_TILE) { + WARN_ONCE("macro tiling enabled!\n"); + } + +- if (t->offset & R300_TXO_MICRO_TILE) { ++ if (t->tile_bits & R300_TXO_MICRO_TILE) { + WARN_ONCE("micro tiling enabled!\n"); + } + +@@ -1561,21 +1518,21 @@ static void r300SetupTextures(GLcontext * ctx) + } + + r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = +- cmdpacket0(R300_TX_FILTER0_0, last_hw_tmu + 1); ++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1); + r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = +- cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1); ++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, last_hw_tmu + 1); + r300->hw.tex.size.cmd[R300_TEX_CMD_0] = +- cmdpacket0(R300_TX_SIZE_0, last_hw_tmu + 1); ++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, last_hw_tmu + 1); + r300->hw.tex.format.cmd[R300_TEX_CMD_0] = +- cmdpacket0(R300_TX_FORMAT_0, last_hw_tmu + 1); ++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, last_hw_tmu + 1); + r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = +- cmdpacket0(R300_TX_FORMAT2_0, last_hw_tmu + 1); ++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, last_hw_tmu + 1); + r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = +- cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1); ++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, last_hw_tmu + 1); + r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = +- cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1); ++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, last_hw_tmu + 1); + r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = +- cmdpacket0(R300_TX_BORDER_COLOR_0, last_hw_tmu + 1); ++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1); + + if (!fp) /* should only happenen once, just after context is created */ + return; +@@ -1587,7 +1544,7 @@ static void r300SetupTextures(GLcontext * ctx) + r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; + r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = +- cmdpacket0(R300_TX_FILTER0_0, 1); ++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 1); + } + r300SetupFragmentShaderTextures(ctx, tmu_mappings); + } else +@@ -1756,7 +1713,7 @@ static void r300SetupRSUnit(GLcontext * ctx) + | R300_HIRES_EN; + + assert(high_rr >= 0); +- r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1); ++ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr + 1); + r300->hw.rc.cmd[2] = high_rr; + + if (InputsRead) +@@ -1916,7 +1873,7 @@ static void r500SetupRSUnit(GLcontext * ctx) + | R300_HIRES_EN; + + assert(high_rr >= 0); +- r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr + 1); ++ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, high_rr + 1); + r300->hw.rc.cmd[2] = 0xC0 | high_rr; + + if (InputsRead) +@@ -2114,6 +2071,7 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa) + (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); + } + ++ + static void r300SetupVertexProgram(r300ContextPtr rmesa) + { + GLcontext *ctx = rmesa->radeon.glCtx; +@@ -2143,6 +2101,7 @@ static void r300SetupVertexProgram(r300ContextPtr rmesa) + */ + static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) + { ++ r300ContextPtr rmesa = R300_CONTEXT(ctx); + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(cap), +@@ -2188,8 +2147,12 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) + case GL_POLYGON_OFFSET_FILL: + r300SetPolygonOffsetState(ctx, state); + break; ++ case GL_SCISSOR_TEST: ++ radeon_firevertices(&rmesa->radeon); ++ rmesa->radeon.state.scissor.enabled = state; ++ radeonUpdateScissor( ctx ); ++ break; + default: +- radeonEnable(ctx, cap, state); + break; + } + } +@@ -2200,6 +2163,7 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) + static void r300ResetHwState(r300ContextPtr r300) + { + GLcontext *ctx = r300->radeon.glCtx; ++ struct radeon_renderbuffer *rrb; + int has_tcl = 1; + + if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) +@@ -2230,8 +2194,6 @@ static void r300ResetHwState(r300ContextPtr r300) + + r300UpdateCulling(ctx); + +- r300UpdateTextureState(ctx); +- + r300SetBlendState(ctx); + r300SetLogicOpState(ctx); + +@@ -2378,20 +2340,6 @@ static void r300ResetHwState(r300ContextPtr r300) + + r300BlendColor(ctx, ctx->Color.BlendColor); + +- /* Again, r300ClearBuffer uses this */ +- r300->hw.cb.cmd[R300_CB_OFFSET] = +- r300->radeon.state.color.drawOffset + +- r300->radeon.radeonScreen->fbLocation; +- r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch; +- +- if (r300->radeon.radeonScreen->cpp == 4) +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; +- else +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; +- +- if (r300->radeon.sarea->tiling_enabled) +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; +- + r300->hw.rb3d_dither_ctl.cmd[1] = 0; + r300->hw.rb3d_dither_ctl.cmd[2] = 0; + r300->hw.rb3d_dither_ctl.cmd[3] = 0; +@@ -2407,12 +2355,8 @@ static void r300ResetHwState(r300ContextPtr r300) + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000; + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff; + +- r300->hw.zb.cmd[R300_ZB_OFFSET] = +- r300->radeon.radeonScreen->depthOffset + +- r300->radeon.radeonScreen->fbLocation; +- r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch; +- +- if (r300->radeon.sarea->tiling_enabled) { ++ rrb = r300->radeon.state.depth.rrb; ++ if (rrb && rrb->bo && (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)) { + /* XXX: Turn off when clearing buffers ? */ + r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTHMACROTILE_ENABLE; + +@@ -2423,18 +2367,6 @@ static void r300ResetHwState(r300ContextPtr r300) + + r300->hw.zb_depthclearvalue.cmd[1] = 0; + +- switch (ctx->Visual.depthBits) { +- case 16: +- r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_16BIT_INT_Z; +- break; +- case 24: +- r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; +- break; +- default: +- fprintf(stderr, "Error: Unsupported depth %d... exiting\n", ctx->Visual.depthBits); +- _mesa_exit(-1); +- } +- + r300->hw.zstencil_format.cmd[2] = R300_ZTOP_DISABLE; + r300->hw.zstencil_format.cmd[3] = 0x00000003; + r300->hw.zstencil_format.cmd[4] = 0x00000000; +@@ -2455,7 +2387,7 @@ static void r300ResetHwState(r300ContextPtr r300) + r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0; + } + +- r300->hw.all_dirty = GL_TRUE; ++ r300->radeon.hw.all_dirty = GL_TRUE; + } + + void r300UpdateShaders(r300ContextPtr rmesa) +@@ -2466,8 +2398,8 @@ void r300UpdateShaders(r300ContextPtr rmesa) + + ctx = rmesa->radeon.glCtx; + +- if (rmesa->NewGLState && hw_tcl_on) { +- rmesa->NewGLState = 0; ++ if (rmesa->radeon.NewGLState && hw_tcl_on) { ++ rmesa->radeon.NewGLState = 0; + + for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { + rmesa->temp_attrib[i] = +@@ -2546,10 +2478,10 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) + R300_STATECHANGE(rmesa, fpi[1]); + R300_STATECHANGE(rmesa, fpi[2]); + R300_STATECHANGE(rmesa, fpi[3]); +- rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, code->alu.length); +- rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, code->alu.length); +- rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, code->alu.length); +- rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); ++ rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, code->alu.length); ++ rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, code->alu.length); ++ rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length); ++ rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) { + rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0; + rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1; +@@ -2580,7 +2512,7 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) + } + + R300_STATECHANGE(rmesa, fpp); +- rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, code->const_nr * 4); ++ rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4); + for (i = 0; i < code->const_nr; i++) { + const GLfloat *constant = get_fragmentprogram_constant(ctx, + &fp->mesa_program.Base, code->constant[i]); +@@ -2682,7 +2614,6 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) + GLcontext *ctx; + ctx = rmesa->radeon.glCtx; + +- r300UpdateTextureState(ctx); + r300SetEarlyZState(ctx); + + GLuint fgdepthsrc = R300_FG_DEPTH_SRC_SCAN; +@@ -2727,7 +2658,7 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) + + r300UpdateStateParameters(ctx, new_state); + +- r300->NewGLState |= new_state; ++ r300->radeon.NewGLState |= new_state; + } + + /** +@@ -2740,26 +2671,9 @@ void r300InitState(r300ContextPtr r300) + GLcontext *ctx = r300->radeon.glCtx; + GLuint depth_fmt; + +- radeonInitState(&r300->radeon); +- +- switch (ctx->Visual.depthBits) { +- case 16: +- r300->state.depth.scale = 1.0 / (GLfloat) 0xffff; +- depth_fmt = R300_DEPTHFORMAT_16BIT_INT_Z; +- break; +- case 24: +- r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff; +- depth_fmt = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; +- break; +- default: +- fprintf(stderr, "Error: Unsupported depth %d... exiting\n", +- ctx->Visual.depthBits); +- _mesa_exit(-1); +- } +- + /* Only have hw stencil when depth buffer is 24 bits deep */ +- r300->state.stencil.hw_stencil = (ctx->Visual.stencilBits > 0 && +- ctx->Visual.depthBits == 24); ++ r300->radeon.state.stencil.hwBuffer = (ctx->Visual.stencilBits > 0 && ++ ctx->Visual.depthBits == 24); + + memset(&(r300->state.texture), 0, sizeof(r300->state.texture)); + +@@ -2791,12 +2705,33 @@ void r300UpdateClipPlanes( GLcontext *ctx ) + } + } + ++static void r300DrawBuffer( GLcontext *ctx, GLenum mode ) ++{ ++ r300ContextPtr rmesa = R300_CONTEXT(ctx); ++ if (RADEON_DEBUG & DEBUG_DRI) ++ fprintf(stderr, "%s %s\n", __FUNCTION__, ++ _mesa_lookup_enum_by_nr( mode )); ++ ++ radeon_firevertices(&rmesa->radeon); /* don't pipeline cliprect changes */ ++ ++ radeonSetCliprects( &rmesa->radeon ); ++ if (!rmesa->radeon.radeonScreen->driScreen->dri2.enabled) ++ radeonUpdatePageFlipping(&rmesa->radeon); ++} ++ ++static void r300ReadBuffer( GLcontext *ctx, GLenum mode ) ++{ ++ if (RADEON_DEBUG & DEBUG_DRI) ++ fprintf(stderr, "%s %s\n", __FUNCTION__, ++ _mesa_lookup_enum_by_nr( mode )); ++ ++}; ++ + /** + * Initialize driver's state callback functions + */ + void r300InitStateFuncs(struct dd_function_table *functions) + { +- radeonInitStateFuncs(functions); + + functions->UpdateState = r300InvalidateState; + functions->AlphaFunc = r300AlphaFunc; +@@ -2833,4 +2768,8 @@ void r300InitStateFuncs(struct dd_function_table *functions) + functions->RenderMode = r300RenderMode; + + functions->ClipPlane = r300ClipPlane; ++ functions->Scissor = radeonScissor; ++ ++ functions->DrawBuffer = r300DrawBuffer; ++ functions->ReadBuffer = r300ReadBuffer; + } +diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h +index 0589ab7..247a20e 100644 +--- a/src/mesa/drivers/dri/r300/r300_state.h ++++ b/src/mesa/drivers/dri/r300/r300_state.h +@@ -39,32 +39,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #define R300_NEWPRIM( rmesa ) \ + do { \ +- if ( rmesa->dma.flush ) \ +- rmesa->dma.flush( rmesa ); \ ++ if ( rmesa->radeon.dma.flush ) \ ++ rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \ + } while (0) + + #define R300_STATECHANGE(r300, atom) \ + do { \ + R300_NEWPRIM(r300); \ + r300->hw.atom.dirty = GL_TRUE; \ +- r300->hw.is_dirty = GL_TRUE; \ ++ r300->radeon.hw.is_dirty = GL_TRUE; \ + } while(0) + +-#define R300_PRINT_STATE(r300, atom) \ +- r300PrintStateAtom(r300, &r300->hw.atom) +- +-/* Fire the buffered vertices no matter what. +- TODO: This has not been implemented yet +- */ +-#define R300_FIREVERTICES( r300 ) \ +-do { \ +- \ +- if ( (r300)->cmdbuf.count_used || (r300)->dma.flush ) { \ +- r300Flush( (r300)->radeon.glCtx ); \ +- } \ +- \ +-} while (0) +- + // r300_state.c + extern int future_hw_tcl_on; + void _tnl_UpdateFixedFunctionProgram (GLcontext * ctx); +diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c +index b6e7ce1..0d8b7e5 100644 +--- a/src/mesa/drivers/dri/r300/r300_swtcl.c ++++ b/src/mesa/drivers/dri/r300/r300_swtcl.c +@@ -56,26 +56,23 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "r300_state.h" + #include "r300_ioctl.h" + #include "r300_emit.h" +-#include "r300_mem.h" ++#include "r300_tex.h" + +-static void flush_last_swtcl_prim( r300ContextPtr rmesa ); +- +- +-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset); ++void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset); + void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr); + #define EMIT_ATTR( ATTR, STYLE ) \ + do { \ +- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \ +- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \ +- rmesa->swtcl.vertex_attr_count++; \ ++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \ ++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \ ++ rmesa->radeon.swtcl.vertex_attr_count++; \ + } while (0) + + #define EMIT_PAD( N ) \ + do { \ +- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \ +- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \ +- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \ +- rmesa->swtcl.vertex_attr_count++; \ ++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \ ++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \ ++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \ ++ rmesa->radeon.swtcl.vertex_attr_count++; \ + } while (0) + + static void r300SetVertexFormat( GLcontext *ctx ) +@@ -86,7 +83,6 @@ static void r300SetVertexFormat( GLcontext *ctx ) + DECLARE_RENDERINPUTS(index_bitset); + GLuint InputsRead = 0, OutputsWritten = 0; + int vap_fmt_0 = 0; +- int vap_vte_cntl = 0; + int offset = 0; + int vte = 0; + GLint inputs[VERT_ATTRIB_MAX]; +@@ -114,7 +110,7 @@ static void r300SetVertexFormat( GLcontext *ctx ) + } + + assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); +- rmesa->swtcl.vertex_attr_count = 0; ++ rmesa->radeon.swtcl.vertex_attr_count = 0; + + /* EMIT_ATTR's must be in order as they tell t_vertex.c how to + * build up a hardware vertex. +@@ -175,7 +171,7 @@ static void r300SetVertexFormat( GLcontext *ctx ) + inputs[i] = -1; + } + } +- ++ + /* Fixed, apply to vir0 only */ + if (InputsRead & (1 << VERT_ATTRIB_POS)) + inputs[VERT_ATTRIB_POS] = 0; +@@ -186,16 +182,16 @@ static void r300SetVertexFormat( GLcontext *ctx ) + for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) + if (InputsRead & (1 << i)) + inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); +- ++ + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) { + tab[nr++] = i; + } + } +- ++ + for (i = 0; i < nr; i++) { + int ci; +- ++ + swizzle[i][0] = SWIZZLE_ZERO; + swizzle[i][1] = SWIZZLE_ZERO; + swizzle[i][2] = SWIZZLE_ZERO; +@@ -215,98 +211,29 @@ static void r300SetVertexFormat( GLcontext *ctx ) + ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = + r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, + nr); +- ++ + R300_STATECHANGE(rmesa, vic); + rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); + rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); +- ++ + R300_STATECHANGE(rmesa, vof); + rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1; +- +- rmesa->swtcl.vertex_size = ++ ++ rmesa->radeon.swtcl.vertex_size = + _tnl_install_attrs( ctx, +- rmesa->swtcl.vertex_attrs, +- rmesa->swtcl.vertex_attr_count, ++ rmesa->radeon.swtcl.vertex_attrs, ++ rmesa->radeon.swtcl.vertex_attr_count, + NULL, 0 ); +- +- rmesa->swtcl.vertex_size /= 4; ++ ++ rmesa->radeon.swtcl.vertex_size /= 4; + + RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); + + + R300_STATECHANGE(rmesa, vte); + rmesa->hw.vte.cmd[1] = vte; +- rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size; +-} +- +- +-/* Flush vertices in the current dma region. +- */ +-static void flush_last_swtcl_prim( r300ContextPtr rmesa ) +-{ +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s\n", __FUNCTION__); +- +- rmesa->dma.flush = NULL; +- +- if (rmesa->dma.current.buf) { +- struct r300_dma_region *current = &rmesa->dma.current; +- GLuint current_offset = GET_START(current); +- +- assert (current->start + +- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == +- current->ptr); +- +- if (rmesa->dma.current.start != rmesa->dma.current.ptr) { +- +- r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__); +- +- r300EmitState(rmesa); +- +- r300EmitVertexAOS( rmesa, +- rmesa->swtcl.vertex_size, +- current_offset); +- +- r300EmitVbufPrim( rmesa, +- rmesa->swtcl.hw_primitive, +- rmesa->swtcl.numverts); +- +- r300EmitCacheFlush(rmesa); +- } +- +- rmesa->swtcl.numverts = 0; +- current->start = current->ptr; +- } +-} +- +-/* Alloc space in the current dma region. +- */ +-static void * +-r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize ) +-{ +- GLuint bytes = vsize * nverts; +- +- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) +- r300RefillCurrentDmaRegion( rmesa, bytes); +- +- if (!rmesa->dma.flush) { +- rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; +- rmesa->dma.flush = flush_last_swtcl_prim; +- } +- +- ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); +- ASSERT( rmesa->dma.flush == flush_last_swtcl_prim ); +- ASSERT( rmesa->dma.current.start + +- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == +- rmesa->dma.current.ptr ); +- +- { +- GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr); +- rmesa->dma.current.ptr += bytes; +- rmesa->swtcl.numverts += nverts; +- return head; +- } ++ rmesa->hw.vte.cmd[2] = rmesa->radeon.swtcl.vertex_size; + } + + static GLuint reduced_prim[] = { +@@ -346,13 +273,13 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); + #undef LOCAL_VARS + #undef ALLOC_VERTS + #define CTX_ARG r300ContextPtr rmesa +-#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size +-#define ALLOC_VERTS( n, size ) r300AllocDmaLowVerts( rmesa, n, size * 4 ) ++#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size ++#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ) + #define LOCAL_VARS \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ +- const char *r300verts = (char *)rmesa->swtcl.verts; ++ const char *r300verts = (char *)rmesa->radeon.swtcl.verts; + #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int))) +-#define VERTEX r300Vertex ++#define VERTEX r300Vertex + #define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS)) + #define PRINT_VERTEX(x) + #undef TAG +@@ -409,7 +336,7 @@ static struct { + #define VERT_Y(_v) _v->v.y + #define VERT_Z(_v) _v->v.z + #define AREA_IS_CCW( a ) (a < 0) +-#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int))) ++#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int))) + + /* Only used to pull back colors into vertices (ie, we know color is + * floating point). +@@ -455,7 +382,7 @@ do { \ + ***********************************************************************/ + + #define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] ) +-#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive ++#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive + #undef TAG + #define TAG(x) x + #include "tnl_dd/t_dd_unfilled.h" +@@ -512,8 +439,8 @@ static void init_rast_tab( void ) + #undef LOCAL_VARS + #define LOCAL_VARS \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ +- const GLuint vertsize = rmesa->swtcl.vertex_size; \ +- const char *r300verts = (char *)rmesa->swtcl.verts; \ ++ const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \ ++ const char *r300verts = (char *)rmesa->radeon.swtcl.verts; \ + const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \ + const GLboolean stipple = ctx->Line.StippleFlag; \ + (void) elt; (void) stipple; +@@ -545,7 +472,7 @@ static void r300ChooseRenderState( GLcontext *ctx ) + if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT; + if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT; + +- if (index != rmesa->swtcl.RenderIndex) { ++ if (index != rmesa->radeon.swtcl.RenderIndex) { + tnl->Driver.Render.Points = rast_tab[index].points; + tnl->Driver.Render.Line = rast_tab[index].line; + tnl->Driver.Render.ClippedLine = rast_tab[index].line; +@@ -562,7 +489,7 @@ static void r300ChooseRenderState( GLcontext *ctx ) + tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon; + } + +- rmesa->swtcl.RenderIndex = index; ++ rmesa->radeon.swtcl.RenderIndex = index; + } + } + +@@ -572,18 +499,18 @@ static void r300RenderStart(GLcontext *ctx) + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + // fprintf(stderr, "%s\n", __FUNCTION__); + +- r300ChooseRenderState(ctx); ++ r300ChooseRenderState(ctx); + r300SetVertexFormat(ctx); + ++ r300ValidateBuffers(ctx); ++ + r300UpdateShaders(rmesa); + r300UpdateShaderStates(rmesa); + + r300EmitCacheFlush(rmesa); +- +- if (rmesa->dma.flush != 0 && +- rmesa->dma.flush != flush_last_swtcl_prim) +- rmesa->dma.flush( rmesa ); +- ++ if (rmesa->radeon.dma.flush != NULL) { ++ rmesa->radeon.dma.flush(ctx); ++ } + } + + static void r300RenderFinish(GLcontext *ctx) +@@ -593,10 +520,10 @@ static void r300RenderFinish(GLcontext *ctx) + static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) + { + r300ContextPtr rmesa = R300_CONTEXT(ctx); +- +- if (rmesa->swtcl.hw_primitive != hwprim) { ++ ++ if (rmesa->radeon.swtcl.hw_primitive != hwprim) { + R300_NEWPRIM( rmesa ); +- rmesa->swtcl.hw_primitive = hwprim; ++ rmesa->radeon.swtcl.hw_primitive = hwprim; + } + } + +@@ -604,14 +531,14 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim) + { + + r300ContextPtr rmesa = R300_CONTEXT(ctx); +- rmesa->swtcl.render_primitive = prim; ++ rmesa->radeon.swtcl.render_primitive = prim; + + if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED)) + return; + + r300RasterPrimitive( ctx, reduced_prim[prim] ); + // fprintf(stderr, "%s\n", __FUNCTION__); +- ++ + } + + static void r300ResetLineStipple(GLcontext *ctx) +@@ -625,12 +552,12 @@ void r300InitSwtcl(GLcontext *ctx) + TNLcontext *tnl = TNL_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + static int firsttime = 1; +- ++ + if (firsttime) { + init_rast_tab(); + firsttime = 0; + } +- ++ + tnl->Driver.Render.Start = r300RenderStart; + tnl->Driver.Render.Finish = r300RenderFinish; + tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive; +@@ -638,15 +565,15 @@ void r300InitSwtcl(GLcontext *ctx) + tnl->Driver.Render.BuildVertices = _tnl_build_vertices; + tnl->Driver.Render.CopyPV = _tnl_copy_pv; + tnl->Driver.Render.Interp = _tnl_interp; +- ++ + /* FIXME: what are these numbers? */ +- _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, ++ _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, + 48 * sizeof(GLfloat) ); +- +- rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; +- rmesa->swtcl.RenderIndex = ~0; +- rmesa->swtcl.render_primitive = GL_TRIANGLES; +- rmesa->swtcl.hw_primitive = 0; ++ ++ rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; ++ rmesa->radeon.swtcl.RenderIndex = ~0; ++ rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES; ++ rmesa->radeon.swtcl.hw_primitive = 0; + + _tnl_invalidate_vertex_state( ctx, ~0 ); + _tnl_invalidate_vertices( ctx, ~0 ); +@@ -655,9 +582,9 @@ void r300InitSwtcl(GLcontext *ctx) + _tnl_need_projected_coords( ctx, GL_FALSE ); + r300ChooseRenderState(ctx); + +- _mesa_validate_all_lighting_tables( ctx ); ++ _mesa_validate_all_lighting_tables( ctx ); + +- tnl->Driver.NotifyMaterialChange = ++ tnl->Driver.NotifyMaterialChange = + _mesa_validate_all_lighting_tables; + } + +@@ -665,33 +592,53 @@ void r300DestroySwtcl(GLcontext *ctx) + { + } + +-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset) ++void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset) + { +- int cmd_reserved = 0; +- int cmd_written = 0; ++ BATCH_LOCALS(&rmesa->radeon); + +- drm_radeon_cmd_header_t *cmd = NULL; + if (RADEON_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", +- __FUNCTION__, vertex_size, offset); +- +- start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2); +- e32(1); +- e32(vertex_size | (vertex_size << 8)); +- e32(offset); ++ fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", ++ __FUNCTION__, vertex_size, offset); ++ ++ BEGIN_BATCH(7); ++ OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2); ++ OUT_BATCH(1); ++ OUT_BATCH(vertex_size | (vertex_size << 8)); ++ OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0); ++ END_BATCH(); + } + + void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) + { +- +- int cmd_reserved = 0; +- int cmd_written = 0; ++ BATCH_LOCALS(&rmesa->radeon); + int type, num_verts; +- drm_radeon_cmd_header_t *cmd = NULL; + + type = r300PrimitiveType(rmesa, primitive); + num_verts = r300NumVerts(rmesa, vertex_nr, primitive); +- +- start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); +- e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type); ++ ++ BEGIN_BATCH(3); ++ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); ++ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type); ++ END_BATCH(); ++} ++ ++void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset) ++{ ++ r300ContextPtr rmesa = R300_CONTEXT(ctx); ++ ++ rcommonEnsureCmdBufSpace(&rmesa->radeon, ++ rmesa->radeon.hw.max_state_size + (12*sizeof(int)), ++ __FUNCTION__); ++ radeonEmitState(&rmesa->radeon); ++ r300EmitVertexAOS(rmesa, ++ rmesa->radeon.swtcl.vertex_size, ++ rmesa->radeon.dma.current, ++ current_offset); ++ ++ r300EmitVbufPrim(rmesa, ++ rmesa->radeon.swtcl.hw_primitive, ++ rmesa->radeon.swtcl.numverts); ++ r300EmitCacheFlush(rmesa); ++ COMMIT_BATCH(); ++ + } +diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.h b/src/mesa/drivers/dri/r300/r300_swtcl.h +index 55df53c..23b4ce3 100644 +--- a/src/mesa/drivers/dri/r300/r300_swtcl.h ++++ b/src/mesa/drivers/dri/r300/r300_swtcl.h +@@ -42,4 +42,5 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + extern void r300InitSwtcl( GLcontext *ctx ); + extern void r300DestroySwtcl( GLcontext *ctx ); + ++extern void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset); + #endif +diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c +index 8ab382c..0f5afbf 100644 +--- a/src/mesa/drivers/dri/r300/r300_tex.c ++++ b/src/mesa/drivers/dri/r300/r300_tex.c +@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "main/context.h" + #include "main/enums.h" + #include "main/image.h" ++#include "main/mipmap.h" + #include "main/simple_list.h" + #include "main/texformat.h" + #include "main/texstore.h" +@@ -49,6 +50,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "r300_context.h" + #include "r300_state.h" + #include "r300_ioctl.h" ++#include "radeon_mipmap_tree.h" + #include "r300_tex.h" + + #include "xmlpool.h" +@@ -77,20 +79,20 @@ static unsigned int translate_wrap_mode(GLenum wrapmode) + * + * \param t Texture object whose wrap modes are to be set + */ +-static void r300UpdateTexWrap(r300TexObjPtr t) ++static void r300UpdateTexWrap(radeonTexObjPtr t) + { +- struct gl_texture_object *tObj = t->base.tObj; ++ struct gl_texture_object *tObj = &t->base; + +- t->filter &= ++ t->pp_txfilter &= + ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK); + +- t->filter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT; ++ t->pp_txfilter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT; + + if (tObj->Target != GL_TEXTURE_1D) { +- t->filter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT; ++ t->pp_txfilter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT; + + if (tObj->Target == GL_TEXTURE_3D) +- t->filter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT; ++ t->pp_txfilter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT; + } + } + +@@ -117,10 +119,13 @@ static GLuint aniso_filter(GLfloat anisotropy) + * \param magf Texture magnification mode + * \param anisotropy Maximum anisotropy level + */ +-static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy) ++static void r300SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy) + { +- t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK); +- t->filter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY; ++ /* Force revalidation to account for switches from/to mipmapping. */ ++ t->validated = GL_FALSE; ++ ++ t->pp_txfilter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK); ++ t->pp_txfilter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY; + + /* Note that EXT_texture_filter_anisotropic is extremely vague about + * how anisotropic filtering interacts with the "normal" filter modes. +@@ -128,7 +133,7 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat + * filter settings completely. This includes driconf's settings. + */ + if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) { +- t->filter |= R300_TX_MAG_FILTER_ANISO ++ t->pp_txfilter |= R300_TX_MAG_FILTER_ANISO + | R300_TX_MIN_FILTER_ANISO + | R300_TX_MIN_FILTER_MIP_LINEAR + | aniso_filter(anisotropy); +@@ -139,22 +144,22 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat + + switch (minf) { + case GL_NEAREST: +- t->filter |= R300_TX_MIN_FILTER_NEAREST; ++ t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST; + break; + case GL_LINEAR: +- t->filter |= R300_TX_MIN_FILTER_LINEAR; ++ t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR; + break; + case GL_NEAREST_MIPMAP_NEAREST: +- t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST; ++ t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: +- t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR; ++ t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR; + break; + case GL_LINEAR_MIPMAP_NEAREST: +- t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST; ++ t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST; + break; + case GL_LINEAR_MIPMAP_LINEAR: +- t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR; ++ t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR; + break; + } + +@@ -163,743 +168,20 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat + */ + switch (magf) { + case GL_NEAREST: +- t->filter |= R300_TX_MAG_FILTER_NEAREST; ++ t->pp_txfilter |= R300_TX_MAG_FILTER_NEAREST; + break; + case GL_LINEAR: +- t->filter |= R300_TX_MAG_FILTER_LINEAR; ++ t->pp_txfilter |= R300_TX_MAG_FILTER_LINEAR; + break; + } + } + +-static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4]) ++static void r300SetTexBorderColor(radeonTexObjPtr t, GLubyte c[4]) + { + t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]); + } + + /** +- * Allocate space for and load the mesa images into the texture memory block. +- * This will happen before drawing with a new texture, or drawing with a +- * texture after it was swapped out or teximaged again. +- */ +- +-static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj) +-{ +- r300TexObjPtr t; +- +- t = CALLOC_STRUCT(r300_tex_obj); +- texObj->DriverData = t; +- if (t != NULL) { +- if (RADEON_DEBUG & DEBUG_TEXTURE) { +- fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__, +- (void *)texObj, (void *)t); +- } +- +- /* Initialize non-image-dependent parts of the state: +- */ +- t->base.tObj = texObj; +- t->border_fallback = GL_FALSE; +- +- make_empty_list(&t->base); +- +- r300UpdateTexWrap(t); +- r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy); +- r300SetTexBorderColor(t, texObj->_BorderChan); +- } +- +- return t; +-} +- +-/* try to find a format which will only need a memcopy */ +-static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat, +- GLenum srcType) +-{ +- const GLuint ui = 1; +- const GLubyte littleEndian = *((const GLubyte *)&ui); +- +- if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) || +- (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) || +- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || +- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) { +- return &_mesa_texformat_rgba8888; +- } else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || +- (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) || +- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) || +- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) { +- return &_mesa_texformat_rgba8888_rev; +- } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) || +- srcType == GL_UNSIGNED_INT_8_8_8_8)) { +- return &_mesa_texformat_argb8888_rev; +- } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) || +- srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) { +- return &_mesa_texformat_argb8888; +- } else +- return _dri_texformat_argb8888; +-} +- +-static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx, +- GLint +- internalFormat, +- GLenum format, +- GLenum type) +-{ +- r300ContextPtr rmesa = R300_CONTEXT(ctx); +- const GLboolean do32bpt = +- (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32); +- const GLboolean force16bpt = +- (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16); +- (void)format; +- +-#if 0 +- fprintf(stderr, "InternalFormat=%s(%d) type=%s format=%s\n", +- _mesa_lookup_enum_by_nr(internalFormat), internalFormat, +- _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format)); +- fprintf(stderr, "do32bpt=%d force16bpt=%d\n", do32bpt, force16bpt); +-#endif +- +- switch (internalFormat) { +- case 4: +- case GL_RGBA: +- case GL_COMPRESSED_RGBA: +- switch (type) { +- case GL_UNSIGNED_INT_10_10_10_2: +- case GL_UNSIGNED_INT_2_10_10_10_REV: +- return do32bpt ? _dri_texformat_argb8888 : +- _dri_texformat_argb1555; +- case GL_UNSIGNED_SHORT_4_4_4_4: +- case GL_UNSIGNED_SHORT_4_4_4_4_REV: +- return _dri_texformat_argb4444; +- case GL_UNSIGNED_SHORT_5_5_5_1: +- case GL_UNSIGNED_SHORT_1_5_5_5_REV: +- return _dri_texformat_argb1555; +- default: +- return do32bpt ? r300Choose8888TexFormat(format, type) : +- _dri_texformat_argb4444; +- } +- +- case 3: +- case GL_RGB: +- case GL_COMPRESSED_RGB: +- switch (type) { +- case GL_UNSIGNED_SHORT_4_4_4_4: +- case GL_UNSIGNED_SHORT_4_4_4_4_REV: +- return _dri_texformat_argb4444; +- case GL_UNSIGNED_SHORT_5_5_5_1: +- case GL_UNSIGNED_SHORT_1_5_5_5_REV: +- return _dri_texformat_argb1555; +- case GL_UNSIGNED_SHORT_5_6_5: +- case GL_UNSIGNED_SHORT_5_6_5_REV: +- return _dri_texformat_rgb565; +- default: +- return do32bpt ? _dri_texformat_argb8888 : +- _dri_texformat_rgb565; +- } +- +- case GL_RGBA8: +- case GL_RGB10_A2: +- case GL_RGBA12: +- case GL_RGBA16: +- return !force16bpt ? +- r300Choose8888TexFormat(format, +- type) : _dri_texformat_argb4444; +- +- case GL_RGBA4: +- case GL_RGBA2: +- return _dri_texformat_argb4444; +- +- case GL_RGB5_A1: +- return _dri_texformat_argb1555; +- +- case GL_RGB8: +- case GL_RGB10: +- case GL_RGB12: +- case GL_RGB16: +- return !force16bpt ? _dri_texformat_argb8888 : +- _dri_texformat_rgb565; +- +- case GL_RGB5: +- case GL_RGB4: +- case GL_R3_G3_B2: +- return _dri_texformat_rgb565; +- +- case GL_ALPHA: +- case GL_ALPHA4: +- case GL_ALPHA8: +- case GL_ALPHA12: +- case GL_ALPHA16: +- case GL_COMPRESSED_ALPHA: +- return _dri_texformat_a8; +- +- case 1: +- case GL_LUMINANCE: +- case GL_LUMINANCE4: +- case GL_LUMINANCE8: +- case GL_LUMINANCE12: +- case GL_LUMINANCE16: +- case GL_COMPRESSED_LUMINANCE: +- return _dri_texformat_l8; +- +- case 2: +- case GL_LUMINANCE_ALPHA: +- case GL_LUMINANCE4_ALPHA4: +- case GL_LUMINANCE6_ALPHA2: +- case GL_LUMINANCE8_ALPHA8: +- case GL_LUMINANCE12_ALPHA4: +- case GL_LUMINANCE12_ALPHA12: +- case GL_LUMINANCE16_ALPHA16: +- case GL_COMPRESSED_LUMINANCE_ALPHA: +- return _dri_texformat_al88; +- +- case GL_INTENSITY: +- case GL_INTENSITY4: +- case GL_INTENSITY8: +- case GL_INTENSITY12: +- case GL_INTENSITY16: +- case GL_COMPRESSED_INTENSITY: +- return _dri_texformat_i8; +- +- case GL_YCBCR_MESA: +- if (type == GL_UNSIGNED_SHORT_8_8_APPLE || +- type == GL_UNSIGNED_BYTE) +- return &_mesa_texformat_ycbcr; +- else +- return &_mesa_texformat_ycbcr_rev; +- +- case GL_RGB_S3TC: +- case GL_RGB4_S3TC: +- case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: +- return &_mesa_texformat_rgb_dxt1; +- +- case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: +- return &_mesa_texformat_rgba_dxt1; +- +- case GL_RGBA_S3TC: +- case GL_RGBA4_S3TC: +- case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: +- return &_mesa_texformat_rgba_dxt3; +- +- case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: +- return &_mesa_texformat_rgba_dxt5; +- +- case GL_ALPHA16F_ARB: +- return &_mesa_texformat_alpha_float16; +- case GL_ALPHA32F_ARB: +- return &_mesa_texformat_alpha_float32; +- case GL_LUMINANCE16F_ARB: +- return &_mesa_texformat_luminance_float16; +- case GL_LUMINANCE32F_ARB: +- return &_mesa_texformat_luminance_float32; +- case GL_LUMINANCE_ALPHA16F_ARB: +- return &_mesa_texformat_luminance_alpha_float16; +- case GL_LUMINANCE_ALPHA32F_ARB: +- return &_mesa_texformat_luminance_alpha_float32; +- case GL_INTENSITY16F_ARB: +- return &_mesa_texformat_intensity_float16; +- case GL_INTENSITY32F_ARB: +- return &_mesa_texformat_intensity_float32; +- case GL_RGB16F_ARB: +- return &_mesa_texformat_rgba_float16; +- case GL_RGB32F_ARB: +- return &_mesa_texformat_rgba_float32; +- case GL_RGBA16F_ARB: +- return &_mesa_texformat_rgba_float16; +- case GL_RGBA32F_ARB: +- return &_mesa_texformat_rgba_float32; +- +- case GL_DEPTH_COMPONENT: +- case GL_DEPTH_COMPONENT16: +- case GL_DEPTH_COMPONENT24: +- case GL_DEPTH_COMPONENT32: +-#if 0 +- switch (type) { +- case GL_UNSIGNED_BYTE: +- case GL_UNSIGNED_SHORT: +- return &_mesa_texformat_z16; +- case GL_UNSIGNED_INT: +- return &_mesa_texformat_z32; +- case GL_UNSIGNED_INT_24_8_EXT: +- default: +- return &_mesa_texformat_z24_s8; +- } +-#else +- return &_mesa_texformat_z16; +-#endif +- +- default: +- _mesa_problem(ctx, +- "unexpected internalFormat 0x%x in r300ChooseTextureFormat", +- (int)internalFormat); +- return NULL; +- } +- +- return NULL; /* never get here */ +-} +- +-static GLboolean +-r300ValidateClientStorage(GLcontext * ctx, GLenum target, +- GLint internalFormat, +- GLint srcWidth, GLint srcHeight, +- GLenum format, GLenum type, const void *pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage) +-{ +- r300ContextPtr rmesa = R300_CONTEXT(ctx); +- +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "intformat %s format %s type %s\n", +- _mesa_lookup_enum_by_nr(internalFormat), +- _mesa_lookup_enum_by_nr(format), +- _mesa_lookup_enum_by_nr(type)); +- +- if (!ctx->Unpack.ClientStorage) +- return 0; +- +- if (ctx->_ImageTransferState || +- texImage->IsCompressed || texObj->GenerateMipmap) +- return 0; +- +- /* This list is incomplete, may be different on ppc??? +- */ +- switch (internalFormat) { +- case GL_RGBA: +- if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) { +- texImage->TexFormat = _dri_texformat_argb8888; +- } else +- return 0; +- break; +- +- case GL_RGB: +- if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) { +- texImage->TexFormat = _dri_texformat_rgb565; +- } else +- return 0; +- break; +- +- case GL_YCBCR_MESA: +- if (format == GL_YCBCR_MESA && +- type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) { +- texImage->TexFormat = &_mesa_texformat_ycbcr_rev; +- } else if (format == GL_YCBCR_MESA && +- (type == GL_UNSIGNED_SHORT_8_8_APPLE || +- type == GL_UNSIGNED_BYTE)) { +- texImage->TexFormat = &_mesa_texformat_ycbcr; +- } else +- return 0; +- break; +- +- default: +- return 0; +- } +- +- /* Could deal with these packing issues, but currently don't: +- */ +- if (packing->SkipPixels || +- packing->SkipRows || packing->SwapBytes || packing->LsbFirst) { +- return 0; +- } +- +- GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth, +- format, type); +- +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: srcRowStride %d/%x\n", +- __FUNCTION__, srcRowStride, srcRowStride); +- +- /* Could check this later in upload, pitch restrictions could be +- * relaxed, but would need to store the image pitch somewhere, +- * as packing details might change before image is uploaded: +- */ +- if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride) +- || (srcRowStride & 63)) +- return 0; +- +- /* Have validated that _mesa_transfer_teximage would be a straight +- * memcpy at this point. NOTE: future calls to TexSubImage will +- * overwrite the client data. This is explicitly mentioned in the +- * extension spec. +- */ +- texImage->Data = (void *)pixels; +- texImage->IsClientData = GL_TRUE; +- texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes; +- +- return 1; +-} +- +-static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level, +- GLint internalFormat, +- GLint width, GLint border, +- GLenum format, GLenum type, const GLvoid * pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage) +-{ +- driTextureObject *t = (driTextureObject *) texObj->DriverData; +- +- if (t) { +- driSwapOutTextureObject(t); +- } else { +- t = (driTextureObject *) r300AllocTexObj(texObj); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D"); +- return; +- } +- } +- +- /* Note, this will call ChooseTextureFormat */ +- _mesa_store_teximage1d(ctx, target, level, internalFormat, +- width, border, format, type, pixels, +- &ctx->Unpack, texObj, texImage); +- +- t->dirty_images[0] |= (1 << level); +-} +- +-static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level, +- GLint xoffset, +- GLsizei width, +- GLenum format, GLenum type, +- const GLvoid * pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage) +-{ +- driTextureObject *t = (driTextureObject *) texObj->DriverData; +- +- assert(t); /* this _should_ be true */ +- if (t) { +- driSwapOutTextureObject(t); +- } else { +- t = (driTextureObject *) r300AllocTexObj(texObj); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D"); +- return; +- } +- } +- +- _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, +- format, type, pixels, packing, texObj, +- texImage); +- +- t->dirty_images[0] |= (1 << level); +-} +- +-static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level, +- GLint internalFormat, +- GLint width, GLint height, GLint border, +- GLenum format, GLenum type, const GLvoid * pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage) +-{ +- driTextureObject *t = (driTextureObject *) texObj->DriverData; +- GLuint face; +- +- /* which cube face or ordinary 2D image */ +- switch (target) { +- case GL_TEXTURE_CUBE_MAP_POSITIVE_X: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: +- face = +- (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; +- ASSERT(face < 6); +- break; +- default: +- face = 0; +- } +- +- if (t != NULL) { +- driSwapOutTextureObject(t); +- } else { +- t = (driTextureObject *) r300AllocTexObj(texObj); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D"); +- return; +- } +- } +- +- texImage->IsClientData = GL_FALSE; +- +- if (r300ValidateClientStorage(ctx, target, +- internalFormat, +- width, height, +- format, type, pixels, +- packing, texObj, texImage)) { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using client storage\n", +- __FUNCTION__); +- } else { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using normal storage\n", +- __FUNCTION__); +- +- /* Normal path: copy (to cached memory) and eventually upload +- * via another copy to GART memory and then a blit... Could +- * eliminate one copy by going straight to (permanent) GART. +- * +- * Note, this will call r300ChooseTextureFormat. +- */ +- _mesa_store_teximage2d(ctx, target, level, internalFormat, +- width, height, border, format, type, +- pixels, &ctx->Unpack, texObj, texImage); +- +- t->dirty_images[face] |= (1 << level); +- } +-} +- +-static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level, +- GLint xoffset, GLint yoffset, +- GLsizei width, GLsizei height, +- GLenum format, GLenum type, +- const GLvoid * pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage) +-{ +- driTextureObject *t = (driTextureObject *) texObj->DriverData; +- GLuint face; +- +- /* which cube face or ordinary 2D image */ +- switch (target) { +- case GL_TEXTURE_CUBE_MAP_POSITIVE_X: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: +- face = +- (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; +- ASSERT(face < 6); +- break; +- default: +- face = 0; +- } +- +- assert(t); /* this _should_ be true */ +- if (t) { +- driSwapOutTextureObject(t); +- } else { +- t = (driTextureObject *) r300AllocTexObj(texObj); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D"); +- return; +- } +- } +- +- _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, +- height, format, type, pixels, packing, texObj, +- texImage); +- +- t->dirty_images[face] |= (1 << level); +-} +- +-static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target, +- GLint level, GLint internalFormat, +- GLint width, GLint height, GLint border, +- GLsizei imageSize, const GLvoid * data, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage) +-{ +- driTextureObject *t = (driTextureObject *) texObj->DriverData; +- GLuint face; +- +- /* which cube face or ordinary 2D image */ +- switch (target) { +- case GL_TEXTURE_CUBE_MAP_POSITIVE_X: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: +- face = +- (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; +- ASSERT(face < 6); +- break; +- default: +- face = 0; +- } +- +- if (t != NULL) { +- driSwapOutTextureObject(t); +- } else { +- t = (driTextureObject *) r300AllocTexObj(texObj); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, +- "glCompressedTexImage2D"); +- return; +- } +- } +- +- texImage->IsClientData = GL_FALSE; +- +- /* can't call this, different parameters. Would never evaluate to true anyway currently */ +-#if 0 +- if (r300ValidateClientStorage(ctx, target, +- internalFormat, +- width, height, +- format, type, pixels, +- packing, texObj, texImage)) { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using client storage\n", +- __FUNCTION__); +- } else +-#endif +- { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using normal storage\n", +- __FUNCTION__); +- +- /* Normal path: copy (to cached memory) and eventually upload +- * via another copy to GART memory and then a blit... Could +- * eliminate one copy by going straight to (permanent) GART. +- * +- * Note, this will call r300ChooseTextureFormat. +- */ +- _mesa_store_compressed_teximage2d(ctx, target, level, +- internalFormat, width, height, +- border, imageSize, data, +- texObj, texImage); +- +- t->dirty_images[face] |= (1 << level); +- } +-} +- +-static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target, +- GLint level, GLint xoffset, +- GLint yoffset, GLsizei width, +- GLsizei height, GLenum format, +- GLsizei imageSize, const GLvoid * data, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage) +-{ +- driTextureObject *t = (driTextureObject *) texObj->DriverData; +- GLuint face; +- +- /* which cube face or ordinary 2D image */ +- switch (target) { +- case GL_TEXTURE_CUBE_MAP_POSITIVE_X: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: +- face = +- (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; +- ASSERT(face < 6); +- break; +- default: +- face = 0; +- } +- +- assert(t); /* this _should_ be true */ +- if (t) { +- driSwapOutTextureObject(t); +- } else { +- t = (driTextureObject *) r300AllocTexObj(texObj); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, +- "glCompressedTexSubImage3D"); +- return; +- } +- } +- +- _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, +- yoffset, width, height, format, +- imageSize, data, texObj, texImage); +- +- t->dirty_images[face] |= (1 << level); +-} +- +-static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level, +- GLint internalFormat, +- GLint width, GLint height, GLint depth, +- GLint border, +- GLenum format, GLenum type, const GLvoid * pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage) +-{ +- driTextureObject *t = (driTextureObject *) texObj->DriverData; +- +- if (t) { +- driSwapOutTextureObject(t); +- } else { +- t = (driTextureObject *) r300AllocTexObj(texObj); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D"); +- return; +- } +- } +- +- texImage->IsClientData = GL_FALSE; +- +-#if 0 +- if (r300ValidateClientStorage(ctx, target, +- internalFormat, +- width, height, +- format, type, pixels, +- packing, texObj, texImage)) { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using client storage\n", +- __FUNCTION__); +- } else +-#endif +- { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: Using normal storage\n", +- __FUNCTION__); +- +- /* Normal path: copy (to cached memory) and eventually upload +- * via another copy to GART memory and then a blit... Could +- * eliminate one copy by going straight to (permanent) GART. +- * +- * Note, this will call r300ChooseTextureFormat. +- */ +- _mesa_store_teximage3d(ctx, target, level, internalFormat, +- width, height, depth, border, +- format, type, pixels, +- &ctx->Unpack, texObj, texImage); +- +- t->dirty_images[0] |= (1 << level); +- } +-} +- +-static void +-r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level, +- GLint xoffset, GLint yoffset, GLint zoffset, +- GLsizei width, GLsizei height, GLsizei depth, +- GLenum format, GLenum type, +- const GLvoid * pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage) +-{ +- driTextureObject *t = (driTextureObject *) texObj->DriverData; +- +-/* fprintf(stderr, "%s\n", __FUNCTION__); */ +- +- assert(t); /* this _should_ be true */ +- if (t) { +- driSwapOutTextureObject(t); +- } else { +- t = (driTextureObject *) r300AllocTexObj(texObj); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D"); +- return; +- } +- texObj->DriverData = t; +- } +- +- _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset, +- width, height, depth, +- format, type, pixels, packing, texObj, +- texImage); +- +- t->dirty_images[0] |= (1 << level); +-} +- +-/** + * Changes variables and flags for a state update, which will happen at the + * next UpdateTextureState + */ +@@ -908,7 +190,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, + struct gl_texture_object *texObj, + GLenum pname, const GLfloat * params) + { +- r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData; ++ radeonTexObj* t = radeon_tex_obj(texObj); + + if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + fprintf(stderr, "%s( %s )\n", __FUNCTION__, +@@ -941,7 +223,11 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, + * we just have to rely on loading the right subset of mipmap levels + * to simulate a clamped LOD. + */ +- driSwapOutTextureObject((driTextureObject *) t); ++ if (t->mt) { ++ radeon_miptree_unreference(t->mt); ++ t->mt = 0; ++ t->validated = GL_FALSE; ++ } + break; + + case GL_DEPTH_TEXTURE_MODE: +@@ -964,27 +250,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, + } + } + +-static void r300BindTexture(GLcontext * ctx, GLenum target, +- struct gl_texture_object *texObj) +-{ +- if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { +- fprintf(stderr, "%s( %p ) unit=%d\n", __FUNCTION__, +- (void *)texObj, ctx->Texture.CurrentUnit); +- } +- +- if ((target == GL_TEXTURE_1D) +- || (target == GL_TEXTURE_2D) +- || (target == GL_TEXTURE_3D) +- || (target == GL_TEXTURE_CUBE_MAP) +- || (target == GL_TEXTURE_RECTANGLE_NV)) { +- assert(texObj->DriverData != NULL); +- } +-} +- + static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) + { + r300ContextPtr rmesa = R300_CONTEXT(ctx); +- driTextureObject *t = (driTextureObject *) texObj->DriverData; ++ radeonTexObj* t = radeon_tex_obj(texObj); + + if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, +@@ -992,14 +261,24 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) + _mesa_lookup_enum_by_nr(texObj->Target)); + } + +- if (t != NULL) { +- if (rmesa) { +- R300_FIREVERTICES(rmesa); +- } ++ if (rmesa) { ++ int i; ++ radeon_firevertices(&rmesa->radeon); ++ ++ for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i) ++ if (rmesa->hw.textures[i] == t) ++ rmesa->hw.textures[i] = 0; ++ } + +- driDestroyTextureObject(t); ++ if (t->bo) { ++ radeon_bo_unref(t->bo); ++ t->bo = NULL; ++ } ++ ++ if (t->mt) { ++ radeon_miptree_unreference(t->mt); ++ t->mt = 0; + } +- /* Free mipmap images and the texture object itself */ + _mesa_delete_texture_object(ctx, texObj); + } + +@@ -1008,8 +287,6 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) + * Called via ctx->Driver.NewTextureObject. + * Note: this function will be called during context creation to + * allocate the default texture objects. +- * Note: we could use containment here to 'derive' the driver-specific +- * texture object from the core mesa gl_texture_object. Not done at this time. + * Fixup MaxAnisotropy according to user preference. + */ + static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, +@@ -1017,14 +294,23 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, + GLenum target) + { + r300ContextPtr rmesa = R300_CONTEXT(ctx); +- struct gl_texture_object *obj; +- obj = _mesa_new_texture_object(ctx, name, target); +- if (!obj) +- return NULL; +- obj->MaxAnisotropy = rmesa->initialMaxAnisotropy; ++ radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj); ++ + +- r300AllocTexObj(obj); +- return obj; ++ if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { ++ fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, ++ t, _mesa_lookup_enum_by_nr(target)); ++ } ++ ++ _mesa_initialize_texture_object(&t->base, name, target); ++ t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy; ++ ++ /* Initialize hardware state */ ++ r300UpdateTexWrap(t); ++ r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy); ++ r300SetTexBorderColor(t, t->base._BorderChan); ++ ++ return &t->base; + } + + void r300InitTextureFuncs(struct dd_function_table *functions) +@@ -1032,22 +318,30 @@ void r300InitTextureFuncs(struct dd_function_table *functions) + /* Note: we only plug in the functions we implement in the driver + * since _mesa_init_driver_functions() was already called. + */ +- functions->ChooseTextureFormat = r300ChooseTextureFormat; +- functions->TexImage1D = r300TexImage1D; +- functions->TexImage2D = r300TexImage2D; +- functions->TexImage3D = r300TexImage3D; +- functions->TexSubImage1D = r300TexSubImage1D; +- functions->TexSubImage2D = r300TexSubImage2D; +- functions->TexSubImage3D = r300TexSubImage3D; ++ functions->NewTextureImage = radeonNewTextureImage; ++ functions->FreeTexImageData = radeonFreeTexImageData; ++ functions->MapTexture = radeonMapTexture; ++ functions->UnmapTexture = radeonUnmapTexture; ++ ++ functions->ChooseTextureFormat = radeonChooseTextureFormat; ++ functions->TexImage1D = radeonTexImage1D; ++ functions->TexImage2D = radeonTexImage2D; ++ functions->TexImage3D = radeonTexImage3D; ++ functions->TexSubImage1D = radeonTexSubImage1D; ++ functions->TexSubImage2D = radeonTexSubImage2D; ++ functions->TexSubImage3D = radeonTexSubImage3D; ++ functions->GetTexImage = radeonGetTexImage; ++ functions->GetCompressedTexImage = radeonGetCompressedTexImage; + functions->NewTextureObject = r300NewTextureObject; +- functions->BindTexture = r300BindTexture; + functions->DeleteTexture = r300DeleteTexture; + functions->IsTextureResident = driIsTextureResident; + + functions->TexParameter = r300TexParameter; + +- functions->CompressedTexImage2D = r300CompressedTexImage2D; +- functions->CompressedTexSubImage2D = r300CompressedTexSubImage2D; ++ functions->CompressedTexImage2D = radeonCompressedTexImage2D; ++ functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; ++ ++ functions->GenerateMipmap = radeonGenerateMipmap; + + driInitTextureFormats(); + } +diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h +index b86d45b..baad3fe 100644 +--- a/src/mesa/drivers/dri/r300/r300_tex.h ++++ b/src/mesa/drivers/dri/r300/r300_tex.h +@@ -37,16 +37,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + extern void r300SetDepthTexMode(struct gl_texture_object *tObj); + ++extern void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, ++ __DRIdrawable *dPriv); ++ + extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname, + unsigned long long offset, GLint depth, + GLuint pitch); + +-extern void r300UpdateTextureState(GLcontext * ctx); +- +-extern int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, +- GLuint face); +- +-extern void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t); ++extern GLboolean r300ValidateBuffers(GLcontext * ctx); + + extern void r300InitTextureFuncs(struct dd_function_table *functions); + +diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c +deleted file mode 100644 +index b03eefa..0000000 +--- a/src/mesa/drivers/dri/r300/r300_texmem.c ++++ /dev/null +@@ -1,567 +0,0 @@ +-/************************************************************************** +- +-Copyright (C) Tungsten Graphics 2002. All Rights Reserved. +-The Weather Channel, Inc. funded Tungsten Graphics to develop the +-initial release of the Radeon 8500 driver under the XFree86 +-license. This notice must be preserved. +- +-Permission is hereby granted, free of charge, to any person obtaining +-a copy of this software and associated documentation files (the +-"Software"), to deal in the Software without restriction, including +-without limitation on the rights to use, copy, modify, merge, publish, +-distribute, sub license, and/or sell copies of the Software, and to +-permit persons to whom the Software is furnished to do so, subject to +-the following conditions: +- +-The above copyright notice and this permission notice (including the +-next paragraph) shall be included in all copies or substantial +-portions of the Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +-NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR +-SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +-IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +-SOFTWARE. +- +-**************************************************************************/ +- +-/** +- * \file +- * +- * \author Gareth Hughes +- * +- * \author Kevin E. Martin +- */ +- +-#include +- +-#include "main/glheader.h" +-#include "main/imports.h" +-#include "main/context.h" +-#include "main/colormac.h" +-#include "main/macros.h" +-#include "main/simple_list.h" +-#include "radeon_reg.h" /* gets definition for usleep */ +-#include "r300_context.h" +-#include "r300_state.h" +-#include "r300_cmdbuf.h" +-#include "radeon_ioctl.h" +-#include "r300_tex.h" +-#include "r300_ioctl.h" +-#include /* for usleep() */ +- +-#ifdef USER_BUFFERS +-#include "r300_mem.h" +-#endif +- +-/** +- * Destroy any device-dependent state associated with the texture. This may +- * include NULLing out hardware state that points to the texture. +- */ +-void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t) +-{ +- int i; +- +- if (RADEON_DEBUG & DEBUG_TEXTURE) { +- fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__, +- (void *)t, (void *)t->base.tObj); +- } +- +- for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) { +- if (rmesa->state.texture.unit[i].texobj == t) { +- rmesa->state.texture.unit[i].texobj = NULL; +- } +- } +-} +- +-/* ------------------------------------------------------------ +- * Texture image conversions +- */ +- +-static void r300UploadGARTClientSubImage(r300ContextPtr rmesa, +- r300TexObjPtr t, +- struct gl_texture_image *texImage, +- GLint hwlevel, +- GLint x, GLint y, +- GLint width, GLint height) +-{ +- const struct gl_texture_format *texFormat = texImage->TexFormat; +- GLuint srcPitch, dstPitch; +- int blit_format; +- int srcOffset; +- +- /* +- * XXX it appears that we always upload the full image, not a subimage. +- * I.e. x==0, y==0, width=texWidth, height=texWidth. If this is ever +- * changed, the src pitch will have to change. +- */ +- switch (texFormat->TexelBytes) { +- case 1: +- blit_format = R300_CP_COLOR_FORMAT_CI8; +- srcPitch = t->image[0][0].width * texFormat->TexelBytes; +- dstPitch = t->image[0][0].width * texFormat->TexelBytes; +- break; +- case 2: +- blit_format = R300_CP_COLOR_FORMAT_RGB565; +- srcPitch = t->image[0][0].width * texFormat->TexelBytes; +- dstPitch = t->image[0][0].width * texFormat->TexelBytes; +- break; +- case 4: +- blit_format = R300_CP_COLOR_FORMAT_ARGB8888; +- srcPitch = t->image[0][0].width * texFormat->TexelBytes; +- dstPitch = t->image[0][0].width * texFormat->TexelBytes; +- break; +- case 8: +- case 16: +- blit_format = R300_CP_COLOR_FORMAT_CI8; +- srcPitch = t->image[0][0].width * texFormat->TexelBytes; +- dstPitch = t->image[0][0].width * texFormat->TexelBytes; +- break; +- default: +- return; +- } +- +- t->image[0][hwlevel].data = texImage->Data; +- srcOffset = r300GartOffsetFromVirtual(rmesa, texImage->Data); +- +- assert(srcOffset != ~0); +- +- /* Don't currently need to cope with small pitches? +- */ +- width = texImage->Width; +- height = texImage->Height; +- +- if (texFormat->TexelBytes > 4) { +- width *= texFormat->TexelBytes; +- } +- +- r300EmitWait(rmesa, R300_WAIT_3D); +- +- r300EmitBlit(rmesa, blit_format, +- srcPitch, +- srcOffset, +- dstPitch, +- t->bufAddr, +- x, +- y, +- t->image[0][hwlevel].x + x, +- t->image[0][hwlevel].y + y, width, height); +- +- r300EmitWait(rmesa, R300_WAIT_2D); +-} +- +-static void r300UploadRectSubImage(r300ContextPtr rmesa, +- r300TexObjPtr t, +- struct gl_texture_image *texImage, +- GLint x, GLint y, GLint width, GLint height) +-{ +- const struct gl_texture_format *texFormat = texImage->TexFormat; +- int blit_format, dstPitch, done; +- +- switch (texFormat->TexelBytes) { +- case 1: +- blit_format = R300_CP_COLOR_FORMAT_CI8; +- break; +- case 2: +- blit_format = R300_CP_COLOR_FORMAT_RGB565; +- break; +- case 4: +- blit_format = R300_CP_COLOR_FORMAT_ARGB8888; +- break; +- case 8: +- case 16: +- blit_format = R300_CP_COLOR_FORMAT_CI8; +- break; +- default: +- return; +- } +- +- t->image[0][0].data = texImage->Data; +- +- /* Currently don't need to cope with small pitches. +- */ +- width = texImage->Width; +- height = texImage->Height; +- dstPitch = t->pitch; +- +- if (texFormat->TexelBytes > 4) { +- width *= texFormat->TexelBytes; +- } +- +- if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) { +- /* In this case, could also use GART texturing. This is +- * currently disabled, but has been tested & works. +- */ +- t->offset = r300GartOffsetFromVirtual(rmesa, texImage->Data); +- t->pitch = texImage->RowStride * texFormat->TexelBytes - 32; +- +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, +- "Using GART texturing for rectangular client texture\n"); +- +- /* Release FB memory allocated for this image: +- */ +- /* FIXME This may not be correct as driSwapOutTextureObject sets +- * FIXME dirty_images. It may be fine, though. +- */ +- if (t->base.memBlock) { +- driSwapOutTextureObject((driTextureObject *) t); +- } +- } else if (texImage->IsClientData) { +- /* Data already in GART memory, with usable pitch. +- */ +- GLuint srcPitch; +- srcPitch = texImage->RowStride * texFormat->TexelBytes; +- r300EmitBlit(rmesa, +- blit_format, +- srcPitch, +- r300GartOffsetFromVirtual(rmesa, texImage->Data), +- dstPitch, t->bufAddr, 0, 0, 0, 0, width, height); +- } else { +- /* Data not in GART memory, or bad pitch. +- */ +- for (done = 0; done < height;) { +- struct r300_dma_region region; +- int lines = +- MIN2(height - done, RADEON_BUFFER_SIZE / dstPitch); +- int src_pitch; +- char *tex; +- +- src_pitch = texImage->RowStride * texFormat->TexelBytes; +- +- tex = (char *)texImage->Data + done * src_pitch; +- +- memset(®ion, 0, sizeof(region)); +- r300AllocDmaRegion(rmesa, ®ion, lines * dstPitch, +- 1024); +- +- /* Copy texdata to dma: +- */ +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, +- "%s: src_pitch %d dst_pitch %d\n", +- __FUNCTION__, src_pitch, dstPitch); +- +- if (src_pitch == dstPitch) { +- memcpy(region.address + region.start, tex, +- lines * src_pitch); +- } else { +- char *buf = region.address + region.start; +- int i; +- for (i = 0; i < lines; i++) { +- memcpy(buf, tex, src_pitch); +- buf += dstPitch; +- tex += src_pitch; +- } +- } +- +- r300EmitWait(rmesa, R300_WAIT_3D); +- +- /* Blit to framebuffer +- */ +- r300EmitBlit(rmesa, +- blit_format, +- dstPitch, GET_START(®ion), +- dstPitch | (t->tile_bits >> 16), +- t->bufAddr, 0, 0, 0, done, width, lines); +- +- r300EmitWait(rmesa, R300_WAIT_2D); +-#ifdef USER_BUFFERS +- r300_mem_use(rmesa, region.buf->id); +-#endif +- +- r300ReleaseDmaRegion(rmesa, ®ion, __FUNCTION__); +- done += lines; +- } +- } +-} +- +-/** +- * Upload the texture image associated with texture \a t at the specified +- * level at the address relative to \a start. +- */ +-static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t, +- GLint hwlevel, +- GLint x, GLint y, GLint width, GLint height, +- GLuint face) +-{ +- struct gl_texture_image *texImage = NULL; +- GLuint offset; +- GLint imageWidth, imageHeight; +- GLint ret; +- drm_radeon_texture_t tex; +- drm_radeon_tex_image_t tmp; +- const int level = hwlevel + t->base.firstLevel; +- +- if (RADEON_DEBUG & DEBUG_TEXTURE) { +- fprintf(stderr, +- "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", +- __FUNCTION__, (void *)t, (void *)t->base.tObj, level, +- width, height, face); +- } +- +- ASSERT(face < 6); +- +- /* Ensure we have a valid texture to upload */ +- if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) { +- _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__); +- return; +- } +- +- texImage = t->base.tObj->Image[face][level]; +- +- if (!texImage) { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: texImage %d is NULL!\n", +- __FUNCTION__, level); +- return; +- } +- if (!texImage->Data) { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: image data is NULL!\n", +- __FUNCTION__); +- return; +- } +- +- if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { +- assert(level == 0); +- assert(hwlevel == 0); +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: image data is rectangular\n", +- __FUNCTION__); +- r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height); +- return; +- } else if (texImage->IsClientData) { +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, +- "%s: image data is in GART client storage\n", +- __FUNCTION__); +- r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y, +- width, height); +- return; +- } else if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "%s: image data is in normal memory\n", +- __FUNCTION__); +- +- imageWidth = texImage->Width; +- imageHeight = texImage->Height; +- +- offset = t->bufAddr; +- +- if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) { +- GLint imageX = 0; +- GLint imageY = 0; +- GLint blitX = t->image[face][hwlevel].x; +- GLint blitY = t->image[face][hwlevel].y; +- GLint blitWidth = t->image[face][hwlevel].width; +- GLint blitHeight = t->image[face][hwlevel].height; +- fprintf(stderr, " upload image: %d,%d at %d,%d\n", +- imageWidth, imageHeight, imageX, imageY); +- fprintf(stderr, " upload blit: %d,%d at %d,%d\n", +- blitWidth, blitHeight, blitX, blitY); +- fprintf(stderr, " blit ofs: 0x%07x level: %d/%d\n", +- (GLuint) offset, hwlevel, level); +- } +- +- t->image[face][hwlevel].data = texImage->Data; +- +- /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct. +- * NOTE: we're always use a 1KB-wide blit and I8 texture format. +- * We used to use 1, 2 and 4-byte texels and used to use the texture +- * width to dictate the blit width - but that won't work for compressed +- * textures. (Brian) +- * NOTE: can't do that with texture tiling. (sroland) +- */ +- tex.offset = offset; +- tex.image = &tmp; +- /* copy (x,y,width,height,data) */ +- memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp)); +- +- if (texImage->TexFormat->TexelBytes > 4) { +- const int log2TexelBytes = +- (3 + (texImage->TexFormat->TexelBytes >> 4)); +- tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ +- tex.pitch = +- MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / +- 64, 1); +- tex.height = imageHeight; +- tex.width = imageWidth << log2TexelBytes; +- tex.offset += (tmp.x << log2TexelBytes) & ~1023; +- tmp.x = tmp.x % (1024 >> log2TexelBytes); +- tmp.width = tmp.width << log2TexelBytes; +- } else if (texImage->TexFormat->TexelBytes) { +- /* use multi-byte upload scheme */ +- tex.height = imageHeight; +- tex.width = imageWidth; +- switch (texImage->TexFormat->TexelBytes) { +- case 1: +- tex.format = RADEON_TXFORMAT_I8; +- break; +- case 2: +- tex.format = RADEON_TXFORMAT_AI88; +- break; +- case 4: +- tex.format = RADEON_TXFORMAT_ARGB8888; +- break; +- } +- tex.pitch = +- MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / +- 64, 1); +- tex.offset += tmp.x & ~1023; +- tmp.x = tmp.x % 1024; +- +- if (t->tile_bits & R300_TXO_MICRO_TILE) { +- /* need something like "tiled coordinates" ? */ +- tmp.y = tmp.x / (tex.pitch * 128) * 2; +- tmp.x = +- tmp.x % (tex.pitch * 128) / 2 / +- texImage->TexFormat->TexelBytes; +- tex.pitch |= RADEON_DST_TILE_MICRO >> 22; +- } else { +- tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); +- } +-#if 1 +- if ((t->tile_bits & R300_TXO_MACRO_TILE) && +- (texImage->Width * texImage->TexFormat->TexelBytes >= 256) +- && ((!(t->tile_bits & R300_TXO_MICRO_TILE) +- && (texImage->Height >= 8)) +- || (texImage->Height >= 16))) { +- /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes, +- OR if height is smaller than 8 automatically, but if micro tiling is active +- the limit is height 16 instead ? */ +- tex.pitch |= RADEON_DST_TILE_MACRO >> 22; +- } +-#endif +- } else { +- /* In case of for instance 8x8 texture (2x2 dxt blocks), +- padding after the first two blocks is needed (only +- with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ +- /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) +- has 4 real pixels. Needed so the kernel module reads +- the right amount of data. */ +- tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ +- tex.pitch = (R300_BLIT_WIDTH_BYTES / 64); +- tex.height = (imageHeight + 3) / 4; +- tex.width = (imageWidth + 3) / 4; +- if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) { +- tex.width *= 8; +- } else { +- tex.width *= 16; +- } +- } +- +- LOCK_HARDWARE(&rmesa->radeon); +- do { +- ret = +- drmCommandWriteRead(rmesa->radeon.dri.fd, +- DRM_RADEON_TEXTURE, &tex, +- sizeof(drm_radeon_texture_t)); +- if (ret) { +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, +- "DRM_RADEON_TEXTURE: again!\n"); +- usleep(1); +- } +- } while (ret == -EAGAIN); +- +- UNLOCK_HARDWARE(&rmesa->radeon); +- +- if (ret) { +- fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret); +- fprintf(stderr, " offset=0x%08x\n", offset); +- fprintf(stderr, " image width=%d height=%d\n", +- imageWidth, imageHeight); +- fprintf(stderr, " blit width=%d height=%d data=%p\n", +- t->image[face][hwlevel].width, +- t->image[face][hwlevel].height, +- t->image[face][hwlevel].data); +- _mesa_exit(-1); +- } +-} +- +-/** +- * Upload the texture images associated with texture \a t. This might +- * require the allocation of texture memory. +- * +- * \param rmesa Context pointer +- * \param t Texture to be uploaded +- * \param face Cube map face to be uploaded. Zero for non-cube maps. +- */ +- +-int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face) +-{ +- const int numLevels = t->base.lastLevel - t->base.firstLevel + 1; +- +- if (t->image_override) +- return 0; +- +- if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) { +- fprintf(stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__, +- (void *)rmesa->radeon.glCtx, (void *)t->base.tObj, +- t->base.totalSize, t->base.firstLevel, +- t->base.lastLevel); +- } +- +- if (t->base.totalSize == 0) +- return 0; +- +- if (RADEON_DEBUG & DEBUG_SYNC) { +- fprintf(stderr, "%s: Syncing\n", __FUNCTION__); +- radeonFinish(rmesa->radeon.glCtx); +- } +- +- LOCK_HARDWARE(&rmesa->radeon); +- +- if (t->base.memBlock == NULL) { +- int heap; +- +- heap = driAllocateTexture(rmesa->texture_heaps, rmesa->nr_heaps, +- (driTextureObject *) t); +- if (heap == -1) { +- UNLOCK_HARDWARE(&rmesa->radeon); +- return -1; +- } +- +- /* Set the base offset of the texture image */ +- t->bufAddr = rmesa->radeon.radeonScreen->texOffset[heap] +- + t->base.memBlock->ofs; +- t->offset = t->bufAddr; +- +- if (!(t->base.tObj->Image[0][0]->IsClientData)) { +- /* hope it's safe to add that here... */ +- t->offset |= t->tile_bits; +- } +- } +- +- /* Let the world know we've used this memory recently. +- */ +- driUpdateTextureLRU((driTextureObject *) t); +- UNLOCK_HARDWARE(&rmesa->radeon); +- +- /* Upload any images that are new */ +- if (t->base.dirty_images[face]) { +- int i; +- for (i = 0; i < numLevels; i++) { +- if ((t->base. +- dirty_images[face] & (1 << +- (i + t->base.firstLevel))) != +- 0) { +- r300UploadSubImage(rmesa, t, i, 0, 0, +- t->image[face][i].width, +- t->image[face][i].height, +- face); +- } +- } +- t->base.dirty_images[face] = 0; +- } +- +- if (RADEON_DEBUG & DEBUG_SYNC) { +- fprintf(stderr, "%s: Syncing\n", __FUNCTION__); +- radeonFinish(rmesa->radeon.glCtx); +- } +- +- return 0; +-} +diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c +index e2329f0..8a90069 100644 +--- a/src/mesa/drivers/dri/r300/r300_texstate.c ++++ b/src/mesa/drivers/dri/r300/r300_texstate.c +@@ -47,7 +47,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "r300_context.h" + #include "r300_state.h" + #include "r300_ioctl.h" +-#include "radeon_ioctl.h" ++#include "radeon_mipmap_tree.h" + #include "r300_tex.h" + #include "r300_reg.h" + +@@ -143,13 +143,12 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) + }, + }; + const GLuint *format; +- r300TexObjPtr t; ++ radeonTexObjPtr t; + + if (!tObj) + return; + +- t = (r300TexObjPtr) tObj->DriverData; +- ++ t = radeon_tex_obj(tObj); + + switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) { + case MESA_FORMAT_Z16: +@@ -171,13 +170,13 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) + + switch (tObj->DepthMode) { + case GL_LUMINANCE: +- t->format = format[0]; ++ t->pp_txformat = format[0]; + break; + case GL_INTENSITY: +- t->format = format[1]; ++ t->pp_txformat = format[1]; + break; + case GL_ALPHA: +- t->format = format[2]; ++ t->pp_txformat = format[2]; + break; + default: + /* Error...which should have already been caught by higher +@@ -190,479 +189,309 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) + + + /** +- * Compute sizes and fill in offset and blit information for the given +- * image (determined by \p face and \p level). +- * +- * \param curOffset points to the offset at which the image is to be stored +- * and is updated by this function according to the size of the image. +- */ +-static void compute_tex_image_offset( +- struct gl_texture_object *tObj, +- GLuint face, +- GLint level, +- GLint* curOffset) +-{ +- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; +- const struct gl_texture_image* texImage; +- GLuint blitWidth = R300_BLIT_WIDTH_BYTES; +- GLuint texelBytes; +- GLuint size; +- +- texImage = tObj->Image[0][level + t->base.firstLevel]; +- if (!texImage) +- return; +- +- texelBytes = texImage->TexFormat->TexelBytes; +- +- /* find image size in bytes */ +- if (texImage->IsCompressed) { +- if ((t->format & R300_TX_FORMAT_DXT1) == +- R300_TX_FORMAT_DXT1) { +- // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format); +- if ((texImage->Width + 3) < 8) /* width one block */ +- size = texImage->CompressedSize * 4; +- else if ((texImage->Width + 3) < 16) +- size = texImage->CompressedSize * 2; +- else +- size = texImage->CompressedSize; +- } else { +- /* DXT3/5, 16 bytes per block */ +- WARN_ONCE +- ("DXT 3/5 suffers from multitexturing problems!\n"); +- // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width); +- if ((texImage->Width + 3) < 8) +- size = texImage->CompressedSize * 2; +- else +- size = texImage->CompressedSize; +- } +- } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { +- size = +- ((texImage->Width * texelBytes + +- 63) & ~63) * texImage->Height; +- blitWidth = 64 / texelBytes; +- } else if (t->tile_bits & R300_TXO_MICRO_TILE) { +- /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, +- though the actual offset may be different (if texture is less than +- 32 bytes width) to the untiled case */ +- int w = (texImage->Width * texelBytes * 2 + 31) & ~31; +- size = +- (w * ((texImage->Height + 1) / 2)) * +- texImage->Depth; +- blitWidth = MAX2(texImage->Width, 64 / texelBytes); +- } else { +- int w = (texImage->Width * texelBytes + 31) & ~31; +- size = w * texImage->Height * texImage->Depth; +- blitWidth = MAX2(texImage->Width, 64 / texelBytes); +- } +- assert(size > 0); +- +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n", +- texImage->Width, texImage->Height, +- texImage->Depth, +- texImage->TexFormat->TexelBytes, +- texImage->InternalFormat); +- +- /* All images are aligned to a 32-byte offset */ +- *curOffset = (*curOffset + 0x1f) & ~0x1f; +- +- if (texelBytes) { +- /* fix x and y coords up later together with offset */ +- t->image[face][level].x = *curOffset; +- t->image[face][level].y = 0; +- t->image[face][level].width = +- MIN2(size / texelBytes, blitWidth); +- t->image[face][level].height = +- (size / texelBytes) / t->image[face][level].width; +- } else { +- t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES; +- t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES; +- t->image[face][level].width = +- MIN2(size, R300_BLIT_WIDTH_BYTES); +- t->image[face][level].height = size / t->image[face][level].width; +- } +- +- if (RADEON_DEBUG & DEBUG_TEXTURE) +- fprintf(stderr, +- "level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", +- level, face, texImage->Width, texImage->Height, +- t->image[face][level].x, t->image[face][level].y, +- t->image[face][level].width, t->image[face][level].height, +- size, *curOffset); +- +- *curOffset += size; +-} +- +- +- +-/** +- * This function computes the number of bytes of storage needed for +- * the given texture object (all mipmap levels, all cube faces). +- * The \c image[face][level].x/y/width/height parameters for upload/blitting +- * are computed here. \c filter, \c format, etc. will be set here +- * too. ++ * Compute the cached hardware register values for the given texture object. + * + * \param rmesa Context pointer +- * \param tObj GL texture object whose images are to be posted to +- * hardware state. ++ * \param t the r300 texture object + */ +-static void r300SetTexImages(r300ContextPtr rmesa, +- struct gl_texture_object *tObj) ++static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t) + { +- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; +- const struct gl_texture_image *baseImage = +- tObj->Image[0][tObj->BaseLevel]; +- GLint curOffset; +- GLint i, texelBytes; +- GLint numLevels; +- GLint log2Width, log2Height, log2Depth; +- +- /* Set the hardware texture format +- */ ++ const struct gl_texture_image *firstImage; ++ int firstlevel = t->mt ? t->mt->firstLevel : 0; ++ ++ firstImage = t->base.Image[0][firstlevel]; ++ + if (!t->image_override +- && VALID_FORMAT(baseImage->TexFormat->MesaFormat)) { +- if (baseImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) { +- r300SetDepthTexMode(tObj); ++ && VALID_FORMAT(firstImage->TexFormat->MesaFormat)) { ++ if (firstImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) { ++ r300SetDepthTexMode(&t->base); + } else { +- t->format = tx_table[baseImage->TexFormat->MesaFormat].format; ++ t->pp_txformat = tx_table[firstImage->TexFormat->MesaFormat].format; + } + +- t->filter |= tx_table[baseImage->TexFormat->MesaFormat].filter; ++ t->pp_txfilter |= tx_table[firstImage->TexFormat->MesaFormat].filter; + } else if (!t->image_override) { + _mesa_problem(NULL, "unexpected texture format in %s", + __FUNCTION__); + return; + } + +- texelBytes = baseImage->TexFormat->TexelBytes; +- +- /* Compute which mipmap levels we really want to send to the hardware. +- */ +- driCalculateTextureFirstLastLevel((driTextureObject *) t); +- log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2; +- log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2; +- log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2; +- +- numLevels = t->base.lastLevel - t->base.firstLevel + 1; ++ if (t->image_override && t->bo) ++ return; + +- assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); ++ t->pp_txsize = (((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT) ++ | ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT) ++ | ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT) ++ | ((t->mt->lastLevel - t->mt->firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT)); + +- /* Calculate mipmap offsets and dimensions for blitting (uploading) +- * The idea is that we lay out the mipmap levels within a block of +- * memory organized as a rectangle of width BLIT_WIDTH_BYTES. +- */ + t->tile_bits = 0; + +- /* figure out if this texture is suitable for tiling. */ +-#if 0 /* Disabled for now */ +- if (texelBytes) { +- if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) && +- /* texrect might be able to use micro tiling too in theory? */ +- (baseImage->Height > 1)) { +- +- /* allow 32 (bytes) x 1 mip (which will use two times the space +- the non-tiled version would use) max if base texture is large enough */ +- if ((numLevels == 1) || +- (((baseImage->Width * texelBytes / +- baseImage->Height) <= 32) +- && (baseImage->Width * texelBytes > 64)) +- || +- ((baseImage->Width * texelBytes / +- baseImage->Height) <= 16)) { +- t->tile_bits |= R300_TXO_MICRO_TILE; +- } +- } ++ if (t->base.Target == GL_TEXTURE_CUBE_MAP) ++ t->pp_txformat |= R300_TX_FORMAT_CUBIC_MAP; ++ if (t->base.Target == GL_TEXTURE_3D) ++ t->pp_txformat |= R300_TX_FORMAT_3D; + +- if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) { +- /* we can set macro tiling even for small textures, they will be untiled anyway */ +- t->tile_bits |= R300_TXO_MACRO_TILE; +- } +- } +-#endif + +- curOffset = 0; +- +- if (tObj->Target == GL_TEXTURE_CUBE_MAP) { +- ASSERT(log2Width == log2Height); +- t->format |= R300_TX_FORMAT_CUBIC_MAP; +- +- for(i = 0; i < numLevels; i++) { +- GLuint face; +- for(face = 0; face < 6; face++) +- compute_tex_image_offset(tObj, face, i, &curOffset); +- } +- } else { +- if (tObj->Target == GL_TEXTURE_3D) +- t->format |= R300_TX_FORMAT_3D; +- +- for (i = 0; i < numLevels; i++) +- compute_tex_image_offset(tObj, 0, i, &curOffset); +- } +- +- /* Align the total size of texture memory block. +- */ +- t->base.totalSize = +- (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; +- +- t->size = +- (((tObj->Image[0][t->base.firstLevel]->Width - +- 1) << R300_TX_WIDTHMASK_SHIFT) +- | ((tObj->Image[0][t->base.firstLevel]->Height - 1) << +- R300_TX_HEIGHTMASK_SHIFT) +- | ((tObj->Image[0][t->base.firstLevel]->DepthLog2) << +- R300_TX_DEPTHMASK_SHIFT)) +- | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT); +- +- t->pitch = 0; +- +- /* Only need to round to nearest 32 for textures, but the blitter +- * requires 64-byte aligned pitches, and we may/may not need the +- * blitter. NPOT only! +- */ +- if (baseImage->IsCompressed) { +- t->pitch |= +- (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); +- } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { +- unsigned int align = (64 / texelBytes) - 1; +- t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width * +- texelBytes) + 63) & ~(63); +- t->size |= R300_TX_SIZE_TXPITCH_EN; ++ if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) { ++ unsigned int align = (64 / t->mt->bpp) - 1; ++ t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN; + if (!t->image_override) +- t->pitch_reg = +- (((tObj->Image[0][t->base.firstLevel]->Width) + +- align) & ~align) - 1; +- } else { +- t->pitch |= +- ((tObj->Image[0][t->base.firstLevel]->Width * +- texelBytes) + 63) & ~(63); ++ t->pp_txpitch = ((firstImage->Width + align) & ~align) - 1; + } + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { +- if (tObj->Image[0][t->base.firstLevel]->Width > 2048) +- t->pitch_reg |= R500_TXWIDTH_BIT11; +- if (tObj->Image[0][t->base.firstLevel]->Height > 2048) +- t->pitch_reg |= R500_TXHEIGHT_BIT11; ++ if (firstImage->Width > 2048) ++ t->pp_txpitch |= R500_TXWIDTH_BIT11; ++ if (firstImage->Height > 2048) ++ t->pp_txpitch |= R500_TXHEIGHT_BIT11; + } + } + +-/* ================================================================ +- * Texture unit state management ++/** ++ * Ensure the given texture is ready for rendering. ++ * ++ * Mostly this means populating the texture object's mipmap tree. + */ +- +-static GLboolean r300EnableTexture2D(GLcontext * ctx, int unit) ++static GLboolean r300_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj) + { + r300ContextPtr rmesa = R300_CONTEXT(ctx); +- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; +- struct gl_texture_object *tObj = texUnit->_Current; +- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; +- +- ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D); ++ radeonTexObj *t = radeon_tex_obj(texObj); + +- if (t->base.dirty_images[0]) { +- R300_FIREVERTICES(rmesa); ++ if (!radeon_validate_texture_miptree(ctx, texObj)) ++ return GL_FALSE; + +- r300SetTexImages(rmesa, tObj); +- r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); +- if (!t->base.memBlock && !t->image_override) +- return GL_FALSE; +- } ++ /* Configure the hardware registers (more precisely, the cached version ++ * of the hardware registers). */ ++ setup_hardware_state(rmesa, t); + ++ t->validated = GL_TRUE; + return GL_TRUE; + } + +-static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit) ++ ++/** ++ * Ensure all enabled and complete textures are uploaded along with any buffers being used. ++ */ ++GLboolean r300ValidateBuffers(GLcontext * ctx) + { + r300ContextPtr rmesa = R300_CONTEXT(ctx); +- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; +- struct gl_texture_object *tObj = texUnit->_Current; +- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; +- +- ASSERT(tObj->Target == GL_TEXTURE_3D); +- +- /* r300 does not support mipmaps for 3D textures. */ +- if ((tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR)) { +- return GL_FALSE; ++ struct radeon_cs_space_check bos[16]; ++ struct radeon_renderbuffer *rrb; ++ int num_bo = 0; ++ int i; ++ int flushed = 0, ret; ++again: ++ num_bo = 0; ++ ++ rrb = radeon_get_colorbuffer(&rmesa->radeon); ++ /* color buffer */ ++ if (rrb && rrb->bo) { ++ bos[num_bo].bo = rrb->bo; ++ bos[num_bo].read_domains = 0; ++ bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM; ++ bos[num_bo].new_accounted = 0; ++ num_bo++; + } + +- if (t->base.dirty_images[0]) { +- R300_FIREVERTICES(rmesa); +- r300SetTexImages(rmesa, tObj); +- r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); +- if (!t->base.memBlock) +- return GL_FALSE; ++ /* depth buffer */ ++ rrb = radeon_get_depthbuffer(&rmesa->radeon); ++ /* color buffer */ ++ if (rrb && rrb->bo) { ++ bos[num_bo].bo = rrb->bo; ++ bos[num_bo].read_domains = 0; ++ bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM; ++ bos[num_bo].new_accounted = 0; ++ num_bo++; + } ++ ++ for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) { ++ radeonTexObj *t; + +- return GL_TRUE; +-} ++ if (!ctx->Texture.Unit[i]._ReallyEnabled) ++ continue; + +-static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit) +-{ +- r300ContextPtr rmesa = R300_CONTEXT(ctx); +- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; +- struct gl_texture_object *tObj = texUnit->_Current; +- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; +- GLuint face; +- +- ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP); +- +- if (t->base.dirty_images[0] || t->base.dirty_images[1] || +- t->base.dirty_images[2] || t->base.dirty_images[3] || +- t->base.dirty_images[4] || t->base.dirty_images[5]) { +- /* flush */ +- R300_FIREVERTICES(rmesa); +- /* layout memory space, once for all faces */ +- r300SetTexImages(rmesa, tObj); +- } +- +- /* upload (per face) */ +- for (face = 0; face < 6; face++) { +- if (t->base.dirty_images[face]) { +- r300UploadTexImages(rmesa, +- (r300TexObjPtr) tObj->DriverData, +- face); ++ if (!r300_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) { ++ _mesa_warning(ctx, ++ "failed to validate texture for unit %d.\n", ++ i); + } ++ t = radeon_tex_obj(ctx->Texture.Unit[i]._Current); ++ if (t->image_override && t->bo) ++ bos[num_bo].bo = t->bo; ++ else if (t->mt->bo) ++ bos[num_bo].bo = t->mt->bo; ++ bos[num_bo].read_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM; ++ bos[num_bo].write_domain = 0; ++ bos[num_bo].new_accounted = 0; ++ num_bo++; + } + +- if (!t->base.memBlock) { +- /* texmem alloc failed, use s/w fallback */ ++ ret = radeon_cs_space_check(rmesa->radeon.cmdbuf.cs, bos, num_bo); ++ if (ret == RADEON_CS_SPACE_OP_TO_BIG) + return GL_FALSE; +- } +- +- return GL_TRUE; +-} +- +-static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit) +-{ +- r300ContextPtr rmesa = R300_CONTEXT(ctx); +- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; +- struct gl_texture_object *tObj = texUnit->_Current; +- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; +- +- ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV); +- +- if (t->base.dirty_images[0]) { +- R300_FIREVERTICES(rmesa); +- +- r300SetTexImages(rmesa, tObj); +- r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); +- if (!t->base.memBlock && !t->image_override && +- !rmesa->prefer_gart_client_texturing) ++ if (ret == RADEON_CS_SPACE_FLUSH) { ++ radeonFlush(ctx); ++ if (flushed) + return GL_FALSE; ++ flushed = 1; ++ goto again; + } +- + return GL_TRUE; + } + +-static GLboolean r300UpdateTexture(GLcontext * ctx, int unit) +-{ +- r300ContextPtr rmesa = R300_CONTEXT(ctx); +- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; +- struct gl_texture_object *tObj = texUnit->_Current; +- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; +- +- /* Fallback if there's a texture border */ +- if (tObj->Image[0][tObj->BaseLevel]->Border > 0) +- return GL_FALSE; +- +- /* Update state if this is a different texture object to last +- * time. +- */ +- if (rmesa->state.texture.unit[unit].texobj != t) { +- if (rmesa->state.texture.unit[unit].texobj != NULL) { +- /* The old texture is no longer bound to this texture unit. +- * Mark it as such. +- */ +- +- rmesa->state.texture.unit[unit].texobj->base.bound &= +- ~(1 << unit); +- } +- +- rmesa->state.texture.unit[unit].texobj = t; +- t->base.bound |= (1 << unit); +- driUpdateTextureLRU((driTextureObject *) t); /* XXX: should be locked! */ +- } +- +- return !t->border_fallback; +-} +- + void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, + unsigned long long offset, GLint depth, GLuint pitch) + { + r300ContextPtr rmesa = pDRICtx->driverPrivate; + struct gl_texture_object *tObj = + _mesa_lookup_texture(rmesa->radeon.glCtx, texname); +- r300TexObjPtr t; ++ radeonTexObjPtr t = radeon_tex_obj(tObj); + uint32_t pitch_val; + + if (!tObj) + return; + +- t = (r300TexObjPtr) tObj->DriverData; +- + t->image_override = GL_TRUE; + + if (!offset) + return; + +- t->offset = offset; +- t->pitch_reg &= (1 << 13) -1; ++ t->bo = NULL; ++ t->override_offset = offset; ++ t->pp_txpitch &= (1 << 13) -1; + pitch_val = pitch; + + switch (depth) { + case 32: +- t->format = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); +- t->filter |= tx_table[2].filter; ++ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); ++ t->pp_txfilter |= tx_table[2].filter; + pitch_val /= 4; + break; + case 24: + default: +- t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); +- t->filter |= tx_table[4].filter; ++ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); ++ t->pp_txfilter |= tx_table[4].filter; + pitch_val /= 4; + break; + case 16: +- t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); +- t->filter |= tx_table[5].filter; ++ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); ++ t->pp_txfilter |= tx_table[5].filter; + pitch_val /= 2; + break; + } + pitch_val--; + +- t->pitch_reg |= pitch_val; ++ t->pp_txpitch |= pitch_val; + } + +-static GLboolean r300UpdateTextureUnit(GLcontext * ctx, int unit) ++void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) + { +- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; +- +- if (texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT)) { +- return (r300EnableTextureRect(ctx, unit) && +- r300UpdateTexture(ctx, unit)); +- } else if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) { +- return (r300EnableTexture2D(ctx, unit) && +- r300UpdateTexture(ctx, unit)); +- } else if (texUnit->_ReallyEnabled & (TEXTURE_3D_BIT)) { +- return (r300EnableTexture3D(ctx, unit) && +- r300UpdateTexture(ctx, unit)); +- } else if (texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT)) { +- return (r300EnableTextureCube(ctx, unit) && +- r300UpdateTexture(ctx, unit)); +- } else if (texUnit->_ReallyEnabled) { +- return GL_FALSE; +- } else { +- return GL_TRUE; +- } +-} ++ struct gl_texture_unit *texUnit; ++ struct gl_texture_object *texObj; ++ struct gl_texture_image *texImage; ++ struct radeon_renderbuffer *rb; ++ radeon_texture_image *rImage; ++ radeonContextPtr radeon; ++ r300ContextPtr rmesa; ++ GLframebuffer *fb; ++ radeonTexObjPtr t; ++ uint32_t pitch_val; + +-void r300UpdateTextureState(GLcontext * ctx) +-{ +- int i; ++ target = GL_TEXTURE_RECTANGLE_ARB; + +- for (i = 0; i < 8; i++) { +- if (!r300UpdateTextureUnit(ctx, i)) { +- _mesa_warning(ctx, +- "failed to update texture state for unit %d.\n", +- i); +- } ++ radeon = pDRICtx->driverPrivate; ++ rmesa = pDRICtx->driverPrivate; ++ ++ fb = dPriv->driverPrivate; ++ texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; ++ texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target); ++ texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0); ++ ++ rImage = get_radeon_texture_image(texImage); ++ t = radeon_tex_obj(texObj); ++ if (t == NULL) { ++ return; ++ } ++ ++ radeon_update_renderbuffers(pDRICtx, dPriv); ++ /* back & depth buffer are useless free them right away */ ++ rb = (void*)fb->Attachment[BUFFER_DEPTH].Renderbuffer; ++ if (rb && rb->bo) { ++ radeon_bo_unref(rb->bo); ++ rb->bo = NULL; ++ } ++ rb = (void*)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; ++ if (rb && rb->bo) { ++ radeon_bo_unref(rb->bo); ++ rb->bo = NULL; ++ } ++ rb = (void*)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; ++ if (rb->bo == NULL) { ++ /* Failed to BO for the buffer */ ++ return; ++ } ++ ++ _mesa_lock_texture(radeon->glCtx, texObj); ++ if (t->bo) { ++ radeon_bo_unref(t->bo); ++ t->bo = NULL; ++ } ++ if (rImage->bo) { ++ radeon_bo_unref(rImage->bo); ++ rImage->bo = NULL; ++ } ++ if (t->mt) { ++ radeon_miptree_unreference(t->mt); ++ t->mt = NULL; ++ } ++ if (rImage->mt) { ++ radeon_miptree_unreference(rImage->mt); ++ rImage->mt = NULL; ++ } ++ fprintf(stderr,"settexbuf %dx%d@%d\n", rb->width, rb->height, rb->cpp); ++ _mesa_init_teximage_fields(radeon->glCtx, target, texImage, ++ rb->width, rb->height, 1, 0, rb->cpp); ++ texImage->TexFormat = &_mesa_texformat_rgba8888_rev; ++ rImage->bo = rb->bo; ++ radeon_bo_ref(rImage->bo); ++ t->bo = rb->bo; ++ radeon_bo_ref(t->bo); ++ t->tile_bits = 0; ++ t->image_override = GL_TRUE; ++ t->override_offset = 0; ++ t->pp_txpitch &= (1 << 13) -1; ++ pitch_val = rb->pitch; ++ switch (rb->cpp) { ++ case 4: ++ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); ++ t->pp_txfilter |= tx_table[2].filter; ++ pitch_val /= 4; ++ break; ++ case 3: ++ default: ++ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); ++ t->pp_txfilter |= tx_table[4].filter; ++ pitch_val /= 4; ++ break; ++ case 2: ++ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); ++ t->pp_txfilter |= tx_table[5].filter; ++ pitch_val /= 2; ++ break; ++ } ++ pitch_val--; ++ t->pp_txsize = ((rb->width - 1) << R300_TX_WIDTHMASK_SHIFT) | ++ ((rb->height - 1) << R300_TX_HEIGHTMASK_SHIFT); ++ t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN; ++ t->pp_txpitch |= pitch_val; ++ ++ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { ++ if (rb->width > 2048) ++ t->pp_txpitch |= R500_TXWIDTH_BIT11; ++ if (rb->height > 2048) ++ t->pp_txpitch |= R500_TXHEIGHT_BIT11; + } ++ t->validated = GL_TRUE; ++ _mesa_unlock_texture(radeon->glCtx, texObj); ++ return; + } +diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c +index 75dae86..926ddd5 100644 +--- a/src/mesa/drivers/dri/r300/r500_fragprog.c ++++ b/src/mesa/drivers/dri/r300/r500_fragprog.c +@@ -31,6 +31,12 @@ + #include "radeon_program_alu.h" + + ++static void reset_srcreg(struct prog_src_register* reg) ++{ ++ _mesa_bzero(reg, sizeof(*reg)); ++ reg->Swizzle = SWIZZLE_NOOP; ++} ++ + static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) + { + gl_state_index fail_value_tokens[STATE_LENGTH] = { +@@ -99,6 +105,19 @@ static GLboolean transform_TEX( + destredirect = GL_TRUE; + } + ++ if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { ++ int tmpreg = radeonFindFreeTemporary(t); ++ tgt = radeonAppendInstructions(t->Program, 1); ++ tgt->Opcode = OPCODE_MOV; ++ tgt->DstReg.File = PROGRAM_TEMPORARY; ++ tgt->DstReg.Index = tmpreg; ++ tgt->SrcReg[0] = inst.SrcReg[0]; ++ ++ reset_srcreg(&inst.SrcReg[0]); ++ inst.SrcReg[0].File = PROGRAM_TEMPORARY; ++ inst.SrcReg[0].Index = tmpreg; ++ } ++ + tgt = radeonAppendInstructions(t->Program, 1); + _mesa_copy_instructions(tgt, &inst, 1); + +diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c +deleted file mode 100644 +index 5267fe9..0000000 +--- a/src/mesa/drivers/dri/r300/radeon_context.c ++++ /dev/null +@@ -1,330 +0,0 @@ +-/* +-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. +- +-The Weather Channel (TM) funded Tungsten Graphics to develop the +-initial release of the Radeon 8500 driver under the XFree86 license. +-This notice must be preserved. +- +-Permission is hereby granted, free of charge, to any person obtaining +-a copy of this software and associated documentation files (the +-"Software"), to deal in the Software without restriction, including +-without limitation the rights to use, copy, modify, merge, publish, +-distribute, sublicense, and/or sell copies of the Software, and to +-permit persons to whom the Software is furnished to do so, subject to +-the following conditions: +- +-The above copyright notice and this permission notice (including the +-next paragraph) shall be included in all copies or substantial +-portions of the Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +- +-**************************************************************************/ +- +-/** +- * \file radeon_context.c +- * Common context initialization. +- * +- * \author Keith Whitwell +- */ +- +-#include +- +-#include "main/glheader.h" +-#include "main/imports.h" +-#include "main/context.h" +-#include "main/state.h" +-#include "main/matrix.h" +-#include "main/framebuffer.h" +- +-#include "drivers/common/driverfuncs.h" +-#include "swrast/swrast.h" +- +-#include "radeon_screen.h" +-#include "radeon_ioctl.h" +-#include "radeon_macros.h" +-#include "radeon_reg.h" +- +-#include "radeon_state.h" +-#include "r300_state.h" +- +-#include "utils.h" +-#include "vblank.h" +-#include "xmlpool.h" /* for symbolic values of enum-type options */ +- +-#define DRIVER_DATE "20060815" +- +- +-/* Return various strings for glGetString(). +- */ +-static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name) +-{ +- radeonContextPtr radeon = RADEON_CONTEXT(ctx); +- static char buffer[128]; +- +- switch (name) { +- case GL_VENDOR: +- if (IS_R300_CLASS(radeon->radeonScreen)) +- return (GLubyte *) "DRI R300 Project"; +- else +- return (GLubyte *) "Tungsten Graphics, Inc."; +- +- case GL_RENDERER: +- { +- unsigned offset; +- GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 : +- radeon->radeonScreen->AGPMode; +- const char* chipname; +- +- if (IS_R300_CLASS(radeon->radeonScreen)) +- chipname = "R300"; +- else +- chipname = "R200"; +- +- offset = driGetRendererString(buffer, chipname, DRIVER_DATE, +- agp_mode); +- +- if (IS_R300_CLASS(radeon->radeonScreen)) { +- sprintf(&buffer[offset], " %sTCL", +- (radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) +- ? "" : "NO-"); +- } else { +- sprintf(&buffer[offset], " %sTCL", +- !(radeon->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE) +- ? "" : "NO-"); +- } +- +- return (GLubyte *) buffer; +- } +- +- default: +- return NULL; +- } +-} +- +-/* Initialize the driver's misc functions. +- */ +-static void radeonInitDriverFuncs(struct dd_function_table *functions) +-{ +- functions->GetString = radeonGetString; +-} +- +- +-/** +- * Create and initialize all common fields of the context, +- * including the Mesa context itself. +- */ +-GLboolean radeonInitContext(radeonContextPtr radeon, +- struct dd_function_table* functions, +- const __GLcontextModes * glVisual, +- __DRIcontextPrivate * driContextPriv, +- void *sharedContextPrivate) +-{ +- __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; +- radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); +- GLcontext* ctx; +- GLcontext* shareCtx; +- int fthrottle_mode; +- +- /* Fill in additional standard functions. */ +- radeonInitDriverFuncs(functions); +- +- radeon->radeonScreen = screen; +- /* Allocate and initialize the Mesa context */ +- if (sharedContextPrivate) +- shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx; +- else +- shareCtx = NULL; +- radeon->glCtx = _mesa_create_context(glVisual, shareCtx, +- functions, (void *)radeon); +- if (!radeon->glCtx) +- return GL_FALSE; +- +- ctx = radeon->glCtx; +- driContextPriv->driverPrivate = radeon; +- +- /* DRI fields */ +- radeon->dri.context = driContextPriv; +- radeon->dri.screen = sPriv; +- radeon->dri.drawable = NULL; +- radeon->dri.readable = NULL; +- radeon->dri.hwContext = driContextPriv->hHWContext; +- radeon->dri.hwLock = &sPriv->pSAREA->lock; +- radeon->dri.fd = sPriv->fd; +- radeon->dri.drmMinor = sPriv->drm_version.minor; +- +- radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA + +- screen->sarea_priv_offset); +- +- /* Setup IRQs */ +- fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode"); +- radeon->iw.irq_seq = -1; +- radeon->irqsEmitted = 0; +- radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && +- radeon->radeonScreen->irq); +- +- radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); +- +- if (!radeon->do_irqs) +- fprintf(stderr, +- "IRQ's not enabled, falling back to %s: %d %d\n", +- radeon->do_usleeps ? "usleeps" : "busy waits", +- fthrottle_mode, radeon->radeonScreen->irq); +- +- (*sPriv->systemTime->getUST) (&radeon->swap_ust); +- +- return GL_TRUE; +-} +- +- +-/** +- * Cleanup common context fields. +- * Called by r200DestroyContext/r300DestroyContext +- */ +-void radeonCleanupContext(radeonContextPtr radeon) +-{ +- /* _mesa_destroy_context() might result in calls to functions that +- * depend on the DriverCtx, so don't set it to NULL before. +- * +- * radeon->glCtx->DriverCtx = NULL; +- */ +- +- /* free the Mesa context */ +- _mesa_destroy_context(radeon->glCtx); +- +- if (radeon->state.scissor.pClipRects) { +- FREE(radeon->state.scissor.pClipRects); +- radeon->state.scissor.pClipRects = 0; +- } +-} +- +- +-/** +- * Swap front and back buffer. +- */ +-void radeonSwapBuffers(__DRIdrawablePrivate * dPriv) +-{ +- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { +- radeonContextPtr radeon; +- GLcontext *ctx; +- +- radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; +- ctx = radeon->glCtx; +- +- if (ctx->Visual.doubleBufferMode) { +- _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */ +- if (radeon->doPageFlip) { +- radeonPageFlip(dPriv); +- } else { +- radeonCopyBuffer(dPriv, NULL); +- } +- } +- } else { +- /* XXX this shouldn't be an error but we can't handle it for now */ +- _mesa_problem(NULL, "%s: drawable has no context!", +- __FUNCTION__); +- } +-} +- +-void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, +- int x, int y, int w, int h ) +-{ +- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { +- radeonContextPtr radeon; +- GLcontext *ctx; +- +- radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; +- ctx = radeon->glCtx; +- +- if (ctx->Visual.doubleBufferMode) { +- drm_clip_rect_t rect; +- rect.x1 = x + dPriv->x; +- rect.y1 = (dPriv->h - y - h) + dPriv->y; +- rect.x2 = rect.x1 + w; +- rect.y2 = rect.y1 + h; +- _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */ +- radeonCopyBuffer(dPriv, &rect); +- } +- } else { +- /* XXX this shouldn't be an error but we can't handle it for now */ +- _mesa_problem(NULL, "%s: drawable has no context!", +- __FUNCTION__); +- } +-} +- +-/* Force the context `c' to be the current context and associate with it +- * buffer `b'. +- */ +-GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, +- __DRIdrawablePrivate * driDrawPriv, +- __DRIdrawablePrivate * driReadPriv) +-{ +- if (driContextPriv) { +- radeonContextPtr radeon = +- (radeonContextPtr) driContextPriv->driverPrivate; +- +- if (RADEON_DEBUG & DEBUG_DRI) +- fprintf(stderr, "%s ctx %p\n", __FUNCTION__, +- radeon->glCtx); +- +- if (radeon->dri.drawable != driDrawPriv) { +- if (driDrawPriv->swap_interval == (unsigned)-1) { +- driDrawPriv->vblFlags = +- (radeon->radeonScreen->irq != 0) +- ? driGetDefaultVBlankFlags(&radeon-> +- optionCache) +- : VBLANK_FLAG_NO_IRQ; +- +- driDrawableInitVBlank(driDrawPriv); +- } +- } +- +- radeon->dri.readable = driReadPriv; +- +- if (radeon->dri.drawable != driDrawPriv || +- radeon->lastStamp != driDrawPriv->lastStamp) { +- radeon->dri.drawable = driDrawPriv; +- +- radeonSetCliprects(radeon); +- r300UpdateViewportOffset(radeon->glCtx); +- } +- +- _mesa_make_current(radeon->glCtx, +- (GLframebuffer *) driDrawPriv-> +- driverPrivate, +- (GLframebuffer *) driReadPriv-> +- driverPrivate); +- +- _mesa_update_state(radeon->glCtx); +- +- radeonUpdatePageFlipping(radeon); +- } else { +- if (RADEON_DEBUG & DEBUG_DRI) +- fprintf(stderr, "%s ctx is null\n", __FUNCTION__); +- _mesa_make_current(0, 0, 0); +- } +- +- if (RADEON_DEBUG & DEBUG_DRI) +- fprintf(stderr, "End %s\n", __FUNCTION__); +- return GL_TRUE; +-} +- +-/* Force the context `c' to be unbound from its buffer. +- */ +-GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv) +-{ +- radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate; +- +- if (RADEON_DEBUG & DEBUG_DRI) +- fprintf(stderr, "%s ctx %p\n", __FUNCTION__, +- radeon->glCtx); +- +- return GL_TRUE; +-} +- +diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h +index 47cbc22..250570f 100644 +--- a/src/mesa/drivers/dri/r300/radeon_context.h ++++ b/src/mesa/drivers/dri/r300/radeon_context.h +@@ -49,20 +49,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "drm.h" + #include "dri_util.h" + +-struct radeon_context; +-typedef struct radeon_context radeonContextRec; +-typedef struct radeon_context *radeonContextPtr; +- +-/* Rasterizing fallbacks */ +-/* See correponding strings in r200_swtcl.c */ +-#define RADEON_FALLBACK_TEXTURE 0x0001 +-#define RADEON_FALLBACK_DRAW_BUFFER 0x0002 +-#define RADEON_FALLBACK_STENCIL 0x0004 +-#define RADEON_FALLBACK_RENDER_MODE 0x0008 +-#define RADEON_FALLBACK_BLEND_EQ 0x0010 +-#define RADEON_FALLBACK_BLEND_FUNC 0x0020 +-#define RADEON_FALLBACK_DISABLE 0x0040 +-#define RADEON_FALLBACK_BORDER_MODE 0x0080 ++#include "radeon_screen.h" + + #if R200_MERGED + extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode); +@@ -79,155 +66,11 @@ extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode); + /* TCL fallbacks */ + extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode); + +-#define RADEON_TCL_FALLBACK_RASTER 0x0001 /* rasterization */ +-#define RADEON_TCL_FALLBACK_UNFILLED 0x0002 /* unfilled tris */ +-#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE 0x0004 /* twoside tris */ +-#define RADEON_TCL_FALLBACK_MATERIAL 0x0008 /* material in vb */ +-#define RADEON_TCL_FALLBACK_TEXGEN_0 0x0010 /* texgen, unit 0 */ +-#define RADEON_TCL_FALLBACK_TEXGEN_1 0x0020 /* texgen, unit 1 */ +-#define RADEON_TCL_FALLBACK_TEXGEN_2 0x0040 /* texgen, unit 2 */ +-#define RADEON_TCL_FALLBACK_TEXGEN_3 0x0080 /* texgen, unit 3 */ +-#define RADEON_TCL_FALLBACK_TEXGEN_4 0x0100 /* texgen, unit 4 */ +-#define RADEON_TCL_FALLBACK_TEXGEN_5 0x0200 /* texgen, unit 5 */ +-#define RADEON_TCL_FALLBACK_TCL_DISABLE 0x0400 /* user disable */ +-#define RADEON_TCL_FALLBACK_BITMAP 0x0800 /* draw bitmap with points */ +-#define RADEON_TCL_FALLBACK_VERTEX_PROGRAM 0x1000 /* vertex program active */ +- + #if R200_MERGED + #define TCL_FALLBACK( ctx, bit, mode ) radeonTclFallback( ctx, bit, mode ) + #else + #define TCL_FALLBACK( ctx, bit, mode ) ; + #endif + +-struct radeon_dri_mirror { +- __DRIcontextPrivate *context; /* DRI context */ +- __DRIscreenPrivate *screen; /* DRI screen */ +- /** +- * DRI drawable bound to this context for drawing. +- */ +- __DRIdrawablePrivate *drawable; +- +- /** +- * DRI drawable bound to this context for reading. +- */ +- __DRIdrawablePrivate *readable; +- +- drm_context_t hwContext; +- drm_hw_lock_t *hwLock; +- int fd; +- int drmMinor; +-}; +- +-/** +- * Derived state for internal purposes. +- */ +-struct radeon_scissor_state { +- drm_clip_rect_t rect; +- GLboolean enabled; +- +- GLuint numClipRects; /* Cliprects active */ +- GLuint numAllocedClipRects; /* Cliprects available */ +- drm_clip_rect_t *pClipRects; +-}; +- +-struct radeon_colorbuffer_state { +- GLuint clear; +- GLint drawOffset, drawPitch; +-}; +- +-struct radeon_state { +- struct radeon_colorbuffer_state color; +- struct radeon_scissor_state scissor; +-}; +- +-/** +- * Common per-context variables shared by R200 and R300. +- * R200- and R300-specific code "derive" their own context from this +- * structure. +- */ +-struct radeon_context { +- GLcontext *glCtx; /* Mesa context */ +- radeonScreenPtr radeonScreen; /* Screen private DRI data */ +- +- /* Fallback state */ +- GLuint Fallback; +- GLuint TclFallback; +- +- /* Page flipping */ +- GLuint doPageFlip; +- +- /* Drawable, cliprect and scissor information */ +- GLuint numClipRects; /* Cliprects for the draw buffer */ +- drm_clip_rect_t *pClipRects; +- unsigned int lastStamp; +- GLboolean lost_context; +- drm_radeon_sarea_t *sarea; /* Private SAREA data */ +- +- /* Mirrors of some DRI state */ +- struct radeon_dri_mirror dri; +- +- /* Busy waiting */ +- GLuint do_usleeps; +- GLuint do_irqs; +- GLuint irqsEmitted; +- drm_radeon_irq_wait_t iw; +- +- /* buffer swap */ +- int64_t swap_ust; +- int64_t swap_missed_ust; +- +- GLuint swap_count; +- GLuint swap_missed_count; +- +- /* Derived state */ +- struct radeon_state state; +- +- /* Configuration cache +- */ +- driOptionCache optionCache; +-}; +- +-#define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx)) +- +-extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv); +-extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, +- int x, int y, int w, int h); +-extern GLboolean radeonInitContext(radeonContextPtr radeon, +- struct dd_function_table *functions, +- const __GLcontextModes * glVisual, +- __DRIcontextPrivate * driContextPriv, +- void *sharedContextPrivate); +-extern void radeonCleanupContext(radeonContextPtr radeon); +-extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, +- __DRIdrawablePrivate * driDrawPriv, +- __DRIdrawablePrivate * driReadPriv); +-extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv); +- +-/* ================================================================ +- * Debugging: +- */ +-#define DO_DEBUG 1 +- +-#if DO_DEBUG +-extern int RADEON_DEBUG; +-#else +-#define RADEON_DEBUG 0 +-#endif +- +-#define DEBUG_TEXTURE 0x0001 +-#define DEBUG_STATE 0x0002 +-#define DEBUG_IOCTL 0x0004 +-#define DEBUG_PRIMS 0x0008 +-#define DEBUG_VERTS 0x0010 +-#define DEBUG_FALLBACKS 0x0020 +-#define DEBUG_VFMT 0x0040 +-#define DEBUG_CODEGEN 0x0080 +-#define DEBUG_VERBOSE 0x0100 +-#define DEBUG_DRI 0x0200 +-#define DEBUG_DMA 0x0400 +-#define DEBUG_SANITY 0x0800 +-#define DEBUG_SYNC 0x1000 +-#define DEBUG_PIXEL 0x2000 +-#define DEBUG_MEMORY 0x4000 + + #endif /* __RADEON_CONTEXT_H__ */ +diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.c b/src/mesa/drivers/dri/r300/radeon_ioctl.c +deleted file mode 100644 +index 36502eb..0000000 +--- a/src/mesa/drivers/dri/r300/radeon_ioctl.c ++++ /dev/null +@@ -1,396 +0,0 @@ +-/* +-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. +- +-The Weather Channel (TM) funded Tungsten Graphics to develop the +-initial release of the Radeon 8500 driver under the XFree86 license. +-This notice must be preserved. +- +-Permission is hereby granted, free of charge, to any person obtaining +-a copy of this software and associated documentation files (the +-"Software"), to deal in the Software without restriction, including +-without limitation the rights to use, copy, modify, merge, publish, +-distribute, sublicense, and/or sell copies of the Software, and to +-permit persons to whom the Software is furnished to do so, subject to +-the following conditions: +- +-The above copyright notice and this permission notice (including the +-next paragraph) shall be included in all copies or substantial +-portions of the Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +- +-**************************************************************************/ +- +-/* +- * Authors: +- * Keith Whitwell +- */ +- +-#include +-#include +- +-#include "main/glheader.h" +-#include "main/imports.h" +-#include "main/macros.h" +-#include "main/context.h" +-#include "swrast/swrast.h" +-#include "r300_context.h" +-#include "radeon_ioctl.h" +-#include "r300_ioctl.h" +-#include "r300_state.h" +-#include "radeon_reg.h" +- +-#include "drirenderbuffer.h" +-#include "vblank.h" +- +-static void radeonWaitForIdle(radeonContextPtr radeon); +- +-/* ================================================================ +- * SwapBuffers with client-side throttling +- */ +- +-static uint32_t radeonGetLastFrame(radeonContextPtr radeon) +-{ +- drm_radeon_getparam_t gp; +- int ret; +- uint32_t frame; +- +- gp.param = RADEON_PARAM_LAST_FRAME; +- gp.value = (int *)&frame; +- ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM, +- &gp, sizeof(gp)); +- if (ret) { +- fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, +- ret); +- exit(1); +- } +- +- return frame; +-} +- +-uint32_t radeonGetAge(radeonContextPtr radeon) +-{ +- drm_radeon_getparam_t gp; +- int ret; +- uint32_t age; +- +- gp.param = RADEON_PARAM_LAST_CLEAR; +- gp.value = (int *)&age; +- ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM, +- &gp, sizeof(gp)); +- if (ret) { +- fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, +- ret); +- exit(1); +- } +- +- return age; +-} +- +-static void radeonEmitIrqLocked(radeonContextPtr radeon) +-{ +- drm_radeon_irq_emit_t ie; +- int ret; +- +- ie.irq_seq = &radeon->iw.irq_seq; +- ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_IRQ_EMIT, +- &ie, sizeof(ie)); +- if (ret) { +- fprintf(stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, +- ret); +- exit(1); +- } +-} +- +-static void radeonWaitIrq(radeonContextPtr radeon) +-{ +- int ret; +- +- do { +- ret = drmCommandWrite(radeon->dri.fd, DRM_RADEON_IRQ_WAIT, +- &radeon->iw, sizeof(radeon->iw)); +- } while (ret && (errno == EINTR || errno == EBUSY)); +- +- if (ret) { +- fprintf(stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, +- ret); +- exit(1); +- } +-} +- +-static void radeonWaitForFrameCompletion(radeonContextPtr radeon) +-{ +- drm_radeon_sarea_t *sarea = radeon->sarea; +- +- if (radeon->do_irqs) { +- if (radeonGetLastFrame(radeon) < sarea->last_frame) { +- if (!radeon->irqsEmitted) { +- while (radeonGetLastFrame(radeon) < +- sarea->last_frame) ; +- } else { +- UNLOCK_HARDWARE(radeon); +- radeonWaitIrq(radeon); +- LOCK_HARDWARE(radeon); +- } +- radeon->irqsEmitted = 10; +- } +- +- if (radeon->irqsEmitted) { +- radeonEmitIrqLocked(radeon); +- radeon->irqsEmitted--; +- } +- } else { +- while (radeonGetLastFrame(radeon) < sarea->last_frame) { +- UNLOCK_HARDWARE(radeon); +- if (radeon->do_usleeps) +- DO_USLEEP(1); +- LOCK_HARDWARE(radeon); +- } +- } +-} +- +-/* Copy the back color buffer to the front color buffer. +- */ +-void radeonCopyBuffer(__DRIdrawablePrivate * dPriv, +- const drm_clip_rect_t * rect) +-{ +- radeonContextPtr radeon; +- GLint nbox, i, ret; +- GLboolean missed_target; +- int64_t ust; +- __DRIscreenPrivate *psp = dPriv->driScreenPriv; +- +- assert(dPriv); +- assert(dPriv->driContextPriv); +- assert(dPriv->driContextPriv->driverPrivate); +- +- radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; +- +- if (RADEON_DEBUG & DEBUG_IOCTL) { +- fprintf(stderr, "\n%s( %p )\n\n", __FUNCTION__, +- (void *)radeon->glCtx); +- } +- +- r300Flush(radeon->glCtx); +- +- LOCK_HARDWARE(radeon); +- +- /* Throttle the frame rate -- only allow one pending swap buffers +- * request at a time. +- */ +- radeonWaitForFrameCompletion(radeon); +- if (!rect) +- { +- UNLOCK_HARDWARE(radeon); +- driWaitForVBlank(dPriv, &missed_target); +- LOCK_HARDWARE(radeon); +- } +- +- nbox = dPriv->numClipRects; /* must be in locked region */ +- +- for (i = 0; i < nbox;) { +- GLint nr = MIN2(i + RADEON_NR_SAREA_CLIPRECTS, nbox); +- drm_clip_rect_t *box = dPriv->pClipRects; +- drm_clip_rect_t *b = radeon->sarea->boxes; +- GLint n = 0; +- +- for ( ; i < nr ; i++ ) { +- +- *b = box[i]; +- +- if (rect) +- { +- if (rect->x1 > b->x1) +- b->x1 = rect->x1; +- if (rect->y1 > b->y1) +- b->y1 = rect->y1; +- if (rect->x2 < b->x2) +- b->x2 = rect->x2; +- if (rect->y2 < b->y2) +- b->y2 = rect->y2; +- +- if (b->x1 >= b->x2 || b->y1 >= b->y2) +- continue; +- } +- +- b++; +- n++; +- } +- radeon->sarea->nbox = n; +- +- if (!n) +- continue; +- +- ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_SWAP); +- +- if (ret) { +- fprintf(stderr, "DRM_RADEON_SWAP: return = %d\n", +- ret); +- UNLOCK_HARDWARE(radeon); +- exit(1); +- } +- } +- +- UNLOCK_HARDWARE(radeon); +- if (!rect) +- { +- ((r300ContextPtr)radeon)->hw.all_dirty = GL_TRUE; +- +- radeon->swap_count++; +- (*psp->systemTime->getUST) (&ust); +- if (missed_target) { +- radeon->swap_missed_count++; +- radeon->swap_missed_ust = ust - radeon->swap_ust; +- } +- +- radeon->swap_ust = ust; +- +- sched_yield(); +- } +-} +- +-void radeonPageFlip(__DRIdrawablePrivate * dPriv) +-{ +- radeonContextPtr radeon; +- GLint ret; +- GLboolean missed_target; +- __DRIscreenPrivate *psp = dPriv->driScreenPriv; +- +- assert(dPriv); +- assert(dPriv->driContextPriv); +- assert(dPriv->driContextPriv->driverPrivate); +- +- radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; +- +- if (RADEON_DEBUG & DEBUG_IOCTL) { +- fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__, +- radeon->sarea->pfCurrentPage); +- } +- +- r300Flush(radeon->glCtx); +- LOCK_HARDWARE(radeon); +- +- if (!dPriv->numClipRects) { +- UNLOCK_HARDWARE(radeon); +- usleep(10000); /* throttle invisible client 10ms */ +- return; +- } +- +- /* Need to do this for the perf box placement: +- */ +- { +- drm_clip_rect_t *box = dPriv->pClipRects; +- drm_clip_rect_t *b = radeon->sarea->boxes; +- b[0] = box[0]; +- radeon->sarea->nbox = 1; +- } +- +- /* Throttle the frame rate -- only allow a few pending swap buffers +- * request at a time. +- */ +- radeonWaitForFrameCompletion(radeon); +- UNLOCK_HARDWARE(radeon); +- driWaitForVBlank(dPriv, &missed_target); +- if (missed_target) { +- radeon->swap_missed_count++; +- (void)(*psp->systemTime->getUST) (&radeon->swap_missed_ust); +- } +- LOCK_HARDWARE(radeon); +- +- ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_FLIP); +- +- UNLOCK_HARDWARE(radeon); +- +- if (ret) { +- fprintf(stderr, "DRM_RADEON_FLIP: return = %d\n", ret); +- exit(1); +- } +- +- radeon->swap_count++; +- (void)(*psp->systemTime->getUST) (&radeon->swap_ust); +- +- driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer, +- radeon->sarea->pfCurrentPage); +- +- if (radeon->sarea->pfCurrentPage == 1) { +- radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset; +- radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch; +- } else { +- radeon->state.color.drawOffset = radeon->radeonScreen->backOffset; +- radeon->state.color.drawPitch = radeon->radeonScreen->backPitch; +- } +- +- if (IS_R300_CLASS(radeon->radeonScreen)) { +- r300ContextPtr r300 = (r300ContextPtr)radeon; +- R300_STATECHANGE(r300, cb); +- r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset + +- r300->radeon.radeonScreen->fbLocation; +- r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch; +- +- if (r300->radeon.radeonScreen->cpp == 4) +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; +- else +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; +- +- if (r300->radeon.sarea->tiling_enabled) +- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; +- } +-} +- +-void radeonWaitForIdleLocked(radeonContextPtr radeon) +-{ +- int ret; +- int i = 0; +- +- do { +- ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_CP_IDLE); +- if (ret) +- DO_USLEEP(1); +- } while (ret && ++i < 100); +- +- if (ret < 0) { +- UNLOCK_HARDWARE(radeon); +- fprintf(stderr, "Error: R300 timed out... exiting\n"); +- exit(-1); +- } +-} +- +-static void radeonWaitForIdle(radeonContextPtr radeon) +-{ +- LOCK_HARDWARE(radeon); +- radeonWaitForIdleLocked(radeon); +- UNLOCK_HARDWARE(radeon); +-} +- +-void radeonFlush(GLcontext * ctx) +-{ +- radeonContextPtr radeon = RADEON_CONTEXT(ctx); +- +- if (IS_R300_CLASS(radeon->radeonScreen)) +- r300Flush(ctx); +-} +- +- +-/* Make sure all commands have been sent to the hardware and have +- * completed processing. +- */ +-void radeonFinish(GLcontext * ctx) +-{ +- radeonContextPtr radeon = RADEON_CONTEXT(ctx); +- +- radeonFlush(ctx); +- +- if (radeon->do_irqs) { +- LOCK_HARDWARE(radeon); +- radeonEmitIrqLocked(radeon); +- UNLOCK_HARDWARE(radeon); +- radeonWaitIrq(radeon); +- } else +- radeonWaitForIdle(radeon); +-} +diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.h b/src/mesa/drivers/dri/r300/radeon_ioctl.h +deleted file mode 100644 +index 3add775..0000000 +--- a/src/mesa/drivers/dri/r300/radeon_ioctl.h ++++ /dev/null +@@ -1,57 +0,0 @@ +-/* +-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. +- +-The Weather Channel (TM) funded Tungsten Graphics to develop the +-initial release of the Radeon 8500 driver under the XFree86 license. +-This notice must be preserved. +- +-Permission is hereby granted, free of charge, to any person obtaining +-a copy of this software and associated documentation files (the +-"Software"), to deal in the Software without restriction, including +-without limitation the rights to use, copy, modify, merge, publish, +-distribute, sublicense, and/or sell copies of the Software, and to +-permit persons to whom the Software is furnished to do so, subject to +-the following conditions: +- +-The above copyright notice and this permission notice (including the +-next paragraph) shall be included in all copies or substantial +-portions of the Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +- +-**************************************************************************/ +- +-/* +- * Authors: +- * Keith Whitwell +- */ +- +-#ifndef __RADEON_IOCTL_H__ +-#define __RADEON_IOCTL_H__ +- +-#include "main/simple_list.h" +-#include "radeon_dri.h" +-#include "radeon_lock.h" +- +-#include "xf86drm.h" +-#include "drm.h" +-#if 0 +-#include "r200context.h" +-#endif +-#include "radeon_drm.h" +- +-extern void radeonCopyBuffer(__DRIdrawablePrivate * drawable, +- const drm_clip_rect_t * rect); +-extern void radeonPageFlip(__DRIdrawablePrivate * drawable); +-extern void radeonFlush(GLcontext * ctx); +-extern void radeonFinish(GLcontext * ctx); +-extern void radeonWaitForIdleLocked(radeonContextPtr radeon); +-extern uint32_t radeonGetAge(radeonContextPtr radeon); +- +-#endif /* __RADEON_IOCTL_H__ */ +diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c +deleted file mode 100644 +index 4f47afd..af4108a +--- a/src/mesa/drivers/dri/r300/radeon_lock.c ++++ /dev/null +@@ -1,137 +0,0 @@ +-/************************************************************************** +- +-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and +- VA Linux Systems Inc., Fremont, California. +-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. +- +-The Weather Channel (TM) funded Tungsten Graphics to develop the +-initial release of the Radeon 8500 driver under the XFree86 license. +-This notice must be preserved. +- +-All Rights Reserved. +- +-Permission is hereby granted, free of charge, to any person obtaining +-a copy of this software and associated documentation files (the +-"Software"), to deal in the Software without restriction, including +-without limitation the rights to use, copy, modify, merge, publish, +-distribute, sublicense, and/or sell copies of the Software, and to +-permit persons to whom the Software is furnished to do so, subject to +-the following conditions: +- +-The above copyright notice and this permission notice (including the +-next paragraph) shall be included in all copies or substantial +-portions of the Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +- +-**************************************************************************/ +- +-/* +- * Authors: +- * Gareth Hughes +- * Keith Whitwell +- * Kevin E. Martin +- */ +- +-#include "radeon_lock.h" +-#include "radeon_ioctl.h" +-#include "radeon_state.h" +-#include "r300_context.h" +-#include "r300_state.h" +- +-#include "main/framebuffer.h" +- +-#include "drirenderbuffer.h" +- +-#if DEBUG_LOCKING +-char *prevLockFile = NULL; +-int prevLockLine = 0; +-#endif +- +-/* Turn on/off page flipping according to the flags in the sarea: +- */ +-void radeonUpdatePageFlipping(radeonContextPtr rmesa) +-{ +- int use_back; +- +- rmesa->doPageFlip = rmesa->sarea->pfState; +- if (rmesa->glCtx->WinSysDrawBuffer) { +- driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer, +- rmesa->sarea->pfCurrentPage); +- r300UpdateDrawBuffer(rmesa->glCtx); +- } +- +- use_back = rmesa->glCtx->DrawBuffer ? +- (rmesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0] == +- BUFFER_BACK_LEFT) : 1; +- use_back ^= (rmesa->sarea->pfCurrentPage == 1); +- +- if (use_back) { +- rmesa->state.color.drawOffset = +- rmesa->radeonScreen->backOffset; +- rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch; +- } else { +- rmesa->state.color.drawOffset = +- rmesa->radeonScreen->frontOffset; +- rmesa->state.color.drawPitch = +- rmesa->radeonScreen->frontPitch; +- } +-} +- +-/* Update the hardware state. This is called if another context has +- * grabbed the hardware lock, which includes the X server. This +- * function also updates the driver's window state after the X server +- * moves, resizes or restacks a window -- the change will be reflected +- * in the drawable position and clip rects. Since the X server grabs +- * the hardware lock when it changes the window state, this routine will +- * automatically be called after such a change. +- */ +-void radeonGetLock(radeonContextPtr rmesa, GLuint flags) +-{ +- __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; +- __DRIdrawablePrivate *const readable = rmesa->dri.readable; +- __DRIscreenPrivate *sPriv = rmesa->dri.screen; +- drm_radeon_sarea_t *sarea = rmesa->sarea; +- r300ContextPtr r300 = (r300ContextPtr) rmesa; +- +- assert(drawable != NULL); +- +- drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags); +- +- /* The window might have moved, so we might need to get new clip +- * rects. +- * +- * NOTE: This releases and regrabs the hw lock to allow the X server +- * to respond to the DRI protocol request for new drawable info. +- * Since the hardware state depends on having the latest drawable +- * clip rects, all state checking must be done _after_ this call. +- */ +- DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable); +- if (drawable != readable) { +- DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable); +- } +- +- if (rmesa->lastStamp != drawable->lastStamp) { +- radeonUpdatePageFlipping(rmesa); +- radeonSetCliprects(rmesa); +- r300UpdateViewportOffset(rmesa->glCtx); +- driUpdateFramebufferSize(rmesa->glCtx, drawable); +- } +- +- if (sarea->ctx_owner != rmesa->dri.hwContext) { +- int i; +- +- sarea->ctx_owner = rmesa->dri.hwContext; +- for (i = 0; i < r300->nr_heaps; i++) { +- DRI_AGE_TEXTURES(r300->texture_heaps[i]); +- } +- } +- +- rmesa->lost_context = GL_TRUE; +-} +diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c +new file mode 120000 +index 4f47afd..af4108a +--- /dev/null ++++ b/src/mesa/drivers/dri/r300/radeon_lock.c +@@ -0,0 +1 @@ ++../radeon/radeon_lock.c +\ No newline at end of file +diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h +deleted file mode 100644 +index a344837..64bdf94 +--- a/src/mesa/drivers/dri/r300/radeon_lock.h ++++ /dev/null +@@ -1,115 +0,0 @@ +-/************************************************************************** +- +-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and +- VA Linux Systems Inc., Fremont, California. +-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. +- +-The Weather Channel (TM) funded Tungsten Graphics to develop the +-initial release of the Radeon 8500 driver under the XFree86 license. +-This notice must be preserved. +- +-All Rights Reserved. +- +-Permission is hereby granted, free of charge, to any person obtaining +-a copy of this software and associated documentation files (the +-"Software"), to deal in the Software without restriction, including +-without limitation the rights to use, copy, modify, merge, publish, +-distribute, sublicense, and/or sell copies of the Software, and to +-permit persons to whom the Software is furnished to do so, subject to +-the following conditions: +- +-The above copyright notice and this permission notice (including the +-next paragraph) shall be included in all copies or substantial +-portions of the Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +- +-**************************************************************************/ +- +-/* +- * Authors: +- * Gareth Hughes +- * Keith Whitwell +- * Kevin E. Martin +- */ +- +-#ifndef __RADEON_LOCK_H__ +-#define __RADEON_LOCK_H__ +- +-#include "radeon_context.h" +- +-extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags); +-extern void radeonUpdatePageFlipping(radeonContextPtr rmesa); +- +-/* Turn DEBUG_LOCKING on to find locking conflicts. +- */ +-#define DEBUG_LOCKING 0 +- +-#if DEBUG_LOCKING +-extern char *prevLockFile; +-extern int prevLockLine; +- +-#define DEBUG_LOCK() \ +- do { \ +- prevLockFile = (__FILE__); \ +- prevLockLine = (__LINE__); \ +- } while (0) +- +-#define DEBUG_RESET() \ +- do { \ +- prevLockFile = 0; \ +- prevLockLine = 0; \ +- } while (0) +- +-#define DEBUG_CHECK_LOCK() \ +- do { \ +- if (prevLockFile) { \ +- fprintf(stderr, \ +- "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n", \ +- prevLockFile, prevLockLine, __FILE__, __LINE__); \ +- exit(1); \ +- } \ +- } while (0) +- +-#else +- +-#define DEBUG_LOCK() +-#define DEBUG_RESET() +-#define DEBUG_CHECK_LOCK() +- +-#endif +- +-/* +- * !!! We may want to separate locks from locks with validation. This +- * could be used to improve performance for those things commands that +- * do not do any drawing !!! +- */ +- +-/* Lock the hardware and validate our state. +- */ +-#define LOCK_HARDWARE( rmesa ) \ +- do { \ +- char __ret = 0; \ +- DEBUG_CHECK_LOCK(); \ +- DRM_CAS((rmesa)->dri.hwLock, (rmesa)->dri.hwContext, \ +- (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret); \ +- if (__ret) \ +- radeonGetLock((rmesa), 0); \ +- DEBUG_LOCK(); \ +- } while (0) +- +-#define UNLOCK_HARDWARE( rmesa ) \ +- do { \ +- DRM_UNLOCK((rmesa)->dri.fd, \ +- (rmesa)->dri.hwLock, \ +- (rmesa)->dri.hwContext); \ +- DEBUG_RESET(); \ +- } while (0) +- +-#endif /* __RADEON_LOCK_H__ */ +diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h +new file mode 120000 +index a344837..64bdf94 +--- /dev/null ++++ b/src/mesa/drivers/dri/r300/radeon_lock.h +@@ -0,0 +1 @@ ++../radeon/radeon_lock.h +\ No newline at end of file +diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c +index 58bc0d5..8a945d8 100644 +--- a/src/mesa/drivers/dri/r300/radeon_program_pair.c ++++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c +@@ -35,7 +35,7 @@ + + #include "radeon_program_pair.h" + +-#include "radeon_context.h" ++#include "radeon_common.h" + + #include "shader/prog_print.h" + +diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c +deleted file mode 100644 +index 16f9fb9..232868c +--- a/src/mesa/drivers/dri/r300/radeon_span.c ++++ /dev/null +@@ -1,349 +0,0 @@ +-/************************************************************************** +- +-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. +-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and +- VA Linux Systems Inc., Fremont, California. +- +-The Weather Channel (TM) funded Tungsten Graphics to develop the +-initial release of the Radeon 8500 driver under the XFree86 license. +-This notice must be preserved. +- +-All Rights Reserved. +- +-Permission is hereby granted, free of charge, to any person obtaining +-a copy of this software and associated documentation files (the +-"Software"), to deal in the Software without restriction, including +-without limitation the rights to use, copy, modify, merge, publish, +-distribute, sublicense, and/or sell copies of the Software, and to +-permit persons to whom the Software is furnished to do so, subject to +-the following conditions: +- +-The above copyright notice and this permission notice (including the +-next paragraph) shall be included in all copies or substantial +-portions of the Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +- +-**************************************************************************/ +- +-/* +- * Authors: +- * Kevin E. Martin +- * Gareth Hughes +- * Keith Whitwell +- * +- */ +- +-#include "main/glheader.h" +-#include "swrast/swrast.h" +- +-#include "r300_state.h" +-#include "radeon_ioctl.h" +-#include "r300_ioctl.h" +-#include "radeon_span.h" +- +-#include "drirenderbuffer.h" +- +-#define DBG 0 +- +-/* +- * Note that all information needed to access pixels in a renderbuffer +- * should be obtained through the gl_renderbuffer parameter, not per-context +- * information. +- */ +-#define LOCAL_VARS \ +- driRenderbuffer *drb = (driRenderbuffer *) rb; \ +- const __DRIdrawablePrivate *dPriv = drb->dPriv; \ +- const GLuint bottom = dPriv->h - 1; \ +- GLubyte *buf = (GLubyte *) drb->flippedData \ +- + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \ +- GLuint p; \ +- (void) p; +- +-#define LOCAL_DEPTH_VARS \ +- driRenderbuffer *drb = (driRenderbuffer *) rb; \ +- const __DRIdrawablePrivate *dPriv = drb->dPriv; \ +- const GLuint bottom = dPriv->h - 1; \ +- GLuint xo = dPriv->x; \ +- GLuint yo = dPriv->y; \ +- GLubyte *buf = (GLubyte *) drb->Base.Data; +- +-#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS +- +-#define Y_FLIP(Y) (bottom - (Y)) +- +-#define HW_LOCK() +- +-#define HW_UNLOCK() +- +-/* ================================================================ +- * Color buffer +- */ +- +-/* 16 bit, RGB565 color spanline and pixel functions +- */ +-#define SPANTMP_PIXEL_FMT GL_RGB +-#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 +- +-#define TAG(x) radeon##x##_RGB565 +-#define TAG2(x,y) radeon##x##_RGB565##y +-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2) +-#include "spantmp2.h" +- +-/* 32 bit, ARGB8888 color spanline and pixel functions +- */ +-#define SPANTMP_PIXEL_FMT GL_BGRA +-#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV +- +-#define TAG(x) radeon##x##_ARGB8888 +-#define TAG2(x,y) radeon##x##_ARGB8888##y +-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4) +-#include "spantmp2.h" +- +-/* ================================================================ +- * Depth buffer +- */ +- +-/* The Radeon family has depth tiling on all the time, so we have to convert +- * the x,y coordinates into the memory bus address (mba) in the same +- * manner as the engine. In each case, the linear block address (ba) +- * is calculated, and then wired with x and y to produce the final +- * memory address. +- * The chip will do address translation on its own if the surface registers +- * are set up correctly. It is not quite enough to get it working with hyperz +- * too... +- */ +- +-static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y) +-{ +- GLuint pitch = drb->pitch; +- if (drb->depthHasSurface) { +- return 4 * (x + y * pitch); +- } else { +- GLuint ba, address = 0; /* a[0..1] = 0 */ +- +-#ifdef COMPILE_R300 +- ba = (y / 8) * (pitch / 8) + (x / 8); +-#else +- ba = (y / 16) * (pitch / 16) + (x / 16); +-#endif +- +- address |= (x & 0x7) << 2; /* a[2..4] = x[0..2] */ +- address |= (y & 0x3) << 5; /* a[5..6] = y[0..1] */ +- address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5; /* a[7] = x[4] ^ y[2] */ +- address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */ +- +- address |= (y & 0x8) << 7; /* a[10] = y[3] */ +- address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7; /* a[11] = x[3] ^ y[4] */ +- address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */ +- +- return address; +- } +-} +- +-static INLINE GLuint +-radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) +-{ +- GLuint pitch = drb->pitch; +- if (drb->depthHasSurface) { +- return 2 * (x + y * pitch); +- } else { +- GLuint ba, address = 0; /* a[0] = 0 */ +- +- ba = (y / 16) * (pitch / 32) + (x / 32); +- +- address |= (x & 0x7) << 1; /* a[1..3] = x[0..2] */ +- address |= (y & 0x7) << 4; /* a[4..6] = y[0..2] */ +- address |= (x & 0x8) << 4; /* a[7] = x[3] */ +- address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */ +- address |= (y & 0x8) << 7; /* a[10] = y[3] */ +- address |= ((x & 0x10) ^ (y & 0x10)) << 7; /* a[11] = x[4] ^ y[4] */ +- address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */ +- +- return address; +- } +-} +- +-/* 16-bit depth buffer functions +- */ +-#define VALUE_TYPE GLushort +- +-#define WRITE_DEPTH( _x, _y, d ) \ +- *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d; +- +-#define READ_DEPTH( d, _x, _y ) \ +- d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )); +- +-#define TAG(x) radeon##x##_z16 +-#include "depthtmp.h" +- +-/* 24 bit depth, 8 bit stencil depthbuffer functions +- * +- * Careful: It looks like the R300 uses ZZZS byte order while the R200 +- * uses SZZZ for 24 bit depth, 8 bit stencil mode. +- */ +-#define VALUE_TYPE GLuint +- +-#ifdef COMPILE_R300 +-#define WRITE_DEPTH( _x, _y, d ) \ +-do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ +- GLuint tmp = *(GLuint *)(buf + offset); \ +- tmp &= 0x000000ff; \ +- tmp |= ((d << 8) & 0xffffff00); \ +- *(GLuint *)(buf + offset) = tmp; \ +-} while (0) +-#else +-#define WRITE_DEPTH( _x, _y, d ) \ +-do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ +- GLuint tmp = *(GLuint *)(buf + offset); \ +- tmp &= 0xff000000; \ +- tmp |= ((d) & 0x00ffffff); \ +- *(GLuint *)(buf + offset) = tmp; \ +-} while (0) +-#endif +- +-#ifdef COMPILE_R300 +-#define READ_DEPTH( d, _x, _y ) \ +- do { \ +- d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ +- _y + yo )) & 0xffffff00) >> 8; \ +- }while(0) +-#else +-#define READ_DEPTH( d, _x, _y ) \ +- d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ +- _y + yo )) & 0x00ffffff; +-#endif +- +-#define TAG(x) radeon##x##_z24_s8 +-#include "depthtmp.h" +- +-/* ================================================================ +- * Stencil buffer +- */ +- +-/* 24 bit depth, 8 bit stencil depthbuffer functions +- */ +-#ifdef COMPILE_R300 +-#define WRITE_STENCIL( _x, _y, d ) \ +-do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ +- GLuint tmp = *(GLuint *)(buf + offset); \ +- tmp &= 0xffffff00; \ +- tmp |= (d) & 0xff; \ +- *(GLuint *)(buf + offset) = tmp; \ +-} while (0) +-#else +-#define WRITE_STENCIL( _x, _y, d ) \ +-do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ +- GLuint tmp = *(GLuint *)(buf + offset); \ +- tmp &= 0x00ffffff; \ +- tmp |= (((d) & 0xff) << 24); \ +- *(GLuint *)(buf + offset) = tmp; \ +-} while (0) +-#endif +- +-#ifdef COMPILE_R300 +-#define READ_STENCIL( d, _x, _y ) \ +-do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ +- GLuint tmp = *(GLuint *)(buf + offset); \ +- d = tmp & 0x000000ff; \ +-} while (0) +-#else +-#define READ_STENCIL( d, _x, _y ) \ +-do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ +- GLuint tmp = *(GLuint *)(buf + offset); \ +- d = (tmp & 0xff000000) >> 24; \ +-} while (0) +-#endif +- +-#define TAG(x) radeon##x##_z24_s8 +-#include "stenciltmp.h" +- +-/* Move locking out to get reasonable span performance (10x better +- * than doing this in HW_LOCK above). WaitForIdle() is the main +- * culprit. +- */ +- +-static void radeonSpanRenderStart(GLcontext * ctx) +-{ +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +-#ifdef COMPILE_R300 +- r300ContextPtr r300 = (r300ContextPtr) rmesa; +- R300_FIREVERTICES(r300); +-#else +- RADEON_FIREVERTICES(rmesa); +-#endif +- LOCK_HARDWARE(rmesa); +- radeonWaitForIdleLocked(rmesa); +- +- /* Read the first pixel in the frame buffer. This should +- * be a noop, right? In fact without this conform fails as reading +- * from the framebuffer sometimes produces old results -- the +- * on-card read cache gets mixed up and doesn't notice that the +- * framebuffer has been updated. +- * +- * Note that we should probably be reading some otherwise unused +- * region of VRAM, otherwise we might get incorrect results when +- * reading pixels from the top left of the screen. +- * +- * I found this problem on an R420 with glean's texCube test. +- * Note that the R200 span code also *writes* the first pixel in the +- * framebuffer, but I've found this to be unnecessary. +- * -- Nicolai Hähnle, June 2008 +- */ +- { +- int p; +- driRenderbuffer *drb = +- (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0]; +- volatile int *buf = +- (volatile int *)(rmesa->dri.screen->pFB + drb->offset); +- p = *buf; +- } +-} +- +-static void radeonSpanRenderFinish(GLcontext * ctx) +-{ +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- _swrast_flush(ctx); +- UNLOCK_HARDWARE(rmesa); +-} +- +-void radeonInitSpanFuncs(GLcontext * ctx) +-{ +- struct swrast_device_driver *swdd = +- _swrast_GetDeviceDriverReference(ctx); +- swdd->SpanRenderStart = radeonSpanRenderStart; +- swdd->SpanRenderFinish = radeonSpanRenderFinish; +-} +- +-/** +- * Plug in the Get/Put routines for the given driRenderbuffer. +- */ +-void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis) +-{ +- if (drb->Base.InternalFormat == GL_RGBA) { +- if (vis->redBits == 5 && vis->greenBits == 6 +- && vis->blueBits == 5) { +- radeonInitPointers_RGB565(&drb->Base); +- } else { +- radeonInitPointers_ARGB8888(&drb->Base); +- } +- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { +- radeonInitDepthPointers_z16(&drb->Base); +- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { +- radeonInitDepthPointers_z24_s8(&drb->Base); +- } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { +- radeonInitStencilPointers_z24_s8(&drb->Base); +- } +-} +diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c +new file mode 120000 +index 16f9fb9..232868c +--- /dev/null ++++ b/src/mesa/drivers/dri/r300/radeon_span.c +@@ -0,0 +1 @@ ++../radeon/radeon_span.c +\ No newline at end of file +diff --git a/src/mesa/drivers/dri/r300/radeon_state.c b/src/mesa/drivers/dri/r300/radeon_state.c +deleted file mode 100644 +index c401da6..0000000 +--- a/src/mesa/drivers/dri/r300/radeon_state.c ++++ /dev/null +@@ -1,244 +0,0 @@ +-/************************************************************************** +- +-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. +- +-The Weather Channel (TM) funded Tungsten Graphics to develop the +-initial release of the Radeon 8500 driver under the XFree86 license. +-This notice must be preserved. +- +-Permission is hereby granted, free of charge, to any person obtaining +-a copy of this software and associated documentation files (the +-"Software"), to deal in the Software without restriction, including +-without limitation the rights to use, copy, modify, merge, publish, +-distribute, sublicense, and/or sell copies of the Software, and to +-permit persons to whom the Software is furnished to do so, subject to +-the following conditions: +- +-The above copyright notice and this permission notice (including the +-next paragraph) shall be included in all copies or substantial +-portions of the Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +- +-**************************************************************************/ +- +-/* +- * Authors: +- * Keith Whitwell +- */ +- +-#include "main/glheader.h" +-#include "main/imports.h" +-#include "main/api_arrayelt.h" +-#include "main/enums.h" +-#include "main/framebuffer.h" +-#include "main/colormac.h" +-#include "main/light.h" +- +-#include "swrast/swrast.h" +-#include "vbo/vbo.h" +-#include "tnl/tnl.h" +-#include "tnl/t_pipeline.h" +-#include "swrast_setup/swrast_setup.h" +- +-#include "radeon_ioctl.h" +-#include "radeon_state.h" +-#include "r300_ioctl.h" +- +- +-/* ============================================================= +- * Scissoring +- */ +- +-static GLboolean intersect_rect(drm_clip_rect_t * out, +- drm_clip_rect_t * a, drm_clip_rect_t * b) +-{ +- *out = *a; +- if (b->x1 > out->x1) +- out->x1 = b->x1; +- if (b->y1 > out->y1) +- out->y1 = b->y1; +- if (b->x2 < out->x2) +- out->x2 = b->x2; +- if (b->y2 < out->y2) +- out->y2 = b->y2; +- if (out->x1 >= out->x2) +- return GL_FALSE; +- if (out->y1 >= out->y2) +- return GL_FALSE; +- return GL_TRUE; +-} +- +-void radeonRecalcScissorRects(radeonContextPtr radeon) +-{ +- drm_clip_rect_t *out; +- int i; +- +- /* Grow cliprect store? +- */ +- if (radeon->state.scissor.numAllocedClipRects < radeon->numClipRects) { +- while (radeon->state.scissor.numAllocedClipRects < +- radeon->numClipRects) { +- radeon->state.scissor.numAllocedClipRects += 1; /* zero case */ +- radeon->state.scissor.numAllocedClipRects *= 2; +- } +- +- if (radeon->state.scissor.pClipRects) +- FREE(radeon->state.scissor.pClipRects); +- +- radeon->state.scissor.pClipRects = +- MALLOC(radeon->state.scissor.numAllocedClipRects * +- sizeof(drm_clip_rect_t)); +- +- if (radeon->state.scissor.pClipRects == NULL) { +- radeon->state.scissor.numAllocedClipRects = 0; +- return; +- } +- } +- +- out = radeon->state.scissor.pClipRects; +- radeon->state.scissor.numClipRects = 0; +- +- for (i = 0; i < radeon->numClipRects; i++) { +- if (intersect_rect(out, +- &radeon->pClipRects[i], +- &radeon->state.scissor.rect)) { +- radeon->state.scissor.numClipRects++; +- out++; +- } +- } +-} +- +-void radeonUpdateScissor(GLcontext* ctx) +-{ +- radeonContextPtr radeon = RADEON_CONTEXT(ctx); +- +- if (radeon->dri.drawable) { +- __DRIdrawablePrivate *dPriv = radeon->dri.drawable; +- int x1 = dPriv->x + ctx->Scissor.X; +- int y1 = dPriv->y + dPriv->h - (ctx->Scissor.Y + ctx->Scissor.Height); +- +- radeon->state.scissor.rect.x1 = x1; +- radeon->state.scissor.rect.y1 = y1; +- radeon->state.scissor.rect.x2 = x1 + ctx->Scissor.Width; +- radeon->state.scissor.rect.y2 = y1 + ctx->Scissor.Height; +- +- radeonRecalcScissorRects(radeon); +- } +-} +- +-static void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h) +-{ +- if (ctx->Scissor.Enabled) { +- /* We don't pipeline cliprect changes */ +- r300Flush(ctx); +- radeonUpdateScissor(ctx); +- } +-} +- +- +-/** +- * Update cliprects and scissors. +- */ +-void radeonSetCliprects(radeonContextPtr radeon) +-{ +- __DRIdrawablePrivate *const drawable = radeon->dri.drawable; +- __DRIdrawablePrivate *const readable = radeon->dri.readable; +- GLframebuffer *const draw_fb = (GLframebuffer*)drawable->driverPrivate; +- GLframebuffer *const read_fb = (GLframebuffer*)readable->driverPrivate; +- +- if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { +- /* Can't ignore 2d windows if we are page flipping. */ +- if (drawable->numBackClipRects == 0 || radeon->doPageFlip || +- radeon->sarea->pfCurrentPage == 1) { +- radeon->numClipRects = drawable->numClipRects; +- radeon->pClipRects = drawable->pClipRects; +- } else { +- radeon->numClipRects = drawable->numBackClipRects; +- radeon->pClipRects = drawable->pBackClipRects; +- } +- } else { +- /* front buffer (or none, or multiple buffers */ +- radeon->numClipRects = drawable->numClipRects; +- radeon->pClipRects = drawable->pClipRects; +- } +- +- if ((draw_fb->Width != drawable->w) || +- (draw_fb->Height != drawable->h)) { +- _mesa_resize_framebuffer(radeon->glCtx, draw_fb, +- drawable->w, drawable->h); +- draw_fb->Initialized = GL_TRUE; +- } +- +- if (drawable != readable) { +- if ((read_fb->Width != readable->w) || +- (read_fb->Height != readable->h)) { +- _mesa_resize_framebuffer(radeon->glCtx, read_fb, +- readable->w, readable->h); +- read_fb->Initialized = GL_TRUE; +- } +- } +- +- if (radeon->state.scissor.enabled) +- radeonRecalcScissorRects(radeon); +- +- radeon->lastStamp = drawable->lastStamp; +-} +- +- +-/** +- * Handle common enable bits. +- * Called as a fallback by r200Enable/r300Enable. +- */ +-void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state) +-{ +- radeonContextPtr radeon = RADEON_CONTEXT(ctx); +- +- switch(cap) { +- case GL_SCISSOR_TEST: +- /* We don't pipeline cliprect & scissor changes */ +- r300Flush(ctx); +- +- radeon->state.scissor.enabled = state; +- radeonUpdateScissor(ctx); +- break; +- +- default: +- return; +- } +-} +- +- +-/** +- * Initialize default state. +- * This function is called once at context init time from +- * r200InitState/r300InitState +- */ +-void radeonInitState(radeonContextPtr radeon) +-{ +- radeon->Fallback = 0; +- +- if (radeon->glCtx->Visual.doubleBufferMode && radeon->sarea->pfCurrentPage == 0) { +- radeon->state.color.drawOffset = radeon->radeonScreen->backOffset; +- radeon->state.color.drawPitch = radeon->radeonScreen->backPitch; +- } else { +- radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset; +- radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch; +- } +-} +- +- +-/** +- * Initialize common state functions. +- * Called by r200InitStateFuncs/r300InitStateFuncs +- */ +-void radeonInitStateFuncs(struct dd_function_table *functions) +-{ +- functions->Scissor = radeonScissor; +-} +diff --git a/src/mesa/drivers/dri/r300/radeon_state.h b/src/mesa/drivers/dri/r300/radeon_state.h +deleted file mode 100644 +index 821cb40..0000000 +--- a/src/mesa/drivers/dri/r300/radeon_state.h ++++ /dev/null +@@ -1,43 +0,0 @@ +-/* +-Copyright (C) 2004 Nicolai Haehnle. All Rights Reserved. +- +-Permission is hereby granted, free of charge, to any person obtaining +-a copy of this software and associated documentation files (the +-"Software"), to deal in the Software without restriction, including +-without limitation the rights to use, copy, modify, merge, publish, +-distribute, sublicense, and/or sell copies of the Software, and to +-permit persons to whom the Software is furnished to do so, subject to +-the following conditions: +- +-The above copyright notice and this permission notice (including the +-next paragraph) shall be included in all copies or substantial +-portions of the Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +- +-**************************************************************************/ +- +-/* +- * Authors: +- * Nicolai Haehnle +- */ +- +-#ifndef __RADEON_STATE_H__ +-#define __RADEON_STATE_H__ +- +-extern void radeonRecalcScissorRects(radeonContextPtr radeon); +-extern void radeonSetCliprects(radeonContextPtr radeon); +-extern void radeonUpdateScissor(GLcontext* ctx); +- +-extern void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state); +- +-extern void radeonInitState(radeonContextPtr radeon); +-extern void radeonInitStateFuncs(struct dd_function_table* functions); +- +-#endif +diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile +index f223b2d..f469c6f 100644 +--- a/src/mesa/drivers/dri/radeon/Makefile ++++ b/src/mesa/drivers/dri/radeon/Makefile +@@ -4,25 +4,36 @@ + TOP = ../../../../.. + include $(TOP)/configs/current + ++CFLAGS += $(RADEON_CFLAGS) ++ + LIBNAME = radeon_dri.so + + MINIGLX_SOURCES = server/radeon_dri.c + ++RADEON_COMMON_SOURCES = \ ++ radeon_texture.c \ ++ radeon_common_context.c \ ++ radeon_common.c \ ++ radeon_dma.c \ ++ radeon_lock.c \ ++ radeon_bo_legacy.c \ ++ radeon_cs_legacy.c \ ++ radeon_mipmap_tree.c \ ++ radeon_span.c ++ + DRIVER_SOURCES = \ + radeon_context.c \ + radeon_ioctl.c \ +- radeon_lock.c \ + radeon_screen.c \ + radeon_state.c \ + radeon_state_init.c \ + radeon_tex.c \ +- radeon_texmem.c \ + radeon_texstate.c \ + radeon_tcl.c \ + radeon_swtcl.c \ +- radeon_span.c \ + radeon_maos.c \ +- radeon_sanity.c ++ radeon_sanity.c \ ++ $(RADEON_COMMON_SOURCES) + + C_SOURCES = \ + $(COMMON_SOURCES) \ +@@ -30,6 +41,8 @@ C_SOURCES = \ + + DRIVER_DEFINES = -DRADEON_COMMON=0 + ++DRI_LIB_DEPS += $(RADEON_LDFLAGS) ++ + X86_SOURCES = + + include ../Makefile.template +diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h +new file mode 100644 +index 0000000..1ed13f1 +--- /dev/null ++++ b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h +@@ -0,0 +1,182 @@ ++/* ++ * Copyright © 2008 Jérôme Glisse ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining ++ * a copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES ++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS ++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE ++ * USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ */ ++/* ++ * Authors: ++ * Jérôme Glisse ++ */ ++#ifndef RADEON_BO_H ++#define RADEON_BO_H ++ ++#include ++#include ++//#include "radeon_track.h" ++ ++/* bo object */ ++#define RADEON_BO_FLAGS_MACRO_TILE 1 ++#define RADEON_BO_FLAGS_MICRO_TILE 2 ++ ++struct radeon_bo_manager; ++ ++struct radeon_bo { ++ uint32_t alignment; ++ uint32_t handle; ++ uint32_t size; ++ uint32_t domains; ++ uint32_t flags; ++ unsigned cref; ++#ifdef RADEON_BO_TRACK ++ struct radeon_track *track; ++#endif ++ void *ptr; ++ struct radeon_bo_manager *bom; ++ uint32_t space_accounted; ++}; ++ ++/* bo functions */ ++struct radeon_bo_funcs { ++ struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom, ++ uint32_t handle, ++ uint32_t size, ++ uint32_t alignment, ++ uint32_t domains, ++ uint32_t flags); ++ void (*bo_ref)(struct radeon_bo *bo); ++ struct radeon_bo *(*bo_unref)(struct radeon_bo *bo); ++ int (*bo_map)(struct radeon_bo *bo, int write); ++ int (*bo_unmap)(struct radeon_bo *bo); ++ int (*bo_wait)(struct radeon_bo *bo); ++}; ++ ++struct radeon_bo_manager { ++ struct radeon_bo_funcs *funcs; ++ int fd; ++ ++#ifdef RADEON_BO_TRACK ++ struct radeon_tracker tracker; ++#endif ++}; ++ ++static inline void _radeon_bo_debug(struct radeon_bo *bo, ++ const char *op, ++ const char *file, ++ const char *func, ++ int line) ++{ ++ fprintf(stderr, "%s %p 0x%08X 0x%08X 0x%08X [%s %s %d]\n", ++ op, bo, bo->handle, bo->size, bo->cref, file, func, line); ++} ++ ++static inline struct radeon_bo *_radeon_bo_open(struct radeon_bo_manager *bom, ++ uint32_t handle, ++ uint32_t size, ++ uint32_t alignment, ++ uint32_t domains, ++ uint32_t flags, ++ const char *file, ++ const char *func, ++ int line) ++{ ++ struct radeon_bo *bo; ++ ++ bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags); ++#ifdef RADEON_BO_TRACK ++ if (bo) { ++ bo->track = radeon_tracker_add_track(&bom->tracker, bo->handle); ++ radeon_track_add_event(bo->track, file, func, "open", line); ++ } ++#endif ++ return bo; ++} ++ ++static inline void _radeon_bo_ref(struct radeon_bo *bo, ++ const char *file, ++ const char *func, ++ int line) ++{ ++ bo->cref++; ++#ifdef RADEON_BO_TRACK ++ radeon_track_add_event(bo->track, file, func, "ref", line); ++#endif ++ bo->bom->funcs->bo_ref(bo); ++} ++ ++static inline struct radeon_bo *_radeon_bo_unref(struct radeon_bo *bo, ++ const char *file, ++ const char *func, ++ int line) ++{ ++ bo->cref--; ++#ifdef RADEON_BO_TRACK ++ radeon_track_add_event(bo->track, file, func, "unref", line); ++ if (bo->cref <= 0) { ++ radeon_tracker_remove_track(&bo->bom->tracker, bo->track); ++ bo->track = NULL; ++ } ++#endif ++ return bo->bom->funcs->bo_unref(bo); ++} ++ ++static inline int _radeon_bo_map(struct radeon_bo *bo, ++ int write, ++ const char *file, ++ const char *func, ++ int line) ++{ ++ return bo->bom->funcs->bo_map(bo, write); ++} ++ ++static inline int _radeon_bo_unmap(struct radeon_bo *bo, ++ const char *file, ++ const char *func, ++ int line) ++{ ++ return bo->bom->funcs->bo_unmap(bo); ++} ++ ++static inline int _radeon_bo_wait(struct radeon_bo *bo, ++ const char *file, ++ const char *func, ++ int line) ++{ ++ return bo->bom->funcs->bo_wait(bo); ++} ++ ++#define radeon_bo_open(bom, h, s, a, d, f)\ ++ _radeon_bo_open(bom, h, s, a, d, f, __FILE__, __FUNCTION__, __LINE__) ++#define radeon_bo_ref(bo)\ ++ _radeon_bo_ref(bo, __FILE__, __FUNCTION__, __LINE__) ++#define radeon_bo_unref(bo)\ ++ _radeon_bo_unref(bo, __FILE__, __FUNCTION__, __LINE__) ++#define radeon_bo_map(bo, w)\ ++ _radeon_bo_map(bo, w, __FILE__, __FUNCTION__, __LINE__) ++#define radeon_bo_unmap(bo)\ ++ _radeon_bo_unmap(bo, __FILE__, __FUNCTION__, __LINE__) ++#define radeon_bo_debug(bo, opcode)\ ++ _radeon_bo_debug(bo, opcode, __FILE__, __FUNCTION__, __LINE__) ++#define radeon_bo_wait(bo) \ ++ _radeon_bo_wait(bo, __FILE__, __func__, __LINE__) ++ ++#endif +diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c +new file mode 100644 +index 0000000..03a6299 +--- /dev/null ++++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c +@@ -0,0 +1,825 @@ ++/* ++ * Copyright © 2008 Nicolai Haehnle ++ * Copyright © 2008 Dave Airlie ++ * Copyright © 2008 Jérôme Glisse ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, ++ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR ++ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE ++ * USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ */ ++/* ++ * Authors: ++ * Aapo Tahkola ++ * Nicolai Haehnle ++ * Dave Airlie ++ * Jérôme Glisse ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "xf86drm.h" ++#include "texmem.h" ++#include "main/simple_list.h" ++ ++#include "drm.h" ++#include "radeon_drm.h" ++#include "radeon_common.h" ++#include "radeon_bocs_wrapper.h" ++ ++/* no seriously texmem.c is this screwed up */ ++struct bo_legacy_texture_object { ++ driTextureObject base; ++ struct bo_legacy *parent; ++}; ++ ++struct bo_legacy { ++ struct radeon_bo base; ++ int map_count; ++ uint32_t pending; ++ int is_pending; ++ int static_bo; ++ uint32_t offset; ++ struct bo_legacy_texture_object *tobj; ++ int validated; ++ int dirty; ++ void *ptr; ++ struct bo_legacy *next, *prev; ++ struct bo_legacy *pnext, *pprev; ++}; ++ ++struct bo_manager_legacy { ++ struct radeon_bo_manager base; ++ unsigned nhandle; ++ unsigned nfree_handles; ++ unsigned cfree_handles; ++ uint32_t current_age; ++ struct bo_legacy bos; ++ struct bo_legacy pending_bos; ++ uint32_t fb_location; ++ uint32_t texture_offset; ++ unsigned dma_alloc_size; ++ uint32_t dma_buf_count; ++ unsigned cpendings; ++ driTextureObject texture_swapped; ++ driTexHeap *texture_heap; ++ struct radeon_screen *screen; ++ unsigned *free_handles; ++}; ++ ++static void bo_legacy_tobj_destroy(void *data, driTextureObject *t) ++{ ++ struct bo_legacy_texture_object *tobj = (struct bo_legacy_texture_object *)t; ++ ++ if (tobj->parent) { ++ tobj->parent->tobj = NULL; ++ tobj->parent->validated = 0; ++ } ++} ++ ++static void inline clean_handles(struct bo_manager_legacy *bom) ++{ ++ while (bom->cfree_handles > 0 && ++ !bom->free_handles[bom->cfree_handles - 1]) ++ bom->cfree_handles--; ++ ++} ++static int legacy_new_handle(struct bo_manager_legacy *bom, uint32_t *handle) ++{ ++ uint32_t tmp; ++ ++ *handle = 0; ++ if (bom->nhandle == 0xFFFFFFFF) { ++ return -EINVAL; ++ } ++ if (bom->cfree_handles > 0) { ++ tmp = bom->free_handles[--bom->cfree_handles]; ++ clean_handles(bom); ++ } else { ++ bom->cfree_handles = 0; ++ tmp = bom->nhandle++; ++ } ++ assert(tmp); ++ *handle = tmp; ++ return 0; ++} ++ ++static int legacy_free_handle(struct bo_manager_legacy *bom, uint32_t handle) ++{ ++ uint32_t *handles; ++ ++ if (!handle) { ++ return 0; ++ } ++ if (handle == (bom->nhandle - 1)) { ++ int i; ++ ++ bom->nhandle--; ++ for (i = bom->cfree_handles - 1; i >= 0; i--) { ++ if (bom->free_handles[i] == (bom->nhandle - 1)) { ++ bom->nhandle--; ++ bom->free_handles[i] = 0; ++ } ++ } ++ clean_handles(bom); ++ return 0; ++ } ++ if (bom->cfree_handles < bom->nfree_handles) { ++ bom->free_handles[bom->cfree_handles++] = handle; ++ return 0; ++ } ++ bom->nfree_handles += 0x100; ++ handles = (uint32_t*)realloc(bom->free_handles, bom->nfree_handles * 4); ++ if (handles == NULL) { ++ bom->nfree_handles -= 0x100; ++ return -ENOMEM; ++ } ++ bom->free_handles = handles; ++ bom->free_handles[bom->cfree_handles++] = handle; ++ return 0; ++} ++ ++static void legacy_get_current_age(struct bo_manager_legacy *boml) ++{ ++ drm_radeon_getparam_t gp; ++ int r; ++ ++ if (IS_R300_CLASS(boml->screen)) { ++ gp.param = RADEON_PARAM_LAST_CLEAR; ++ gp.value = (int *)&boml->current_age; ++ r = drmCommandWriteRead(boml->base.fd, DRM_RADEON_GETPARAM, ++ &gp, sizeof(gp)); ++ if (r) { ++ fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, r); ++ exit(1); ++ } ++ } else ++ boml->current_age = boml->screen->scratch[3]; ++} ++ ++static int legacy_is_pending(struct radeon_bo *bo) ++{ ++ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; ++ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; ++ ++ if (bo_legacy->is_pending <= 0) { ++ bo_legacy->is_pending = 0; ++ return 0; ++ } ++ if (boml->current_age >= bo_legacy->pending) { ++ if (boml->pending_bos.pprev == bo_legacy) { ++ boml->pending_bos.pprev = bo_legacy->pprev; ++ } ++ bo_legacy->pprev->pnext = bo_legacy->pnext; ++ if (bo_legacy->pnext) { ++ bo_legacy->pnext->pprev = bo_legacy->pprev; ++ } ++ assert(bo_legacy->is_pending <= bo->cref); ++ while (bo_legacy->is_pending--) { ++ bo = radeon_bo_unref(bo); ++ if (!bo) ++ break; ++ } ++ if (bo) ++ bo_legacy->is_pending = 0; ++ boml->cpendings--; ++ return 0; ++ } ++ return 1; ++} ++ ++static int legacy_wait_pending(struct radeon_bo *bo) ++{ ++ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; ++ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; ++ ++ if (!bo_legacy->is_pending) { ++ return 0; ++ } ++ /* FIXME: lockup and userspace busy looping that's all the folks */ ++ legacy_get_current_age(boml); ++ while (legacy_is_pending(bo)) { ++ usleep(10); ++ legacy_get_current_age(boml); ++ } ++ return 0; ++} ++ ++static void legacy_track_pending(struct bo_manager_legacy *boml, int debug) ++{ ++ struct bo_legacy *bo_legacy; ++ struct bo_legacy *next; ++ ++ legacy_get_current_age(boml); ++ bo_legacy = boml->pending_bos.pnext; ++ while (bo_legacy) { ++ if (debug) ++ fprintf(stderr,"pending %p %d %d %d\n", bo_legacy, bo_legacy->base.size, ++ boml->current_age, bo_legacy->pending); ++ next = bo_legacy->pnext; ++ if (legacy_is_pending(&(bo_legacy->base))) { ++ } ++ bo_legacy = next; ++ } ++} ++ ++static int legacy_wait_any_pending(struct bo_manager_legacy *boml) ++{ ++ struct bo_legacy *bo_legacy; ++ ++ legacy_get_current_age(boml); ++ bo_legacy = boml->pending_bos.pnext; ++ if (!bo_legacy) ++ return -1; ++ legacy_wait_pending(&bo_legacy->base); ++ return 0; ++} ++ ++static void legacy_kick_all_buffers(struct bo_manager_legacy *boml) ++{ ++ struct bo_legacy *legacy; ++ ++ legacy = boml->bos.next; ++ while (legacy != &boml->bos) { ++ if (legacy->tobj) { ++ if (legacy->validated) { ++ driDestroyTextureObject(&legacy->tobj->base); ++ legacy->tobj = 0; ++ legacy->validated = 0; ++ } ++ } ++ legacy = legacy->next; ++ } ++} ++ ++static struct bo_legacy *bo_allocate(struct bo_manager_legacy *boml, ++ uint32_t size, ++ uint32_t alignment, ++ uint32_t domains, ++ uint32_t flags) ++{ ++ struct bo_legacy *bo_legacy; ++ static int pgsize; ++ ++ if (pgsize == 0) ++ pgsize = getpagesize() - 1; ++ ++ size = (size + pgsize) & ~pgsize; ++ ++ bo_legacy = (struct bo_legacy*)calloc(1, sizeof(struct bo_legacy)); ++ if (bo_legacy == NULL) { ++ return NULL; ++ } ++ bo_legacy->base.bom = (struct radeon_bo_manager*)boml; ++ bo_legacy->base.handle = 0; ++ bo_legacy->base.size = size; ++ bo_legacy->base.alignment = alignment; ++ bo_legacy->base.domains = domains; ++ bo_legacy->base.flags = flags; ++ bo_legacy->base.ptr = NULL; ++ bo_legacy->map_count = 0; ++ bo_legacy->next = NULL; ++ bo_legacy->prev = NULL; ++ bo_legacy->pnext = NULL; ++ bo_legacy->pprev = NULL; ++ bo_legacy->next = boml->bos.next; ++ bo_legacy->prev = &boml->bos; ++ boml->bos.next = bo_legacy; ++ if (bo_legacy->next) { ++ bo_legacy->next->prev = bo_legacy; ++ } ++ return bo_legacy; ++} ++ ++static int bo_dma_alloc(struct radeon_bo *bo) ++{ ++ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; ++ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; ++ drm_radeon_mem_alloc_t alloc; ++ unsigned size; ++ int base_offset; ++ int r; ++ ++ /* align size on 4Kb */ ++ size = (((4 * 1024) - 1) + bo->size) & ~((4 * 1024) - 1); ++ alloc.region = RADEON_MEM_REGION_GART; ++ alloc.alignment = bo_legacy->base.alignment; ++ alloc.size = size; ++ alloc.region_offset = &base_offset; ++ r = drmCommandWriteRead(bo->bom->fd, ++ DRM_RADEON_ALLOC, ++ &alloc, ++ sizeof(alloc)); ++ if (r) { ++ /* ptr is set to NULL if dma allocation failed */ ++ bo_legacy->ptr = NULL; ++ return r; ++ } ++ bo_legacy->ptr = boml->screen->gartTextures.map + base_offset; ++ bo_legacy->offset = boml->screen->gart_texture_offset + base_offset; ++ bo->size = size; ++ boml->dma_alloc_size += size; ++ boml->dma_buf_count++; ++ return 0; ++} ++ ++static int bo_dma_free(struct radeon_bo *bo) ++{ ++ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; ++ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; ++ drm_radeon_mem_free_t memfree; ++ int r; ++ ++ if (bo_legacy->ptr == NULL) { ++ /* ptr is set to NULL if dma allocation failed */ ++ return 0; ++ } ++ legacy_get_current_age(boml); ++ memfree.region = RADEON_MEM_REGION_GART; ++ memfree.region_offset = bo_legacy->offset; ++ memfree.region_offset -= boml->screen->gart_texture_offset; ++ r = drmCommandWrite(boml->base.fd, ++ DRM_RADEON_FREE, ++ &memfree, ++ sizeof(memfree)); ++ if (r) { ++ fprintf(stderr, "Failed to free bo[%p] at %08x\n", ++ &bo_legacy->base, memfree.region_offset); ++ fprintf(stderr, "ret = %s\n", strerror(-r)); ++ return r; ++ } ++ boml->dma_alloc_size -= bo_legacy->base.size; ++ boml->dma_buf_count--; ++ return 0; ++} ++ ++static void bo_free(struct bo_legacy *bo_legacy) ++{ ++ struct bo_manager_legacy *boml; ++ ++ if (bo_legacy == NULL) { ++ return; ++ } ++ boml = (struct bo_manager_legacy *)bo_legacy->base.bom; ++ bo_legacy->prev->next = bo_legacy->next; ++ if (bo_legacy->next) { ++ bo_legacy->next->prev = bo_legacy->prev; ++ } ++ if (!bo_legacy->static_bo) { ++ legacy_free_handle(boml, bo_legacy->base.handle); ++ if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) { ++ /* dma buffers */ ++ bo_dma_free(&bo_legacy->base); ++ } else { ++ driDestroyTextureObject(&bo_legacy->tobj->base); ++ bo_legacy->tobj = NULL; ++ /* free backing store */ ++ free(bo_legacy->ptr); ++ } ++ } ++ memset(bo_legacy, 0 , sizeof(struct bo_legacy)); ++ free(bo_legacy); ++} ++ ++static struct radeon_bo *bo_open(struct radeon_bo_manager *bom, ++ uint32_t handle, ++ uint32_t size, ++ uint32_t alignment, ++ uint32_t domains, ++ uint32_t flags) ++{ ++ struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom; ++ struct bo_legacy *bo_legacy; ++ int r; ++ ++ if (handle) { ++ bo_legacy = boml->bos.next; ++ while (bo_legacy) { ++ if (bo_legacy->base.handle == handle) { ++ radeon_bo_ref(&(bo_legacy->base)); ++ return (struct radeon_bo*)bo_legacy; ++ } ++ bo_legacy = bo_legacy->next; ++ } ++ return NULL; ++ } ++ ++ bo_legacy = bo_allocate(boml, size, alignment, domains, flags); ++ bo_legacy->static_bo = 0; ++ r = legacy_new_handle(boml, &bo_legacy->base.handle); ++ if (r) { ++ bo_free(bo_legacy); ++ return NULL; ++ } ++ if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) { ++ retry: ++ legacy_track_pending(boml, 0); ++ /* dma buffers */ ++ ++ r = bo_dma_alloc(&(bo_legacy->base)); ++ if (r) { ++ if (legacy_wait_any_pending(boml) == -1) { ++ bo_free(bo_legacy); ++ return NULL; ++ } ++ goto retry; ++ return NULL; ++ } ++ } else { ++ bo_legacy->ptr = malloc(bo_legacy->base.size); ++ if (bo_legacy->ptr == NULL) { ++ bo_free(bo_legacy); ++ return NULL; ++ } ++ } ++ radeon_bo_ref(&(bo_legacy->base)); ++ return (struct radeon_bo*)bo_legacy; ++} ++ ++static void bo_ref(struct radeon_bo *bo) ++{ ++} ++ ++static struct radeon_bo *bo_unref(struct radeon_bo *bo) ++{ ++ struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; ++ ++ if (bo->cref <= 0) { ++ bo_legacy->prev->next = bo_legacy->next; ++ if (bo_legacy->next) { ++ bo_legacy->next->prev = bo_legacy->prev; ++ } ++ if (!bo_legacy->is_pending) { ++ bo_free(bo_legacy); + } + return NULL; + } @@ -1187,6 +20653,28 @@ index 0000000..f80f0f7 +#include "radeon_cs_legacy.h" + +#endif +diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h +index f6bd1eb..55a73ea 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_chipset.h ++++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h +@@ -247,9 +247,6 @@ + #define PCI_CHIP_RS350_7835 0x7835 + #define PCI_CHIP_RS690_791E 0x791E + #define PCI_CHIP_RS690_791F 0x791F +-#define PCI_CHIP_RS600_793F 0x793F +-#define PCI_CHIP_RS600_7941 0x7941 +-#define PCI_CHIP_RS600_7942 0x7942 + #define PCI_CHIP_RS740_796C 0x796C + #define PCI_CHIP_RS740_796D 0x796D + #define PCI_CHIP_RS740_796E 0x796E +@@ -273,7 +270,6 @@ enum { + CHIP_FAMILY_R420, + CHIP_FAMILY_RV410, + CHIP_FAMILY_RS400, +- CHIP_FAMILY_RS600, + CHIP_FAMILY_RS690, + CHIP_FAMILY_RS740, + CHIP_FAMILY_RV515, diff --git a/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h new file mode 100644 index 0000000..4b5116c @@ -3361,12 +22849,1608 @@ index 0000000..a200e90 +#endif + +#endif -diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_drm.h b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h +diff --git a/src/mesa/drivers/dri/radeon/radeon_compat.c b/src/mesa/drivers/dri/radeon/radeon_compat.c +deleted file mode 100644 +index 46b490d..0000000 +--- a/src/mesa/drivers/dri/radeon/radeon_compat.c ++++ /dev/null +@@ -1,301 +0,0 @@ +-/************************************************************************** +- +-Copyright 2002 ATI Technologies Inc., Ontario, Canada, and +- Tungsten Graphics Inc., Austin, Texas. +- +-All Rights Reserved. +- +-Permission is hereby granted, free of charge, to any person obtaining a +-copy of this software and associated documentation files (the "Software"), +-to deal in the Software without restriction, including without limitation +-on the rights to use, copy, modify, merge, publish, distribute, sub +-license, and/or sell copies of the Software, and to permit persons to whom +-the Software is furnished to do so, subject to the following conditions: +- +-The above copyright notice and this permission notice (including the next +-paragraph) shall be included in all copies or substantial portions of the +-Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +-FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +-ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +-DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +-OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +-USE OR OTHER DEALINGS IN THE SOFTWARE. +- +-**************************************************************************/ +- +-/* +- * Authors: +- * Keith Whitwell +- * +- */ +- +-#include "main/glheader.h" +-#include "main/imports.h" +- +-#include "radeon_context.h" +-#include "radeon_state.h" +-#include "radeon_ioctl.h" +- +- +-static struct { +- int start; +- int len; +- const char *name; +-} packet[RADEON_MAX_STATE_PACKETS] = { +- { RADEON_PP_MISC,7,"RADEON_PP_MISC" }, +- { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" }, +- { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" }, +- { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" }, +- { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" }, +- { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" }, +- { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" }, +- { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" }, +- { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" }, +- { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" }, +- { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" }, +- { RADEON_RE_MISC,1,"RADEON_RE_MISC" }, +- { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" }, +- { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" }, +- { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" }, +- { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" }, +- { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" }, +- { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" }, +- { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" }, +- { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" }, +- { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" }, +-}; +- +- +-static void radeonCompatEmitPacket( radeonContextPtr rmesa, +- struct radeon_state_atom *state ) +-{ +- drm_radeon_sarea_t *sarea = rmesa->sarea; +- drm_radeon_context_regs_t *ctx = &sarea->context_state; +- drm_radeon_texture_regs_t *tex0 = &sarea->tex_state[0]; +- drm_radeon_texture_regs_t *tex1 = &sarea->tex_state[1]; +- int i; +- int *buf = state->cmd; +- +- for ( i = 0 ; i < state->cmd_size ; ) { +- drm_radeon_cmd_header_t *header = (drm_radeon_cmd_header_t *)&buf[i++]; +- +- if (RADEON_DEBUG & DEBUG_STATE) +- fprintf(stderr, "%s %d: %s\n", __FUNCTION__, header->packet.packet_id, +- packet[(int)header->packet.packet_id].name); +- +- switch (header->packet.packet_id) { +- case RADEON_EMIT_PP_MISC: +- ctx->pp_misc = buf[i++]; +- ctx->pp_fog_color = buf[i++]; +- ctx->re_solid_color = buf[i++]; +- ctx->rb3d_blendcntl = buf[i++]; +- ctx->rb3d_depthoffset = buf[i++]; +- ctx->rb3d_depthpitch = buf[i++]; +- ctx->rb3d_zstencilcntl = buf[i++]; +- sarea->dirty |= RADEON_UPLOAD_CONTEXT; +- break; +- case RADEON_EMIT_PP_CNTL: +- ctx->pp_cntl = buf[i++]; +- ctx->rb3d_cntl = buf[i++]; +- ctx->rb3d_coloroffset = buf[i++]; +- sarea->dirty |= RADEON_UPLOAD_CONTEXT; +- break; +- case RADEON_EMIT_RB3D_COLORPITCH: +- ctx->rb3d_colorpitch = buf[i++]; +- sarea->dirty |= RADEON_UPLOAD_CONTEXT; +- break; +- case RADEON_EMIT_RE_LINE_PATTERN: +- ctx->re_line_pattern = buf[i++]; +- ctx->re_line_state = buf[i++]; +- sarea->dirty |= RADEON_UPLOAD_LINE; +- break; +- case RADEON_EMIT_SE_LINE_WIDTH: +- ctx->se_line_width = buf[i++]; +- sarea->dirty |= RADEON_UPLOAD_LINE; +- break; +- case RADEON_EMIT_PP_LUM_MATRIX: +- ctx->pp_lum_matrix = buf[i++]; +- sarea->dirty |= RADEON_UPLOAD_BUMPMAP; +- break; +- case RADEON_EMIT_PP_ROT_MATRIX_0: +- ctx->pp_rot_matrix_0 = buf[i++]; +- ctx->pp_rot_matrix_1 = buf[i++]; +- sarea->dirty |= RADEON_UPLOAD_BUMPMAP; +- break; +- case RADEON_EMIT_RB3D_STENCILREFMASK: +- ctx->rb3d_stencilrefmask = buf[i++]; +- ctx->rb3d_ropcntl = buf[i++]; +- ctx->rb3d_planemask = buf[i++]; +- sarea->dirty |= RADEON_UPLOAD_MASKS; +- break; +- case RADEON_EMIT_SE_VPORT_XSCALE: +- ctx->se_vport_xscale = buf[i++]; +- ctx->se_vport_xoffset = buf[i++]; +- ctx->se_vport_yscale = buf[i++]; +- ctx->se_vport_yoffset = buf[i++]; +- ctx->se_vport_zscale = buf[i++]; +- ctx->se_vport_zoffset = buf[i++]; +- sarea->dirty |= RADEON_UPLOAD_VIEWPORT; +- break; +- case RADEON_EMIT_SE_CNTL: +- ctx->se_cntl = buf[i++]; +- ctx->se_coord_fmt = buf[i++]; +- sarea->dirty |= RADEON_UPLOAD_CONTEXT | RADEON_UPLOAD_VERTFMT; +- break; +- case RADEON_EMIT_SE_CNTL_STATUS: +- ctx->se_cntl_status = buf[i++]; +- sarea->dirty |= RADEON_UPLOAD_SETUP; +- break; +- case RADEON_EMIT_RE_MISC: +- ctx->re_misc = buf[i++]; +- sarea->dirty |= RADEON_UPLOAD_MISC; +- break; +- case RADEON_EMIT_PP_TXFILTER_0: +- tex0->pp_txfilter = buf[i++]; +- tex0->pp_txformat = buf[i++]; +- tex0->pp_txoffset = buf[i++]; +- tex0->pp_txcblend = buf[i++]; +- tex0->pp_txablend = buf[i++]; +- tex0->pp_tfactor = buf[i++]; +- sarea->dirty |= RADEON_UPLOAD_TEX0; +- break; +- case RADEON_EMIT_PP_BORDER_COLOR_0: +- tex0->pp_border_color = buf[i++]; +- sarea->dirty |= RADEON_UPLOAD_TEX0; +- break; +- case RADEON_EMIT_PP_TXFILTER_1: +- tex1->pp_txfilter = buf[i++]; +- tex1->pp_txformat = buf[i++]; +- tex1->pp_txoffset = buf[i++]; +- tex1->pp_txcblend = buf[i++]; +- tex1->pp_txablend = buf[i++]; +- tex1->pp_tfactor = buf[i++]; +- sarea->dirty |= RADEON_UPLOAD_TEX1; +- break; +- case RADEON_EMIT_PP_BORDER_COLOR_1: +- tex1->pp_border_color = buf[i++]; +- sarea->dirty |= RADEON_UPLOAD_TEX1; +- break; +- +- case RADEON_EMIT_SE_ZBIAS_FACTOR: +- i++; +- i++; +- break; +- +- case RADEON_EMIT_PP_TXFILTER_2: +- case RADEON_EMIT_PP_BORDER_COLOR_2: +- case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT: +- case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED: +- default: +- /* These states aren't understood by radeon drm 1.1 */ +- fprintf(stderr, "Tried to emit unsupported state\n"); +- return; +- } +- } +-} +- +- +- +-static void radeonCompatEmitStateLocked( radeonContextPtr rmesa ) +-{ +- struct radeon_state_atom *atom; +- +- if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS)) +- fprintf(stderr, "%s\n", __FUNCTION__); +- +- if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty) +- return; +- +- foreach(atom, &rmesa->hw.atomlist) { +- if (rmesa->hw.all_dirty) +- atom->dirty = GL_TRUE; +- if (atom->is_tcl) +- atom->dirty = GL_FALSE; +- if (atom->dirty) +- radeonCompatEmitPacket(rmesa, atom); +- } +- +- rmesa->hw.is_dirty = GL_FALSE; +- rmesa->hw.all_dirty = GL_FALSE; +-} +- +- +-static void radeonCompatEmitPrimitiveLocked( radeonContextPtr rmesa, +- GLuint hw_primitive, +- GLuint nverts, +- drm_clip_rect_t *pbox, +- GLuint nbox ) +-{ +- int i; +- +- for ( i = 0 ; i < nbox ; ) { +- int nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, nbox ); +- drm_clip_rect_t *b = rmesa->sarea->boxes; +- drm_radeon_vertex_t vtx; +- +- rmesa->sarea->dirty |= RADEON_UPLOAD_CLIPRECTS; +- rmesa->sarea->nbox = nr - i; +- +- for ( ; i < nr ; i++) +- *b++ = pbox[i]; +- +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, +- "RadeonFlushVertexBuffer: prim %x buf %d verts %d " +- "disc %d nbox %d\n", +- hw_primitive, +- rmesa->dma.current.buf->buf->idx, +- nverts, +- nr == nbox, +- rmesa->sarea->nbox ); +- +- vtx.prim = hw_primitive; +- vtx.idx = rmesa->dma.current.buf->buf->idx; +- vtx.count = nverts; +- vtx.discard = (nr == nbox); +- +- drmCommandWrite( rmesa->dri.fd, +- DRM_RADEON_VERTEX, +- &vtx, sizeof(vtx)); +- } +-} +- +- +- +-/* No 'start' for 1.1 vertices ioctl: only one vertex prim/buffer! +- */ +-void radeonCompatEmitPrimitive( radeonContextPtr rmesa, +- GLuint vertex_format, +- GLuint hw_primitive, +- GLuint nrverts ) +-{ +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s\n", __FUNCTION__); +- +- LOCK_HARDWARE( rmesa ); +- +- radeonCompatEmitStateLocked( rmesa ); +- rmesa->sarea->vc_format = vertex_format; +- +- if (rmesa->state.scissor.enabled) { +- radeonCompatEmitPrimitiveLocked( rmesa, +- hw_primitive, +- nrverts, +- rmesa->state.scissor.pClipRects, +- rmesa->state.scissor.numClipRects ); +- } +- else { +- radeonCompatEmitPrimitiveLocked( rmesa, +- hw_primitive, +- nrverts, +- rmesa->pClipRects, +- rmesa->numClipRects ); +- } +- +- +- UNLOCK_HARDWARE( rmesa ); +-} +- +diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c +index 1e992c0..e4202c7 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_context.c ++++ b/src/mesa/drivers/dri/radeon/radeon_context.c +@@ -53,6 +53,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #include "drivers/common/driverfuncs.h" + ++#include "radeon_common.h" + #include "radeon_context.h" + #include "radeon_ioctl.h" + #include "radeon_state.h" +@@ -62,9 +63,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "radeon_tcl.h" + #include "radeon_maos.h" + +-#define need_GL_ARB_multisample +-#define need_GL_ARB_texture_compression +-#define need_GL_ARB_vertex_buffer_object + #define need_GL_EXT_blend_minmax + #define need_GL_EXT_fog_coord + #define need_GL_EXT_secondary_color +@@ -75,55 +73,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "vblank.h" + #include "utils.h" + #include "xmlpool.h" /* for symbolic values of enum-type options */ +-#ifndef RADEON_DEBUG +-int RADEON_DEBUG = (0); +-#endif +- +- +-/* Return various strings for glGetString(). +- */ +-static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name ) +-{ +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- static char buffer[128]; +- unsigned offset; +- GLuint agp_mode = (rmesa->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 : +- rmesa->radeonScreen->AGPMode; +- +- switch ( name ) { +- case GL_VENDOR: +- return (GLubyte *)"Tungsten Graphics, Inc."; +- +- case GL_RENDERER: +- offset = driGetRendererString( buffer, "Radeon", DRIVER_DATE, +- agp_mode ); +- +- sprintf( & buffer[ offset ], " %sTCL", +- !(rmesa->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE) +- ? "" : "NO-" ); +- +- return (GLubyte *)buffer; +- +- default: +- return NULL; +- } +-} +- + + /* Extension strings exported by the R100 driver. + */ + const struct dri_extension card_extensions[] = + { +- { "GL_ARB_multisample", GL_ARB_multisample_functions }, + { "GL_ARB_multitexture", NULL }, + { "GL_ARB_texture_border_clamp", NULL }, +- { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, + { "GL_ARB_texture_env_add", NULL }, + { "GL_ARB_texture_env_combine", NULL }, + { "GL_ARB_texture_env_crossbar", NULL }, + { "GL_ARB_texture_env_dot3", NULL }, + { "GL_ARB_texture_mirrored_repeat", NULL }, +- { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, + { "GL_EXT_blend_logic_op", NULL }, + { "GL_EXT_blend_subtract", GL_EXT_blend_minmax_functions }, + { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, +@@ -166,15 +127,6 @@ static const struct tnl_pipeline_stage *radeon_pipeline[] = { + NULL, + }; + +- +- +-/* Initialize the driver's misc functions. +- */ +-static void radeonInitDriverFuncs( struct dd_function_table *functions ) +-{ +- functions->GetString = radeonGetString; +-} +- + static const struct dri_debug_control debug_control[] = + { + { "fall", DEBUG_FALLBACKS }, +@@ -194,6 +146,51 @@ static const struct dri_debug_control debug_control[] = + { NULL, 0 } + }; + ++static void r100_get_lock(radeonContextPtr radeon) ++{ ++ r100ContextPtr rmesa = (r100ContextPtr)radeon; ++ drm_radeon_sarea_t *sarea = radeon->sarea; ++ ++ RADEON_STATECHANGE(rmesa, ctx); ++ if (rmesa->radeon.sarea->tiling_enabled) { ++ rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= ++ RADEON_COLOR_TILE_ENABLE; ++ } else { ++ rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ++ ~RADEON_COLOR_TILE_ENABLE; ++ } ++ ++ if (sarea->ctx_owner != rmesa->radeon.dri.hwContext) { ++ sarea->ctx_owner = rmesa->radeon.dri.hwContext; ++ ++ if (!radeon->radeonScreen->kernel_mm) ++ radeon_bo_legacy_texture_age(radeon->radeonScreen->bom); ++ } ++} ++ ++static void r100_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa) ++{ ++} ++ ++static void r100_vtbl_pre_emit_state(radeonContextPtr radeon) ++{ ++ r100ContextPtr rmesa = (r100ContextPtr)radeon; ++ ++ /* r100 always needs to emit ZBS to avoid TCL lockups */ ++ rmesa->hw.zbs.dirty = 1; ++ radeon->hw.is_dirty = 1; ++} ++ ++ ++static void r100_init_vtbl(radeonContextPtr radeon) ++{ ++ radeon->vtbl.get_lock = r100_get_lock; ++ radeon->vtbl.update_viewport_offset = radeonUpdateViewportOffset; ++ radeon->vtbl.update_draw_buffer = radeonUpdateDrawBuffer; ++ radeon->vtbl.emit_cs_header = r100_vtbl_emit_cs_header; ++ radeon->vtbl.swtcl_flush = r100_swtcl_flush; ++ radeon->vtbl.pre_emit_state = r100_vtbl_pre_emit_state; ++} + + /* Create the device specific context. + */ +@@ -205,8 +202,8 @@ radeonCreateContext( const __GLcontextModes *glVisual, + __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private); + struct dd_function_table functions; +- radeonContextPtr rmesa; +- GLcontext *ctx, *shareCtx; ++ r100ContextPtr rmesa; ++ GLcontext *ctx; + int i; + int tcl_mode, fthrottle_mode; + +@@ -215,10 +212,12 @@ radeonCreateContext( const __GLcontextModes *glVisual, + assert(screen); + + /* Allocate the Radeon context */ +- rmesa = (radeonContextPtr) CALLOC( sizeof(*rmesa) ); ++ rmesa = (r100ContextPtr) CALLOC( sizeof(*rmesa) ); + if ( !rmesa ) + return GL_FALSE; + ++ r100_init_vtbl(&rmesa->radeon); ++ + /* init exp fog table data */ + radeonInitStaticFogData(); + +@@ -226,12 +225,12 @@ radeonCreateContext( const __GLcontextModes *glVisual, + * Do this here so that initialMaxAnisotropy is set before we create + * the default textures. + */ +- driParseConfigFiles (&rmesa->optionCache, &screen->optionCache, ++ driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache, + screen->driScreen->myNum, "radeon"); +- rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache, ++ rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache, + "def_max_anisotropy"); + +- if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) { ++ if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) { + if ( sPriv->drm_version.minor < 13 ) + fprintf( stderr, "DRM version 1.%d too old to support HyperZ, " + "disabling.\n", sPriv->drm_version.minor ); +@@ -246,65 +245,23 @@ radeonCreateContext( const __GLcontextModes *glVisual, + * (the texture functions are especially important) + */ + _mesa_init_driver_functions( &functions ); +- radeonInitDriverFuncs( &functions ); + radeonInitTextureFuncs( &functions ); + +- /* Allocate the Mesa context */ +- if (sharedContextPrivate) +- shareCtx = ((radeonContextPtr) sharedContextPrivate)->glCtx; +- else +- shareCtx = NULL; +- rmesa->glCtx = _mesa_create_context(glVisual, shareCtx, +- &functions, (void *) rmesa); +- if (!rmesa->glCtx) { +- FREE(rmesa); +- return GL_FALSE; +- } +- driContextPriv->driverPrivate = rmesa; +- +- /* Init radeon context data */ +- rmesa->dri.context = driContextPriv; +- rmesa->dri.screen = sPriv; +- rmesa->dri.drawable = NULL; +- rmesa->dri.readable = NULL; +- rmesa->dri.hwContext = driContextPriv->hHWContext; +- rmesa->dri.hwLock = &sPriv->pSAREA->lock; +- rmesa->dri.fd = sPriv->fd; +- rmesa->dri.drmMinor = sPriv->drm_version.minor; +- +- rmesa->radeonScreen = screen; +- rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA + +- screen->sarea_priv_offset); +- +- +- rmesa->dma.buf0_address = rmesa->radeonScreen->buffers->list[0].address; +- +- (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) ); +- make_empty_list( & rmesa->swapped ); +- +- rmesa->nr_heaps = screen->numTexHeaps; +- for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) { +- rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa, +- screen->texSize[i], +- 12, +- RADEON_NR_TEX_REGIONS, +- (drmTextureRegionPtr)rmesa->sarea->tex_list[i], +- & rmesa->sarea->tex_age[i], +- & rmesa->swapped, +- sizeof( radeonTexObj ), +- (destroy_texture_object_t *) radeonDestroyTexObj ); +- +- driSetTextureSwapCounterLocation( rmesa->texture_heaps[i], +- & rmesa->c_textureSwaps ); ++ if (!radeonInitContext(&rmesa->radeon, &functions, ++ glVisual, driContextPriv, ++ sharedContextPrivate)) { ++ FREE(rmesa); ++ return GL_FALSE; + } +- rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache, ++ ++ rmesa->radeon.texture_depth = driQueryOptioni (&rmesa->radeon.optionCache, + "texture_depth"); +- if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) +- rmesa->texture_depth = ( screen->cpp == 4 ) ? ++ if (rmesa->radeon.texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) ++ rmesa->radeon.texture_depth = ( screen->cpp == 4 ) ? + DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16; + +- rmesa->swtcl.RenderIndex = ~0; +- rmesa->hw.all_dirty = GL_TRUE; ++ rmesa->radeon.swtcl.RenderIndex = ~0; ++ rmesa->radeon.hw.all_dirty = GL_TRUE; + + /* Set the maximum texture size small enough that we can guarentee that + * all texture units can bind a maximal texture and have all of them in +@@ -312,26 +269,13 @@ radeonCreateContext( const __GLcontextModes *glVisual, + * setting allow larger textures. + */ + +- ctx = rmesa->glCtx; +- ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache, ++ ctx = rmesa->radeon.glCtx; ++ ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache, + "texture_units"); + ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits; + ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits; + +- i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures"); +- +- driCalculateMaxTextureLevels( rmesa->texture_heaps, +- rmesa->nr_heaps, +- & ctx->Const, +- 4, +- 11, /* max 2D texture size is 2048x2048 */ +- 8, /* 256^3 */ +- 9, /* \todo: max cube texture size seems to be 512x512(x6) */ +- 11, /* max rect texture size is 2048x2048. */ +- 12, +- GL_FALSE, +- i ); +- ++ i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures"); + + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + +@@ -359,6 +303,8 @@ radeonCreateContext( const __GLcontextModes *glVisual, + + rmesa->boxes = 0; + ++ ctx->Const.MaxDrawBuffers = 1; ++ + /* Initialize the software rasterizer and helper modules. + */ + _swrast_CreateContext( ctx ); +@@ -392,38 +338,38 @@ radeonCreateContext( const __GLcontextModes *glVisual, + } + + driInitExtensions( ctx, card_extensions, GL_TRUE ); +- if (rmesa->radeonScreen->drmSupportsCubeMapsR100) ++ if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100) + _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" ); +- if (rmesa->glCtx->Mesa_DXTn) { ++ if (rmesa->radeon.glCtx->Mesa_DXTn) { + _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); + _mesa_enable_extension( ctx, "GL_S3_s3tc" ); + } +- else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) { ++ else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) { + _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); + } + +- if (rmesa->dri.drmMinor >= 9) ++ if (rmesa->radeon.dri.drmMinor >= 9) + _mesa_enable_extension( ctx, "GL_NV_texture_rectangle"); + + /* XXX these should really go right after _mesa_init_driver_functions() */ ++ radeonInitSpanFuncs( ctx ); + radeonInitIoctlFuncs( ctx ); + radeonInitStateFuncs( ctx ); +- radeonInitSpanFuncs( ctx ); + radeonInitState( rmesa ); + radeonInitSwtcl( ctx ); + + _mesa_vector4f_alloc( &rmesa->tcl.ObjClean, 0, + ctx->Const.MaxArrayLockSize, 32 ); + +- fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode"); +- rmesa->iw.irq_seq = -1; +- rmesa->irqsEmitted = 0; +- rmesa->do_irqs = (rmesa->radeonScreen->irq != 0 && +- fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS); ++ fthrottle_mode = driQueryOptioni(&rmesa->radeon.optionCache, "fthrottle_mode"); ++ rmesa->radeon.iw.irq_seq = -1; ++ rmesa->radeon.irqsEmitted = 0; ++ rmesa->radeon.do_irqs = (rmesa->radeon.radeonScreen->irq != 0 && ++ fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS); + +- rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); ++ rmesa->radeon.do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); + +- (*sPriv->systemTime->getUST)( & rmesa->swap_ust ); ++ (*sPriv->systemTime->getUST)( & rmesa->radeon.swap_ust ); + + + #if DO_DEBUG +@@ -431,20 +377,20 @@ radeonCreateContext( const __GLcontextModes *glVisual, + debug_control ); + #endif + +- tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode"); +- if (driQueryOptionb(&rmesa->optionCache, "no_rast")) { ++ tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode"); ++ if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) { + fprintf(stderr, "disabling 3D acceleration\n"); + FALLBACK(rmesa, RADEON_FALLBACK_DISABLE, 1); + } else if (tcl_mode == DRI_CONF_TCL_SW || +- !(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { +- if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { +- rmesa->radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL; ++ !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { ++ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { ++ rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL; + fprintf(stderr, "Disabling HW TCL support\n"); + } +- TCL_FALLBACK(rmesa->glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1); ++ TCL_FALLBACK(rmesa->radeon.glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1); + } + +- if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { ++ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + /* _tnl_need_dlist_norm_lengths( ctx, GL_FALSE ); */ + } + return GL_TRUE; +@@ -458,179 +404,41 @@ radeonCreateContext( const __GLcontextModes *glVisual, + void radeonDestroyContext( __DRIcontextPrivate *driContextPriv ) + { + GET_CURRENT_CONTEXT(ctx); +- radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate; +- radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL; ++ r100ContextPtr rmesa = (r100ContextPtr) driContextPriv->driverPrivate; ++ r100ContextPtr current = ctx ? R100_CONTEXT(ctx) : NULL; + + /* check if we're deleting the currently bound context */ + if (rmesa == current) { +- RADEON_FIREVERTICES( rmesa ); ++ radeon_firevertices(&rmesa->radeon); + _mesa_make_current(NULL, NULL, NULL); + } + + /* Free radeon context resources */ + assert(rmesa); /* should never be null */ + if ( rmesa ) { +- GLboolean release_texture_heaps; +- + +- release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1); +- _swsetup_DestroyContext( rmesa->glCtx ); +- _tnl_DestroyContext( rmesa->glCtx ); +- _vbo_DestroyContext( rmesa->glCtx ); +- _swrast_DestroyContext( rmesa->glCtx ); ++ _swsetup_DestroyContext( rmesa->radeon.glCtx ); ++ _tnl_DestroyContext( rmesa->radeon.glCtx ); ++ _vbo_DestroyContext( rmesa->radeon.glCtx ); ++ _swrast_DestroyContext( rmesa->radeon.glCtx ); + +- radeonDestroySwtcl( rmesa->glCtx ); +- radeonReleaseArrays( rmesa->glCtx, ~0 ); +- if (rmesa->dma.current.buf) { +- radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ ); +- radeonFlushCmdBuf( rmesa, __FUNCTION__ ); ++ radeonDestroySwtcl( rmesa->radeon.glCtx ); ++ radeonReleaseArrays( rmesa->radeon.glCtx, ~0 ); ++ if (rmesa->radeon.dma.current) { ++ radeonReleaseDmaRegion( &rmesa->radeon ); ++ rcommonFlushCmdBuf( &rmesa->radeon, __FUNCTION__ ); + } + + _mesa_vector4f_free( &rmesa->tcl.ObjClean ); + +- if (rmesa->state.scissor.pClipRects) { +- FREE(rmesa->state.scissor.pClipRects); +- rmesa->state.scissor.pClipRects = NULL; +- } +- +- if ( release_texture_heaps ) { +- /* This share group is about to go away, free our private +- * texture object data. +- */ +- int i; +- +- for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) { +- driDestroyTextureHeap( rmesa->texture_heaps[ i ] ); +- rmesa->texture_heaps[ i ] = NULL; +- } +- +- assert( is_empty_list( & rmesa->swapped ) ); ++ if (rmesa->radeon.state.scissor.pClipRects) { ++ FREE(rmesa->radeon.state.scissor.pClipRects); ++ rmesa->radeon.state.scissor.pClipRects = NULL; + } + +- /* free the Mesa context */ +- rmesa->glCtx->DriverCtx = NULL; +- _mesa_destroy_context( rmesa->glCtx ); +- +- /* free the option cache */ +- driDestroyOptionCache (&rmesa->optionCache); ++ radeonCleanupContext(&rmesa->radeon); + + FREE( rmesa ); + } + } + +- +- +- +-void +-radeonSwapBuffers( __DRIdrawablePrivate *dPriv ) +-{ +- +- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { +- radeonContextPtr rmesa; +- GLcontext *ctx; +- rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; +- ctx = rmesa->glCtx; +- if (ctx->Visual.doubleBufferMode) { +- _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ +- +- if ( rmesa->doPageFlip ) { +- radeonPageFlip( dPriv ); +- } +- else { +- radeonCopyBuffer( dPriv, NULL ); +- } +- } +- } +- else { +- /* XXX this shouldn't be an error but we can't handle it for now */ +- _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__); +- } +-} +- +-void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, +- int x, int y, int w, int h ) +-{ +- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { +- radeonContextPtr radeon; +- GLcontext *ctx; +- +- radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; +- ctx = radeon->glCtx; +- +- if (ctx->Visual.doubleBufferMode) { +- drm_clip_rect_t rect; +- rect.x1 = x + dPriv->x; +- rect.y1 = (dPriv->h - y - h) + dPriv->y; +- rect.x2 = rect.x1 + w; +- rect.y2 = rect.y1 + h; +- _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */ +- radeonCopyBuffer(dPriv, &rect); +- } +- } else { +- /* XXX this shouldn't be an error but we can't handle it for now */ +- _mesa_problem(NULL, "%s: drawable has no context!", +- __FUNCTION__); +- } +-} +- +-/* Make context `c' the current context and bind it to the given +- * drawing and reading surfaces. +- */ +-GLboolean +-radeonMakeCurrent( __DRIcontextPrivate *driContextPriv, +- __DRIdrawablePrivate *driDrawPriv, +- __DRIdrawablePrivate *driReadPriv ) +-{ +- if ( driContextPriv ) { +- radeonContextPtr newCtx = +- (radeonContextPtr) driContextPriv->driverPrivate; +- +- if (RADEON_DEBUG & DEBUG_DRI) +- fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) newCtx->glCtx); +- +- newCtx->dri.readable = driReadPriv; +- +- if ( (newCtx->dri.drawable != driDrawPriv) || +- newCtx->lastStamp != driDrawPriv->lastStamp ) { +- if (driDrawPriv->swap_interval == (unsigned)-1) { +- driDrawPriv->vblFlags = (newCtx->radeonScreen->irq != 0) +- ? driGetDefaultVBlankFlags(&newCtx->optionCache) +- : VBLANK_FLAG_NO_IRQ; +- +- driDrawableInitVBlank( driDrawPriv ); +- } +- +- newCtx->dri.drawable = driDrawPriv; +- +- radeonSetCliprects(newCtx); +- radeonUpdateViewportOffset( newCtx->glCtx ); +- } +- +- _mesa_make_current( newCtx->glCtx, +- (GLframebuffer *) driDrawPriv->driverPrivate, +- (GLframebuffer *) driReadPriv->driverPrivate ); +- +- _mesa_update_state( newCtx->glCtx ); +- } else { +- if (RADEON_DEBUG & DEBUG_DRI) +- fprintf(stderr, "%s ctx is null\n", __FUNCTION__); +- _mesa_make_current( NULL, NULL, NULL ); +- } +- +- if (RADEON_DEBUG & DEBUG_DRI) +- fprintf(stderr, "End %s\n", __FUNCTION__); +- return GL_TRUE; +-} +- +-/* Force the context `c' to be unbound from its buffer. +- */ +-GLboolean +-radeonUnbindContext( __DRIcontextPrivate *driContextPriv ) +-{ +- radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate; +- +- if (RADEON_DEBUG & DEBUG_DRI) +- fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) rmesa->glCtx); +- +- return GL_TRUE; +-} +diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h +index 53df766..2efabd1 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_context.h ++++ b/src/mesa/drivers/dri/radeon/radeon_context.h +@@ -48,91 +48,23 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "drm.h" + #include "radeon_drm.h" + #include "texmem.h" +- + #include "main/macros.h" + #include "main/mtypes.h" + #include "main/colormac.h" +- +-struct radeon_context; +-typedef struct radeon_context radeonContextRec; +-typedef struct radeon_context *radeonContextPtr; +- +-/* This union is used to avoid warnings/miscompilation +- with float to uint32_t casts due to strict-aliasing */ +-typedef union { +- GLfloat f; +- uint32_t ui32; +-} float_ui32_type; +- +-#include "radeon_lock.h" + #include "radeon_screen.h" +-#include "main/mm.h" +- +-#include "math/m_vector.h" +- +-#define TEX_0 0x1 +-#define TEX_1 0x2 +-#define TEX_2 0x4 +-#define TEX_ALL 0x7 +- +-/* Rasterizing fallbacks */ +-/* See correponding strings in r200_swtcl.c */ +-#define RADEON_FALLBACK_TEXTURE 0x0001 +-#define RADEON_FALLBACK_DRAW_BUFFER 0x0002 +-#define RADEON_FALLBACK_STENCIL 0x0004 +-#define RADEON_FALLBACK_RENDER_MODE 0x0008 +-#define RADEON_FALLBACK_BLEND_EQ 0x0010 +-#define RADEON_FALLBACK_BLEND_FUNC 0x0020 +-#define RADEON_FALLBACK_DISABLE 0x0040 +-#define RADEON_FALLBACK_BORDER_MODE 0x0080 +- +-/* The blit width for texture uploads +- */ +-#define BLIT_WIDTH_BYTES 1024 + +-/* Use the templated vertex format: +- */ +-#define COLOR_IS_RGBA +-#define TAG(x) radeon##x +-#include "tnl_dd/t_dd_vertex.h" +-#undef TAG +- +-typedef void (*radeon_tri_func) (radeonContextPtr, +- radeonVertex *, +- radeonVertex *, radeonVertex *); +- +-typedef void (*radeon_line_func) (radeonContextPtr, +- radeonVertex *, radeonVertex *); ++#include "radeon_common.h" + +-typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *); +- +-struct radeon_colorbuffer_state { +- GLuint clear; +- int roundEnable; +-}; + +-struct radeon_depthbuffer_state { +- GLuint clear; +- GLfloat scale; +-}; ++struct r100_context; ++typedef struct r100_context r100ContextRec; ++typedef struct r100_context *r100ContextPtr; + +-struct radeon_scissor_state { +- drm_clip_rect_t rect; +- GLboolean enabled; ++#include "radeon_lock.h" + +- GLuint numClipRects; /* Cliprects active */ +- GLuint numAllocedClipRects; /* Cliprects available */ +- drm_clip_rect_t *pClipRects; +-}; + +-struct radeon_stencilbuffer_state { +- GLboolean hwBuffer; +- GLuint clear; /* rb3d_stencilrefmask value */ +-}; + +-struct radeon_stipple_state { +- GLuint mask[32]; +-}; ++#define R100_TEX_ALL 0x7 + + /* used for both tcl_vtx and vc_frmt tex bits (they are identical) */ + #define RADEON_ST_BIT(unit) \ +@@ -141,42 +73,6 @@ struct radeon_stipple_state { + #define RADEON_Q_BIT(unit) \ + (unit == 0 ? RADEON_CP_VC_FRMT_Q0 : (RADEON_CP_VC_FRMT_Q1 >> 2) << (2 * unit)) + +-typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr; +- +-/* Texture object in locally shared texture space. +- */ +-struct radeon_tex_obj { +- driTextureObject base; +- +- GLuint bufAddr; /* Offset to start of locally +- shared texture block */ +- +- GLuint dirty_state; /* Flags (1 per texunit) for +- whether or not this texobj +- has dirty hardware state +- (pp_*) that needs to be +- brought into the +- texunit. */ +- +- drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; +- /* Six, for the cube faces */ +- +- GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ +- +- GLuint pp_txfilter; /* hardware register values */ +- GLuint pp_txformat; +- GLuint pp_txoffset; /* Image location in texmem. +- All cube faces follow. */ +- GLuint pp_txsize; /* npot only */ +- GLuint pp_txpitch; /* npot only */ +- GLuint pp_border_color; +- GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ +- +- GLboolean border_fallback; +- +- GLuint tile_bits; /* hw texture tile bits used on this texture */ +-}; +- + struct radeon_texture_env_state { + radeonTexObjPtr texobj; + GLenum format; +@@ -187,17 +83,6 @@ struct radeon_texture_state { + struct radeon_texture_env_state unit[RADEON_MAX_TEXTURE_UNITS]; + }; + +-struct radeon_state_atom { +- struct radeon_state_atom *next, *prev; +- const char *name; /* for debug */ +- int cmd_size; /* size in bytes */ +- GLuint is_tcl; +- int *cmd; /* one or more cmd's */ +- int *lastcmd; /* one or more cmd's */ +- GLboolean dirty; /* dirty-mark in emit_state_list */ +- GLboolean(*check) (GLcontext *); /* is this state active? */ +-}; +- + /* Trying to keep these relatively short as the variables are becoming + * extravagently long. Drop the driver name prefix off the front of + * everything - I think we know which driver we're in by now, and keep the +@@ -410,10 +295,7 @@ struct radeon_state_atom { + #define SHN_SHININESS 1 + #define SHN_STATE_SIZE 2 + +-struct radeon_hw_state { +- /* Head of the linked list of state atoms. */ +- struct radeon_state_atom atomlist; +- ++struct r100_hw_state { + /* Hardware state, stored as cmdbuf commands: + * -- Need to doublebuffer for + * - eliding noop statechange loops? (except line stipple count) +@@ -438,86 +320,16 @@ struct radeon_hw_state { + struct radeon_state_atom glt; + struct radeon_state_atom txr[3]; /* for NPOT */ + +- int max_state_size; /* Number of bytes necessary for a full state emit. */ +- GLboolean is_dirty, all_dirty; + }; + +-struct radeon_state { +- /* Derived state for internal purposes: +- */ +- struct radeon_colorbuffer_state color; +- struct radeon_depthbuffer_state depth; +- struct radeon_scissor_state scissor; +- struct radeon_stencilbuffer_state stencil; ++ ++struct r100_state { + struct radeon_stipple_state stipple; + struct radeon_texture_state texture; + }; + +-/* Need refcounting on dma buffers: +- */ +-struct radeon_dma_buffer { +- int refcount; /* the number of retained regions in buf */ +- drmBufPtr buf; +-}; +- +-#define GET_START(rvb) (rmesa->radeonScreen->gart_buffer_offset + \ +- (rvb)->address - rmesa->dma.buf0_address + \ +- (rvb)->start) +- +-/* A retained region, eg vertices for indexed vertices. +- */ +-struct radeon_dma_region { +- struct radeon_dma_buffer *buf; +- char *address; /* == buf->address */ +- int start, end, ptr; /* offsets from start of buf */ +- int aos_start; +- int aos_stride; +- int aos_size; +-}; +- +-struct radeon_dma { +- /* Active dma region. Allocations for vertices and retained +- * regions come from here. Also used for emitting random vertices, +- * these may be flushed by calling flush_current(); +- */ +- struct radeon_dma_region current; +- +- void (*flush) (radeonContextPtr); +- +- char *buf0_address; /* start of buf[0], for index calcs */ +- GLuint nr_released_bufs; /* flush after so many buffers released */ +-}; +- +-struct radeon_dri_mirror { +- __DRIcontextPrivate *context; /* DRI context */ +- __DRIscreenPrivate *screen; /* DRI screen */ +- +- /** +- * DRI drawable bound to this context for drawing. +- */ +- __DRIdrawablePrivate *drawable; +- +- /** +- * DRI drawable bound to this context for reading. +- */ +- __DRIdrawablePrivate *readable; +- +- drm_context_t hwContext; +- drm_hw_lock_t *hwLock; +- int fd; +- int drmMinor; +-}; +- + #define RADEON_CMD_BUF_SZ (8*1024) +- +-struct radeon_store { +- GLuint statenr; +- GLuint primnr; +- char cmd_buf[RADEON_CMD_BUF_SZ]; +- int cmd_used; +- int elts_start; +-}; +- ++#define R200_ELT_BUF_SZ (8*1024) + /* radeon_tcl.c + */ + struct radeon_tcl_info { +@@ -529,30 +341,23 @@ struct radeon_tcl_info { + */ + GLvector4f ObjClean; + +- struct radeon_dma_region *aos_components[8]; ++ struct radeon_aos aos[8]; + GLuint nr_aos_components; + + GLuint *Elts; + +- struct radeon_dma_region indexed_verts; +- struct radeon_dma_region obj; +- struct radeon_dma_region rgba; +- struct radeon_dma_region spec; +- struct radeon_dma_region fog; +- struct radeon_dma_region tex[RADEON_MAX_TEXTURE_UNITS]; +- struct radeon_dma_region norm; ++ struct radeon_bo *indexed_bo; ++ ++ int elt_cmd_offset; /** Offset into the cmdbuf */ ++ int elt_cmd_start; ++ int elt_used; + }; + + /* radeon_swtcl.c + */ +-struct radeon_swtcl_info { +- GLuint RenderIndex; +- GLuint vertex_size; ++struct r100_swtcl_info { + GLuint vertex_format; + +- struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; +- GLuint vertex_attr_count; +- + GLubyte *verts; + + /* Fallback rasterization functions +@@ -561,10 +366,6 @@ struct radeon_swtcl_info { + radeon_line_func draw_line; + radeon_tri_func draw_tri; + +- GLuint hw_primitive; +- GLenum render_primitive; +- GLuint numverts; +- + /** + * Offset of the 4UB color data within a hardware (swtcl) vertex. + */ +@@ -576,22 +377,9 @@ struct radeon_swtcl_info { + GLuint specoffset; + + GLboolean needproj; +- +- struct radeon_dma_region indexed_verts; + }; + +-struct radeon_ioctl { +- GLuint vertex_offset; +- GLuint vertex_size; +-}; + +-#define RADEON_MAX_PRIMS 64 +- +-struct radeon_prim { +- GLuint start; +- GLuint end; +- GLuint prim; +-}; + + /* A maximum total of 20 elements per vertex: 3 floats for position, 3 + * floats for normal, 4 floats for color, 4 bytes for secondary color, +@@ -602,59 +390,18 @@ struct radeon_prim { + */ + #define RADEON_MAX_VERTEX_SIZE 20 + +-struct radeon_context { +- GLcontext *glCtx; /* Mesa context */ ++struct r100_context { ++ struct radeon_context radeon; + + /* Driver and hardware state management + */ +- struct radeon_hw_state hw; +- struct radeon_state state; +- +- /* Texture object bookkeeping +- */ +- unsigned nr_heaps; +- driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS]; +- driTextureObject swapped; +- int texture_depth; +- float initialMaxAnisotropy; +- +- /* Rasterization and vertex state: +- */ +- GLuint TclFallback; +- GLuint Fallback; +- GLuint NewGLState; +- DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */ ++ struct r100_hw_state hw; ++ struct r100_state state; + + /* Vertex buffers + */ + struct radeon_ioctl ioctl; +- struct radeon_dma dma; + struct radeon_store store; +- /* A full state emit as of the first state emit in the main store, in case +- * the context is lost. +- */ +- struct radeon_store backup_store; +- +- /* Page flipping +- */ +- GLuint doPageFlip; +- +- /* Busy waiting +- */ +- GLuint do_usleeps; +- GLuint do_irqs; +- GLuint irqsEmitted; +- drm_radeon_irq_wait_t iw; +- +- /* Drawable, cliprect and scissor information +- */ +- GLuint numClipRects; /* Cliprects for the draw buffer */ +- drm_clip_rect_t *pClipRects; +- unsigned int lastStamp; +- GLboolean lost_context; +- GLboolean save_on_next_emit; +- radeonScreenPtr radeonScreen; /* Screen private DRI data */ +- drm_radeon_sarea_t *sarea; /* Private SAREA data */ + + /* TCL stuff + */ +@@ -667,29 +414,13 @@ struct radeon_context { + GLmatrix tmpmat[RADEON_MAX_TEXTURE_UNITS]; + GLuint last_ReallyEnabled; + +- /* VBI +- */ +- int64_t swap_ust; +- int64_t swap_missed_ust; +- +- GLuint swap_count; +- GLuint swap_missed_count; +- + /* radeon_tcl.c + */ + struct radeon_tcl_info tcl; + + /* radeon_swtcl.c + */ +- struct radeon_swtcl_info swtcl; +- +- /* Mirrors of some DRI state +- */ +- struct radeon_dri_mirror dri; +- +- /* Configuration cache +- */ +- driOptionCache optionCache; ++ struct r100_swtcl_info swtcl; + + GLboolean using_hyperz; + GLboolean texmicrotile; +@@ -703,23 +434,11 @@ struct radeon_context { + GLuint c_textureSwaps; + GLuint c_textureBytes; + GLuint c_vertexBuffers; ++ + }; + +-#define RADEON_CONTEXT(ctx) ((radeonContextPtr)(ctx->DriverCtx)) +- +-static INLINE GLuint radeonPackColor(GLuint cpp, +- GLubyte r, GLubyte g, +- GLubyte b, GLubyte a) +-{ +- switch (cpp) { +- case 2: +- return PACK_COLOR_565(r, g, b); +- case 4: +- return PACK_COLOR_8888(a, r, g, b); +- default: +- return 0; +- } +-} ++#define R100_CONTEXT(ctx) ((r100ContextPtr)(ctx->DriverCtx)) ++ + + #define RADEON_OLD_PACKETS 1 + +@@ -727,37 +446,11 @@ extern void radeonDestroyContext(__DRIcontextPrivate * driContextPriv); + extern GLboolean radeonCreateContext(const __GLcontextModes * glVisual, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate); +-extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv); +-extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, +- int x, int y, int w, int h); + extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, + __DRIdrawablePrivate * driDrawPriv, + __DRIdrawablePrivate * driReadPriv); + extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv); + +-/* ================================================================ +- * Debugging: +- */ +-#define DO_DEBUG 1 +- +-#if DO_DEBUG +-extern int RADEON_DEBUG; +-#else +-#define RADEON_DEBUG 0 +-#endif +- +-#define DEBUG_TEXTURE 0x0001 +-#define DEBUG_STATE 0x0002 +-#define DEBUG_IOCTL 0x0004 +-#define DEBUG_PRIMS 0x0008 +-#define DEBUG_VERTS 0x0010 +-#define DEBUG_FALLBACKS 0x0020 +-#define DEBUG_VFMT 0x0040 +-#define DEBUG_CODEGEN 0x0080 +-#define DEBUG_VERBOSE 0x0100 +-#define DEBUG_DRI 0x0200 +-#define DEBUG_DMA 0x0400 +-#define DEBUG_SANITY 0x0800 +-#define DEBUG_SYNC 0x1000 ++ + + #endif /* __RADEON_CONTEXT_H__ */ +diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_drm.h b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h +new file mode 100644 +index 0000000..984725a +--- /dev/null ++++ b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h +@@ -0,0 +1,207 @@ ++/* ++ * Copyright © 2008 Nicolai Haehnle ++ * Copyright © 2008 Jérôme Glisse ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, ++ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR ++ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE ++ * USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ */ ++/* ++ * Authors: ++ * Aapo Tahkola ++ * Nicolai Haehnle ++ * Jérôme Glisse ++ */ ++#ifndef RADEON_CS_H ++#define RADEON_CS_H ++ ++#include ++#include ++#include "drm.h" ++#include "radeon_drm.h" ++ ++struct radeon_cs_reloc { ++ struct radeon_bo *bo; ++ uint32_t read_domain; ++ uint32_t write_domain; ++ uint32_t flags; ++}; ++ ++ ++#define RADEON_CS_SPACE_OK 0 ++#define RADEON_CS_SPACE_OP_TO_BIG 1 ++#define RADEON_CS_SPACE_FLUSH 2 ++ ++struct radeon_cs_space_check { ++ struct radeon_bo *bo; ++ uint32_t read_domains; ++ uint32_t write_domain; ++ uint32_t new_accounted; ++}; ++ ++struct radeon_cs_manager; ++ ++struct radeon_cs { ++ struct radeon_cs_manager *csm; ++ void *relocs; ++ uint32_t *packets; ++ unsigned crelocs; ++ unsigned relocs_total_size; ++ unsigned cdw; ++ unsigned ndw; ++ int section; ++ unsigned section_ndw; ++ unsigned section_cdw; ++ const char *section_file; ++ const char *section_func; ++ int section_line; ++ ++}; ++ ++/* cs functions */ ++struct radeon_cs_funcs { ++ struct radeon_cs *(*cs_create)(struct radeon_cs_manager *csm, ++ uint32_t ndw); ++ int (*cs_write_reloc)(struct radeon_cs *cs, ++ struct radeon_bo *bo, ++ uint32_t read_domain, ++ uint32_t write_domain, ++ uint32_t flags); ++ int (*cs_begin)(struct radeon_cs *cs, ++ uint32_t ndw, ++ const char *file, ++ const char *func, ++ int line); ++ int (*cs_end)(struct radeon_cs *cs, ++ const char *file, ++ const char *func, ++ int line); ++ int (*cs_emit)(struct radeon_cs *cs); ++ int (*cs_destroy)(struct radeon_cs *cs); ++ int (*cs_erase)(struct radeon_cs *cs); ++ int (*cs_need_flush)(struct radeon_cs *cs); ++ void (*cs_print)(struct radeon_cs *cs, FILE *file); ++ int (*cs_space_check)(struct radeon_cs *cs, struct radeon_cs_space_check *bos, ++ int num_bo); ++}; ++ ++struct radeon_cs_manager { ++ struct radeon_cs_funcs *funcs; ++ int fd; ++ uint32_t vram_limit, gart_limit; ++ uint32_t vram_write_used, gart_write_used; ++ uint32_t read_used; ++}; ++ ++static inline struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm, ++ uint32_t ndw) ++{ ++ return csm->funcs->cs_create(csm, ndw); ++} ++ ++static inline int radeon_cs_write_reloc(struct radeon_cs *cs, ++ struct radeon_bo *bo, ++ uint32_t read_domain, ++ uint32_t write_domain, ++ uint32_t flags) ++{ ++ return cs->csm->funcs->cs_write_reloc(cs, ++ bo, ++ read_domain, ++ write_domain, ++ flags); ++} ++ ++static inline int radeon_cs_begin(struct radeon_cs *cs, ++ uint32_t ndw, ++ const char *file, ++ const char *func, ++ int line) ++{ ++ return cs->csm->funcs->cs_begin(cs, ndw, file, func, line); ++} ++ ++static inline int radeon_cs_end(struct radeon_cs *cs, ++ const char *file, ++ const char *func, ++ int line) ++{ ++ return cs->csm->funcs->cs_end(cs, file, func, line); ++} ++ ++static inline int radeon_cs_emit(struct radeon_cs *cs) ++{ ++ return cs->csm->funcs->cs_emit(cs); ++} ++ ++static inline int radeon_cs_destroy(struct radeon_cs *cs) ++{ ++ return cs->csm->funcs->cs_destroy(cs); ++} ++ ++static inline int radeon_cs_erase(struct radeon_cs *cs) ++{ ++ return cs->csm->funcs->cs_erase(cs); ++} ++ ++static inline int radeon_cs_need_flush(struct radeon_cs *cs) ++{ ++ return cs->csm->funcs->cs_need_flush(cs); ++} ++ ++static inline void radeon_cs_print(struct radeon_cs *cs, FILE *file) ++{ ++ cs->csm->funcs->cs_print(cs, file); ++} ++ ++static inline int radeon_cs_space_check(struct radeon_cs *cs, ++ struct radeon_cs_space_check *bos, ++ int num_bo) ++{ ++ return cs->csm->funcs->cs_space_check(cs, bos, num_bo); ++} ++ ++static inline void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit) ++{ ++ ++ if (domain == RADEON_GEM_DOMAIN_VRAM) ++ cs->csm->vram_limit = limit; ++ else ++ cs->csm->gart_limit = limit; ++} ++ ++static inline void radeon_cs_write_dword(struct radeon_cs *cs, uint32_t dword) ++{ ++ cs->packets[cs->cdw++] = dword; ++ if (cs->section) { ++ cs->section_cdw++; ++ } ++} ++ ++static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword) ++{ ++ ++ memcpy(cs->packets + cs->cdw, &qword, sizeof(qword)); ++ cs->cdw+=2; ++ if (cs->section) { ++ cs->section_cdw+=2; ++ } ++} ++#endif +diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c new file mode 100644 -index 0000000..984725a +index 0000000..b47b095 --- /dev/null -+++ b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h -@@ -0,0 +1,207 @@ ++++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c +@@ -0,0 +1,504 @@ +/* + * Copyright © 2008 Nicolai Haehnle + * Copyright © 2008 Jérôme Glisse @@ -3398,188 +24482,485 @@ index 0000000..984725a + * Nicolai Haehnle + * Jérôme Glisse + */ -+#ifndef RADEON_CS_H -+#define RADEON_CS_H ++#include + -+#include -+#include -+#include "drm.h" -+#include "radeon_drm.h" ++#include "radeon_bocs_wrapper.h" + -+struct radeon_cs_reloc { -+ struct radeon_bo *bo; -+ uint32_t read_domain; -+ uint32_t write_domain; -+ uint32_t flags; ++struct cs_manager_legacy { ++ struct radeon_cs_manager base; ++ struct radeon_context *ctx; ++ /* hack for scratch stuff */ ++ uint32_t pending_age; ++ uint32_t pending_count; ++ ++ ++}; ++ ++struct cs_reloc_legacy { ++ struct radeon_cs_reloc base; ++ uint32_t cindices; ++ uint32_t *indices; +}; + + -+#define RADEON_CS_SPACE_OK 0 -+#define RADEON_CS_SPACE_OP_TO_BIG 1 -+#define RADEON_CS_SPACE_FLUSH 2 ++static struct radeon_cs *cs_create(struct radeon_cs_manager *csm, ++ uint32_t ndw) ++{ ++ struct radeon_cs *cs; ++ ++ cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs)); ++ if (cs == NULL) { ++ return NULL; ++ } ++ cs->csm = csm; ++ cs->ndw = (ndw + 0x3FF) & (~0x3FF); ++ cs->packets = (uint32_t*)malloc(4*cs->ndw); ++ if (cs->packets == NULL) { ++ free(cs); ++ return NULL; ++ } ++ cs->relocs_total_size = 0; ++ return cs; ++} ++ ++static int cs_write_reloc(struct radeon_cs *cs, ++ struct radeon_bo *bo, ++ uint32_t read_domain, ++ uint32_t write_domain, ++ uint32_t flags) ++{ ++ struct cs_reloc_legacy *relocs; ++ int i; ++ ++ relocs = (struct cs_reloc_legacy *)cs->relocs; ++ /* check domains */ ++ if ((read_domain && write_domain) || (!read_domain && !write_domain)) { ++ /* in one CS a bo can only be in read or write domain but not ++ * in read & write domain at the same sime ++ */ ++ return -EINVAL; ++ } ++ if (read_domain == RADEON_GEM_DOMAIN_CPU) { ++ return -EINVAL; ++ } ++ if (write_domain == RADEON_GEM_DOMAIN_CPU) { ++ return -EINVAL; ++ } ++ /* check if bo is already referenced */ ++ for(i = 0; i < cs->crelocs; i++) { ++ uint32_t *indices; ++ ++ if (relocs[i].base.bo->handle == bo->handle) { ++ /* Check domains must be in read or write. As we check already ++ * checked that in argument one of the read or write domain was ++ * set we only need to check that if previous reloc as the read ++ * domain set then the read_domain should also be set for this ++ * new relocation. ++ */ ++ if (relocs[i].base.read_domain && !read_domain) { ++ return -EINVAL; ++ } ++ if (relocs[i].base.write_domain && !write_domain) { ++ return -EINVAL; ++ } ++ relocs[i].base.read_domain |= read_domain; ++ relocs[i].base.write_domain |= write_domain; ++ /* save indice */ ++ relocs[i].cindices++; ++ indices = (uint32_t*)realloc(relocs[i].indices, ++ relocs[i].cindices * 4); ++ if (indices == NULL) { ++ relocs[i].cindices -= 1; ++ return -ENOMEM; ++ } ++ relocs[i].indices = indices; ++ relocs[i].indices[relocs[i].cindices - 1] = cs->cdw - 1; ++ return 0; ++ } ++ } ++ /* add bo to reloc */ ++ relocs = (struct cs_reloc_legacy*) ++ realloc(cs->relocs, ++ sizeof(struct cs_reloc_legacy) * (cs->crelocs + 1)); ++ if (relocs == NULL) { ++ return -ENOMEM; ++ } ++ cs->relocs = relocs; ++ relocs[cs->crelocs].base.bo = bo; ++ relocs[cs->crelocs].base.read_domain = read_domain; ++ relocs[cs->crelocs].base.write_domain = write_domain; ++ relocs[cs->crelocs].base.flags = flags; ++ relocs[cs->crelocs].indices = (uint32_t*)malloc(4); ++ if (relocs[cs->crelocs].indices == NULL) { ++ return -ENOMEM; ++ } ++ relocs[cs->crelocs].indices[0] = cs->cdw - 1; ++ relocs[cs->crelocs].cindices = 1; ++ cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo); ++ cs->crelocs++; ++ radeon_bo_ref(bo); ++ return 0; ++} ++ ++static int cs_begin(struct radeon_cs *cs, ++ uint32_t ndw, ++ const char *file, ++ const char *func, ++ int line) ++{ ++ if (cs->section) { ++ fprintf(stderr, "CS already in a section(%s,%s,%d)\n", ++ cs->section_file, cs->section_func, cs->section_line); ++ fprintf(stderr, "CS can't start section(%s,%s,%d)\n", ++ file, func, line); ++ return -EPIPE; ++ } ++ cs->section = 1; ++ cs->section_ndw = ndw; ++ cs->section_cdw = 0; ++ cs->section_file = file; ++ cs->section_func = func; ++ cs->section_line = line; ++ ++ ++ if (cs->cdw + ndw > cs->ndw) { ++ uint32_t tmp, *ptr; ++ int num = (ndw > 0x3FF) ? ndw : 0x3FF; ++ ++ tmp = (cs->cdw + 1 + num) & (~num); ++ ptr = (uint32_t*)realloc(cs->packets, 4 * tmp); ++ if (ptr == NULL) { ++ return -ENOMEM; ++ } ++ cs->packets = ptr; ++ cs->ndw = tmp; ++ } ++ ++ return 0; ++} ++ ++static int cs_end(struct radeon_cs *cs, ++ const char *file, ++ const char *func, ++ int line) ++ ++{ ++ if (!cs->section) { ++ fprintf(stderr, "CS no section to end at (%s,%s,%d)\n", ++ file, func, line); ++ return -EPIPE; ++ } ++ cs->section = 0; ++ if (cs->section_ndw != cs->section_cdw) { ++ fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n", ++ cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw); ++ fprintf(stderr, "CS section end at (%s,%s,%d)\n", ++ file, func, line); ++ return -EPIPE; ++ } ++ return 0; ++} ++ ++static int cs_process_relocs(struct radeon_cs *cs) ++{ ++ struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; ++ struct cs_reloc_legacy *relocs; ++ int i, j, r; ++ ++ csm = (struct cs_manager_legacy*)cs->csm; ++ relocs = (struct cs_reloc_legacy *)cs->relocs; ++ restart: ++ for (i = 0; i < cs->crelocs; i++) { ++ for (j = 0; j < relocs[i].cindices; j++) { ++ uint32_t soffset, eoffset; ++ ++ r = radeon_bo_legacy_validate(relocs[i].base.bo, ++ &soffset, &eoffset); ++ if (r == -EAGAIN) ++ goto restart; ++ if (r) { ++ fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", ++ relocs[i].base.bo, soffset, eoffset); ++ return r; ++ } ++ cs->packets[relocs[i].indices[j]] += soffset; ++ if (cs->packets[relocs[i].indices[j]] >= eoffset) { ++ /* radeon_bo_debug(relocs[i].base.bo, 12); */ ++ fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", ++ relocs[i].base.bo, soffset, eoffset); ++ fprintf(stderr, "above end: %p 0x%08X 0x%08X\n", ++ relocs[i].base.bo, ++ cs->packets[relocs[i].indices[j]], ++ eoffset); ++ exit(0); ++ return -EINVAL; ++ } ++ } ++ } ++ return 0; ++} ++ ++static int cs_set_age(struct radeon_cs *cs) ++{ ++ struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; ++ struct cs_reloc_legacy *relocs; ++ int i; ++ ++ relocs = (struct cs_reloc_legacy *)cs->relocs; ++ for (i = 0; i < cs->crelocs; i++) { ++ radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age); ++ radeon_bo_unref(relocs[i].base.bo); ++ } ++ return 0; ++} ++ ++static void dump_cmdbuf(struct radeon_cs *cs) ++{ ++ int i; ++ for (i = 0; i < cs->cdw; i++){ ++ fprintf(stderr,"%x: %08x\n", i, cs->packets[i]); ++ } ++ ++} ++static int cs_emit(struct radeon_cs *cs) ++{ ++ struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; ++ drm_radeon_cmd_buffer_t cmd; ++ drm_r300_cmd_header_t age; ++ uint64_t ull; ++ int r; + -+struct radeon_cs_space_check { -+ struct radeon_bo *bo; -+ uint32_t read_domains; -+ uint32_t write_domain; -+ uint32_t new_accounted; -+}; ++ csm->ctx->vtbl.emit_cs_header(cs, csm->ctx); + -+struct radeon_cs_manager; ++ /* append buffer age */ ++ if (IS_R300_CLASS(csm->ctx->radeonScreen)) { ++ age.scratch.cmd_type = R300_CMD_SCRATCH; ++ /* Scratch register 2 corresponds to what radeonGetAge polls */ ++ csm->pending_age = 0; ++ csm->pending_count = 1; ++ ull = (uint64_t) (intptr_t) &csm->pending_age; ++ age.scratch.reg = 2; ++ age.scratch.n_bufs = 1; ++ age.scratch.flags = 0; ++ radeon_cs_write_dword(cs, age.u); ++ radeon_cs_write_qword(cs, ull); ++ radeon_cs_write_dword(cs, 0); ++ } + -+struct radeon_cs { -+ struct radeon_cs_manager *csm; -+ void *relocs; -+ uint32_t *packets; -+ unsigned crelocs; -+ unsigned relocs_total_size; -+ unsigned cdw; -+ unsigned ndw; -+ int section; -+ unsigned section_ndw; -+ unsigned section_cdw; -+ const char *section_file; -+ const char *section_func; -+ int section_line; ++ r = cs_process_relocs(cs); ++ if (r) { ++ return 0; ++ } + -+}; ++ cmd.buf = (char *)cs->packets; ++ cmd.bufsz = cs->cdw * 4; ++ if (csm->ctx->state.scissor.enabled) { ++ cmd.nbox = csm->ctx->state.scissor.numClipRects; ++ cmd.boxes = (drm_clip_rect_t *) csm->ctx->state.scissor.pClipRects; ++ } else { ++ cmd.nbox = csm->ctx->numClipRects; ++ cmd.boxes = (drm_clip_rect_t *) csm->ctx->pClipRects; ++ } + -+/* cs functions */ -+struct radeon_cs_funcs { -+ struct radeon_cs *(*cs_create)(struct radeon_cs_manager *csm, -+ uint32_t ndw); -+ int (*cs_write_reloc)(struct radeon_cs *cs, -+ struct radeon_bo *bo, -+ uint32_t read_domain, -+ uint32_t write_domain, -+ uint32_t flags); -+ int (*cs_begin)(struct radeon_cs *cs, -+ uint32_t ndw, -+ const char *file, -+ const char *func, -+ int line); -+ int (*cs_end)(struct radeon_cs *cs, -+ const char *file, -+ const char *func, -+ int line); -+ int (*cs_emit)(struct radeon_cs *cs); -+ int (*cs_destroy)(struct radeon_cs *cs); -+ int (*cs_erase)(struct radeon_cs *cs); -+ int (*cs_need_flush)(struct radeon_cs *cs); -+ void (*cs_print)(struct radeon_cs *cs, FILE *file); -+ int (*cs_space_check)(struct radeon_cs *cs, struct radeon_cs_space_check *bos, -+ int num_bo); -+}; ++ //dump_cmdbuf(cs); + -+struct radeon_cs_manager { -+ struct radeon_cs_funcs *funcs; -+ int fd; -+ uint32_t vram_limit, gart_limit; -+ uint32_t vram_write_used, gart_write_used; -+ uint32_t read_used; -+}; ++ r = drmCommandWrite(cs->csm->fd, DRM_RADEON_CMDBUF, &cmd, sizeof(cmd)); ++ if (r) { ++ return r; ++ } ++ if (!IS_R300_CLASS(csm->ctx->radeonScreen)) { ++ drm_radeon_irq_emit_t emit_cmd; ++ emit_cmd.irq_seq = &csm->pending_age; ++ r = drmCommandWrite(cs->csm->fd, DRM_RADEON_IRQ_EMIT, &emit_cmd, sizeof(emit_cmd)); ++ if (r) { ++ return r; ++ } ++ } ++ cs_set_age(cs); + -+static inline struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm, -+ uint32_t ndw) -+{ -+ return csm->funcs->cs_create(csm, ndw); ++ cs->csm->read_used = 0; ++ cs->csm->vram_write_used = 0; ++ cs->csm->gart_write_used = 0; ++ return 0; +} + -+static inline int radeon_cs_write_reloc(struct radeon_cs *cs, -+ struct radeon_bo *bo, -+ uint32_t read_domain, -+ uint32_t write_domain, -+ uint32_t flags) ++static void inline cs_free_reloc(void *relocs_p, int crelocs) +{ -+ return cs->csm->funcs->cs_write_reloc(cs, -+ bo, -+ read_domain, -+ write_domain, -+ flags); ++ struct cs_reloc_legacy *relocs = relocs_p; ++ int i; ++ if (!relocs_p) ++ return; ++ for (i = 0; i < crelocs; i++) ++ free(relocs[i].indices); +} + -+static inline int radeon_cs_begin(struct radeon_cs *cs, -+ uint32_t ndw, -+ const char *file, -+ const char *func, -+ int line) ++static int cs_destroy(struct radeon_cs *cs) +{ -+ return cs->csm->funcs->cs_begin(cs, ndw, file, func, line); ++ cs_free_reloc(cs->relocs, cs->crelocs); ++ free(cs->relocs); ++ free(cs->packets); ++ free(cs); ++ return 0; +} + -+static inline int radeon_cs_end(struct radeon_cs *cs, -+ const char *file, -+ const char *func, -+ int line) ++static int cs_erase(struct radeon_cs *cs) +{ -+ return cs->csm->funcs->cs_end(cs, file, func, line); ++ cs_free_reloc(cs->relocs, cs->crelocs); ++ free(cs->relocs); ++ cs->relocs_total_size = 0; ++ cs->relocs = NULL; ++ cs->crelocs = 0; ++ cs->cdw = 0; ++ cs->section = 0; ++ return 0; +} + -+static inline int radeon_cs_emit(struct radeon_cs *cs) ++static int cs_need_flush(struct radeon_cs *cs) +{ -+ return cs->csm->funcs->cs_emit(cs); ++ /* this function used to flush when the BO usage got to ++ * a certain size, now the higher levels handle this better */ ++ return 0; +} + -+static inline int radeon_cs_destroy(struct radeon_cs *cs) ++static void cs_print(struct radeon_cs *cs, FILE *file) +{ -+ return cs->csm->funcs->cs_destroy(cs); +} + -+static inline int radeon_cs_erase(struct radeon_cs *cs) ++static int cs_check_space(struct radeon_cs *cs, struct radeon_cs_space_check *bos, int num_bo) +{ -+ return cs->csm->funcs->cs_erase(cs); -+} ++ struct radeon_cs_manager *csm = cs->csm; ++ int this_op_read = 0, this_op_gart_write = 0, this_op_vram_write = 0; ++ uint32_t read_domains, write_domain; ++ int i; ++ struct radeon_bo *bo; + -+static inline int radeon_cs_need_flush(struct radeon_cs *cs) -+{ -+ return cs->csm->funcs->cs_need_flush(cs); -+} ++ /* check the totals for this operation */ ++ ++ if (num_bo == 0) ++ return 0; ++ ++ /* prepare */ ++ for (i = 0; i < num_bo; i++) { ++ bo = bos[i].bo; ++ ++ bos[i].new_accounted = 0; ++ read_domains = bos[i].read_domains; ++ write_domain = bos[i].write_domain; ++ ++ /* pinned bos don't count */ ++ if (radeon_legacy_bo_is_static(bo)) ++ continue; ++ ++ /* already accounted this bo */ ++ if (write_domain && (write_domain == bo->space_accounted)) ++ continue; ++ ++ if (read_domains && ((read_domains << 16) == bo->space_accounted)) ++ continue; ++ ++ if (bo->space_accounted == 0) { ++ if (write_domain == RADEON_GEM_DOMAIN_VRAM) ++ this_op_vram_write += bo->size; ++ else if (write_domain == RADEON_GEM_DOMAIN_GTT) ++ this_op_gart_write += bo->size; ++ else ++ this_op_read += bo->size; ++ bos[i].new_accounted = (read_domains << 16) | write_domain; ++ } else { ++ uint16_t old_read, old_write; ++ ++ old_read = bo->space_accounted >> 16; ++ old_write = bo->space_accounted & 0xffff; ++ ++ if (write_domain && (old_read & write_domain)) { ++ bos[i].new_accounted = write_domain; ++ /* moving from read to a write domain */ ++ if (write_domain == RADEON_GEM_DOMAIN_VRAM) { ++ this_op_read -= bo->size; ++ this_op_vram_write += bo->size; ++ } else if (write_domain == RADEON_GEM_DOMAIN_VRAM) { ++ this_op_read -= bo->size; ++ this_op_gart_write += bo->size; ++ } ++ } else if (read_domains & old_write) { ++ bos[i].new_accounted = bo->space_accounted & 0xffff; ++ } else { ++ /* rewrite the domains */ ++ if (write_domain != old_write) ++ fprintf(stderr,"WRITE DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, write_domain, old_write); ++ if (read_domains != old_read) ++ fprintf(stderr,"READ DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, read_domains, old_read); ++ return RADEON_CS_SPACE_FLUSH; ++ } ++ } ++ } ++ ++ if (this_op_read < 0) ++ this_op_read = 0; ++ ++ /* check sizes - operation first */ ++ if ((this_op_read + this_op_gart_write > csm->gart_limit) || ++ (this_op_vram_write > csm->vram_limit)) { ++ return RADEON_CS_SPACE_OP_TO_BIG; ++ } ++ ++ if (((csm->vram_write_used + this_op_vram_write) > csm->vram_limit) || ++ ((csm->read_used + csm->gart_write_used + this_op_gart_write + this_op_read) > csm->gart_limit)) { ++ return RADEON_CS_SPACE_FLUSH; ++ } + -+static inline void radeon_cs_print(struct radeon_cs *cs, FILE *file) -+{ -+ cs->csm->funcs->cs_print(cs, file); -+} ++ csm->gart_write_used += this_op_gart_write; ++ csm->vram_write_used += this_op_vram_write; ++ csm->read_used += this_op_read; ++ /* commit */ ++ for (i = 0; i < num_bo; i++) { ++ bo = bos[i].bo; ++ bo->space_accounted = bos[i].new_accounted; ++ } + -+static inline int radeon_cs_space_check(struct radeon_cs *cs, -+ struct radeon_cs_space_check *bos, -+ int num_bo) -+{ -+ return cs->csm->funcs->cs_space_check(cs, bos, num_bo); ++ return RADEON_CS_SPACE_OK; +} + -+static inline void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit) -+{ -+ -+ if (domain == RADEON_GEM_DOMAIN_VRAM) -+ cs->csm->vram_limit = limit; -+ else -+ cs->csm->gart_limit = limit; -+} ++static struct radeon_cs_funcs radeon_cs_legacy_funcs = { ++ cs_create, ++ cs_write_reloc, ++ cs_begin, ++ cs_end, ++ cs_emit, ++ cs_destroy, ++ cs_erase, ++ cs_need_flush, ++ cs_print, ++ cs_check_space ++}; + -+static inline void radeon_cs_write_dword(struct radeon_cs *cs, uint32_t dword) ++struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx) +{ -+ cs->packets[cs->cdw++] = dword; -+ if (cs->section) { -+ cs->section_cdw++; ++ struct cs_manager_legacy *csm; ++ ++ csm = (struct cs_manager_legacy*) ++ calloc(1, sizeof(struct cs_manager_legacy)); ++ if (csm == NULL) { ++ return NULL; + } ++ csm->base.funcs = &radeon_cs_legacy_funcs; ++ csm->base.fd = ctx->dri.fd; ++ csm->ctx = ctx; ++ csm->pending_age = 1; ++ return (struct radeon_cs_manager*)csm; +} + -+static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword) ++void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm) +{ -+ -+ memcpy(cs->packets + cs->cdw, &qword, sizeof(qword)); -+ cs->cdw+=2; -+ if (cs->section) { -+ cs->section_cdw+=2; -+ } ++ free(csm); +} -+#endif -diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c ++ +diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h new file mode 100644 -index 0000000..b47b095 +index 0000000..e177b4b --- /dev/null -+++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c -@@ -0,0 +1,504 @@ ++++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h +@@ -0,0 +1,40 @@ +/* + * Copyright © 2008 Nicolai Haehnle + * Copyright © 2008 Jérôme Glisse @@ -3611,531 +24992,350 @@ index 0000000..b47b095 + * Nicolai Haehnle + * Jérôme Glisse + */ -+#include -+ -+#include "radeon_bocs_wrapper.h" -+ -+struct cs_manager_legacy { -+ struct radeon_cs_manager base; -+ struct radeon_context *ctx; -+ /* hack for scratch stuff */ -+ uint32_t pending_age; -+ uint32_t pending_count; -+ -+ -+}; -+ -+struct cs_reloc_legacy { -+ struct radeon_cs_reloc base; -+ uint32_t cindices; -+ uint32_t *indices; -+}; -+ -+ -+static struct radeon_cs *cs_create(struct radeon_cs_manager *csm, -+ uint32_t ndw) -+{ -+ struct radeon_cs *cs; ++#ifndef RADEON_CS_LEGACY_H ++#define RADEON_CS_LEGACY_H + -+ cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs)); -+ if (cs == NULL) { -+ return NULL; -+ } -+ cs->csm = csm; -+ cs->ndw = (ndw + 0x3FF) & (~0x3FF); -+ cs->packets = (uint32_t*)malloc(4*cs->ndw); -+ if (cs->packets == NULL) { -+ free(cs); -+ return NULL; -+ } -+ cs->relocs_total_size = 0; -+ return cs; -+} ++#include "radeon_common.h" + -+static int cs_write_reloc(struct radeon_cs *cs, -+ struct radeon_bo *bo, -+ uint32_t read_domain, -+ uint32_t write_domain, -+ uint32_t flags) -+{ -+ struct cs_reloc_legacy *relocs; -+ int i; ++struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx); ++void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm); + -+ relocs = (struct cs_reloc_legacy *)cs->relocs; -+ /* check domains */ -+ if ((read_domain && write_domain) || (!read_domain && !write_domain)) { -+ /* in one CS a bo can only be in read or write domain but not -+ * in read & write domain at the same sime -+ */ -+ return -EINVAL; -+ } -+ if (read_domain == RADEON_GEM_DOMAIN_CPU) { -+ return -EINVAL; -+ } -+ if (write_domain == RADEON_GEM_DOMAIN_CPU) { -+ return -EINVAL; -+ } -+ /* check if bo is already referenced */ -+ for(i = 0; i < cs->crelocs; i++) { -+ uint32_t *indices; ++#endif +diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c +new file mode 100644 +index 0000000..393b121 +--- /dev/null ++++ b/src/mesa/drivers/dri/radeon/radeon_dma.c +@@ -0,0 +1,323 @@ ++/************************************************************************** + -+ if (relocs[i].base.bo->handle == bo->handle) { -+ /* Check domains must be in read or write. As we check already -+ * checked that in argument one of the read or write domain was -+ * set we only need to check that if previous reloc as the read -+ * domain set then the read_domain should also be set for this -+ * new relocation. -+ */ -+ if (relocs[i].base.read_domain && !read_domain) { -+ return -EINVAL; -+ } -+ if (relocs[i].base.write_domain && !write_domain) { -+ return -EINVAL; -+ } -+ relocs[i].base.read_domain |= read_domain; -+ relocs[i].base.write_domain |= write_domain; -+ /* save indice */ -+ relocs[i].cindices++; -+ indices = (uint32_t*)realloc(relocs[i].indices, -+ relocs[i].cindices * 4); -+ if (indices == NULL) { -+ relocs[i].cindices -= 1; -+ return -ENOMEM; -+ } -+ relocs[i].indices = indices; -+ relocs[i].indices[relocs[i].cindices - 1] = cs->cdw - 1; -+ return 0; -+ } -+ } -+ /* add bo to reloc */ -+ relocs = (struct cs_reloc_legacy*) -+ realloc(cs->relocs, -+ sizeof(struct cs_reloc_legacy) * (cs->crelocs + 1)); -+ if (relocs == NULL) { -+ return -ENOMEM; -+ } -+ cs->relocs = relocs; -+ relocs[cs->crelocs].base.bo = bo; -+ relocs[cs->crelocs].base.read_domain = read_domain; -+ relocs[cs->crelocs].base.write_domain = write_domain; -+ relocs[cs->crelocs].base.flags = flags; -+ relocs[cs->crelocs].indices = (uint32_t*)malloc(4); -+ if (relocs[cs->crelocs].indices == NULL) { -+ return -ENOMEM; -+ } -+ relocs[cs->crelocs].indices[0] = cs->cdw - 1; -+ relocs[cs->crelocs].cindices = 1; -+ cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo); -+ cs->crelocs++; -+ radeon_bo_ref(bo); -+ return 0; -+} ++Copyright (C) 2004 Nicolai Haehnle. ++Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + -+static int cs_begin(struct radeon_cs *cs, -+ uint32_t ndw, -+ const char *file, -+ const char *func, -+ int line) -+{ -+ if (cs->section) { -+ fprintf(stderr, "CS already in a section(%s,%s,%d)\n", -+ cs->section_file, cs->section_func, cs->section_line); -+ fprintf(stderr, "CS can't start section(%s,%s,%d)\n", -+ file, func, line); -+ return -EPIPE; -+ } -+ cs->section = 1; -+ cs->section_ndw = ndw; -+ cs->section_cdw = 0; -+ cs->section_file = file; -+ cs->section_func = func; -+ cs->section_line = line; ++The Weather Channel (TM) funded Tungsten Graphics to develop the ++initial release of the Radeon 8500 driver under the XFree86 license. ++This notice must be preserved. + ++All Rights Reserved. + -+ if (cs->cdw + ndw > cs->ndw) { -+ uint32_t tmp, *ptr; -+ int num = (ndw > 0x3FF) ? ndw : 0x3FF; ++Permission is hereby granted, free of charge, to any person obtaining a ++copy of this software and associated documentation files (the "Software"), ++to deal in the Software without restriction, including without limitation ++on the rights to use, copy, modify, merge, publish, distribute, sub ++license, and/or sell copies of the Software, and to permit persons to whom ++the Software is furnished to do so, subject to the following conditions: + -+ tmp = (cs->cdw + 1 + num) & (~num); -+ ptr = (uint32_t*)realloc(cs->packets, 4 * tmp); -+ if (ptr == NULL) { -+ return -ENOMEM; -+ } -+ cs->packets = ptr; -+ cs->ndw = tmp; -+ } ++The above copyright notice and this permission notice (including the next ++paragraph) shall be included in all copies or substantial portions of the ++Software. + -+ return 0; -+} ++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL ++ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, ++DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR ++OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE ++USE OR OTHER DEALINGS IN THE SOFTWARE. + -+static int cs_end(struct radeon_cs *cs, -+ const char *file, -+ const char *func, -+ int line) ++**************************************************************************/ + -+{ -+ if (!cs->section) { -+ fprintf(stderr, "CS no section to end at (%s,%s,%d)\n", -+ file, func, line); -+ return -EPIPE; -+ } -+ cs->section = 0; -+ if (cs->section_ndw != cs->section_cdw) { -+ fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n", -+ cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw); -+ fprintf(stderr, "CS section end at (%s,%s,%d)\n", -+ file, func, line); -+ return -EPIPE; -+ } -+ return 0; -+} ++#include "radeon_common.h" + -+static int cs_process_relocs(struct radeon_cs *cs) ++#if defined(USE_X86_ASM) ++#define COPY_DWORDS( dst, src, nr ) \ ++do { \ ++ int __tmp; \ ++ __asm__ __volatile__( "rep ; movsl" \ ++ : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ ++ : "0" (nr), \ ++ "D" ((long)dst), \ ++ "S" ((long)src) ); \ ++} while (0) ++#else ++#define COPY_DWORDS( dst, src, nr ) \ ++do { \ ++ int j; \ ++ for ( j = 0 ; j < nr ; j++ ) \ ++ dst[j] = ((int *)src)[j]; \ ++ dst += nr; \ ++} while (0) ++#endif ++ ++static void radeonEmitVec4(uint32_t *out, GLvoid * data, int stride, int count) +{ -+ struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; -+ struct cs_reloc_legacy *relocs; -+ int i, j, r; ++ int i; + -+ csm = (struct cs_manager_legacy*)cs->csm; -+ relocs = (struct cs_reloc_legacy *)cs->relocs; -+ restart: -+ for (i = 0; i < cs->crelocs; i++) { -+ for (j = 0; j < relocs[i].cindices; j++) { -+ uint32_t soffset, eoffset; ++ if (RADEON_DEBUG & DEBUG_VERTS) ++ fprintf(stderr, "%s count %d stride %d out %p data %p\n", ++ __FUNCTION__, count, stride, (void *)out, (void *)data); + -+ r = radeon_bo_legacy_validate(relocs[i].base.bo, -+ &soffset, &eoffset); -+ if (r == -EAGAIN) -+ goto restart; -+ if (r) { -+ fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", -+ relocs[i].base.bo, soffset, eoffset); -+ return r; -+ } -+ cs->packets[relocs[i].indices[j]] += soffset; -+ if (cs->packets[relocs[i].indices[j]] >= eoffset) { -+ /* radeon_bo_debug(relocs[i].base.bo, 12); */ -+ fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", -+ relocs[i].base.bo, soffset, eoffset); -+ fprintf(stderr, "above end: %p 0x%08X 0x%08X\n", -+ relocs[i].base.bo, -+ cs->packets[relocs[i].indices[j]], -+ eoffset); -+ exit(0); -+ return -EINVAL; -+ } -+ } -+ } -+ return 0; ++ if (stride == 4) ++ COPY_DWORDS(out, data, count); ++ else ++ for (i = 0; i < count; i++) { ++ out[0] = *(int *)data; ++ out++; ++ data += stride; ++ } +} + -+static int cs_set_age(struct radeon_cs *cs) ++void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count) +{ -+ struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; -+ struct cs_reloc_legacy *relocs; -+ int i; -+ -+ relocs = (struct cs_reloc_legacy *)cs->relocs; -+ for (i = 0; i < cs->crelocs; i++) { -+ radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age); -+ radeon_bo_unref(relocs[i].base.bo); -+ } -+ return 0; -+} ++ int i; + -+static void dump_cmdbuf(struct radeon_cs *cs) -+{ -+ int i; -+ for (i = 0; i < cs->cdw; i++){ -+ fprintf(stderr,"%x: %08x\n", i, cs->packets[i]); -+ } ++ if (RADEON_DEBUG & DEBUG_VERTS) ++ fprintf(stderr, "%s count %d stride %d out %p data %p\n", ++ __FUNCTION__, count, stride, (void *)out, (void *)data); + ++ if (stride == 8) ++ COPY_DWORDS(out, data, count * 2); ++ else ++ for (i = 0; i < count; i++) { ++ out[0] = *(int *)data; ++ out[1] = *(int *)(data + 4); ++ out += 2; ++ data += stride; ++ } +} -+static int cs_emit(struct radeon_cs *cs) -+{ -+ struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; -+ drm_radeon_cmd_buffer_t cmd; -+ drm_r300_cmd_header_t age; -+ uint64_t ull; -+ int r; + -+ csm->ctx->vtbl.emit_cs_header(cs, csm->ctx); -+ -+ /* append buffer age */ -+ if (IS_R300_CLASS(csm->ctx->radeonScreen)) { -+ age.scratch.cmd_type = R300_CMD_SCRATCH; -+ /* Scratch register 2 corresponds to what radeonGetAge polls */ -+ csm->pending_age = 0; -+ csm->pending_count = 1; -+ ull = (uint64_t) (intptr_t) &csm->pending_age; -+ age.scratch.reg = 2; -+ age.scratch.n_bufs = 1; -+ age.scratch.flags = 0; -+ radeon_cs_write_dword(cs, age.u); -+ radeon_cs_write_qword(cs, ull); -+ radeon_cs_write_dword(cs, 0); -+ } ++void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count) ++{ ++ int i; + -+ r = cs_process_relocs(cs); -+ if (r) { -+ return 0; -+ } ++ if (RADEON_DEBUG & DEBUG_VERTS) ++ fprintf(stderr, "%s count %d stride %d out %p data %p\n", ++ __FUNCTION__, count, stride, (void *)out, (void *)data); + -+ cmd.buf = (char *)cs->packets; -+ cmd.bufsz = cs->cdw * 4; -+ if (csm->ctx->state.scissor.enabled) { -+ cmd.nbox = csm->ctx->state.scissor.numClipRects; -+ cmd.boxes = (drm_clip_rect_t *) csm->ctx->state.scissor.pClipRects; -+ } else { -+ cmd.nbox = csm->ctx->numClipRects; -+ cmd.boxes = (drm_clip_rect_t *) csm->ctx->pClipRects; ++ if (stride == 12) { ++ COPY_DWORDS(out, data, count * 3); + } ++ else ++ for (i = 0; i < count; i++) { ++ out[0] = *(int *)data; ++ out[1] = *(int *)(data + 4); ++ out[2] = *(int *)(data + 8); ++ out += 3; ++ data += stride; ++ } ++} + -+ //dump_cmdbuf(cs); ++static void radeonEmitVec16(uint32_t *out, GLvoid * data, int stride, int count) ++{ ++ int i; + -+ r = drmCommandWrite(cs->csm->fd, DRM_RADEON_CMDBUF, &cmd, sizeof(cmd)); -+ if (r) { -+ return r; -+ } -+ if (!IS_R300_CLASS(csm->ctx->radeonScreen)) { -+ drm_radeon_irq_emit_t emit_cmd; -+ emit_cmd.irq_seq = &csm->pending_age; -+ r = drmCommandWrite(cs->csm->fd, DRM_RADEON_IRQ_EMIT, &emit_cmd, sizeof(emit_cmd)); -+ if (r) { -+ return r; -+ } -+ } -+ cs_set_age(cs); ++ if (RADEON_DEBUG & DEBUG_VERTS) ++ fprintf(stderr, "%s count %d stride %d out %p data %p\n", ++ __FUNCTION__, count, stride, (void *)out, (void *)data); + -+ cs->csm->read_used = 0; -+ cs->csm->vram_write_used = 0; -+ cs->csm->gart_write_used = 0; -+ return 0; ++ if (stride == 16) ++ COPY_DWORDS(out, data, count * 4); ++ else ++ for (i = 0; i < count; i++) { ++ out[0] = *(int *)data; ++ out[1] = *(int *)(data + 4); ++ out[2] = *(int *)(data + 8); ++ out[3] = *(int *)(data + 12); ++ out += 4; ++ data += stride; ++ } +} + -+static void inline cs_free_reloc(void *relocs_p, int crelocs) ++void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, ++ GLvoid * data, int size, int stride, int count) +{ -+ struct cs_reloc_legacy *relocs = relocs_p; -+ int i; -+ if (!relocs_p) -+ return; -+ for (i = 0; i < crelocs; i++) -+ free(relocs[i].indices); -+} ++ radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ uint32_t *out; + -+static int cs_destroy(struct radeon_cs *cs) -+{ -+ cs_free_reloc(cs->relocs, cs->crelocs); -+ free(cs->relocs); -+ free(cs->packets); -+ free(cs); -+ return 0; -+} ++ if (stride == 0) { ++ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); ++ count = 1; ++ aos->stride = 0; ++ } else { ++ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); ++ aos->stride = size; ++ } + -+static int cs_erase(struct radeon_cs *cs) -+{ -+ cs_free_reloc(cs->relocs, cs->crelocs); -+ free(cs->relocs); -+ cs->relocs_total_size = 0; -+ cs->relocs = NULL; -+ cs->crelocs = 0; -+ cs->cdw = 0; -+ cs->section = 0; -+ return 0; -+} ++ aos->components = size; ++ aos->count = count; + -+static int cs_need_flush(struct radeon_cs *cs) -+{ -+ /* this function used to flush when the BO usage got to -+ * a certain size, now the higher levels handle this better */ -+ return 0; ++ out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); ++ switch (size) { ++ case 1: radeonEmitVec4(out, data, stride, count); break; ++ case 2: radeonEmitVec8(out, data, stride, count); break; ++ case 3: radeonEmitVec12(out, data, stride, count); break; ++ case 4: radeonEmitVec16(out, data, stride, count); break; ++ default: ++ assert(0); ++ break; ++ } +} + -+static void cs_print(struct radeon_cs *cs, FILE *file) ++void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) +{ -+} ++ struct radeon_cs_space_check bos[1]; ++ int flushed = 0, ret; + -+static int cs_check_space(struct radeon_cs *cs, struct radeon_cs_space_check *bos, int num_bo) -+{ -+ struct radeon_cs_manager *csm = cs->csm; -+ int this_op_read = 0, this_op_gart_write = 0, this_op_vram_write = 0; -+ uint32_t read_domains, write_domain; -+ int i; -+ struct radeon_bo *bo; ++ size = MAX2(size, MAX_DMA_BUF_SZ * 16); + -+ /* check the totals for this operation */ ++ if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) ++ fprintf(stderr, "%s\n", __FUNCTION__); + -+ if (num_bo == 0) -+ return 0; ++ if (rmesa->dma.flush) { ++ rmesa->dma.flush(rmesa->glCtx); ++ } + -+ /* prepare */ -+ for (i = 0; i < num_bo; i++) { -+ bo = bos[i].bo; ++ if (rmesa->dma.nr_released_bufs > 4) { ++ rcommonFlushCmdBuf(rmesa, __FUNCTION__); ++ rmesa->dma.nr_released_bufs = 0; ++ } + -+ bos[i].new_accounted = 0; -+ read_domains = bos[i].read_domains; -+ write_domain = bos[i].write_domain; -+ -+ /* pinned bos don't count */ -+ if (radeon_legacy_bo_is_static(bo)) -+ continue; -+ -+ /* already accounted this bo */ -+ if (write_domain && (write_domain == bo->space_accounted)) -+ continue; ++ if (rmesa->dma.current) { ++ radeon_bo_unmap(rmesa->dma.current); ++ radeon_bo_unref(rmesa->dma.current); ++ rmesa->dma.current = 0; ++ } + -+ if (read_domains && ((read_domains << 16) == bo->space_accounted)) -+ continue; -+ -+ if (bo->space_accounted == 0) { -+ if (write_domain == RADEON_GEM_DOMAIN_VRAM) -+ this_op_vram_write += bo->size; -+ else if (write_domain == RADEON_GEM_DOMAIN_GTT) -+ this_op_gart_write += bo->size; -+ else -+ this_op_read += bo->size; -+ bos[i].new_accounted = (read_domains << 16) | write_domain; -+ } else { -+ uint16_t old_read, old_write; -+ -+ old_read = bo->space_accounted >> 16; -+ old_write = bo->space_accounted & 0xffff; ++again_alloc: ++ rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom, ++ 0, size, 4, RADEON_GEM_DOMAIN_GTT, ++ 0); + -+ if (write_domain && (old_read & write_domain)) { -+ bos[i].new_accounted = write_domain; -+ /* moving from read to a write domain */ -+ if (write_domain == RADEON_GEM_DOMAIN_VRAM) { -+ this_op_read -= bo->size; -+ this_op_vram_write += bo->size; -+ } else if (write_domain == RADEON_GEM_DOMAIN_VRAM) { -+ this_op_read -= bo->size; -+ this_op_gart_write += bo->size; -+ } -+ } else if (read_domains & old_write) { -+ bos[i].new_accounted = bo->space_accounted & 0xffff; -+ } else { -+ /* rewrite the domains */ -+ if (write_domain != old_write) -+ fprintf(stderr,"WRITE DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, write_domain, old_write); -+ if (read_domains != old_read) -+ fprintf(stderr,"READ DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, read_domains, old_read); -+ return RADEON_CS_SPACE_FLUSH; -+ } -+ } ++ if (!rmesa->dma.current) { ++ rcommonFlushCmdBuf(rmesa, __FUNCTION__); ++ rmesa->dma.nr_released_bufs = 0; ++ goto again_alloc; + } ++ ++ rmesa->dma.current_used = 0; ++ rmesa->dma.current_vertexptr = 0; + -+ if (this_op_read < 0) -+ this_op_read = 0; ++ bos[0].bo = rmesa->dma.current; ++ bos[0].read_domains = RADEON_GEM_DOMAIN_GTT; ++ bos[0].write_domain =0 ; ++ bos[0].new_accounted = 0; + -+ /* check sizes - operation first */ -+ if ((this_op_read + this_op_gart_write > csm->gart_limit) || -+ (this_op_vram_write > csm->vram_limit)) { -+ return RADEON_CS_SPACE_OP_TO_BIG; ++ ret = radeon_cs_space_check(rmesa->cmdbuf.cs, bos, 1); ++ if (ret == RADEON_CS_SPACE_OP_TO_BIG) { ++ fprintf(stderr,"Got OPEARTION TO BIG ILLEGAL - this cannot happen"); ++ assert(0); ++ } else if (ret == RADEON_CS_SPACE_FLUSH) { ++ rcommonFlushCmdBuf(rmesa, __FUNCTION__); ++ if (flushed) { ++ fprintf(stderr,"flushed but still no space\n"); ++ assert(0); ++ } ++ flushed = 1; ++ goto again_alloc; + } ++ radeon_bo_map(rmesa->dma.current, 1); ++} + -+ if (((csm->vram_write_used + this_op_vram_write) > csm->vram_limit) || -+ ((csm->read_used + csm->gart_write_used + this_op_gart_write + this_op_read) > csm->gart_limit)) { -+ return RADEON_CS_SPACE_FLUSH; -+ } ++/* Allocates a region from rmesa->dma.current. If there isn't enough ++ * space in current, grab a new buffer (and discard what was left of current) ++ */ ++void radeonAllocDmaRegion(radeonContextPtr rmesa, ++ struct radeon_bo **pbo, int *poffset, ++ int bytes, int alignment) ++{ ++ if (RADEON_DEBUG & DEBUG_IOCTL) ++ fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); + -+ csm->gart_write_used += this_op_gart_write; -+ csm->vram_write_used += this_op_vram_write; -+ csm->read_used += this_op_read; -+ /* commit */ -+ for (i = 0; i < num_bo; i++) { -+ bo = bos[i].bo; -+ bo->space_accounted = bos[i].new_accounted; -+ } ++ if (rmesa->dma.flush) ++ rmesa->dma.flush(rmesa->glCtx); + -+ return RADEON_CS_SPACE_OK; -+} ++ assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr); + -+static struct radeon_cs_funcs radeon_cs_legacy_funcs = { -+ cs_create, -+ cs_write_reloc, -+ cs_begin, -+ cs_end, -+ cs_emit, -+ cs_destroy, -+ cs_erase, -+ cs_need_flush, -+ cs_print, -+ cs_check_space -+}; ++ alignment--; ++ rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; + -+struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx) -+{ -+ struct cs_manager_legacy *csm; ++ if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size) ++ radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15); + -+ csm = (struct cs_manager_legacy*) -+ calloc(1, sizeof(struct cs_manager_legacy)); -+ if (csm == NULL) { -+ return NULL; -+ } -+ csm->base.funcs = &radeon_cs_legacy_funcs; -+ csm->base.fd = ctx->dri.fd; -+ csm->ctx = ctx; -+ csm->pending_age = 1; -+ return (struct radeon_cs_manager*)csm; ++ *poffset = rmesa->dma.current_used; ++ *pbo = rmesa->dma.current; ++ radeon_bo_ref(*pbo); ++ ++ /* Always align to at least 16 bytes */ ++ rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; ++ rmesa->dma.current_vertexptr = rmesa->dma.current_used; ++ ++ assert(rmesa->dma.current_used <= rmesa->dma.current->size); +} + -+void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm) ++void radeonReleaseDmaRegion(radeonContextPtr rmesa) +{ -+ free(csm); ++ if (RADEON_DEBUG & DEBUG_IOCTL) ++ fprintf(stderr, "%s %p\n", __FUNCTION__, rmesa->dma.current); ++ if (rmesa->dma.current) { ++ rmesa->dma.nr_released_bufs++; ++ radeon_bo_unmap(rmesa->dma.current); ++ radeon_bo_unref(rmesa->dma.current); ++ } ++ rmesa->dma.current = NULL; +} + -diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h -new file mode 100644 -index 0000000..e177b4b ---- /dev/null -+++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h -@@ -0,0 +1,40 @@ -+/* -+ * Copyright © 2008 Nicolai Haehnle -+ * Copyright © 2008 Jérôme Glisse -+ * All Rights Reserved. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the -+ * "Software"), to deal in the Software without restriction, including -+ * without limitation the rights to use, copy, modify, merge, publish, -+ * distribute, sub license, and/or sell copies of the Software, and to -+ * permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, -+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE -+ * USE OR OTHER DEALINGS IN THE SOFTWARE. -+ * -+ * The above copyright notice and this permission notice (including the -+ * next paragraph) shall be included in all copies or substantial portions -+ * of the Software. ++ ++/* Flush vertices in the current dma region. + */ -+/* -+ * Authors: -+ * Aapo Tahkola -+ * Nicolai Haehnle -+ * Jérôme Glisse ++void rcommon_flush_last_swtcl_prim( GLcontext *ctx ) ++{ ++ radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ struct radeon_dma *dma = &rmesa->dma; ++ ++ ++ if (RADEON_DEBUG & DEBUG_IOCTL) ++ fprintf(stderr, "%s\n", __FUNCTION__); ++ dma->flush = NULL; ++ ++ if (dma->current) { ++ GLuint current_offset = dma->current_used; ++ ++ assert (dma->current_used + ++ rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == ++ dma->current_vertexptr); ++ ++ if (dma->current_used != dma->current_vertexptr) { ++ dma->current_used = dma->current_vertexptr; ++ ++ rmesa->vtbl.swtcl_flush(ctx, current_offset); ++ } ++ rmesa->swtcl.numverts = 0; ++ } ++} ++/* Alloc space in the current dma region. + */ -+#ifndef RADEON_CS_LEGACY_H -+#define RADEON_CS_LEGACY_H ++void * ++rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) ++{ ++ GLuint bytes = vsize * nverts; ++ void *head; + -+#include "radeon_common.h" ++ if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) { ++ radeonRefillCurrentDmaRegion(rmesa, bytes); ++ } + -+struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx); -+void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm); ++ if (!rmesa->dma.flush) { ++ rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; ++ rmesa->dma.flush = rcommon_flush_last_swtcl_prim; ++ } + -+#endif -diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c ++ ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); ++ ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim ); ++ ASSERT( rmesa->dma.current_used + ++ rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == ++ rmesa->dma.current_vertexptr ); ++ ++ head = (rmesa->dma.current->ptr + rmesa->dma.current_vertexptr); ++ rmesa->dma.current_vertexptr += bytes; ++ rmesa->swtcl.numverts += nverts; ++ return head; ++} +diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.h b/src/mesa/drivers/dri/radeon/radeon_dma.h new file mode 100644 -index 0000000..393b121 +index 0000000..cee3744 --- /dev/null -+++ b/src/mesa/drivers/dri/radeon/radeon_dma.c -@@ -0,0 +1,323 @@ ++++ b/src/mesa/drivers/dri/radeon/radeon_dma.h +@@ -0,0 +1,51 @@ +/************************************************************************** + +Copyright (C) 2004 Nicolai Haehnle. @@ -4168,360 +25368,2871 @@ index 0000000..393b121 + +**************************************************************************/ + -+#include "radeon_common.h" ++#ifndef RADEON_DMA_H ++#define RADEON_DMA_H + -+#if defined(USE_X86_ASM) -+#define COPY_DWORDS( dst, src, nr ) \ -+do { \ -+ int __tmp; \ -+ __asm__ __volatile__( "rep ; movsl" \ -+ : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ -+ : "0" (nr), \ -+ "D" ((long)dst), \ -+ "S" ((long)src) ); \ -+} while (0) -+#else -+#define COPY_DWORDS( dst, src, nr ) \ -+do { \ -+ int j; \ -+ for ( j = 0 ; j < nr ; j++ ) \ -+ dst[j] = ((int *)src)[j]; \ -+ dst += nr; \ -+} while (0) ++void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count); ++void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count); ++ ++void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, ++ GLvoid * data, int size, int stride, int count); ++ ++void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size); ++void radeonAllocDmaRegion(radeonContextPtr rmesa, ++ struct radeon_bo **pbo, int *poffset, ++ int bytes, int alignment); ++void radeonReleaseDmaRegion(radeonContextPtr rmesa); ++ ++void rcommon_flush_last_swtcl_prim(GLcontext *ctx); ++ ++void *rcommonAllocDmaLowVerts(radeonContextPtr rmesa, int nverts, int vsize); ++#endif +diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c +index 09acf6b..b5ab923 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c ++++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c +@@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "swrast/swrast.h" + + #include "radeon_context.h" ++#include "radeon_common.h" + #include "radeon_state.h" + #include "radeon_ioctl.h" + #include "radeon_tcl.h" +@@ -58,75 +59,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #define RADEON_IDLE_RETRY 16 + + +-static void radeonWaitForIdle( radeonContextPtr rmesa ); +-static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, +- const char * caller ); +- +-static void print_state_atom( struct radeon_state_atom *state ) +-{ +- int i; +- +- fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size); +- +- if (RADEON_DEBUG & DEBUG_VERBOSE) +- for (i = 0 ; i < state->cmd_size ; i++) +- fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]); +- +-} +- +-static void radeonSaveHwState( radeonContextPtr rmesa ) +-{ +- struct radeon_state_atom *atom; +- char * dest = rmesa->backup_store.cmd_buf; +- +- if (RADEON_DEBUG & DEBUG_STATE) +- fprintf(stderr, "%s\n", __FUNCTION__); +- +- rmesa->backup_store.cmd_used = 0; +- +- foreach( atom, &rmesa->hw.atomlist ) { +- if ( atom->check( rmesa->glCtx ) ) { +- int size = atom->cmd_size * 4; +- memcpy( dest, atom->cmd, size); +- dest += size; +- rmesa->backup_store.cmd_used += size; +- if (RADEON_DEBUG & DEBUG_STATE) +- print_state_atom( atom ); +- } +- } +- +- assert( rmesa->backup_store.cmd_used <= RADEON_CMD_BUF_SZ ); +- if (RADEON_DEBUG & DEBUG_STATE) +- fprintf(stderr, "Returning to radeonEmitState\n"); +-} +- +-/* At this point we were in FlushCmdBufLocked but we had lost our context, so +- * we need to unwire our current cmdbuf, hook the one with the saved state in +- * it, flush it, and then put the current one back. This is so commands at the +- * start of a cmdbuf can rely on the state being kept from the previous one. +- */ +-static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa ) +-{ +- GLuint nr_released_bufs; +- struct radeon_store saved_store; +- +- if (rmesa->backup_store.cmd_used == 0) +- return; +- +- if (RADEON_DEBUG & DEBUG_STATE) +- fprintf(stderr, "Emitting backup state on lost context\n"); +- +- rmesa->lost_context = GL_FALSE; +- +- nr_released_bufs = rmesa->dma.nr_released_bufs; +- saved_store = rmesa->store; +- rmesa->dma.nr_released_bufs = 0; +- rmesa->store = rmesa->backup_store; +- radeonFlushCmdBufLocked( rmesa, __FUNCTION__ ); +- rmesa->dma.nr_released_bufs = nr_released_bufs; +- rmesa->store = saved_store; +-} +- + /* ============================================================= + * Kernel command buffer handling + */ +@@ -134,893 +66,340 @@ static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa ) + /* The state atoms will be emitted in the order they appear in the atom list, + * so this step is important. + */ +-void radeonSetUpAtomList( radeonContextPtr rmesa ) ++void radeonSetUpAtomList( r100ContextPtr rmesa ) + { +- int i, mtu = rmesa->glCtx->Const.MaxTextureUnits; +- +- make_empty_list(&rmesa->hw.atomlist); +- rmesa->hw.atomlist.name = "atom-list"; +- +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ctx); +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.set); +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lin); +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msk); +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.vpt); +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tcl); +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msc); ++ int i, mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits; ++ ++ make_empty_list(&rmesa->radeon.hw.atomlist); ++ rmesa->radeon.hw.atomlist.name = "atom-list"; ++ ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ctx); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.set); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lin); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msk); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.vpt); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tcl); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msc); + for (i = 0; i < mtu; ++i) { +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tex[i]); +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.txr[i]); +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.cube[i]); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i]); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.txr[i]); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i]); + } +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.zbs); +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mtl); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.zbs); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mtl); + for (i = 0; i < 3 + mtu; ++i) +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mat[i]); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i]); + for (i = 0; i < 8; ++i) +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lit[i]); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i]); + for (i = 0; i < 6; ++i) +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ucp[i]); +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.eye); +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.grd); +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.fog); +- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.glt); +-} +- +-void radeonEmitState( radeonContextPtr rmesa ) +-{ +- struct radeon_state_atom *atom; +- char *dest; +- +- if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS)) +- fprintf(stderr, "%s\n", __FUNCTION__); +- +- if (rmesa->save_on_next_emit) { +- radeonSaveHwState(rmesa); +- rmesa->save_on_next_emit = GL_FALSE; +- } +- +- /* this code used to return here but now it emits zbs */ +- +- /* To avoid going across the entire set of states multiple times, just check +- * for enough space for the case of emitting all state, and inline the +- * radeonAllocCmdBuf code here without all the checks. +- */ +- radeonEnsureCmdBufSpace(rmesa, rmesa->hw.max_state_size); +- dest = rmesa->store.cmd_buf + rmesa->store.cmd_used; +- +- /* We always always emit zbs, this is due to a bug found by keithw in +- the hardware and rediscovered after Erics changes by me. +- if you ever touch this code make sure you emit zbs otherwise +- you get tcl lockups on at least M7/7500 class of chips - airlied */ +- rmesa->hw.zbs.dirty=1; +- +- if (RADEON_DEBUG & DEBUG_STATE) { +- foreach(atom, &rmesa->hw.atomlist) { +- if (atom->dirty || rmesa->hw.all_dirty) { +- if (atom->check(rmesa->glCtx)) +- print_state_atom(atom); +- else +- fprintf(stderr, "skip state %s\n", atom->name); +- } +- } +- } +- +- foreach(atom, &rmesa->hw.atomlist) { +- if (rmesa->hw.all_dirty) +- atom->dirty = GL_TRUE; +- if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) && +- atom->is_tcl) +- atom->dirty = GL_FALSE; +- if (atom->dirty) { +- if (atom->check(rmesa->glCtx)) { +- int size = atom->cmd_size * 4; +- memcpy(dest, atom->cmd, size); +- dest += size; +- rmesa->store.cmd_used += size; +- atom->dirty = GL_FALSE; +- } +- } +- } +- +- assert(rmesa->store.cmd_used <= RADEON_CMD_BUF_SZ); +- +- rmesa->hw.is_dirty = GL_FALSE; +- rmesa->hw.all_dirty = GL_FALSE; ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i]); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.eye); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.grd); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.fog); ++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.glt); + } + + /* Fire a section of the retained (indexed_verts) buffer as a regular + * primtive. + */ +-extern void radeonEmitVbufPrim( radeonContextPtr rmesa, ++extern void radeonEmitVbufPrim( r100ContextPtr rmesa, + GLuint vertex_format, + GLuint primitive, + GLuint vertex_nr ) + { +- drm_radeon_cmd_header_t *cmd; +- ++ BATCH_LOCALS(&rmesa->radeon); + + assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)); + +- radeonEmitState( rmesa ); ++ radeonEmitState(&rmesa->radeon); + +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__, +- rmesa->store.cmd_used/4); +- +- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VBUF_BUFSZ, +- __FUNCTION__ ); + #if RADEON_OLD_PACKETS +- cmd[0].i = 0; +- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; +- cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16); +- cmd[2].i = rmesa->ioctl.vertex_offset; +- cmd[3].i = vertex_nr; +- cmd[4].i = vertex_format; +- cmd[5].i = (primitive | +- RADEON_CP_VC_CNTL_PRIM_WALK_LIST | +- RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | +- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | +- (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT)); +- +- if (RADEON_DEBUG & DEBUG_PRIMS) +- fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n", +- __FUNCTION__, +- cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i); +-#else +- cmd[0].i = 0; +- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; +- cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16); +- cmd[2].i = vertex_format; +- cmd[3].i = (primitive | +- RADEON_CP_VC_CNTL_PRIM_WALK_LIST | +- RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | +- RADEON_CP_VC_CNTL_MAOS_ENABLE | +- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | +- (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT)); +- +- +- if (RADEON_DEBUG & DEBUG_PRIMS) +- fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n", +- __FUNCTION__, +- cmd[1].i, cmd[2].i, cmd[3].i); ++ BEGIN_BATCH(8); ++ OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 3); ++ if (!rmesa->radeon.radeonScreen->kernel_mm) { ++ OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0); ++ } else { ++ OUT_BATCH(rmesa->ioctl.vertex_offset); ++ } ++ ++ OUT_BATCH(vertex_nr); ++ OUT_BATCH(vertex_format); ++ OUT_BATCH(primitive | RADEON_CP_VC_CNTL_PRIM_WALK_LIST | ++ RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | ++ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | ++ (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT)); ++ ++ if (rmesa->radeon.radeonScreen->kernel_mm) { ++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, ++ rmesa->ioctl.bo, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); ++ } ++ ++ END_BATCH(); ++ ++#else ++ BEGIN_BATCH(4); ++ OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 1); ++ OUT_BATCH(vertex_format); ++ OUT_BATCH(primitive | ++ RADEON_CP_VC_CNTL_PRIM_WALK_LIST | ++ RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | ++ RADEON_CP_VC_CNTL_MAOS_ENABLE | ++ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | ++ (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT)); ++ END_BATCH(); + #endif + } + +- +-void radeonFlushElts( radeonContextPtr rmesa ) ++void radeonFlushElts( GLcontext *ctx ) + { +- int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start); +- int dwords; +-#if RADEON_OLD_PACKETS +- int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 24)) / 2; +-#else +- int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 16)) / 2; +-#endif +- ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); ++ BATCH_LOCALS(&rmesa->radeon); ++ int nr; ++ uint32_t *cmd = (uint32_t *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_start); ++ int dwords = (rmesa->radeon.cmdbuf.cs->section_ndw - rmesa->radeon.cmdbuf.cs->section_cdw); ++ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + +- assert( rmesa->dma.flush == radeonFlushElts ); +- rmesa->dma.flush = NULL; ++ assert( rmesa->radeon.dma.flush == radeonFlushElts ); ++ rmesa->radeon.dma.flush = NULL; + +- /* Cope with odd number of elts: +- */ +- rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2; +- dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4; ++ nr = rmesa->tcl.elt_used; + + #if RADEON_OLD_PACKETS +- cmd[1] |= (dwords - 3) << 16; ++ if (rmesa->radeon.radeonScreen->kernel_mm) { ++ dwords -= 2; ++ } +#endif + -+static void radeonEmitVec4(uint32_t *out, GLvoid * data, int stride, int count) -+{ -+ int i; ++#if RADEON_OLD_PACKETS ++ cmd[1] |= (dwords + 3) << 16; + cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT; + #else +- cmd[1] |= (dwords - 3) << 16; ++ cmd[1] |= (dwords + 2) << 16; + cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT; + #endif + ++ rmesa->radeon.cmdbuf.cs->cdw += dwords; ++ rmesa->radeon.cmdbuf.cs->section_cdw += dwords; ++ ++#if RADEON_OLD_PACKETS ++ if (rmesa->radeon.radeonScreen->kernel_mm) { ++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, ++ rmesa->ioctl.bo, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); ++ } ++#endif + -+ if (RADEON_DEBUG & DEBUG_VERTS) -+ fprintf(stderr, "%s count %d stride %d out %p data %p\n", -+ __FUNCTION__, count, stride, (void *)out, (void *)data); ++ END_BATCH(); + -+ if (stride == 4) -+ COPY_DWORDS(out, data, count); -+ else -+ for (i = 0; i < count; i++) { -+ out[0] = *(int *)data; -+ out++; -+ data += stride; -+ } + if (RADEON_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "%s: Syncing\n", __FUNCTION__); +- radeonFinish( rmesa->glCtx ); ++ radeonFinish( rmesa->radeon.glCtx ); + } +-} + +} + +-GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa, ++GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa, + GLuint vertex_format, + GLuint primitive, + GLuint min_nr ) + { +- drm_radeon_cmd_header_t *cmd; + GLushort *retval; ++ int align_min_nr; ++ BATCH_LOCALS(&rmesa->radeon); + + if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s %d\n", __FUNCTION__, min_nr); ++ fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive); + + assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)); + +- radeonEmitState( rmesa ); ++ radeonEmitState(&rmesa->radeon); + +- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, +- ELTS_BUFSZ(min_nr), +- __FUNCTION__ ); ++ rmesa->tcl.elt_cmd_start = rmesa->radeon.cmdbuf.cs->cdw; ++ ++ /* round up min_nr to align the state */ ++ align_min_nr = (min_nr + 1) & ~1; ++ + #if RADEON_OLD_PACKETS +- cmd[0].i = 0; +- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; +- cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM; +- cmd[2].i = rmesa->ioctl.vertex_offset; +- cmd[3].i = 0xffff; +- cmd[4].i = vertex_format; +- cmd[5].i = (primitive | +- RADEON_CP_VC_CNTL_PRIM_WALK_IND | +- RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | +- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE); +- +- retval = (GLushort *)(cmd+6); +-#else +- cmd[0].i = 0; +- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; +- cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX; +- cmd[2].i = vertex_format; +- cmd[3].i = (primitive | +- RADEON_CP_VC_CNTL_PRIM_WALK_IND | +- RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | +- RADEON_CP_VC_CNTL_MAOS_ENABLE | +- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE); +- +- retval = (GLushort *)(cmd+4); ++ BEGIN_BATCH_NO_AUTOSTATE(2+ELTS_BUFSZ(align_min_nr)/4); ++ OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 0); ++ if (!rmesa->radeon.radeonScreen->kernel_mm) { ++ OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0); ++ } else { ++ OUT_BATCH(rmesa->ioctl.vertex_offset); ++ } ++ OUT_BATCH(0xffff); ++ OUT_BATCH(vertex_format); ++ OUT_BATCH(primitive | ++ RADEON_CP_VC_CNTL_PRIM_WALK_IND | ++ RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | ++ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE); + -+void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count) -+{ -+ int i; ++#else ++ BEGIN_BATCH_NO_AUTOSTATE(ELTS_BUFSZ(align_min_nr)/4); ++ OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_DRAW_INDX, 0); ++ OUT_BATCH(vertex_format); ++ OUT_BATCH(primitive | ++ RADEON_CP_VC_CNTL_PRIM_WALK_IND | ++ RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | ++ RADEON_CP_VC_CNTL_MAOS_ENABLE | ++ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE); + #endif + +- if (RADEON_DEBUG & DEBUG_PRIMS) +- fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n", +- __FUNCTION__, +- cmd[1].i, vertex_format, primitive); + +- assert(!rmesa->dma.flush); +- rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; +- rmesa->dma.flush = radeonFlushElts; ++ rmesa->tcl.elt_cmd_offset = rmesa->radeon.cmdbuf.cs->cdw; ++ rmesa->tcl.elt_used = min_nr; + +- rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf; ++ retval = (GLushort *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_offset); ++ ++ if (RADEON_DEBUG & DEBUG_PRIMS) ++ fprintf(stderr, "%s: header prim %x \n", ++ __FUNCTION__, primitive); ++ ++ assert(!rmesa->radeon.dma.flush); ++ rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; ++ rmesa->radeon.dma.flush = radeonFlushElts; + + return retval; + } + +- +- +-void radeonEmitVertexAOS( radeonContextPtr rmesa, ++void radeonEmitVertexAOS( r100ContextPtr rmesa, + GLuint vertex_size, ++ struct radeon_bo *bo, + GLuint offset ) + { + #if RADEON_OLD_PACKETS +- rmesa->ioctl.vertex_size = vertex_size; + rmesa->ioctl.vertex_offset = offset; ++ rmesa->ioctl.bo = bo; + #else +- drm_radeon_cmd_header_t *cmd; ++ BATCH_LOCALS(&rmesa->radeon); + + if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL)) + fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n", + __FUNCTION__, vertex_size, offset); + +- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VERT_AOS_BUFSZ, +- __FUNCTION__ ); ++ BEGIN_BATCH(7); ++ OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, 2); ++ OUT_BATCH(1); ++ OUT_BATCH(vertex_size | (vertex_size << 8)); ++ OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0); ++ END_BATCH(); + +- cmd[0].i = 0; +- cmd[0].header.cmd_type = RADEON_CMD_PACKET3; +- cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16); +- cmd[2].i = 1; +- cmd[3].i = vertex_size | (vertex_size << 8); +- cmd[4].i = offset; + #endif + } + + +-void radeonEmitAOS( radeonContextPtr rmesa, +- struct radeon_dma_region **component, ++void radeonEmitAOS( r100ContextPtr rmesa, + GLuint nr, + GLuint offset ) + { + #if RADEON_OLD_PACKETS + assert( nr == 1 ); +- assert( component[0]->aos_size == component[0]->aos_stride ); +- rmesa->ioctl.vertex_size = component[0]->aos_size; ++ rmesa->ioctl.bo = rmesa->tcl.aos[0].bo; + rmesa->ioctl.vertex_offset = +- (component[0]->aos_start + offset * component[0]->aos_stride * 4); ++ (rmesa->tcl.aos[0].offset + offset * rmesa->tcl.aos[0].stride * 4); + #else +- drm_radeon_cmd_header_t *cmd; +- int sz = AOS_BUFSZ(nr); ++ BATCH_LOCALS(&rmesa->radeon); ++ uint32_t voffset; ++ // int sz = AOS_BUFSZ(nr); ++ int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; + int i; +- int *tmp; + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + +- +- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sz, +- __FUNCTION__ ); +- cmd[0].i = 0; +- cmd[0].header.cmd_type = RADEON_CMD_PACKET3; +- cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (((sz / sizeof(int))-3) << 16); +- cmd[2].i = nr; +- tmp = &cmd[0].i; +- cmd += 3; +- +- for (i = 0 ; i < nr ; i++) { +- if (i & 1) { +- cmd[0].i |= ((component[i]->aos_stride << 24) | +- (component[i]->aos_size << 16)); +- cmd[2].i = (component[i]->aos_start + +- offset * component[i]->aos_stride * 4); +- cmd += 3; +- } +- else { +- cmd[0].i = ((component[i]->aos_stride << 8) | +- (component[i]->aos_size << 0)); +- cmd[1].i = (component[i]->aos_start + +- offset * component[i]->aos_stride * 4); +- } +- } +- +- if (RADEON_DEBUG & DEBUG_VERTS) { +- fprintf(stderr, "%s:\n", __FUNCTION__); +- for (i = 0 ; i < sz ; i++) +- fprintf(stderr, " %d: %x\n", i, tmp[i]); +- } +-#endif +-} +- +-/* using already shifted color_fmt! */ +-void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is required? */ +- GLuint color_fmt, +- GLuint src_pitch, +- GLuint src_offset, +- GLuint dst_pitch, +- GLuint dst_offset, +- GLint srcx, GLint srcy, +- GLint dstx, GLint dsty, +- GLuint w, GLuint h ) +-{ +- drm_radeon_cmd_header_t *cmd; +- +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n", +- __FUNCTION__, +- src_pitch, src_offset, srcx, srcy, +- dst_pitch, dst_offset, dstx, dsty, +- w, h); +- +- assert( (src_pitch & 63) == 0 ); +- assert( (dst_pitch & 63) == 0 ); +- assert( (src_offset & 1023) == 0 ); +- assert( (dst_offset & 1023) == 0 ); +- assert( w < (1<<16) ); +- assert( h < (1<<16) ); +- +- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 8 * sizeof(int), +- __FUNCTION__ ); +- +- +- cmd[0].i = 0; +- cmd[0].header.cmd_type = RADEON_CMD_PACKET3; +- cmd[1].i = RADEON_CP_PACKET3_CNTL_BITBLT_MULTI | (5 << 16); +- cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | +- RADEON_GMC_DST_PITCH_OFFSET_CNTL | +- RADEON_GMC_BRUSH_NONE | +- color_fmt | +- RADEON_GMC_SRC_DATATYPE_COLOR | +- RADEON_ROP3_S | +- RADEON_DP_SRC_SOURCE_MEMORY | +- RADEON_GMC_CLR_CMP_CNTL_DIS | +- RADEON_GMC_WR_MSK_DIS ); +- +- cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10); +- cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10); +- cmd[5].i = (srcx << 16) | srcy; +- cmd[6].i = (dstx << 16) | dsty; /* dst */ +- cmd[7].i = (w << 16) | h; +-} +- +- +-void radeonEmitWait( radeonContextPtr rmesa, GLuint flags ) +-{ +- drm_radeon_cmd_header_t *cmd; +- +- assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) ); +- +- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int), +- __FUNCTION__ ); +- cmd[0].i = 0; +- cmd[0].wait.cmd_type = RADEON_CMD_WAIT; +- cmd[0].wait.flags = flags; +-} +- +- +-static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, +- const char * caller ) +-{ +- int ret, i; +- drm_radeon_cmd_buffer_t cmd; +- +- if (rmesa->lost_context) +- radeonBackUpAndEmitLostStateLocked(rmesa); +- +- if (RADEON_DEBUG & DEBUG_IOCTL) { +- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); +- +- if (RADEON_DEBUG & DEBUG_VERBOSE) +- for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 ) +- fprintf(stderr, "%d: %x\n", i/4, +- *(int *)(&rmesa->store.cmd_buf[i])); +- } +- +- if (RADEON_DEBUG & DEBUG_DMA) +- fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__, +- rmesa->dma.nr_released_bufs); +- +- +- if (RADEON_DEBUG & DEBUG_SANITY) { +- if (rmesa->state.scissor.enabled) +- ret = radeonSanityCmdBuffer( rmesa, +- rmesa->state.scissor.numClipRects, +- rmesa->state.scissor.pClipRects); +- else +- ret = radeonSanityCmdBuffer( rmesa, +- rmesa->numClipRects, +- rmesa->pClipRects); +- if (ret) { +- fprintf(stderr, "drmSanityCommandWrite: %d\n", ret); +- goto out; ++ BEGIN_BATCH(sz+2+(nr * 2)); ++ OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1); ++ OUT_BATCH(nr); ++ ++ if (!rmesa->radeon.radeonScreen->kernel_mm) { ++ for (i = 0; i + 1 < nr; i += 2) { ++ OUT_BATCH((rmesa->tcl.aos[i].components << 0) | ++ (rmesa->tcl.aos[i].stride << 8) | ++ (rmesa->tcl.aos[i + 1].components << 16) | ++ (rmesa->tcl.aos[i + 1].stride << 24)); ++ ++ voffset = rmesa->tcl.aos[i + 0].offset + ++ offset * 4 * rmesa->tcl.aos[i + 0].stride; ++ OUT_BATCH_RELOC(voffset, ++ rmesa->tcl.aos[i].bo, ++ voffset, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); ++ voffset = rmesa->tcl.aos[i + 1].offset + ++ offset * 4 * rmesa->tcl.aos[i + 1].stride; ++ OUT_BATCH_RELOC(voffset, ++ rmesa->tcl.aos[i+1].bo, ++ voffset, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); + } +- } +- +- +- cmd.bufsz = rmesa->store.cmd_used; +- cmd.buf = rmesa->store.cmd_buf; +- +- if (rmesa->state.scissor.enabled) { +- cmd.nbox = rmesa->state.scissor.numClipRects; +- cmd.boxes = rmesa->state.scissor.pClipRects; +- } else { +- cmd.nbox = rmesa->numClipRects; +- cmd.boxes = rmesa->pClipRects; +- } +- +- ret = drmCommandWrite( rmesa->dri.fd, +- DRM_RADEON_CMDBUF, +- &cmd, sizeof(cmd) ); +- +- if (ret) +- fprintf(stderr, "drmCommandWrite: %d\n", ret); +- +- if (RADEON_DEBUG & DEBUG_SYNC) { +- fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__); +- radeonWaitForIdleLocked( rmesa ); +- } +- +- out: +- rmesa->store.primnr = 0; +- rmesa->store.statenr = 0; +- rmesa->store.cmd_used = 0; +- rmesa->dma.nr_released_bufs = 0; +- rmesa->save_on_next_emit = 1; +- +- return ret; +-} +- +- +-/* Note: does not emit any commands to avoid recursion on +- * radeonAllocCmdBuf. +- */ +-void radeonFlushCmdBuf( radeonContextPtr rmesa, const char *caller ) +-{ +- int ret; +- +- +- LOCK_HARDWARE( rmesa ); +- +- ret = radeonFlushCmdBufLocked( rmesa, caller ); +- +- UNLOCK_HARDWARE( rmesa ); +- +- if (ret) { +- fprintf(stderr, "drm_radeon_cmd_buffer_t: %d (exiting)\n", ret); +- exit(ret); +- } +-} +- +-/* ============================================================= +- * Hardware vertex buffer handling +- */ +- +- +-void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa ) +-{ +- struct radeon_dma_buffer *dmabuf; +- int fd = rmesa->dri.fd; +- int index = 0; +- int size = 0; +- drmDMAReq dma; +- int ret; +- +- if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA)) +- fprintf(stderr, "%s\n", __FUNCTION__); +- +- if (rmesa->dma.flush) { +- rmesa->dma.flush( rmesa ); +- } +- +- if (rmesa->dma.current.buf) +- radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ ); +- +- if (rmesa->dma.nr_released_bufs > 4) +- radeonFlushCmdBuf( rmesa, __FUNCTION__ ); +- +- dma.context = rmesa->dri.hwContext; +- dma.send_count = 0; +- dma.send_list = NULL; +- dma.send_sizes = NULL; +- dma.flags = 0; +- dma.request_count = 1; +- dma.request_size = RADEON_BUFFER_SIZE; +- dma.request_list = &index; +- dma.request_sizes = &size; +- dma.granted_count = 0; +- +- LOCK_HARDWARE(rmesa); /* no need to validate */ +- +- ret = drmDMA( fd, &dma ); + +- if (ret != 0) { +- /* Free some up this way? +- */ +- if (rmesa->dma.nr_released_bufs) { +- radeonFlushCmdBufLocked( rmesa, __FUNCTION__ ); ++ if (nr & 1) { ++ OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) | ++ (rmesa->tcl.aos[nr - 1].stride << 8)); ++ voffset = rmesa->tcl.aos[nr - 1].offset + ++ offset * 4 * rmesa->tcl.aos[nr - 1].stride; ++ OUT_BATCH_RELOC(voffset, ++ rmesa->tcl.aos[nr - 1].bo, ++ voffset, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); + } +- +- if (RADEON_DEBUG & DEBUG_DMA) +- fprintf(stderr, "Waiting for buffers\n"); +- +- radeonWaitForIdleLocked( rmesa ); +- ret = drmDMA( fd, &dma ); +- +- if ( ret != 0 ) { +- UNLOCK_HARDWARE( rmesa ); +- fprintf( stderr, "Error: Could not get dma buffer... exiting\n" ); +- exit( -1 ); ++ } else { ++ for (i = 0; i + 1 < nr; i += 2) { ++ OUT_BATCH((rmesa->tcl.aos[i].components << 0) | ++ (rmesa->tcl.aos[i].stride << 8) | ++ (rmesa->tcl.aos[i + 1].components << 16) | ++ (rmesa->tcl.aos[i + 1].stride << 24)); ++ ++ voffset = rmesa->tcl.aos[i + 0].offset + ++ offset * 4 * rmesa->tcl.aos[i + 0].stride; ++ OUT_BATCH(voffset); ++ voffset = rmesa->tcl.aos[i + 1].offset + ++ offset * 4 * rmesa->tcl.aos[i + 1].stride; ++ OUT_BATCH(voffset); + } +- } +- +- UNLOCK_HARDWARE(rmesa); +- +- if (RADEON_DEBUG & DEBUG_DMA) +- fprintf(stderr, "Allocated buffer %d\n", index); +- +- dmabuf = CALLOC_STRUCT( radeon_dma_buffer ); +- dmabuf->buf = &rmesa->radeonScreen->buffers->list[index]; +- dmabuf->refcount = 1; +- +- rmesa->dma.current.buf = dmabuf; +- rmesa->dma.current.address = dmabuf->buf->address; +- rmesa->dma.current.end = dmabuf->buf->total; +- rmesa->dma.current.start = 0; +- rmesa->dma.current.ptr = 0; +- +- rmesa->c_vertexBuffers++; +-} +- +-void radeonReleaseDmaRegion( radeonContextPtr rmesa, +- struct radeon_dma_region *region, +- const char *caller ) +-{ +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); +- +- if (!region->buf) +- return; +- +- if (rmesa->dma.flush) +- rmesa->dma.flush( rmesa ); +- +- if (--region->buf->refcount == 0) { +- drm_radeon_cmd_header_t *cmd; +- +- if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA)) +- fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__, +- region->buf->buf->idx); + +- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sizeof(*cmd), +- __FUNCTION__ ); +- cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD; +- cmd->dma.buf_idx = region->buf->buf->idx; +- FREE(region->buf); +- rmesa->dma.nr_released_bufs++; +- } +- +- region->buf = NULL; +- region->start = 0; +-} +- +-/* Allocates a region from rmesa->dma.current. If there isn't enough +- * space in current, grab a new buffer (and discard what was left of current) +- */ +-void radeonAllocDmaRegion( radeonContextPtr rmesa, +- struct radeon_dma_region *region, +- int bytes, +- int alignment ) +-{ +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); +- +- if (rmesa->dma.flush) +- rmesa->dma.flush( rmesa ); +- +- if (region->buf) +- radeonReleaseDmaRegion( rmesa, region, __FUNCTION__ ); +- +- alignment--; +- rmesa->dma.current.start = rmesa->dma.current.ptr = +- (rmesa->dma.current.ptr + alignment) & ~alignment; +- +- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) +- radeonRefillCurrentDmaRegion( rmesa ); +- +- region->start = rmesa->dma.current.start; +- region->ptr = rmesa->dma.current.start; +- region->end = rmesa->dma.current.start + bytes; +- region->address = rmesa->dma.current.address; +- region->buf = rmesa->dma.current.buf; +- region->buf->refcount++; +- +- rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ +- rmesa->dma.current.start = +- rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; +-} +- +-/* ================================================================ +- * SwapBuffers with client-side throttling +- */ +- +-static uint32_t radeonGetLastFrame (radeonContextPtr rmesa) +-{ +- drm_radeon_getparam_t gp; +- int ret; +- uint32_t frame; +- +- gp.param = RADEON_PARAM_LAST_FRAME; +- gp.value = (int *)&frame; +- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM, +- &gp, sizeof(gp) ); +- +- if ( ret ) { +- fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret ); +- exit(1); +- } +- +- return frame; +-} +- +-static void radeonEmitIrqLocked( radeonContextPtr rmesa ) +-{ +- drm_radeon_irq_emit_t ie; +- int ret; +- +- ie.irq_seq = &rmesa->iw.irq_seq; +- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, +- &ie, sizeof(ie) ); +- if ( ret ) { +- fprintf( stderr, "%s: drm_radeon_irq_emit_t: %d\n", __FUNCTION__, ret ); +- exit(1); +- } +-} +- +- +-static void radeonWaitIrq( radeonContextPtr rmesa ) +-{ +- int ret; +- +- do { +- ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT, +- &rmesa->iw, sizeof(rmesa->iw) ); +- } while (ret && (errno == EINTR || errno == EBUSY)); +- +- if ( ret ) { +- fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret ); +- exit(1); +- } +-} +- +- +-static void radeonWaitForFrameCompletion( radeonContextPtr rmesa ) +-{ +- drm_radeon_sarea_t *sarea = rmesa->sarea; +- +- if (rmesa->do_irqs) { +- if (radeonGetLastFrame(rmesa) < sarea->last_frame) { +- if (!rmesa->irqsEmitted) { +- while (radeonGetLastFrame (rmesa) < sarea->last_frame) +- ; +- } +- else { +- UNLOCK_HARDWARE( rmesa ); +- radeonWaitIrq( rmesa ); +- LOCK_HARDWARE( rmesa ); +- } +- rmesa->irqsEmitted = 10; +- } +- +- if (rmesa->irqsEmitted) { +- radeonEmitIrqLocked( rmesa ); +- rmesa->irqsEmitted--; ++ if (nr & 1) { ++ OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) | ++ (rmesa->tcl.aos[nr - 1].stride << 8)); ++ voffset = rmesa->tcl.aos[nr - 1].offset + ++ offset * 4 * rmesa->tcl.aos[nr - 1].stride; ++ OUT_BATCH(voffset); + } +- } +- else { +- while (radeonGetLastFrame (rmesa) < sarea->last_frame) { +- UNLOCK_HARDWARE( rmesa ); +- if (rmesa->do_usleeps) +- DO_USLEEP( 1 ); +- LOCK_HARDWARE( rmesa ); +- } +- } +-} +- +-/* Copy the back color buffer to the front color buffer. +- */ +-void radeonCopyBuffer( __DRIdrawablePrivate *dPriv, +- const drm_clip_rect_t *rect) +-{ +- radeonContextPtr rmesa; +- GLint nbox, i, ret; +- GLboolean missed_target; +- int64_t ust; +- __DRIscreenPrivate *psp; +- +- assert(dPriv); +- assert(dPriv->driContextPriv); +- assert(dPriv->driContextPriv->driverPrivate); +- +- rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; +- +- if ( RADEON_DEBUG & DEBUG_IOCTL ) { +- fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx ); +- } +- +- RADEON_FIREVERTICES( rmesa ); +- LOCK_HARDWARE( rmesa ); +- +- /* Throttle the frame rate -- only allow one pending swap buffers +- * request at a time. +- */ +- radeonWaitForFrameCompletion( rmesa ); +- if (!rect) +- { +- UNLOCK_HARDWARE( rmesa ); +- driWaitForVBlank( dPriv, & missed_target ); +- LOCK_HARDWARE( rmesa ); +- } +- +- nbox = dPriv->numClipRects; /* must be in locked region */ +- +- for ( i = 0 ; i < nbox ; ) { +- GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox ); +- drm_clip_rect_t *box = dPriv->pClipRects; +- drm_clip_rect_t *b = rmesa->sarea->boxes; +- GLint n = 0; +- +- for ( ; i < nr ; i++ ) { +- +- *b = box[i]; +- +- if (rect) +- { +- if (rect->x1 > b->x1) +- b->x1 = rect->x1; +- if (rect->y1 > b->y1) +- b->y1 = rect->y1; +- if (rect->x2 < b->x2) +- b->x2 = rect->x2; +- if (rect->y2 < b->y2) +- b->y2 = rect->y2; +- +- if (b->x1 >= b->x2 || b->y1 >= b->y2) +- continue; +- } +- +- b++; +- n++; ++ for (i = 0; i + 1 < nr; i += 2) { ++ voffset = rmesa->tcl.aos[i + 0].offset + ++ offset * 4 * rmesa->tcl.aos[i + 0].stride; ++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, ++ rmesa->tcl.aos[i+0].bo, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); ++ voffset = rmesa->tcl.aos[i + 1].offset + ++ offset * 4 * rmesa->tcl.aos[i + 1].stride; ++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, ++ rmesa->tcl.aos[i+1].bo, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); + } +- rmesa->sarea->nbox = n; +- +- if (!n) +- continue; +- +- ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP ); +- +- if ( ret ) { +- fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret ); +- UNLOCK_HARDWARE( rmesa ); +- exit( 1 ); ++ if (nr & 1) { ++ voffset = rmesa->tcl.aos[nr - 1].offset + ++ offset * 4 * rmesa->tcl.aos[nr - 1].stride; ++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, ++ rmesa->tcl.aos[nr-1].bo, ++ RADEON_GEM_DOMAIN_GTT, ++ 0, 0); + } + } ++ END_BATCH(); + +- UNLOCK_HARDWARE( rmesa ); +- if (!rect) +- { +- psp = dPriv->driScreenPriv; +- rmesa->swap_count++; +- (*psp->systemTime->getUST)( & ust ); +- if ( missed_target ) { +- rmesa->swap_missed_count++; +- rmesa->swap_missed_ust = ust - rmesa->swap_ust; +- } +- +- rmesa->swap_ust = ust; +- rmesa->hw.all_dirty = GL_TRUE; +- } +-} +- +-void radeonPageFlip( __DRIdrawablePrivate *dPriv ) +-{ +- radeonContextPtr rmesa; +- GLint ret; +- GLboolean missed_target; +- __DRIscreenPrivate *psp; +- +- assert(dPriv); +- assert(dPriv->driContextPriv); +- assert(dPriv->driContextPriv->driverPrivate); +- +- rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; +- psp = dPriv->driScreenPriv; +- +- if ( RADEON_DEBUG & DEBUG_IOCTL ) { +- fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__, +- rmesa->sarea->pfCurrentPage); +- } +- +- RADEON_FIREVERTICES( rmesa ); +- LOCK_HARDWARE( rmesa ); +- +- /* Need to do this for the perf box placement: +- */ +- if (dPriv->numClipRects) +- { +- drm_clip_rect_t *box = dPriv->pClipRects; +- drm_clip_rect_t *b = rmesa->sarea->boxes; +- b[0] = box[0]; +- rmesa->sarea->nbox = 1; +- } +- +- /* Throttle the frame rate -- only allow a few pending swap buffers +- * request at a time. +- */ +- radeonWaitForFrameCompletion( rmesa ); +- UNLOCK_HARDWARE( rmesa ); +- driWaitForVBlank( dPriv, & missed_target ); +- if ( missed_target ) { +- rmesa->swap_missed_count++; +- (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust ); +- } +- LOCK_HARDWARE( rmesa ); +- +- ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP ); +- +- UNLOCK_HARDWARE( rmesa ); +- +- if ( ret ) { +- fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret ); +- exit( 1 ); +- } +- +- rmesa->swap_count++; +- (void) (*psp->systemTime->getUST)( & rmesa->swap_ust ); +- +- /* Get ready for drawing next frame. Update the renderbuffers' +- * flippedOffset/Pitch fields so we draw into the right place. +- */ +- driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer, +- rmesa->sarea->pfCurrentPage); +- +- radeonUpdateDrawBuffer(rmesa->glCtx); ++#endif + } + +- + /* ================================================================ + * Buffer clear + */ +@@ -1028,9 +407,9 @@ void radeonPageFlip( __DRIdrawablePrivate *dPriv ) + + static void radeonClear( GLcontext *ctx, GLbitfield mask ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; +- drm_radeon_sarea_t *sarea = rmesa->sarea; ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); ++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; ++ drm_radeon_sarea_t *sarea = rmesa->radeon.sarea; + uint32_t clear; + GLuint flags = 0; + GLuint color_mask = 0; +@@ -1042,8 +421,8 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask ) + } + + { +- LOCK_HARDWARE( rmesa ); +- UNLOCK_HARDWARE( rmesa ); ++ LOCK_HARDWARE( &rmesa->radeon ); ++ UNLOCK_HARDWARE( &rmesa->radeon ); + if ( dPriv->numClipRects == 0 ) + return; + } +@@ -1067,7 +446,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask ) + mask &= ~BUFFER_BIT_DEPTH; + } + +- if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) { ++ if ( (mask & BUFFER_BIT_STENCIL) && rmesa->radeon.state.stencil.hwBuffer ) { + flags |= RADEON_STENCIL; + mask &= ~BUFFER_BIT_STENCIL; + } +@@ -1083,16 +462,16 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask ) + + if (rmesa->using_hyperz) { + flags |= RADEON_USE_COMP_ZBUF; +-/* if (rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL) ++/* if (rmesa->radeon.radeonScreen->chipset & RADEON_CHIPSET_TCL) + flags |= RADEON_USE_HIERZ; */ +- if (!(rmesa->state.stencil.hwBuffer) || ++ if (!(rmesa->radeon.state.stencil.hwBuffer) || + ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) && +- ((rmesa->state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) { ++ ((rmesa->radeon.state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) { + flags |= RADEON_CLEAR_FASTZ; + } + } + +- LOCK_HARDWARE( rmesa ); ++ LOCK_HARDWARE( &rmesa->radeon ); + + /* compute region after locking: */ + cx = ctx->DrawBuffer->_Xmin; +@@ -1112,7 +491,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask ) + + gp.param = RADEON_PARAM_LAST_CLEAR; + gp.value = (int *)&clear; +- ret = drmCommandWriteRead( rmesa->dri.fd, ++ ret = drmCommandWriteRead( rmesa->radeon.dri.fd, + DRM_RADEON_GETPARAM, &gp, sizeof(gp) ); + + if ( ret ) { +@@ -1124,20 +503,20 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask ) + break; + } + +- if ( rmesa->do_usleeps ) { +- UNLOCK_HARDWARE( rmesa ); ++ if ( rmesa->radeon.do_usleeps ) { ++ UNLOCK_HARDWARE( &rmesa->radeon ); + DO_USLEEP( 1 ); +- LOCK_HARDWARE( rmesa ); ++ LOCK_HARDWARE( &rmesa->radeon ); + } + } + + /* Send current state to the hardware */ +- radeonFlushCmdBufLocked( rmesa, __FUNCTION__ ); ++ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ ); + + for ( i = 0 ; i < dPriv->numClipRects ; ) { + GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects ); + drm_clip_rect_t *box = dPriv->pClipRects; +- drm_clip_rect_t *b = rmesa->sarea->boxes; ++ drm_clip_rect_t *b = rmesa->radeon.sarea->boxes; + drm_radeon_clear_t clear; + drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS]; + GLint n = 0; +@@ -1172,106 +551,40 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask ) + } + } + +- rmesa->sarea->nbox = n; ++ rmesa->radeon.sarea->nbox = n; + + clear.flags = flags; +- clear.clear_color = rmesa->state.color.clear; +- clear.clear_depth = rmesa->state.depth.clear; ++ clear.clear_color = rmesa->radeon.state.color.clear; ++ clear.clear_depth = rmesa->radeon.state.depth.clear; + clear.color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; +- clear.depth_mask = rmesa->state.stencil.clear; ++ clear.depth_mask = rmesa->radeon.state.stencil.clear; + clear.depth_boxes = depth_boxes; + + n--; +- b = rmesa->sarea->boxes; ++ b = rmesa->radeon.sarea->boxes; + for ( ; n >= 0 ; n-- ) { + depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1; + depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1; + depth_boxes[n].f[CLEAR_X2] = (float)b[n].x2; + depth_boxes[n].f[CLEAR_Y2] = (float)b[n].y2; + depth_boxes[n].f[CLEAR_DEPTH] = +- (float)rmesa->state.depth.clear; ++ (float)rmesa->radeon.state.depth.clear; + } + +- ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR, ++ ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR, + &clear, sizeof(drm_radeon_clear_t)); + + if ( ret ) { +- UNLOCK_HARDWARE( rmesa ); ++ UNLOCK_HARDWARE( &rmesa->radeon ); + fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret ); + exit( 1 ); + } + } + +- UNLOCK_HARDWARE( rmesa ); +- rmesa->hw.all_dirty = GL_TRUE; ++ UNLOCK_HARDWARE( &rmesa->radeon ); ++ rmesa->radeon.hw.all_dirty = GL_TRUE; + } + +- +-void radeonWaitForIdleLocked( radeonContextPtr rmesa ) +-{ +- int fd = rmesa->dri.fd; +- int to = 0; +- int ret, i = 0; +- +- rmesa->c_drawWaits++; +- +- do { +- do { +- ret = drmCommandNone( fd, DRM_RADEON_CP_IDLE); +- } while ( ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY ); +- } while ( ( ret == -EBUSY ) && ( to++ < RADEON_TIMEOUT ) ); +- +- if ( ret < 0 ) { +- UNLOCK_HARDWARE( rmesa ); +- fprintf( stderr, "Error: Radeon timed out... exiting\n" ); +- exit( -1 ); +- } +-} +- +- +-static void radeonWaitForIdle( radeonContextPtr rmesa ) +-{ +- LOCK_HARDWARE(rmesa); +- radeonWaitForIdleLocked( rmesa ); +- UNLOCK_HARDWARE(rmesa); +-} +- +- +-void radeonFlush( GLcontext *ctx ) +-{ +- radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); +- +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s\n", __FUNCTION__); +- +- if (rmesa->dma.flush) +- rmesa->dma.flush( rmesa ); +- +- radeonEmitState( rmesa ); +- +- if (rmesa->store.cmd_used) +- radeonFlushCmdBuf( rmesa, __FUNCTION__ ); +-} +- +-/* Make sure all commands have been sent to the hardware and have +- * completed processing. +- */ +-void radeonFinish( GLcontext *ctx ) +-{ +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- radeonFlush( ctx ); +- +- if (rmesa->do_irqs) { +- LOCK_HARDWARE( rmesa ); +- radeonEmitIrqLocked( rmesa ); +- UNLOCK_HARDWARE( rmesa ); +- radeonWaitIrq( rmesa ); +- } +- else +- radeonWaitForIdle( rmesa ); +-} +- +- + void radeonInitIoctlFuncs( GLcontext *ctx ) + { + ctx->Driver.Clear = radeonClear; +diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.h b/src/mesa/drivers/dri/radeon/radeon_ioctl.h +index 4e3a44d..18805d4 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.h ++++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.h +@@ -38,31 +38,32 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #include "main/simple_list.h" + #include "radeon_lock.h" ++#include "radeon_bocs_wrapper.h" + +- +-extern void radeonEmitState( radeonContextPtr rmesa ); +-extern void radeonEmitVertexAOS( radeonContextPtr rmesa, ++extern void radeonEmitVertexAOS( r100ContextPtr rmesa, + GLuint vertex_size, ++ struct radeon_bo *bo, + GLuint offset ); + +-extern void radeonEmitVbufPrim( radeonContextPtr rmesa, ++extern void radeonEmitVbufPrim( r100ContextPtr rmesa, + GLuint vertex_format, + GLuint primitive, + GLuint vertex_nr ); + +-extern void radeonFlushElts( radeonContextPtr rmesa ); ++extern void radeonFlushElts( GLcontext *ctx ); ++ + +-extern GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa, ++extern GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa, + GLuint vertex_format, + GLuint primitive, + GLuint min_nr ); + +-extern void radeonEmitAOS( radeonContextPtr rmesa, +- struct radeon_dma_region **regions, ++ ++extern void radeonEmitAOS( r100ContextPtr rmesa, + GLuint n, + GLuint offset ); + +-extern void radeonEmitBlit( radeonContextPtr rmesa, ++extern void radeonEmitBlit( r100ContextPtr rmesa, + GLuint color_fmt, + GLuint src_pitch, + GLuint src_offset, +@@ -72,30 +73,15 @@ extern void radeonEmitBlit( radeonContextPtr rmesa, + GLint dstx, GLint dsty, + GLuint w, GLuint h ); + +-extern void radeonEmitWait( radeonContextPtr rmesa, GLuint flags ); +- +-extern void radeonFlushCmdBuf( radeonContextPtr rmesa, const char * ); +-extern void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa ); ++extern void radeonEmitWait( r100ContextPtr rmesa, GLuint flags ); + +-extern void radeonAllocDmaRegion( radeonContextPtr rmesa, +- struct radeon_dma_region *region, +- int bytes, +- int alignment ); ++extern void radeonFlushCmdBuf( r100ContextPtr rmesa, const char * ); + +-extern void radeonReleaseDmaRegion( radeonContextPtr rmesa, +- struct radeon_dma_region *region, +- const char *caller ); +- +-extern void radeonCopyBuffer( __DRIdrawablePrivate *drawable, +- const drm_clip_rect_t *rect); +-extern void radeonPageFlip( __DRIdrawablePrivate *drawable ); + extern void radeonFlush( GLcontext *ctx ); + extern void radeonFinish( GLcontext *ctx ); +-extern void radeonWaitForIdleLocked( radeonContextPtr rmesa ); +-extern void radeonWaitForVBlank( radeonContextPtr rmesa ); + extern void radeonInitIoctlFuncs( GLcontext *ctx ); +-extern void radeonGetAllParams( radeonContextPtr rmesa ); +-extern void radeonSetUpAtomList( radeonContextPtr rmesa ); ++extern void radeonGetAllParams( r100ContextPtr rmesa ); ++extern void radeonSetUpAtomList( r100ContextPtr rmesa ); + + /* ================================================================ + * Helper macros: +@@ -105,33 +91,33 @@ extern void radeonSetUpAtomList( radeonContextPtr rmesa ); + */ + #define RADEON_NEWPRIM( rmesa ) \ + do { \ +- if ( rmesa->dma.flush ) \ +- rmesa->dma.flush( rmesa ); \ ++ if ( rmesa->radeon.dma.flush ) \ ++ rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \ + } while (0) + + /* Can accomodate several state changes and primitive changes without + * actually firing the buffer. + */ ++ + #define RADEON_STATECHANGE( rmesa, ATOM ) \ + do { \ + RADEON_NEWPRIM( rmesa ); \ + rmesa->hw.ATOM.dirty = GL_TRUE; \ +- rmesa->hw.is_dirty = GL_TRUE; \ ++ rmesa->radeon.hw.is_dirty = GL_TRUE; \ + } while (0) + +-#define RADEON_DB_STATE( ATOM ) \ ++#define RADEON_DB_STATE( ATOM ) \ + memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd, \ + rmesa->hw.ATOM.cmd_size * 4) + +-static INLINE int RADEON_DB_STATECHANGE( +- radeonContextPtr rmesa, +- struct radeon_state_atom *atom ) ++static INLINE int RADEON_DB_STATECHANGE(r100ContextPtr rmesa, ++ struct radeon_state_atom *atom ) + { + if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) { +- int *tmp; ++ GLuint *tmp; + RADEON_NEWPRIM( rmesa ); + atom->dirty = GL_TRUE; +- rmesa->hw.is_dirty = GL_TRUE; ++ rmesa->radeon.hw.is_dirty = GL_TRUE; + tmp = atom->cmd; + atom->cmd = atom->lastcmd; + atom->lastcmd = tmp; +@@ -141,16 +127,6 @@ static INLINE int RADEON_DB_STATECHANGE( + return 0; + } + +- +-/* Fire the buffered vertices no matter what. +- */ +-#define RADEON_FIREVERTICES( rmesa ) \ +-do { \ +- if ( rmesa->store.cmd_used || rmesa->dma.flush ) { \ +- radeonFlush( rmesa->glCtx ); \ +- } \ +-} while (0) +- + /* Command lengths. Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ + * are available, you will also be adding an rmesa->state.max_state_size because + * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts. +@@ -167,36 +143,37 @@ do { \ + #define VBUF_BUFSZ (4 * sizeof(int)) + #endif + +-/* Ensure that a minimum amount of space is available in the command buffer. +- * This is used to ensure atomicity of state updates with the rendering requests +- * that rely on them. +- * +- * An alternative would be to implement a "soft lock" such that when the buffer +- * wraps at an inopportune time, we grab the lock, flush the current buffer, +- * and hang on to the lock until the critical section is finished and we flush +- * the buffer again and unlock. +- */ +-static INLINE void radeonEnsureCmdBufSpace( radeonContextPtr rmesa, +- int bytes ) +-{ +- if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ) +- radeonFlushCmdBuf( rmesa, __FUNCTION__ ); +- assert( bytes <= RADEON_CMD_BUF_SZ ); +-} + +-/* Alloc space in the command buffer +- */ +-static INLINE char *radeonAllocCmdBuf( radeonContextPtr rmesa, +- int bytes, const char *where ) ++static inline uint32_t cmdpacket3(int cmd_type) + { +- if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ) +- radeonFlushCmdBuf( rmesa, __FUNCTION__ ); ++ drm_radeon_cmd_header_t cmd; ++ ++ cmd.i = 0; ++ cmd.header.cmd_type = cmd_type; ++ ++ return (uint32_t)cmd.i; + +- { +- char *head = rmesa->store.cmd_buf + rmesa->store.cmd_used; +- rmesa->store.cmd_used += bytes; +- return head; +- } + } + ++#define OUT_BATCH_PACKET3(packet, num_extra) do { \ ++ if (!b_l_rmesa->radeonScreen->kernel_mm) { \ ++ OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3)); \ ++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ ++ } else { \ ++ OUT_BATCH(CP_PACKET2); \ ++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ ++ } \ ++ } while(0) ++ ++#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do { \ ++ if (!b_l_rmesa->radeonScreen->kernel_mm) { \ ++ OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP)); \ ++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ ++ } else { \ ++ OUT_BATCH(CP_PACKET2); \ ++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ ++ } \ ++ } while(0) ++ ++ + #endif /* __RADEON_IOCTL_H__ */ +diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c +index 64bb3ca..9a7e76b 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_lock.c ++++ b/src/mesa/drivers/dri/radeon/radeon_lock.c +@@ -41,12 +41,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #include "main/glheader.h" + #include "main/mtypes.h" +-#include "radeon_context.h" ++#include "main/colormac.h" ++#include "dri_util.h" ++#include "radeon_screen.h" ++#include "radeon_common.h" + #include "radeon_lock.h" +-#include "radeon_tex.h" +-#include "radeon_state.h" +-#include "radeon_ioctl.h" +- + #include "drirenderbuffer.h" + + #if DEBUG_LOCKING +@@ -56,13 +55,28 @@ int prevLockLine = 0; + + /* Turn on/off page flipping according to the flags in the sarea: + */ +-static void radeonUpdatePageFlipping(radeonContextPtr rmesa) ++void radeonUpdatePageFlipping(radeonContextPtr rmesa) + { ++ int use_back; ++ __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; ++ GLframebuffer *fb = drawable->driverPrivate; ++ + rmesa->doPageFlip = rmesa->sarea->pfState; + if (rmesa->glCtx->WinSysDrawBuffer) { +- driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer, +- rmesa->sarea->pfCurrentPage); ++ rmesa->vtbl.update_draw_buffer(rmesa->glCtx); + } + -+ if (RADEON_DEBUG & DEBUG_VERTS) -+ fprintf(stderr, "%s count %d stride %d out %p data %p\n", -+ __FUNCTION__, count, stride, (void *)out, (void *)data); ++ use_back = rmesa->glCtx->DrawBuffer ? ++ (rmesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0] == ++ BUFFER_BACK_LEFT) : 1; ++ use_back ^= (rmesa->sarea->pfCurrentPage == 1); + -+ if (stride == 8) -+ COPY_DWORDS(out, data, count * 2); ++ if (use_back) ++ rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; + else -+ for (i = 0; i < count; i++) { -+ out[0] = *(int *)data; -+ out[1] = *(int *)(data + 4); -+ out += 2; -+ data += stride; -+ } -+} ++ rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; ++ ++ rmesa->state.depth.rrb = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer; + } + + /* Update the hardware state. This is called if another context has +@@ -80,6 +94,8 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags) + __DRIscreenPrivate *sPriv = rmesa->dri.screen; + drm_radeon_sarea_t *sarea = rmesa->sarea; + ++ assert(drawable != NULL); ++ + drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags); + + /* The window might have moved, so we might need to get new clip +@@ -98,27 +114,11 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags) + if (rmesa->lastStamp != drawable->lastStamp) { + radeonUpdatePageFlipping(rmesa); + radeonSetCliprects(rmesa); +- radeonUpdateViewportOffset(rmesa->glCtx); ++ rmesa->vtbl.update_viewport_offset(rmesa->glCtx); + driUpdateFramebufferSize(rmesa->glCtx, drawable); + } + +- RADEON_STATECHANGE(rmesa, ctx); +- if (rmesa->sarea->tiling_enabled) { +- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= +- RADEON_COLOR_TILE_ENABLE; +- } else { +- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= +- ~RADEON_COLOR_TILE_ENABLE; +- } +- +- if (sarea->ctx_owner != rmesa->dri.hwContext) { +- int i; +- sarea->ctx_owner = rmesa->dri.hwContext; +- +- for (i = 0; i < rmesa->nr_heaps; i++) { +- DRI_AGE_TEXTURES(rmesa->texture_heaps[i]); +- } +- } ++ rmesa->vtbl.get_lock(rmesa); + + rmesa->lost_context = GL_TRUE; + } +diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.h b/src/mesa/drivers/dri/radeon/radeon_lock.h +index 86e96aa..f5ebb8d 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_lock.h ++++ b/src/mesa/drivers/dri/radeon/radeon_lock.h +@@ -39,8 +39,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * Kevin E. Martin + */ + +-#ifndef __RADEON_LOCK_H__ +-#define __RADEON_LOCK_H__ ++#ifndef COMMON_LOCK_H ++#define COMMON_LOCK_H + -+void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count) -+{ -+ int i; ++#include "main/colormac.h" ++#include "radeon_screen.h" ++#include "radeon_common.h" + + extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags); + +@@ -94,19 +98,23 @@ extern int prevLockLine; + do { \ + char __ret = 0; \ + DEBUG_CHECK_LOCK(); \ +- DRM_CAS( (rmesa)->dri.hwLock, (rmesa)->dri.hwContext, \ +- (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret ); \ +- if ( __ret ) \ +- radeonGetLock( (rmesa), 0 ); \ +- DEBUG_LOCK(); \ ++ if (!(rmesa)->radeonScreen->driScreen->dri2.enabled) { \ ++ DRM_CAS( (rmesa)->dri.hwLock, (rmesa)->dri.hwContext, \ ++ (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret ); \ ++ if ( __ret ) \ ++ radeonGetLock( (rmesa), 0 ); \ ++ } \ ++ DEBUG_LOCK(); \ + } while (0) + + #define UNLOCK_HARDWARE( rmesa ) \ + do { \ +- DRM_UNLOCK( (rmesa)->dri.fd, \ +- (rmesa)->dri.hwLock, \ +- (rmesa)->dri.hwContext ); \ +- DEBUG_RESET(); \ ++ if (!(rmesa)->radeonScreen->driScreen->dri2.enabled) { \ ++ DRM_UNLOCK( (rmesa)->dri.fd, \ ++ (rmesa)->dri.hwLock, \ ++ (rmesa)->dri.hwContext ); \ ++ DEBUG_RESET(); \ ++ } \ + } while (0) + +-#endif /* __RADEON_LOCK_H__ */ ++#endif +diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c +index de3c3a1..7f5da16 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c ++++ b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c +@@ -40,7 +40,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "swrast_setup/swrast_setup.h" + #include "math/m_translate.h" + #include "tnl/tnl.h" +-#include "tnl/tcontext.h" + + #include "radeon_context.h" + #include "radeon_ioctl.h" +@@ -49,160 +48,35 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "radeon_maos.h" + #include "radeon_tcl.h" + +-#if 0 +-/* Usage: +- * - from radeon_tcl_render +- * - call radeonEmitArrays to ensure uptodate arrays in dma +- * - emit primitives (new type?) which reference the data +- * -- need to use elts for lineloop, quads, quadstrip/flat +- * -- other primitives are all well-formed (need tristrip-1,fake-poly) +- * +- */ +-static void emit_ubyte_rgba3( GLcontext *ctx, +- struct radeon_dma_region *rvb, +- char *data, +- int stride, +- int count ) ++static void emit_vecfog(GLcontext *ctx, struct radeon_aos *aos, ++ GLvoid *data, int stride, int count) + { + int i; +- radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address); +- +- if (RADEON_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d out %p\n", +- __FUNCTION__, count, stride, (void *)out); +- +- for (i = 0; i < count; i++) { +- out->red = *data; +- out->green = *(data+1); +- out->blue = *(data+2); +- out->alpha = 0xFF; +- out++; +- data += stride; +- } +-} +- +-static void emit_ubyte_rgba4( GLcontext *ctx, +- struct radeon_dma_region *rvb, +- char *data, +- int stride, +- int count ) +-{ +- int i; +- int *out = (int *)(rvb->address + rvb->start); ++ uint32_t *out; ++ int size = 1; ++ radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); + +- if (stride == 4) +- COPY_DWORDS( out, data, count ); +- else +- for (i = 0; i < count; i++) { +- *out++ = LE32_TO_CPU(*(int *)data); +- data += stride; +- } +-} +- +- +-static void emit_ubyte_rgba( GLcontext *ctx, +- struct radeon_dma_region *rvb, +- char *data, +- int size, +- int stride, +- int count ) +-{ +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- +- if (RADEON_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size); +- +- assert (!rvb->buf); +- + if (stride == 0) { +- radeonAllocDmaRegion( rmesa, rvb, 4, 4 ); ++ radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 ); + count = 1; +- rvb->aos_start = GET_START(rvb); +- rvb->aos_stride = 0; +- rvb->aos_size = 1; ++ aos->stride = 0; + } + else { +- radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */ +- rvb->aos_start = GET_START(rvb); +- rvb->aos_stride = 1; +- rvb->aos_size = 1; ++ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); ++ aos->stride = size; + } + +- /* Emit the data +- */ +- switch (size) { +- case 3: +- emit_ubyte_rgba3( ctx, rvb, data, stride, count ); +- break; +- case 4: +- emit_ubyte_rgba4( ctx, rvb, data, stride, count ); +- break; +- default: +- assert(0); +- exit(1); +- break; +- } +-} +-#endif +- +-#if defined(USE_X86_ASM) +-#define COPY_DWORDS( dst, src, nr ) \ +-do { \ +- int __tmp; \ +- __asm__ __volatile__( "rep ; movsl" \ +- : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ +- : "0" (nr), \ +- "D" ((long)dst), \ +- "S" ((long)src) ); \ +-} while (0) +-#else +-#define COPY_DWORDS( dst, src, nr ) \ +-do { \ +- int j; \ +- for ( j = 0 ; j < nr ; j++ ) \ +- dst[j] = ((int *)src)[j]; \ +- dst += nr; \ +-} while (0) +-#endif +- +-static void emit_vecfog( GLcontext *ctx, +- struct radeon_dma_region *rvb, +- char *data, +- int stride, +- int count ) +-{ +- int i; +- GLfloat *out; +- +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ aos->components = size; ++ aos->count = count; + +- if (RADEON_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d\n", +- __FUNCTION__, count, stride); +- +- assert (!rvb->buf); +- +- if (stride == 0) { +- radeonAllocDmaRegion( rmesa, rvb, 4, 4 ); +- count = 1; +- rvb->aos_start = GET_START(rvb); +- rvb->aos_stride = 0; +- rvb->aos_size = 1; +- } +- else { +- radeonAllocDmaRegion( rmesa, rvb, count * 4, 4 ); /* alignment? */ +- rvb->aos_start = GET_START(rvb); +- rvb->aos_stride = 1; +- rvb->aos_size = 1; +- } + + /* Emit the data + */ +- out = (GLfloat *)(rvb->address + rvb->start); ++ out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); + for (i = 0; i < count; i++) { + out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data ); + out++; +@@ -210,169 +84,9 @@ static void emit_vecfog( GLcontext *ctx, + } + } + +-static void emit_vec4( GLcontext *ctx, +- struct radeon_dma_region *rvb, +- char *data, +- int stride, +- int count ) +-{ +- int i; +- int *out = (int *)(rvb->address + rvb->start); +- +- if (RADEON_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d\n", +- __FUNCTION__, count, stride); +- +- if (stride == 4) +- COPY_DWORDS( out, data, count ); +- else +- for (i = 0; i < count; i++) { +- out[0] = *(int *)data; +- out++; +- data += stride; +- } +-} +- +- +-static void emit_vec8( GLcontext *ctx, +- struct radeon_dma_region *rvb, +- char *data, +- int stride, +- int count ) +-{ +- int i; +- int *out = (int *)(rvb->address + rvb->start); +- +- if (RADEON_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d\n", +- __FUNCTION__, count, stride); +- +- if (stride == 8) +- COPY_DWORDS( out, data, count*2 ); +- else +- for (i = 0; i < count; i++) { +- out[0] = *(int *)data; +- out[1] = *(int *)(data+4); +- out += 2; +- data += stride; +- } +-} +- +-static void emit_vec12( GLcontext *ctx, +- struct radeon_dma_region *rvb, +- char *data, +- int stride, +- int count ) +-{ +- int i; +- int *out = (int *)(rvb->address + rvb->start); +- +- if (RADEON_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d out %p data %p\n", +- __FUNCTION__, count, stride, (void *)out, (void *)data); +- +- if (stride == 12) +- COPY_DWORDS( out, data, count*3 ); +- else +- for (i = 0; i < count; i++) { +- out[0] = *(int *)data; +- out[1] = *(int *)(data+4); +- out[2] = *(int *)(data+8); +- out += 3; +- data += stride; +- } +-} +- +-static void emit_vec16( GLcontext *ctx, +- struct radeon_dma_region *rvb, +- char *data, +- int stride, +- int count ) +-{ +- int i; +- int *out = (int *)(rvb->address + rvb->start); +- +- if (RADEON_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d stride %d\n", +- __FUNCTION__, count, stride); +- +- if (stride == 16) +- COPY_DWORDS( out, data, count*4 ); +- else +- for (i = 0; i < count; i++) { +- out[0] = *(int *)data; +- out[1] = *(int *)(data+4); +- out[2] = *(int *)(data+8); +- out[3] = *(int *)(data+12); +- out += 4; +- data += stride; +- } +-} +- +- +-static void emit_vector( GLcontext *ctx, +- struct radeon_dma_region *rvb, +- char *data, +- int size, +- int stride, +- int count ) +-{ +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- +- if (RADEON_DEBUG & DEBUG_VERTS) +- fprintf(stderr, "%s count %d size %d stride %d\n", +- __FUNCTION__, count, size, stride); +- +- assert (!rvb->buf); +- +- if (stride == 0) { +- radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 ); +- count = 1; +- rvb->aos_start = GET_START(rvb); +- rvb->aos_stride = 0; +- rvb->aos_size = size; +- } +- else { +- radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 ); /* alignment? */ +- rvb->aos_start = GET_START(rvb); +- rvb->aos_stride = size; +- rvb->aos_size = size; +- } +- +- /* Emit the data +- */ +- switch (size) { +- case 1: +- emit_vec4( ctx, rvb, data, stride, count ); +- break; +- case 2: +- emit_vec8( ctx, rvb, data, stride, count ); +- break; +- case 3: +- emit_vec12( ctx, rvb, data, stride, count ); +- break; +- case 4: +- emit_vec16( ctx, rvb, data, stride, count ); +- break; +- default: +- assert(0); +- exit(1); +- break; +- } +- +-} +- +- +- +-static void emit_s0_vec( GLcontext *ctx, +- struct radeon_dma_region *rvb, +- char *data, +- int stride, +- int count ) ++static void emit_s0_vec(uint32_t *out, GLvoid *data, int stride, int count) + { + int i; +- int *out = (int *)(rvb->address + rvb->start); +- + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); +@@ -385,14 +99,9 @@ static void emit_s0_vec( GLcontext *ctx, + } + } + +-static void emit_stq_vec( GLcontext *ctx, +- struct radeon_dma_region *rvb, +- char *data, +- int stride, +- int count ) ++static void emit_stq_vec(uint32_t *out, GLvoid *data, int stride, int count) + { + int i; +- int *out = (int *)(rvb->address + rvb->start); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", +@@ -410,21 +119,16 @@ static void emit_stq_vec( GLcontext *ctx, + + + +-static void emit_tex_vector( GLcontext *ctx, +- struct radeon_dma_region *rvb, +- char *data, +- int size, +- int stride, +- int count ) ++static void emit_tex_vector(GLcontext *ctx, struct radeon_aos *aos, ++ GLvoid *data, int size, int stride, int count) + { + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + int emitsize; ++ uint32_t *out; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size); + +- assert (!rvb->buf); +- + switch (size) { + case 4: emitsize = 3; break; + case 3: emitsize = 3; break; +@@ -433,34 +137,33 @@ static void emit_tex_vector( GLcontext *ctx, + + + if (stride == 0) { +- radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 ); ++ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * 4, 32); + count = 1; +- rvb->aos_start = GET_START(rvb); +- rvb->aos_stride = 0; +- rvb->aos_size = emitsize; ++ aos->stride = 0; + } + else { +- radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 ); +- rvb->aos_start = GET_START(rvb); +- rvb->aos_stride = emitsize; +- rvb->aos_size = emitsize; ++ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * count * 4, 32); ++ aos->stride = emitsize; + } + ++ aos->components = emitsize; ++ aos->count = count; + + /* Emit the data + */ ++ out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); + switch (size) { + case 1: +- emit_s0_vec( ctx, rvb, data, stride, count ); ++ emit_s0_vec( out, data, stride, count ); + break; + case 2: +- emit_vec8( ctx, rvb, data, stride, count ); ++ radeonEmitVec8( out, data, stride, count ); + break; + case 3: +- emit_vec12( ctx, rvb, data, stride, count ); ++ radeonEmitVec12( out, data, stride, count ); + break; + case 4: +- emit_stq_vec( ctx, rvb, data, stride, count ); ++ emit_stq_vec( out, data, stride, count ); + break; + default: + assert(0); +@@ -477,9 +180,8 @@ static void emit_tex_vector( GLcontext *ctx, + */ + void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); ++ r100ContextPtr rmesa = R100_CONTEXT( ctx ); + struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb; +- struct radeon_dma_region **component = rmesa->tcl.aos_components; + GLuint nr = 0; + GLuint vfmt = 0; + GLuint count = VB->Count; +@@ -492,12 +194,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) + + if (1) { + if (!rmesa->tcl.obj.buf) +- emit_vector( ctx, +- &rmesa->tcl.obj, +- (char *)VB->ObjPtr->data, +- VB->ObjPtr->size, +- VB->ObjPtr->stride, +- count); ++ rcommon_emit_vector( ctx, ++ &(rmesa->tcl.aos[nr]), ++ (char *)VB->ObjPtr->data, ++ VB->ObjPtr->size, ++ VB->ObjPtr->stride, ++ count); + + switch( VB->ObjPtr->size ) { + case 4: vfmt |= RADEON_CP_VC_FRMT_W0; +@@ -506,21 +208,21 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) + default: + break; + } +- component[nr++] = &rmesa->tcl.obj; ++ nr++; + } + + + if (inputs & VERT_BIT_NORMAL) { + if (!rmesa->tcl.norm.buf) +- emit_vector( ctx, +- &(rmesa->tcl.norm), +- (char *)VB->NormalPtr->data, +- 3, +- VB->NormalPtr->stride, +- count); ++ rcommon_emit_vector( ctx, ++ &(rmesa->tcl.aos[nr]), ++ (char *)VB->NormalPtr->data, ++ 3, ++ VB->NormalPtr->stride, ++ count); + + vfmt |= RADEON_CP_VC_FRMT_N0; +- component[nr++] = &rmesa->tcl.norm; ++ nr++; + } + + if (inputs & VERT_BIT_COLOR0) { +@@ -538,31 +240,30 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) + } + + if (!rmesa->tcl.rgba.buf) +- emit_vector( ctx, +- &(rmesa->tcl.rgba), +- (char *)VB->ColorPtr[0]->data, +- emitsize, +- VB->ColorPtr[0]->stride, +- count); +- +- +- component[nr++] = &rmesa->tcl.rgba; ++ rcommon_emit_vector( ctx, ++ &(rmesa->tcl.aos[nr]), ++ (char *)VB->ColorPtr[0]->data, ++ emitsize, ++ VB->ColorPtr[0]->stride, ++ count); ++ ++ nr++; + } + + + if (inputs & VERT_BIT_COLOR1) { + if (!rmesa->tcl.spec.buf) { + +- emit_vector( ctx, +- &rmesa->tcl.spec, +- (char *)VB->SecondaryColorPtr[0]->data, +- 3, +- VB->SecondaryColorPtr[0]->stride, +- count); ++ rcommon_emit_vector( ctx, ++ &(rmesa->tcl.aos[nr]), ++ (char *)VB->SecondaryColorPtr[0]->data, ++ 3, ++ VB->SecondaryColorPtr[0]->stride, ++ count); + } + + vfmt |= RADEON_CP_VC_FRMT_FPSPEC; +- component[nr++] = &rmesa->tcl.spec; ++ nr++; + } + + /* FIXME: not sure if this is correct. May need to stitch this together with +@@ -571,13 +272,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) + if (inputs & VERT_BIT_FOG) { + if (!rmesa->tcl.fog.buf) + emit_vecfog( ctx, +- &(rmesa->tcl.fog), ++ &(rmesa->tcl.aos[nr]), + (char *)VB->FogCoordPtr->data, + VB->FogCoordPtr->stride, + count); + + vfmt |= RADEON_CP_VC_FRMT_FPFOG; +- component[nr++] = &rmesa->tcl.fog; ++ nr++; + } + + +@@ -588,11 +289,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) + if (inputs & VERT_BIT_TEX(unit)) { + if (!rmesa->tcl.tex[unit].buf) + emit_tex_vector( ctx, +- &(rmesa->tcl.tex[unit]), ++ &(rmesa->tcl.aos[nr]), + (char *)VB->TexCoordPtr[unit]->data, + VB->TexCoordPtr[unit]->size, + VB->TexCoordPtr[unit]->stride, + count ); ++ nr++; + + vfmt |= RADEON_ST_BIT(unit); + /* assume we need the 3rd coord if texgen is active for r/q OR at least +@@ -610,7 +312,6 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) + (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1))) + radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ; + } +- component[nr++] = &rmesa->tcl.tex[unit]; + } + } + +@@ -626,31 +327,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) + + void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); +- GLuint unit; +- +-#if 0 +- if (RADEON_DEBUG & DEBUG_VERTS) +- _tnl_print_vert_flags( __FUNCTION__, newinputs ); +-#endif +- +- if (newinputs & VERT_BIT_POS) +- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ ); +- +- if (newinputs & VERT_BIT_NORMAL) +- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ ); +- +- if (newinputs & VERT_BIT_COLOR0) +- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ ); +- +- if (newinputs & VERT_BIT_COLOR1) +- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ ); +- +- if (newinputs & VERT_BIT_FOG) +- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ ); ++ r100ContextPtr rmesa = R100_CONTEXT( ctx ); ++ int i; + +- for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) { +- if (newinputs & VERT_BIT_TEX(unit)) +- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ ); ++ for (i = 0; i < rmesa->tcl.nr_aos_components; i++) { ++ if (rmesa->tcl.aos[i].bo) { ++ radeon_bo_unref(rmesa->tcl.aos[i].bo); ++ rmesa->tcl.aos[i].bo = NULL; ++ } + } + } +diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c +index 126d072..d468a97 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c ++++ b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c +@@ -310,7 +310,7 @@ static void init_tcl_verts( void ) + + void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; + GLuint req = 0; + GLuint unit; +@@ -374,14 +374,15 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) + break; + + if (rmesa->tcl.vertex_format == setup_tab[i].vertex_format && +- rmesa->tcl.indexed_verts.buf) ++ rmesa->tcl.aos[0].bo) + return; + +- if (rmesa->tcl.indexed_verts.buf) ++ if (rmesa->tcl.aos[0].bo) + radeonReleaseArrays( ctx, ~0 ); + +- radeonAllocDmaRegion( rmesa, +- &rmesa->tcl.indexed_verts, ++ radeonAllocDmaRegion( &rmesa->radeon, ++ &rmesa->tcl.aos[0].bo, ++ &rmesa->tcl.aos[0].offset, + VB->Count * setup_tab[i].vertex_size * 4, + 4); + +@@ -421,15 +422,11 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) + + + setup_tab[i].emit( ctx, 0, VB->Count, +- rmesa->tcl.indexed_verts.address + +- rmesa->tcl.indexed_verts.start ); ++ rmesa->tcl.aos[0].bo->ptr + rmesa->tcl.aos[0].offset); + ++ // rmesa->tcl.aos[0].size = setup_tab[i].vertex_size; ++ rmesa->tcl.aos[0].stride = setup_tab[i].vertex_size; + rmesa->tcl.vertex_format = setup_tab[i].vertex_format; +- rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts ); +- rmesa->tcl.indexed_verts.aos_size = setup_tab[i].vertex_size; +- rmesa->tcl.indexed_verts.aos_stride = setup_tab[i].vertex_size; +- +- rmesa->tcl.aos_components[0] = &rmesa->tcl.indexed_verts; + rmesa->tcl.nr_aos_components = 1; + } + +@@ -437,13 +434,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) + + void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); +- +-#if 0 +- if (RADEON_DEBUG & DEBUG_VERTS) +- _tnl_print_vert_flags( __FUNCTION__, newinputs ); +-#endif ++ r100ContextPtr rmesa = R100_CONTEXT( ctx ); ++ int i; + +- if (newinputs) +- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ ); ++ for (i = 0; i < rmesa->tcl.nr_aos_components; i++) { ++ if (rmesa->tcl.aos[i].bo) { ++ radeon_bo_unref(rmesa->tcl.aos[i].bo); ++ rmesa->tcl.aos[i].bo = NULL; ++ } ++ } + } +diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +new file mode 100644 +index 0000000..3203ee1 +--- /dev/null ++++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +@@ -0,0 +1,360 @@ ++/* ++ * Copyright (C) 2008 Nicolai Haehnle. ++ * ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining ++ * a copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sublicense, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial ++ * portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ++ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE ++ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION ++ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION ++ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ */ + -+ if (RADEON_DEBUG & DEBUG_VERTS) -+ fprintf(stderr, "%s count %d stride %d out %p data %p\n", -+ __FUNCTION__, count, stride, (void *)out, (void *)data); ++#include "radeon_mipmap_tree.h" + -+ if (stride == 12) { -+ COPY_DWORDS(out, data, count * 3); -+ } -+ else -+ for (i = 0; i < count; i++) { -+ out[0] = *(int *)data; -+ out[1] = *(int *)(data + 4); -+ out[2] = *(int *)(data + 8); -+ out += 3; -+ data += stride; -+ } -+} ++#include ++#include + -+static void radeonEmitVec16(uint32_t *out, GLvoid * data, int stride, int count) ++#include "main/simple_list.h" ++#include "main/texcompress.h" ++#include "main/texformat.h" ++ ++static GLuint radeon_compressed_texture_size(GLcontext *ctx, ++ GLsizei width, GLsizei height, GLsizei depth, ++ GLuint mesaFormat) +{ -+ int i; ++ GLuint size = _mesa_compressed_texture_size(ctx, width, height, depth, mesaFormat); + -+ if (RADEON_DEBUG & DEBUG_VERTS) -+ fprintf(stderr, "%s count %d stride %d out %p data %p\n", -+ __FUNCTION__, count, stride, (void *)out, (void *)data); ++ if (mesaFormat == MESA_FORMAT_RGB_DXT1 || ++ mesaFormat == MESA_FORMAT_RGBA_DXT1) { ++ if (width + 3 < 8) /* width one block */ ++ size = size * 4; ++ else if (width + 3 < 16) ++ size = size * 2; ++ } else { ++ /* DXT3/5, 16 bytes per block */ ++ // WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n"); ++ if (width + 3 < 8) ++ size = size * 2; ++ } + -+ if (stride == 16) -+ COPY_DWORDS(out, data, count * 4); -+ else -+ for (i = 0; i < count; i++) { -+ out[0] = *(int *)data; -+ out[1] = *(int *)(data + 4); -+ out[2] = *(int *)(data + 8); -+ out[3] = *(int *)(data + 12); -+ out += 4; -+ data += stride; -+ } ++ return size; +} + -+void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, -+ GLvoid * data, int size, int stride, int count) ++ ++static int radeon_compressed_num_bytes(GLuint mesaFormat) ++{ ++ int bytes = 0; ++ switch(mesaFormat) { ++ ++ case MESA_FORMAT_RGB_FXT1: ++ case MESA_FORMAT_RGBA_FXT1: ++ case MESA_FORMAT_RGB_DXT1: ++ case MESA_FORMAT_RGBA_DXT1: ++ bytes = 2; ++ break; ++ ++ case MESA_FORMAT_RGBA_DXT3: ++ case MESA_FORMAT_RGBA_DXT5: ++ bytes = 4; ++ default: ++ break; ++ } ++ ++ return bytes; ++} ++ ++/** ++ * Compute sizes and fill in offset and blit information for the given ++ * image (determined by \p face and \p level). ++ * ++ * \param curOffset points to the offset at which the image is to be stored ++ * and is updated by this function according to the size of the image. ++ */ ++static void compute_tex_image_offset(radeon_mipmap_tree *mt, ++ GLuint face, GLuint level, GLuint* curOffset) +{ -+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx); -+ uint32_t *out; ++ radeon_mipmap_level *lvl = &mt->levels[level]; + -+ if (stride == 0) { -+ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); -+ count = 1; -+ aos->stride = 0; ++ /* Find image size in bytes */ ++ if (mt->compressed) { ++ /* TODO: Is this correct? Need test cases for compressed textures! */ ++ GLuint align; ++ ++ lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63; ++ lvl->size = radeon_compressed_texture_size(mt->radeon->glCtx, ++ lvl->width, lvl->height, lvl->depth, mt->compressed); ++ } else if (mt->target == GL_TEXTURE_RECTANGLE_NV) { ++ lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63; ++ lvl->size = lvl->rowstride * lvl->height; ++ } else if (mt->tilebits & RADEON_TXO_MICRO_TILE) { ++ /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, ++ * though the actual offset may be different (if texture is less than ++ * 32 bytes width) to the untiled case */ ++ lvl->rowstride = (lvl->width * mt->bpp * 2 + 31) & ~31; ++ lvl->size = lvl->rowstride * ((lvl->height + 1) / 2) * lvl->depth; + } else { -+ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); -+ aos->stride = size; ++ lvl->rowstride = (lvl->width * mt->bpp + 31) & ~31; ++ lvl->size = lvl->rowstride * lvl->height * lvl->depth; + } ++ assert(lvl->size > 0); + -+ aos->components = size; -+ aos->count = count; ++ /* All images are aligned to a 32-byte offset */ ++ *curOffset = (*curOffset + 0x1f) & ~0x1f; ++ lvl->faces[face].offset = *curOffset; ++ *curOffset += lvl->size; + -+ out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); -+ switch (size) { -+ case 1: radeonEmitVec4(out, data, stride, count); break; -+ case 2: radeonEmitVec8(out, data, stride, count); break; -+ case 3: radeonEmitVec12(out, data, stride, count); break; -+ case 4: radeonEmitVec16(out, data, stride, count); break; -+ default: -+ assert(0); -+ break; -+ } ++ if (RADEON_DEBUG & DEBUG_TEXTURE) ++ fprintf(stderr, ++ "level %d, face %d: rs:%d %dx%d at %d\n", ++ level, face, lvl->rowstride, lvl->width, lvl->height, lvl->faces[face].offset); +} + -+void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) ++static GLuint minify(GLuint size, GLuint levels) +{ -+ struct radeon_cs_space_check bos[1]; -+ int flushed = 0, ret; -+ -+ size = MAX2(size, MAX_DMA_BUF_SZ * 16); -+ -+ if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) -+ fprintf(stderr, "%s\n", __FUNCTION__); ++ size = size >> levels; ++ if (size < 1) ++ size = 1; ++ return size; ++} + -+ if (rmesa->dma.flush) { -+ rmesa->dma.flush(rmesa->glCtx); -+ } ++static void calculate_miptree_layout(radeon_mipmap_tree *mt) ++{ ++ GLuint curOffset; ++ GLuint numLevels; ++ GLuint i; + -+ if (rmesa->dma.nr_released_bufs > 4) { -+ rcommonFlushCmdBuf(rmesa, __FUNCTION__); -+ rmesa->dma.nr_released_bufs = 0; -+ } ++ numLevels = mt->lastLevel - mt->firstLevel + 1; ++ assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); + -+ if (rmesa->dma.current) { -+ radeon_bo_unmap(rmesa->dma.current); -+ radeon_bo_unref(rmesa->dma.current); -+ rmesa->dma.current = 0; -+ } ++ curOffset = 0; ++ for(i = 0; i < numLevels; i++) { ++ GLuint face; + -+again_alloc: -+ rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom, -+ 0, size, 4, RADEON_GEM_DOMAIN_GTT, -+ 0); ++ mt->levels[i].width = minify(mt->width0, i); ++ mt->levels[i].height = minify(mt->height0, i); ++ mt->levels[i].depth = minify(mt->depth0, i); + -+ if (!rmesa->dma.current) { -+ rcommonFlushCmdBuf(rmesa, __FUNCTION__); -+ rmesa->dma.nr_released_bufs = 0; -+ goto again_alloc; ++ for(face = 0; face < mt->faces; face++) ++ compute_tex_image_offset(mt, face, i, &curOffset); + } + -+ rmesa->dma.current_used = 0; -+ rmesa->dma.current_vertexptr = 0; -+ -+ bos[0].bo = rmesa->dma.current; -+ bos[0].read_domains = RADEON_GEM_DOMAIN_GTT; -+ bos[0].write_domain =0 ; -+ bos[0].new_accounted = 0; -+ -+ ret = radeon_cs_space_check(rmesa->cmdbuf.cs, bos, 1); -+ if (ret == RADEON_CS_SPACE_OP_TO_BIG) { -+ fprintf(stderr,"Got OPEARTION TO BIG ILLEGAL - this cannot happen"); -+ assert(0); -+ } else if (ret == RADEON_CS_SPACE_FLUSH) { -+ rcommonFlushCmdBuf(rmesa, __FUNCTION__); -+ if (flushed) { -+ fprintf(stderr,"flushed but still no space\n"); -+ assert(0); -+ } -+ flushed = 1; -+ goto again_alloc; -+ } -+ radeon_bo_map(rmesa->dma.current, 1); ++ /* Note the required size in memory */ ++ mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; +} + -+/* Allocates a region from rmesa->dma.current. If there isn't enough -+ * space in current, grab a new buffer (and discard what was left of current) ++ ++/** ++ * Create a new mipmap tree, calculate its layout and allocate memory. + */ -+void radeonAllocDmaRegion(radeonContextPtr rmesa, -+ struct radeon_bo **pbo, int *poffset, -+ int bytes, int alignment) ++radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t, ++ GLenum target, GLuint firstLevel, GLuint lastLevel, ++ GLuint width0, GLuint height0, GLuint depth0, ++ GLuint bpp, GLuint tilebits, GLuint compressed) +{ -+ if (RADEON_DEBUG & DEBUG_IOCTL) -+ fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); ++ radeon_mipmap_tree *mt = CALLOC_STRUCT(_radeon_mipmap_tree); + -+ if (rmesa->dma.flush) -+ rmesa->dma.flush(rmesa->glCtx); ++ mt->radeon = rmesa; ++ mt->refcount = 1; ++ mt->t = t; ++ mt->target = target; ++ mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; ++ mt->firstLevel = firstLevel; ++ mt->lastLevel = lastLevel; ++ mt->width0 = width0; ++ mt->height0 = height0; ++ mt->depth0 = depth0; ++ mt->bpp = compressed ? radeon_compressed_num_bytes(compressed) : bpp; ++ mt->tilebits = tilebits; ++ mt->compressed = compressed; + -+ assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr); ++ calculate_miptree_layout(mt); + -+ alignment--; -+ rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; ++ mt->bo = radeon_bo_open(rmesa->radeonScreen->bom, ++ 0, mt->totalsize, 1024, ++ RADEON_GEM_DOMAIN_VRAM, ++ 0); + -+ if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size) -+ radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15); ++ return mt; ++} + -+ *poffset = rmesa->dma.current_used; -+ *pbo = rmesa->dma.current; -+ radeon_bo_ref(*pbo); ++void radeon_miptree_reference(radeon_mipmap_tree *mt) ++{ ++ mt->refcount++; ++ assert(mt->refcount > 0); ++} + -+ /* Always align to at least 16 bytes */ -+ rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; -+ rmesa->dma.current_vertexptr = rmesa->dma.current_used; ++void radeon_miptree_unreference(radeon_mipmap_tree *mt) ++{ ++ if (!mt) ++ return; + -+ assert(rmesa->dma.current_used <= rmesa->dma.current->size); ++ assert(mt->refcount > 0); ++ mt->refcount--; ++ if (!mt->refcount) { ++ radeon_bo_unref(mt->bo); ++ free(mt); ++ } +} + -+void radeonReleaseDmaRegion(radeonContextPtr rmesa) ++ ++/** ++ * Calculate first and last mip levels for the given texture object, ++ * where the dimensions are taken from the given texture image at ++ * the given level. ++ * ++ * Note: level is the OpenGL level number, which is not necessarily the same ++ * as the first level that is actually present. ++ * ++ * The base level image of the given texture face must be non-null, ++ * or this will fail. ++ */ ++static void calculate_first_last_level(struct gl_texture_object *tObj, ++ GLuint *pfirstLevel, GLuint *plastLevel, ++ GLuint face, GLuint level) +{ -+ if (RADEON_DEBUG & DEBUG_IOCTL) -+ fprintf(stderr, "%s %p\n", __FUNCTION__, rmesa->dma.current); -+ if (rmesa->dma.current) { -+ rmesa->dma.nr_released_bufs++; -+ radeon_bo_unmap(rmesa->dma.current); -+ radeon_bo_unref(rmesa->dma.current); ++ const struct gl_texture_image * const baseImage = ++ tObj->Image[face][level]; ++ ++ assert(baseImage); ++ ++ /* These must be signed values. MinLod and MaxLod can be negative numbers, ++ * and having firstLevel and lastLevel as signed prevents the need for ++ * extra sign checks. ++ */ ++ int firstLevel; ++ int lastLevel; ++ ++ /* Yes, this looks overly complicated, but it's all needed. ++ */ ++ switch (tObj->Target) { ++ case GL_TEXTURE_1D: ++ case GL_TEXTURE_2D: ++ case GL_TEXTURE_3D: ++ case GL_TEXTURE_CUBE_MAP: ++ if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) { ++ /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL. ++ */ ++ firstLevel = lastLevel = tObj->BaseLevel; ++ } else { ++ firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5); ++ firstLevel = MAX2(firstLevel, tObj->BaseLevel); ++ firstLevel = MIN2(firstLevel, level + baseImage->MaxLog2); ++ lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5); ++ lastLevel = MAX2(lastLevel, tObj->BaseLevel); ++ lastLevel = MIN2(lastLevel, level + baseImage->MaxLog2); ++ lastLevel = MIN2(lastLevel, tObj->MaxLevel); ++ lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ ++ } ++ break; ++ case GL_TEXTURE_RECTANGLE_NV: ++ case GL_TEXTURE_4D_SGIS: ++ firstLevel = lastLevel = 0; ++ break; ++ default: ++ return; + } -+ rmesa->dma.current = NULL; ++ ++ /* save these values */ ++ *pfirstLevel = firstLevel; ++ *plastLevel = lastLevel; +} + + -+/* Flush vertices in the current dma region. ++/** ++ * Checks whether the given miptree can hold the given texture image at the ++ * given face and level. + */ -+void rcommon_flush_last_swtcl_prim( GLcontext *ctx ) ++GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt, ++ struct gl_texture_image *texImage, GLuint face, GLuint level) +{ -+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx); -+ struct radeon_dma *dma = &rmesa->dma; -+ ++ radeon_mipmap_level *lvl; + -+ if (RADEON_DEBUG & DEBUG_IOCTL) -+ fprintf(stderr, "%s\n", __FUNCTION__); -+ dma->flush = NULL; ++ if (face >= mt->faces || level < mt->firstLevel || level > mt->lastLevel) ++ return GL_FALSE; + -+ if (dma->current) { -+ GLuint current_offset = dma->current_used; ++ if (texImage->IsCompressed != mt->compressed) ++ return GL_FALSE; + -+ assert (dma->current_used + -+ rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == -+ dma->current_vertexptr); ++ if (!texImage->IsCompressed && ++ !mt->compressed && ++ texImage->TexFormat->TexelBytes != mt->bpp) ++ return GL_FALSE; + -+ if (dma->current_used != dma->current_vertexptr) { -+ dma->current_used = dma->current_vertexptr; ++ lvl = &mt->levels[level - mt->firstLevel]; ++ if (lvl->width != texImage->Width || ++ lvl->height != texImage->Height || ++ lvl->depth != texImage->Depth) ++ return GL_FALSE; + -+ rmesa->vtbl.swtcl_flush(ctx, current_offset); -+ } -+ rmesa->swtcl.numverts = 0; -+ } ++ return GL_TRUE; +} -+/* Alloc space in the current dma region. ++ ++ ++/** ++ * Checks whether the given miptree has the right format to store the given texture object. + */ -+void * -+rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) ++GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj) +{ -+ GLuint bytes = vsize * nverts; -+ void *head; -+ -+ if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) { -+ radeonRefillCurrentDmaRegion(rmesa, bytes); -+ } ++ struct gl_texture_image *firstImage; ++ GLuint compressed; ++ GLuint numfaces = 1; ++ GLuint firstLevel, lastLevel; + -+ if (!rmesa->dma.flush) { -+ rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; -+ rmesa->dma.flush = rcommon_flush_last_swtcl_prim; -+ } ++ calculate_first_last_level(texObj, &firstLevel, &lastLevel, 0, texObj->BaseLevel); ++ if (texObj->Target == GL_TEXTURE_CUBE_MAP) ++ numfaces = 6; + -+ ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); -+ ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim ); -+ ASSERT( rmesa->dma.current_used + -+ rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == -+ rmesa->dma.current_vertexptr ); ++ firstImage = texObj->Image[0][firstLevel]; ++ compressed = firstImage->IsCompressed ? firstImage->TexFormat->MesaFormat : 0; + -+ head = (rmesa->dma.current->ptr + rmesa->dma.current_vertexptr); -+ rmesa->dma.current_vertexptr += bytes; -+ rmesa->swtcl.numverts += nverts; -+ return head; ++ return (mt->firstLevel == firstLevel && ++ mt->lastLevel == lastLevel && ++ mt->width0 == firstImage->Width && ++ mt->height0 == firstImage->Height && ++ mt->depth0 == firstImage->Depth && ++ mt->bpp == firstImage->TexFormat->TexelBytes && ++ mt->compressed == compressed); +} -diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.h b/src/mesa/drivers/dri/radeon/radeon_dma.h -new file mode 100644 -index 0000000..cee3744 ---- /dev/null -+++ b/src/mesa/drivers/dri/radeon/radeon_dma.h -@@ -0,0 +1,51 @@ -+/************************************************************************** -+ -+Copyright (C) 2004 Nicolai Haehnle. -+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. -+ -+The Weather Channel (TM) funded Tungsten Graphics to develop the -+initial release of the Radeon 8500 driver under the XFree86 license. -+This notice must be preserved. -+ -+All Rights Reserved. -+ -+Permission is hereby granted, free of charge, to any person obtaining a -+copy of this software and associated documentation files (the "Software"), -+to deal in the Software without restriction, including without limitation -+on the rights to use, copy, modify, merge, publish, distribute, sub -+license, and/or sell copies of the Software, and to permit persons to whom -+the Software is furnished to do so, subject to the following conditions: -+ -+The above copyright notice and this permission notice (including the next -+paragraph) shall be included in all copies or substantial portions of the -+Software. -+ -+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, -+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE -+USE OR OTHER DEALINGS IN THE SOFTWARE. -+ -+**************************************************************************/ + -+#ifndef RADEON_DMA_H -+#define RADEON_DMA_H + -+void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count); -+void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count); ++/** ++ * Try to allocate a mipmap tree for the given texture that will fit the ++ * given image in the given position. ++ */ ++void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t, ++ struct gl_texture_image *texImage, GLuint face, GLuint level) ++{ ++ GLuint compressed = texImage->IsCompressed ? texImage->TexFormat->MesaFormat : 0; ++ GLuint numfaces = 1; ++ GLuint firstLevel, lastLevel; + -+void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, -+ GLvoid * data, int size, int stride, int count); ++ assert(!t->mt); + -+void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size); -+void radeonAllocDmaRegion(radeonContextPtr rmesa, -+ struct radeon_bo **pbo, int *poffset, -+ int bytes, int alignment); -+void radeonReleaseDmaRegion(radeonContextPtr rmesa); ++ calculate_first_last_level(&t->base, &firstLevel, &lastLevel, face, level); ++ if (t->base.Target == GL_TEXTURE_CUBE_MAP) ++ numfaces = 6; + -+void rcommon_flush_last_swtcl_prim(GLcontext *ctx); ++ if (level != firstLevel || face >= numfaces) ++ return; + -+void *rcommonAllocDmaLowVerts(radeonContextPtr rmesa, int nverts, int vsize); -+#endif -diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c ++ t->mt = radeon_miptree_create(rmesa, t, t->base.Target, ++ firstLevel, lastLevel, ++ texImage->Width, texImage->Height, texImage->Depth, ++ texImage->TexFormat->TexelBytes, t->tile_bits, compressed); ++} +diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h new file mode 100644 -index 0000000..3203ee1 +index 0000000..43dfa48 --- /dev/null -+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c -@@ -0,0 +1,360 @@ ++++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h +@@ -0,0 +1,97 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * @@ -4549,442 +28260,5955 @@ index 0000000..3203ee1 + * + */ + -+#include "radeon_mipmap_tree.h" ++#ifndef __RADEON_MIPMAP_TREE_H_ ++#define __RADEON_MIPMAP_TREE_H_ ++ ++#include "radeon_common.h" ++ ++typedef struct _radeon_mipmap_tree radeon_mipmap_tree; ++typedef struct _radeon_mipmap_level radeon_mipmap_level; ++typedef struct _radeon_mipmap_image radeon_mipmap_image; ++ ++struct _radeon_mipmap_image { ++ GLuint offset; /** Offset of this image from the start of mipmap tree buffer, in bytes */ ++}; ++ ++struct _radeon_mipmap_level { ++ GLuint width; ++ GLuint height; ++ GLuint depth; ++ GLuint size; /** Size of each image, in bytes */ ++ GLuint rowstride; /** in bytes */ ++ radeon_mipmap_image faces[6]; ++}; ++ ++ ++/** ++ * A mipmap tree contains texture images in the layout that the hardware ++ * expects. ++ * ++ * The meta-data of mipmap trees is immutable, i.e. you cannot change the ++ * layout on-the-fly; however, the texture contents (i.e. texels) can be ++ * changed. ++ */ ++struct _radeon_mipmap_tree { ++ radeonContextPtr radeon; ++ radeonTexObj *t; ++ struct radeon_bo *bo; ++ GLuint refcount; ++ ++ GLuint totalsize; /** total size of the miptree, in bytes */ ++ ++ GLenum target; /** GL_TEXTURE_xxx */ ++ GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */ ++ GLuint firstLevel; /** First mip level stored in this mipmap tree */ ++ GLuint lastLevel; /** Last mip level stored in this mipmap tree */ ++ ++ GLuint width0; /** Width of firstLevel image */ ++ GLuint height0; /** Height of firstLevel image */ ++ GLuint depth0; /** Depth of firstLevel image */ ++ ++ GLuint bpp; /** Bytes per texel */ ++ GLuint tilebits; /** RADEON_TXO_xxx_TILE */ ++ GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */ ++ ++ radeon_mipmap_level levels[RADEON_MAX_TEXTURE_LEVELS]; ++}; ++ ++radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t, ++ GLenum target, GLuint firstLevel, GLuint lastLevel, ++ GLuint width0, GLuint height0, GLuint depth0, ++ GLuint bpp, GLuint tilebits, GLuint compressed); ++void radeon_miptree_reference(radeon_mipmap_tree *mt); ++void radeon_miptree_unreference(radeon_mipmap_tree *mt); ++ ++GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt, ++ struct gl_texture_image *texImage, GLuint face, GLuint level); ++GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj); ++void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t, ++ struct gl_texture_image *texImage, GLuint face, GLuint level); ++ ++ ++#endif /* __RADEON_MIPMAP_TREE_H_ */ +diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.c b/src/mesa/drivers/dri/radeon/radeon_sanity.c +index 6613757..bbed838 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_sanity.c ++++ b/src/mesa/drivers/dri/radeon/radeon_sanity.c +@@ -973,7 +973,7 @@ static int radeon_emit_packet3_cliprect( drm_radeon_cmd_buffer_t *cmdbuf ) + } + + +-int radeonSanityCmdBuffer( radeonContextPtr rmesa, ++int radeonSanityCmdBuffer( r100ContextPtr rmesa, + int nbox, + drm_clip_rect_t *boxes ) + { +diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.h b/src/mesa/drivers/dri/radeon/radeon_sanity.h +index 1ec06bc..f30eb1c 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_sanity.h ++++ b/src/mesa/drivers/dri/radeon/radeon_sanity.h +@@ -1,7 +1,7 @@ + #ifndef RADEON_SANITY_H + #define RADEON_SANITY_H + +-extern int radeonSanityCmdBuffer( radeonContextPtr rmesa, ++extern int radeonSanityCmdBuffer( r100ContextPtr rmesa, + int nbox, + drm_clip_rect_t *boxes ); + +diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c +index e3afaa9..c591e9f 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_screen.c ++++ b/src/mesa/drivers/dri/radeon/radeon_screen.c +@@ -35,6 +35,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * \author Gareth Hughes + */ + ++#include + #include "main/glheader.h" + #include "main/imports.h" + #include "main/mtypes.h" +@@ -45,32 +46,39 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "radeon_chipset.h" + #include "radeon_macros.h" + #include "radeon_screen.h" ++#include "radeon_common.h" ++#include "radeon_span.h" + #if !RADEON_COMMON + #include "radeon_context.h" +-#include "radeon_span.h" + #include "radeon_tex.h" + #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) + #include "r200_context.h" + #include "r200_ioctl.h" +-#include "r200_span.h" + #include "r200_tex.h" + #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) + #include "r300_context.h" + #include "r300_fragprog.h" + #include "r300_tex.h" +-#include "radeon_span.h" + #endif + + #include "utils.h" + #include "vblank.h" + #include "drirenderbuffer.h" + ++#include "radeon_bocs_wrapper.h" ++ + #include "GL/internal/dri_interface.h" + + /* Radeon configuration + */ + #include "xmlpool.h" + ++#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \ ++DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \ ++ DRI_CONF_DESC(en,"Size of command buffer (in KB)") \ ++ DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \ ++DRI_CONF_OPT_END ++ + #if !RADEON_COMMON /* R100 */ + PUBLIC const char __driConfigOptions[] = + DRI_CONF_BEGIN +@@ -80,6 +88,7 @@ DRI_CONF_BEGIN + DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) + DRI_CONF_MAX_TEXTURE_UNITS(3,2,3) + DRI_CONF_HYPERZ(false) ++ DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32) + DRI_CONF_SECTION_END + DRI_CONF_SECTION_QUALITY + DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB) +@@ -95,7 +104,7 @@ DRI_CONF_BEGIN + DRI_CONF_NO_RAST(false) + DRI_CONF_SECTION_END + DRI_CONF_END; +-static const GLuint __driNConfigOptions = 14; ++static const GLuint __driNConfigOptions = 15; + + #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) + +@@ -107,6 +116,7 @@ DRI_CONF_BEGIN + DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) + DRI_CONF_MAX_TEXTURE_UNITS(6,2,6) + DRI_CONF_HYPERZ(false) ++ DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32) + DRI_CONF_SECTION_END + DRI_CONF_SECTION_QUALITY + DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB) +@@ -126,7 +136,7 @@ DRI_CONF_BEGIN + DRI_CONF_NV_VERTEX_PROGRAM(false) + DRI_CONF_SECTION_END + DRI_CONF_END; +-static const GLuint __driNConfigOptions = 16; ++static const GLuint __driNConfigOptions = 17; + + extern const struct dri_extension blend_extensions[]; + extern const struct dri_extension ARB_vp_extension[]; +@@ -149,11 +159,7 @@ DRI_CONF_OPT_BEGIN_V(texture_coord_units,int,def, # min ":" # max ) \ + DRI_CONF_DESC(de,"Anzahl der Texturkoordinateneinheiten") \ + DRI_CONF_OPT_END + +-#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \ +-DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \ +- DRI_CONF_DESC(en,"Size of command buffer (in KB)") \ +- DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \ +-DRI_CONF_OPT_END ++ + + #define DRI_CONF_DISABLE_S3TC(def) \ + DRI_CONF_OPT_BEGIN(disable_s3tc,bool,def) \ +@@ -206,8 +212,9 @@ DRI_CONF_BEGIN + DRI_CONF_END; + static const GLuint __driNConfigOptions = 17; + ++extern const struct dri_extension gl_20_extension[]; ++ + #ifndef RADEON_DEBUG +-int RADEON_DEBUG = 0; + + static const struct dri_debug_control debug_control[] = { + {"fall", DEBUG_FALLBACKS}, +@@ -349,137 +356,17 @@ static const __DRItexOffsetExtension r300texOffsetExtension = { + { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, + r300SetTexOffset, + }; +-#endif +- +-/* Create the device specific screen private data struct. +- */ +-static radeonScreenPtr +-radeonCreateScreen( __DRIscreenPrivate *sPriv ) +-{ +- radeonScreenPtr screen; +- RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv; +- unsigned char *RADEONMMIO; +- int i; +- int ret; +- uint32_t temp; +- +- if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) { +- fprintf(stderr,"\nERROR! sizeof(RADEONDRIRec) does not match passed size from device driver\n"); +- return GL_FALSE; +- } + +- /* Allocate the private area */ +- screen = (radeonScreenPtr) CALLOC( sizeof(*screen) ); +- if ( !screen ) { +- __driUtilMessage("%s: Could not allocate memory for screen structure", +- __FUNCTION__); +- return NULL; +- } +- +-#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +- RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control); ++static const __DRItexBufferExtension r300TexBufferExtension = { ++ { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, ++ r300SetTexBuffer, ++}; + #endif + +- /* parse information in __driConfigOptions */ +- driParseOptionInfo (&screen->optionCache, +- __driConfigOptions, __driNConfigOptions); +- +- /* This is first since which regions we map depends on whether or +- * not we are using a PCI card. +- */ +- screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP); +- { +- int ret; +- ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET, +- &screen->gart_buffer_offset); +- +- if (ret) { +- FREE( screen ); +- fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret); +- return NULL; +- } +- +- ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BASE, +- &screen->gart_base); +- if (ret) { +- FREE( screen ); +- fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret); +- return NULL; +- } +- +- ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR, +- &screen->irq); +- if (ret) { +- FREE( screen ); +- fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret); +- return NULL; +- } +- screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7); +- screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11); +- screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16); +- screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18); +- screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13); +- screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15); +- screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25); +- } +- +- screen->mmio.handle = dri_priv->registerHandle; +- screen->mmio.size = dri_priv->registerSize; +- if ( drmMap( sPriv->fd, +- screen->mmio.handle, +- screen->mmio.size, +- &screen->mmio.map ) ) { +- FREE( screen ); +- __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ ); +- return NULL; +- } +- +- RADEONMMIO = screen->mmio.map; +- +- screen->status.handle = dri_priv->statusHandle; +- screen->status.size = dri_priv->statusSize; +- if ( drmMap( sPriv->fd, +- screen->status.handle, +- screen->status.size, +- &screen->status.map ) ) { +- drmUnmap( screen->mmio.map, screen->mmio.size ); +- FREE( screen ); +- __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ ); +- return NULL; +- } +- screen->scratch = (__volatile__ uint32_t *) +- ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); +- +- screen->buffers = drmMapBufs( sPriv->fd ); +- if ( !screen->buffers ) { +- drmUnmap( screen->status.map, screen->status.size ); +- drmUnmap( screen->mmio.map, screen->mmio.size ); +- FREE( screen ); +- __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ ); +- return NULL; +- } +- +- if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) { +- screen->gartTextures.handle = dri_priv->gartTexHandle; +- screen->gartTextures.size = dri_priv->gartTexMapSize; +- if ( drmMap( sPriv->fd, +- screen->gartTextures.handle, +- screen->gartTextures.size, +- (drmAddressPtr)&screen->gartTextures.map ) ) { +- drmUnmapBufs( screen->buffers ); +- drmUnmap( screen->status.map, screen->status.size ); +- drmUnmap( screen->mmio.map, screen->mmio.size ); +- FREE( screen ); +- __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__); +- return NULL; +- } +- +- screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base; +- } +- ++static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) ++{ + screen->chip_flags = 0; +- /* XXX: add more chipsets */ +- switch ( dri_priv->deviceID ) { ++ switch ( device_id ) { + case PCI_CHIP_RADEON_LY: + case PCI_CHIP_RADEON_LZ: + case PCI_CHIP_RADEON_QY: +@@ -683,12 +570,6 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + screen->chip_family = CHIP_FAMILY_RS400; + break; + +- case PCI_CHIP_RS600_793F: +- case PCI_CHIP_RS600_7941: +- case PCI_CHIP_RS600_7942: +- screen->chip_family = CHIP_FAMILY_RS600; +- break; +- + case PCI_CHIP_RS690_791E: + case PCI_CHIP_RS690_791F: + screen->chip_family = CHIP_FAMILY_RS690; +@@ -817,9 +698,162 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + + default: + fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", +- dri_priv->deviceID); ++ device_id); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++ ++/* Create the device specific screen private data struct. ++ */ ++static radeonScreenPtr ++radeonCreateScreen( __DRIscreenPrivate *sPriv ) ++{ ++ radeonScreenPtr screen; ++ RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv; ++ unsigned char *RADEONMMIO = NULL; ++ int i; ++ int ret; ++ uint32_t temp; ++ ++ if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) { ++ fprintf(stderr,"\nERROR! sizeof(RADEONDRIRec) does not match passed size from device driver\n"); ++ return GL_FALSE; ++ } ++ ++ /* Allocate the private area */ ++ screen = (radeonScreenPtr) CALLOC( sizeof(*screen) ); ++ if ( !screen ) { ++ __driUtilMessage("%s: Could not allocate memory for screen structure", ++ __FUNCTION__); + return NULL; + } ++ ++#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) ++ RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control); ++#endif ++ ++ /* parse information in __driConfigOptions */ ++ driParseOptionInfo (&screen->optionCache, ++ __driConfigOptions, __driNConfigOptions); ++ ++ /* This is first since which regions we map depends on whether or ++ * not we are using a PCI card. ++ */ ++ screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP); ++ { ++ int ret; ++ ++#ifdef RADEON_PARAM_KERNEL_MM ++ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_KERNEL_MM, ++ &screen->kernel_mm); ++ ++ if (ret && ret != -EINVAL) { ++ FREE( screen ); ++ fprintf(stderr, "drm_radeon_getparam_t (RADEON_OFFSET): %d\n", ret); ++ return NULL; ++ } ++ ++ if (ret == -EINVAL) ++ screen->kernel_mm = 0; ++#endif + -+#include -+#include ++ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET, ++ &screen->gart_buffer_offset); + -+#include "main/simple_list.h" -+#include "main/texcompress.h" -+#include "main/texformat.h" ++ if (ret) { ++ FREE( screen ); ++ fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret); ++ return NULL; ++ } + -+static GLuint radeon_compressed_texture_size(GLcontext *ctx, -+ GLsizei width, GLsizei height, GLsizei depth, -+ GLuint mesaFormat) -+{ -+ GLuint size = _mesa_compressed_texture_size(ctx, width, height, depth, mesaFormat); ++ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BASE, ++ &screen->gart_base); ++ if (ret) { ++ FREE( screen ); ++ fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret); ++ return NULL; ++ } + -+ if (mesaFormat == MESA_FORMAT_RGB_DXT1 || -+ mesaFormat == MESA_FORMAT_RGBA_DXT1) { -+ if (width + 3 < 8) /* width one block */ -+ size = size * 4; -+ else if (width + 3 < 16) -+ size = size * 2; -+ } else { -+ /* DXT3/5, 16 bytes per block */ -+ // WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n"); -+ if (width + 3 < 8) -+ size = size * 2; -+ } ++ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR, ++ &screen->irq); ++ if (ret) { ++ FREE( screen ); ++ fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret); ++ return NULL; ++ } ++ screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7); ++ screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11); ++ screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16); ++ screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18); ++ screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13); ++ screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15); ++ screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25); ++ } + -+ return size; -+} ++ if (!screen->kernel_mm) { ++ screen->mmio.handle = dri_priv->registerHandle; ++ screen->mmio.size = dri_priv->registerSize; ++ if ( drmMap( sPriv->fd, ++ screen->mmio.handle, ++ screen->mmio.size, ++ &screen->mmio.map ) ) { ++ FREE( screen ); ++ __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ ); ++ return NULL; ++ } ++ ++ RADEONMMIO = screen->mmio.map; ++ ++ screen->status.handle = dri_priv->statusHandle; ++ screen->status.size = dri_priv->statusSize; ++ if ( drmMap( sPriv->fd, ++ screen->status.handle, ++ screen->status.size, ++ &screen->status.map ) ) { ++ drmUnmap( screen->mmio.map, screen->mmio.size ); ++ FREE( screen ); ++ __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ ); ++ return NULL; ++ } ++ screen->scratch = (__volatile__ uint32_t *) ++ ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); ++ ++ screen->buffers = drmMapBufs( sPriv->fd ); ++ if ( !screen->buffers ) { ++ drmUnmap( screen->status.map, screen->status.size ); ++ drmUnmap( screen->mmio.map, screen->mmio.size ); ++ FREE( screen ); ++ __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ ); ++ return NULL; ++ } ++ ++ if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) { ++ screen->gartTextures.handle = dri_priv->gartTexHandle; ++ screen->gartTextures.size = dri_priv->gartTexMapSize; ++ if ( drmMap( sPriv->fd, ++ screen->gartTextures.handle, ++ screen->gartTextures.size, ++ (drmAddressPtr)&screen->gartTextures.map ) ) { ++ drmUnmapBufs( screen->buffers ); ++ drmUnmap( screen->status.map, screen->status.size ); ++ drmUnmap( screen->mmio.map, screen->mmio.size ); ++ FREE( screen ); ++ __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__); ++ return NULL; ++ } ++ ++ screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base; ++ } ++ } + + -+static int radeon_compressed_num_bytes(GLuint mesaFormat) -+{ -+ int bytes = 0; -+ switch(mesaFormat) { -+ -+ case MESA_FORMAT_RGB_FXT1: -+ case MESA_FORMAT_RGBA_FXT1: -+ case MESA_FORMAT_RGB_DXT1: -+ case MESA_FORMAT_RGBA_DXT1: -+ bytes = 2; -+ break; -+ -+ case MESA_FORMAT_RGBA_DXT3: -+ case MESA_FORMAT_RGBA_DXT5: -+ bytes = 4; -+ default: -+ break; ++ ret = radeon_set_screen_flags(screen, dri_priv->deviceID); ++ if (ret == -1) ++ return NULL; ++ + if ((screen->chip_family == CHIP_FAMILY_R350 || screen->chip_family == CHIP_FAMILY_R300) && + sPriv->ddx_version.minor < 2) { + fprintf(stderr, "xf86-video-ati-6.6.2 or newer needed for Radeon 9500/9700/9800 cards.\n"); +@@ -847,7 +881,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION, + &temp); + if (ret) { +- if (screen->chip_family < CHIP_FAMILY_RS600) ++ if (screen->chip_family < CHIP_FAMILY_RS690 && !screen->kernel_mm) + screen->fbLocation = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16; + else { + FREE( screen ); +@@ -858,7 +892,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + screen->fbLocation = (temp & 0xffff) << 16; + } + +- if (screen->chip_family >= CHIP_FAMILY_R300) { ++ if (screen->chip_family >= CHIP_FAMILY_RV515) { + ret = radeonGetParam( sPriv->fd, RADEON_PARAM_NUM_GB_PIPES, + &temp); + if (ret) { +@@ -949,6 +983,103 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + screen->extensions[i++] = &driMediaStreamCounterExtension.base; + } + ++ if (!screen->kernel_mm) { ++#if !RADEON_COMMON ++ screen->extensions[i++] = &radeonTexOffsetExtension.base; ++#endif ++ ++#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) ++ if (IS_R200_CLASS(screen)) ++ screen->extensions[i++] = &r200AllocateExtension.base; ++ ++ screen->extensions[i++] = &r200texOffsetExtension.base; ++#endif ++ ++#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) ++ screen->extensions[i++] = &r300texOffsetExtension.base; ++#endif + } -+ -+ return bytes; ++ ++ screen->extensions[i++] = NULL; ++ sPriv->extensions = screen->extensions; ++ ++ screen->driScreen = sPriv; ++ screen->sarea_priv_offset = dri_priv->sarea_priv_offset; ++ screen->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA + ++ screen->sarea_priv_offset); ++ ++ if (screen->kernel_mm) ++ screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd); ++ else ++ screen->bom = radeon_bo_manager_legacy_ctor(screen); ++ if (screen->bom == NULL) { ++ free(screen); ++ return NULL; ++ } ++ ++ return screen; +} + -+/** -+ * Compute sizes and fill in offset and blit information for the given -+ * image (determined by \p face and \p level). -+ * -+ * \param curOffset points to the offset at which the image is to be stored -+ * and is updated by this function according to the size of the image. -+ */ -+static void compute_tex_image_offset(radeon_mipmap_tree *mt, -+ GLuint face, GLuint level, GLuint* curOffset) ++static radeonScreenPtr ++radeonCreateScreen2(__DRIscreenPrivate *sPriv) +{ -+ radeon_mipmap_level *lvl = &mt->levels[level]; ++ radeonScreenPtr screen; ++ int i; ++ int ret; ++ uint32_t device_id; ++ ++ /* Allocate the private area */ ++ screen = (radeonScreenPtr) CALLOC( sizeof(*screen) ); ++ if ( !screen ) { ++ __driUtilMessage("%s: Could not allocate memory for screen structure", ++ __FUNCTION__); ++ fprintf(stderr, "leaving here\n"); ++ return NULL; ++ } + -+ /* Find image size in bytes */ -+ if (mt->compressed) { -+ /* TODO: Is this correct? Need test cases for compressed textures! */ -+ GLuint align; ++#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) ++ RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control); ++#endif + -+ lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63; -+ lvl->size = radeon_compressed_texture_size(mt->radeon->glCtx, -+ lvl->width, lvl->height, lvl->depth, mt->compressed); -+ } else if (mt->target == GL_TEXTURE_RECTANGLE_NV) { -+ lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63; -+ lvl->size = lvl->rowstride * lvl->height; -+ } else if (mt->tilebits & RADEON_TXO_MICRO_TILE) { -+ /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, -+ * though the actual offset may be different (if texture is less than -+ * 32 bytes width) to the untiled case */ -+ lvl->rowstride = (lvl->width * mt->bpp * 2 + 31) & ~31; -+ lvl->size = lvl->rowstride * ((lvl->height + 1) / 2) * lvl->depth; -+ } else { -+ lvl->rowstride = (lvl->width * mt->bpp + 31) & ~31; -+ lvl->size = lvl->rowstride * lvl->height * lvl->depth; -+ } -+ assert(lvl->size > 0); ++ /* parse information in __driConfigOptions */ ++ driParseOptionInfo (&screen->optionCache, ++ __driConfigOptions, __driNConfigOptions); + -+ /* All images are aligned to a 32-byte offset */ -+ *curOffset = (*curOffset + 0x1f) & ~0x1f; -+ lvl->faces[face].offset = *curOffset; -+ *curOffset += lvl->size; ++ screen->kernel_mm = 1; ++ screen->chip_flags = 0; + -+ if (RADEON_DEBUG & DEBUG_TEXTURE) -+ fprintf(stderr, -+ "level %d, face %d: rs:%d %dx%d at %d\n", -+ level, face, lvl->rowstride, lvl->width, lvl->height, lvl->faces[face].offset); -+} ++ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR, ++ &screen->irq); + -+static GLuint minify(GLuint size, GLuint levels) ++ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_DEVICE_ID, ++ &device_id); ++ if (ret) { ++ FREE( screen ); ++ fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_DEVICE_ID): %d\n", ret); ++ return NULL; ++ } ++ ++ ret = radeon_set_screen_flags(screen, device_id); ++ if (ret == -1) ++ return NULL; ++ ++ if (screen->chip_family <= CHIP_FAMILY_RS200) ++ screen->chip_flags |= RADEON_CLASS_R100; ++ else if (screen->chip_family <= CHIP_FAMILY_RV280) ++ screen->chip_flags |= RADEON_CLASS_R200; ++ else ++ screen->chip_flags |= RADEON_CLASS_R300; ++ ++ i = 0; ++ screen->extensions[i++] = &driCopySubBufferExtension.base; ++ screen->extensions[i++] = &driFrameTrackingExtension.base; ++ screen->extensions[i++] = &driReadDrawableExtension; ++ ++ if ( screen->irq != 0 ) { ++ screen->extensions[i++] = &driSwapControlExtension.base; ++ screen->extensions[i++] = &driMediaStreamCounterExtension.base; ++ } ++ + #if !RADEON_COMMON + screen->extensions[i++] = &radeonTexOffsetExtension.base; + #endif +@@ -961,14 +1092,19 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + #endif + + #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +- screen->extensions[i++] = &r300texOffsetExtension.base; ++ //screen->extensions[i++] = &r300texOffsetExtension.base; ++ screen->extensions[i++] = &r300TexBufferExtension.base; + #endif + + screen->extensions[i++] = NULL; + sPriv->extensions = screen->extensions; + + screen->driScreen = sPriv; +- screen->sarea_priv_offset = dri_priv->sarea_priv_offset; ++ screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd); ++ if (screen->bom == NULL) { ++ free(screen); ++ return NULL; ++ } + return screen; + } + +@@ -977,23 +1113,32 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + static void + radeonDestroyScreen( __DRIscreenPrivate *sPriv ) + { +- radeonScreenPtr screen = (radeonScreenPtr)sPriv->private; ++ radeonScreenPtr screen = (radeonScreenPtr)sPriv->private; + +- if (!screen) +- return; ++ if (!screen) ++ return; + +- if ( screen->gartTextures.map ) { +- drmUnmap( screen->gartTextures.map, screen->gartTextures.size ); +- } +- drmUnmapBufs( screen->buffers ); +- drmUnmap( screen->status.map, screen->status.size ); +- drmUnmap( screen->mmio.map, screen->mmio.size ); ++ if (screen->kernel_mm) { ++#ifdef RADEON_BO_TRACK ++ radeon_tracker_print(&screen->bom->tracker, stderr); ++#endif ++ radeon_bo_manager_gem_dtor(screen->bom); ++ } else { ++ radeon_bo_manager_legacy_dtor(screen->bom); ++ ++ if ( screen->gartTextures.map ) { ++ drmUnmap( screen->gartTextures.map, screen->gartTextures.size ); ++ } ++ drmUnmapBufs( screen->buffers ); ++ drmUnmap( screen->status.map, screen->status.size ); ++ drmUnmap( screen->mmio.map, screen->mmio.size ); ++ } + +- /* free all option information */ +- driDestroyOptionInfo (&screen->optionCache); ++ /* free all option information */ ++ driDestroyOptionInfo (&screen->optionCache); + +- FREE( screen ); +- sPriv->private = NULL; ++ FREE( screen ); ++ sPriv->private = NULL; + } + + +@@ -1002,15 +1147,102 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv ) + static GLboolean + radeonInitDriver( __DRIscreenPrivate *sPriv ) + { +- sPriv->private = (void *) radeonCreateScreen( sPriv ); +- if ( !sPriv->private ) { +- radeonDestroyScreen( sPriv ); +- return GL_FALSE; +- } ++ if (sPriv->dri2.enabled) { ++ sPriv->private = (void *) radeonCreateScreen2( sPriv ); ++ } else { ++ sPriv->private = (void *) radeonCreateScreen( sPriv ); ++ } ++ if ( !sPriv->private ) { ++ radeonDestroyScreen( sPriv ); ++ return GL_FALSE; ++ } + +- return GL_TRUE; ++ return GL_TRUE; + } + ++static GLboolean ++radeon_alloc_window_storage(GLcontext *ctx, struct gl_renderbuffer *rb, ++ GLenum intFormat, GLuint w, GLuint h) +{ -+ size = size >> levels; -+ if (size < 1) -+ size = 1; -+ return size; ++ rb->Width = w; ++ rb->Height = h; ++ rb->_ActualFormat = intFormat; ++ ++ return GL_TRUE; +} + -+static void calculate_miptree_layout(radeon_mipmap_tree *mt) ++ ++static struct radeon_renderbuffer * ++radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv) +{ -+ GLuint curOffset; -+ GLuint numLevels; -+ GLuint i; ++ struct radeon_renderbuffer *ret; + -+ numLevels = mt->lastLevel - mt->firstLevel + 1; -+ assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); ++ ret = CALLOC_STRUCT(radeon_renderbuffer); ++ if (!ret) ++ return NULL; + -+ curOffset = 0; -+ for(i = 0; i < numLevels; i++) { -+ GLuint face; ++ _mesa_init_renderbuffer(&ret->base, 0); + -+ mt->levels[i].width = minify(mt->width0, i); -+ mt->levels[i].height = minify(mt->height0, i); -+ mt->levels[i].depth = minify(mt->depth0, i); ++ /* XXX format junk */ ++ switch (format) { ++ case GL_RGB5: ++ ret->base._ActualFormat = GL_RGB5; ++ ret->base._BaseFormat = GL_RGBA; ++ ret->base.RedBits = 5; ++ ret->base.GreenBits = 6; ++ ret->base.BlueBits = 5; ++ ret->base.DataType = GL_UNSIGNED_BYTE; ++ break; ++ case GL_RGBA8: ++ ret->base._ActualFormat = GL_RGBA8; ++ ret->base._BaseFormat = GL_RGBA; ++ ret->base.RedBits = 8; ++ ret->base.GreenBits = 8; ++ ret->base.BlueBits = 8; ++ ret->base.AlphaBits = 8; ++ ret->base.DataType = GL_UNSIGNED_BYTE; ++ break; ++ case GL_STENCIL_INDEX8_EXT: ++ ret->base._ActualFormat = GL_STENCIL_INDEX8_EXT; ++ ret->base._BaseFormat = GL_STENCIL_INDEX; ++ ret->base.StencilBits = 8; ++ ret->base.DataType = GL_UNSIGNED_BYTE; ++ break; ++ case GL_DEPTH_COMPONENT16: ++ ret->base._ActualFormat = GL_DEPTH_COMPONENT16; ++ ret->base._BaseFormat = GL_DEPTH_COMPONENT; ++ ret->base.DepthBits = 16; ++ ret->base.DataType = GL_UNSIGNED_SHORT; ++ break; ++ case GL_DEPTH_COMPONENT24: ++ ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; ++ ret->base._BaseFormat = GL_DEPTH_COMPONENT; ++ ret->base.DepthBits = 24; ++ ret->base.DataType = GL_UNSIGNED_INT; ++ break; ++ case GL_DEPTH24_STENCIL8_EXT: ++ ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; ++ ret->base._BaseFormat = GL_DEPTH_STENCIL_EXT; ++ ret->base.DepthBits = 24; ++ ret->base.StencilBits = 8; ++ ret->base.DataType = GL_UNSIGNED_INT_24_8_EXT; ++ break; ++ default: ++ fprintf(stderr, "%s: Unknown format 0x%04x\n", __FUNCTION__, format); ++ _mesa_delete_renderbuffer(&ret->base); ++ return NULL; ++ } + -+ for(face = 0; face < mt->faces; face++) -+ compute_tex_image_offset(mt, face, i, &curOffset); -+ } ++ ret->dPriv = driDrawPriv; ++ ret->base.InternalFormat = format; + -+ /* Note the required size in memory */ -+ mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; -+} ++ ret->base.AllocStorage = radeon_alloc_window_storage; + ++ radeonSetSpanFunctions(ret); + ++ ret->bo = NULL; ++ return ret; ++} + + /** + * Create the Mesa framebuffer and renderbuffers for a given window/drawable. +@@ -1026,95 +1258,86 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, + { + radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private; + +- if (isPixmap) { +- return GL_FALSE; /* not implemented */ +- } +- else { +- const GLboolean swDepth = GL_FALSE; +- const GLboolean swAlpha = GL_FALSE; +- const GLboolean swAccum = mesaVis->accumRedBits > 0; +- const GLboolean swStencil = mesaVis->stencilBits > 0 && +- mesaVis->depthBits != 24; +- struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis); +- +- /* front color renderbuffer */ +- { +- driRenderbuffer *frontRb +- = driNewRenderbuffer(GL_RGBA, +- driScrnPriv->pFB + screen->frontOffset, +- screen->cpp, +- screen->frontOffset, screen->frontPitch, +- driDrawPriv); +- radeonSetSpanFunctions(frontRb, mesaVis); +- _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base); +- } ++ const GLboolean swDepth = GL_FALSE; ++ const GLboolean swAlpha = GL_FALSE; ++ const GLboolean swAccum = mesaVis->accumRedBits > 0; ++ const GLboolean swStencil = mesaVis->stencilBits > 0 && ++ mesaVis->depthBits != 24; ++ GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8); ++ GLenum depthFormat = GL_NONE; ++ struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis); ++ ++ if (mesaVis->depthBits == 16) ++ depthFormat = GL_DEPTH_COMPONENT16; ++ else if (mesaVis->depthBits == 24) ++ depthFormat = GL_DEPTH_COMPONENT24; ++ ++ /* front color renderbuffer */ ++ { ++ struct radeon_renderbuffer *front = ++ radeon_create_renderbuffer(rgbFormat, driDrawPriv); ++ _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &front->base); ++ front->has_surface = 1; ++ } + +- /* back color renderbuffer */ +- if (mesaVis->doubleBufferMode) { +- driRenderbuffer *backRb +- = driNewRenderbuffer(GL_RGBA, +- driScrnPriv->pFB + screen->backOffset, +- screen->cpp, +- screen->backOffset, screen->backPitch, +- driDrawPriv); +- radeonSetSpanFunctions(backRb, mesaVis); +- _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base); +- } ++ /* back color renderbuffer */ ++ if (mesaVis->doubleBufferMode) { ++ struct radeon_renderbuffer *back = ++ radeon_create_renderbuffer(rgbFormat, driDrawPriv); ++ _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &back->base); ++ back->has_surface = 1; ++ } + +- /* depth renderbuffer */ +- if (mesaVis->depthBits == 16) { +- driRenderbuffer *depthRb +- = driNewRenderbuffer(GL_DEPTH_COMPONENT16, +- driScrnPriv->pFB + screen->depthOffset, +- screen->cpp, +- screen->depthOffset, screen->depthPitch, +- driDrawPriv); +- radeonSetSpanFunctions(depthRb, mesaVis); +- _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); +- depthRb->depthHasSurface = screen->depthHasSurface; +- } +- else if (mesaVis->depthBits == 24) { +- driRenderbuffer *depthRb +- = driNewRenderbuffer(GL_DEPTH_COMPONENT24, +- driScrnPriv->pFB + screen->depthOffset, +- screen->cpp, +- screen->depthOffset, screen->depthPitch, +- driDrawPriv); +- radeonSetSpanFunctions(depthRb, mesaVis); +- _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); +- depthRb->depthHasSurface = screen->depthHasSurface; +- } ++ /* depth renderbuffer */ ++ if (depthFormat != GL_NONE) { ++ struct radeon_renderbuffer *depth = ++ radeon_create_renderbuffer(depthFormat, driDrawPriv); ++ _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depth->base); ++ depth->has_surface = screen->depthHasSurface; ++ } + +- /* stencil renderbuffer */ +- if (mesaVis->stencilBits > 0 && !swStencil) { +- driRenderbuffer *stencilRb +- = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT, +- driScrnPriv->pFB + screen->depthOffset, +- screen->cpp, +- screen->depthOffset, screen->depthPitch, +- driDrawPriv); +- radeonSetSpanFunctions(stencilRb, mesaVis); +- _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base); +- stencilRb->depthHasSurface = screen->depthHasSurface; +- } ++ /* stencil renderbuffer */ ++ if (mesaVis->stencilBits > 0 && !swStencil) { ++ struct radeon_renderbuffer *stencil = ++ radeon_create_renderbuffer(GL_STENCIL_INDEX8_EXT, driDrawPriv); ++ _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencil->base); ++ stencil->has_surface = screen->depthHasSurface; ++ } + +- _mesa_add_soft_renderbuffers(fb, +- GL_FALSE, /* color */ +- swDepth, +- swStencil, +- swAccum, +- swAlpha, +- GL_FALSE /* aux */); +- driDrawPriv->driverPrivate = (void *) fb; ++ _mesa_add_soft_renderbuffers(fb, ++ GL_FALSE, /* color */ ++ swDepth, ++ swStencil, ++ swAccum, ++ swAlpha, ++ GL_FALSE /* aux */); ++ driDrawPriv->driverPrivate = (void *) fb; + +- return (driDrawPriv->driverPrivate != NULL); +- } ++ return (driDrawPriv->driverPrivate != NULL); + } + +- + static void + radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) + { ++ struct radeon_renderbuffer *rb; ++ GLframebuffer *fb; ++ ++ fb = (void*)driDrawPriv->driverPrivate; ++ rb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; ++ if (rb && rb->bo) { ++ radeon_bo_unref(rb->bo); ++ rb->bo = NULL; ++ } ++ rb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; ++ if (rb && rb->bo) { ++ radeon_bo_unref(rb->bo); ++ rb->bo = NULL; ++ } ++ rb = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer; ++ if (rb && rb->bo) { ++ radeon_bo_unref(rb->bo); ++ rb->bo = NULL; ++ } + _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + } + +@@ -1149,6 +1372,7 @@ static void radeonDestroyContext(__DRIcontextPrivate * driContextPriv) + + #endif + ++ + /** + * This is the driver specific part of the createNewScreen entry point. + * +@@ -1201,18 +1425,109 @@ radeonInitScreen(__DRIscreenPrivate *psp) + driInitSingleExtension( NULL, NV_vp_extension ); + driInitSingleExtension( NULL, ATI_fs_extension ); + driInitExtensions( NULL, point_extensions, GL_FALSE ); ++#elif defined(RADEON_COMMON_FOR_R300) ++ driInitSingleExtension( NULL, gl_20_extension ); + #endif + + if (!radeonInitDriver(psp)) + return NULL; + ++ /* for now fill in all modes */ + return radeonFillInModes( psp, + dri_priv->bpp, + (dri_priv->bpp == 16) ? 16 : 24, +- (dri_priv->bpp == 16) ? 0 : 8, +- (dri_priv->backOffset != dri_priv->depthOffset) ); ++ (dri_priv->bpp == 16) ? 0 : 8, 1); + } ++#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0])) + +/** -+ * Create a new mipmap tree, calculate its layout and allocate memory. ++ * This is the driver specific part of the createNewScreen entry point. ++ * Called when using DRI2. ++ * ++ * \return the __GLcontextModes supported by this driver + */ -+radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t, -+ GLenum target, GLuint firstLevel, GLuint lastLevel, -+ GLuint width0, GLuint height0, GLuint depth0, -+ GLuint bpp, GLuint tilebits, GLuint compressed) ++static const ++__DRIconfig **radeonInitScreen2(__DRIscreenPrivate *psp) +{ -+ radeon_mipmap_tree *mt = CALLOC_STRUCT(_radeon_mipmap_tree); -+ -+ mt->radeon = rmesa; -+ mt->refcount = 1; -+ mt->t = t; -+ mt->target = target; -+ mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; -+ mt->firstLevel = firstLevel; -+ mt->lastLevel = lastLevel; -+ mt->width0 = width0; -+ mt->height0 = height0; -+ mt->depth0 = depth0; -+ mt->bpp = compressed ? radeon_compressed_num_bytes(compressed) : bpp; -+ mt->tilebits = tilebits; -+ mt->compressed = compressed; ++ GLenum fb_format[3]; ++ GLenum fb_type[3]; ++ /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't ++ * support pageflipping at all. ++ */ ++ static const GLenum back_buffer_modes[] = { ++ GLX_NONE, GLX_SWAP_UNDEFINED_OML, /*, GLX_SWAP_COPY_OML*/ ++ }; ++ uint8_t depth_bits[4], stencil_bits[4], msaa_samples_array[1]; ++ int color; ++ __DRIconfig **configs = NULL; ++ ++ /* Calling driInitExtensions here, with a NULL context pointer, ++ * does not actually enable the extensions. It just makes sure ++ * that all the dispatch offsets for all the extensions that ++ * *might* be enables are known. This is needed because the ++ * dispatch offsets need to be known when _mesa_context_create ++ * is called, but we can't enable the extensions until we have a ++ * context pointer. ++ * ++ * Hello chicken. Hello egg. How are you two today? ++ */ ++ driInitExtensions( NULL, card_extensions, GL_FALSE ); ++#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) ++ driInitExtensions( NULL, blend_extensions, GL_FALSE ); ++ driInitSingleExtension( NULL, ARB_vp_extension ); ++ driInitSingleExtension( NULL, NV_vp_extension ); ++ driInitSingleExtension( NULL, ATI_fs_extension ); ++ driInitExtensions( NULL, point_extensions, GL_FALSE ); ++#endif + -+ calculate_miptree_layout(mt); ++ if (!radeonInitDriver(psp)) { ++ return NULL; ++ } ++ depth_bits[0] = 0; ++ stencil_bits[0] = 0; ++ depth_bits[1] = 16; ++ stencil_bits[1] = 0; ++ depth_bits[2] = 24; ++ stencil_bits[2] = 0; ++ depth_bits[3] = 24; ++ stencil_bits[3] = 8; ++ ++ msaa_samples_array[0] = 0; ++ ++ fb_format[0] = GL_RGB; ++ fb_type[0] = GL_UNSIGNED_SHORT_5_6_5; ++ ++ fb_format[1] = GL_BGR; ++ fb_type[1] = GL_UNSIGNED_INT_8_8_8_8_REV; ++ ++ fb_format[2] = GL_BGRA; ++ fb_type[2] = GL_UNSIGNED_INT_8_8_8_8_REV; ++ ++ for (color = 0; color < ARRAY_SIZE(fb_format); color++) { ++ __DRIconfig **new_configs; ++ ++ new_configs = driCreateConfigs(fb_format[color], fb_type[color], ++ depth_bits, ++ stencil_bits, ++ ARRAY_SIZE(depth_bits), ++ back_buffer_modes, ++ ARRAY_SIZE(back_buffer_modes), ++ msaa_samples_array, ++ ARRAY_SIZE(msaa_samples_array)); ++ if (configs == NULL) ++ configs = new_configs; ++ else ++ configs = driConcatConfigs(configs, new_configs); ++ } + -+ mt->bo = radeon_bo_open(rmesa->radeonScreen->bom, -+ 0, mt->totalsize, 1024, -+ RADEON_GEM_DOMAIN_VRAM, -+ 0); ++ if (configs == NULL) { ++ fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__, ++ __LINE__); ++ return NULL; ++ } + -+ return mt; ++ return (const __DRIconfig **)configs; +} -+ -+void radeon_miptree_reference(radeon_mipmap_tree *mt) + + /** + * Get information about previous buffer swaps. +@@ -1220,11 +1535,7 @@ radeonInitScreen(__DRIscreenPrivate *psp) + static int + getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ) + { +-#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)) + radeonContextPtr rmesa; +-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +- r200ContextPtr rmesa; +-#endif + + if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL) + || (dPriv->driContextPriv->driverPrivate == NULL) +@@ -1261,6 +1572,8 @@ const struct __DriverAPIRec driDriverAPI = { + .WaitForSBC = NULL, + .SwapBuffersMSC = NULL, + .CopySubBuffer = radeonCopySubBuffer, ++ /* DRI2 */ ++ .InitScreen2 = radeonInitScreen2, + }; + #else + const struct __DriverAPIRec driDriverAPI = { +@@ -1270,14 +1583,16 @@ const struct __DriverAPIRec driDriverAPI = { + .DestroyContext = r200DestroyContext, + .CreateBuffer = radeonCreateBuffer, + .DestroyBuffer = radeonDestroyBuffer, +- .SwapBuffers = r200SwapBuffers, +- .MakeCurrent = r200MakeCurrent, +- .UnbindContext = r200UnbindContext, ++ .SwapBuffers = radeonSwapBuffers, ++ .MakeCurrent = radeonMakeCurrent, ++ .UnbindContext = radeonUnbindContext, + .GetSwapInfo = getSwapInfo, + .GetDrawableMSC = driDrawableGetMSC32, + .WaitForMSC = driWaitForMSC32, + .WaitForSBC = NULL, + .SwapBuffersMSC = NULL, +- .CopySubBuffer = r200CopySubBuffer, ++ .CopySubBuffer = radeonCopySubBuffer, ++ .InitScreen2 = radeonInitScreen2, + }; + #endif ++ +diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h +index b84c70b..1c0f5bb 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_screen.h ++++ b/src/mesa/drivers/dri/radeon/radeon_screen.h +@@ -54,7 +54,7 @@ typedef struct { + drmAddress map; /* Mapping of the DRM region */ + } radeonRegionRec, *radeonRegionPtr; + +-typedef struct { ++typedef struct radeon_screen { + int chip_family; + int chip_flags; + int cpp; +@@ -103,9 +103,12 @@ typedef struct { + /* Configuration cache with default values for all contexts */ + driOptionCache optionCache; + +- const __DRIextension *extensions[8]; ++ const __DRIextension *extensions[16]; + + int num_gb_pipes; ++ int kernel_mm; ++ drm_radeon_sarea_t *sarea; /* Private SAREA data */ ++ struct radeon_bo_manager *bom; + } radeonScreenRec, *radeonScreenPtr; + + #define IS_R100_CLASS(screen) \ +diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c +index 12051ff..49ec2c3 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_span.c ++++ b/src/mesa/drivers/dri/radeon/radeon_span.c +@@ -43,37 +43,168 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "main/glheader.h" + #include "swrast/swrast.h" + +-#include "radeon_context.h" +-#include "radeon_ioctl.h" +-#include "radeon_state.h" ++#include "radeon_common.h" ++#include "radeon_lock.h" + #include "radeon_span.h" +-#include "radeon_tex.h" +- +-#include "drirenderbuffer.h" + + #define DBG 0 + ++static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb, ++ GLint x, GLint y) +{ -+ mt->refcount++; -+ assert(mt->refcount > 0); ++ GLubyte *ptr = rrb->bo->ptr; ++ const __DRIdrawablePrivate *dPriv = rrb->dPriv; ++ uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; ++ GLint offset; ++ GLint nmacroblkpl; ++ GLint nmicroblkpl; ++ ++ x += dPriv->x; ++ y += dPriv->y; ++ ++ if (rrb->has_surface || !(rrb->bo->flags & mask)) { ++ offset = x * rrb->cpp + y * rrb->pitch; ++ } else { ++ offset = 0; ++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { ++ if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) { ++ nmacroblkpl = rrb->pitch >> 5; ++ offset += ((y >> 4) * nmacroblkpl) << 11; ++ offset += ((y & 15) >> 1) << 8; ++ offset += (y & 1) << 4; ++ offset += (x >> 5) << 11; ++ offset += ((x & 31) >> 2) << 5; ++ offset += (x & 3) << 2; ++ } else { ++ nmacroblkpl = rrb->pitch >> 6; ++ offset += ((y >> 3) * nmacroblkpl) << 11; ++ offset += (y & 7) << 8; ++ offset += (x >> 6) << 11; ++ offset += ((x & 63) >> 3) << 5; ++ offset += (x & 7) << 2; ++ } ++ } else { ++ nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5; ++ offset += (y * nmicroblkpl) << 5; ++ offset += (x >> 3) << 5; ++ offset += (x & 7) << 2; ++ } ++ } ++ return &ptr[offset]; +} + -+void radeon_miptree_unreference(radeon_mipmap_tree *mt) ++static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb, ++ GLint x, GLint y) +{ -+ if (!mt) -+ return; -+ -+ assert(mt->refcount > 0); -+ mt->refcount--; -+ if (!mt->refcount) { -+ radeon_bo_unref(mt->bo); -+ free(mt); -+ } ++ GLubyte *ptr = rrb->bo->ptr; ++ const __DRIdrawablePrivate *dPriv = rrb->dPriv; ++ uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; ++ GLint offset; ++ GLint nmacroblkpl; ++ GLint nmicroblkpl; ++ ++ x += dPriv->x; ++ y += dPriv->y; ++ ++ if (rrb->has_surface || !(rrb->bo->flags & mask)) { ++ offset = x * rrb->cpp + y * rrb->pitch; ++ } else { ++ offset = 0; ++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { ++ if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) { ++ nmacroblkpl = rrb->pitch >> 6; ++ offset += ((y >> 4) * nmacroblkpl) << 11; ++ offset += ((y & 15) >> 1) << 8; ++ offset += (y & 1) << 4; ++ offset += (x >> 6) << 11; ++ offset += ((x & 63) >> 3) << 5; ++ offset += (x & 7) << 1; ++ } else { ++ nmacroblkpl = rrb->pitch >> 7; ++ offset += ((y >> 3) * nmacroblkpl) << 11; ++ offset += (y & 7) << 8; ++ offset += (x >> 7) << 11; ++ offset += ((x & 127) >> 4) << 5; ++ offset += (x & 15) << 2; ++ } ++ } else { ++ nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5; ++ offset += (y * nmicroblkpl) << 5; ++ offset += (x >> 4) << 5; ++ offset += (x & 15) << 2; ++ } ++ } ++ return &ptr[offset]; +} + -+ -+/** -+ * Calculate first and last mip levels for the given texture object, -+ * where the dimensions are taken from the given texture image at -+ * the given level. -+ * -+ * Note: level is the OpenGL level number, which is not necessarily the same -+ * as the first level that is actually present. -+ * -+ * The base level image of the given texture face must be non-null, -+ * or this will fail. -+ */ -+static void calculate_first_last_level(struct gl_texture_object *tObj, -+ GLuint *pfirstLevel, GLuint *plastLevel, -+ GLuint face, GLuint level) ++static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb, ++ GLint x, GLint y) +{ -+ const struct gl_texture_image * const baseImage = -+ tObj->Image[face][level]; -+ -+ assert(baseImage); -+ -+ /* These must be signed values. MinLod and MaxLod can be negative numbers, -+ * and having firstLevel and lastLevel as signed prevents the need for -+ * extra sign checks. -+ */ -+ int firstLevel; -+ int lastLevel; ++ GLubyte *ptr = rrb->bo->ptr; ++ const __DRIdrawablePrivate *dPriv = rrb->dPriv; ++ uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; ++ GLint offset; ++ GLint microblkxs; ++ GLint macroblkxs; ++ GLint nmacroblkpl; ++ GLint nmicroblkpl; ++ ++ x += dPriv->x; ++ y += dPriv->y; ++ ++ if (rrb->has_surface || !(rrb->bo->flags & mask)) { ++ offset = x * rrb->cpp + y * rrb->pitch; ++ } else { ++ offset = 0; ++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { ++ if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) { ++ microblkxs = 16 / rrb->cpp; ++ macroblkxs = 128 / rrb->cpp; ++ nmacroblkpl = rrb->pitch / macroblkxs; ++ offset += ((y >> 4) * nmacroblkpl) << 11; ++ offset += ((y & 15) >> 1) << 8; ++ offset += (y & 1) << 4; ++ offset += (x / macroblkxs) << 11; ++ offset += ((x & (macroblkxs - 1)) / microblkxs) << 5; ++ offset += (x & (microblkxs - 1)) * rrb->cpp; ++ } else { ++ microblkxs = 32 / rrb->cpp; ++ macroblkxs = 256 / rrb->cpp; ++ nmacroblkpl = rrb->pitch / macroblkxs; ++ offset += ((y >> 3) * nmacroblkpl) << 11; ++ offset += (y & 7) << 8; ++ offset += (x / macroblkxs) << 11; ++ offset += ((x & (macroblkxs - 1)) / microblkxs) << 5; ++ offset += (x & (microblkxs - 1)) * rrb->cpp; ++ } ++ } else { ++ microblkxs = 32 / rrb->cpp; ++ nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5; ++ offset += (y * nmicroblkpl) << 5; ++ offset += (x / microblkxs) << 5; ++ offset += (x & (microblkxs - 1)) * rrb->cpp; ++ } ++ } ++ return &ptr[offset]; ++} + -+ /* Yes, this looks overly complicated, but it's all needed. -+ */ -+ switch (tObj->Target) { -+ case GL_TEXTURE_1D: -+ case GL_TEXTURE_2D: -+ case GL_TEXTURE_3D: -+ case GL_TEXTURE_CUBE_MAP: -+ if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) { -+ /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL. -+ */ -+ firstLevel = lastLevel = tObj->BaseLevel; -+ } else { -+ firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5); -+ firstLevel = MAX2(firstLevel, tObj->BaseLevel); -+ firstLevel = MIN2(firstLevel, level + baseImage->MaxLog2); -+ lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5); -+ lastLevel = MAX2(lastLevel, tObj->BaseLevel); -+ lastLevel = MIN2(lastLevel, level + baseImage->MaxLog2); -+ lastLevel = MIN2(lastLevel, tObj->MaxLevel); -+ lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ ++ + /* + * Note that all information needed to access pixels in a renderbuffer + * should be obtained through the gl_renderbuffer parameter, not per-context + * information. + */ + #define LOCAL_VARS \ +- driRenderbuffer *drb = (driRenderbuffer *) rb; \ +- const __DRIdrawablePrivate *dPriv = drb->dPriv; \ ++ struct radeon_renderbuffer *rrb = (void *) rb; \ ++ const __DRIdrawablePrivate *dPriv = rrb->dPriv; \ + const GLuint bottom = dPriv->h - 1; \ +- GLubyte *buf = (GLubyte *) drb->flippedData \ +- + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \ +- GLuint p; \ +- (void) p; ++ GLuint p; \ ++ (void)p; + + #define LOCAL_DEPTH_VARS \ +- driRenderbuffer *drb = (driRenderbuffer *) rb; \ +- const __DRIdrawablePrivate *dPriv = drb->dPriv; \ +- const GLuint bottom = dPriv->h - 1; \ +- GLuint xo = dPriv->x; \ +- GLuint yo = dPriv->y; \ +- GLubyte *buf = (GLubyte *) drb->Base.Data; ++ struct radeon_renderbuffer *rrb = (void *) rb; \ ++ const __DRIdrawablePrivate *dPriv = rrb->dPriv; \ ++ const GLuint bottom = dPriv->h - 1; + + #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS + +@@ -94,7 +225,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #define TAG(x) radeon##x##_RGB565 + #define TAG2(x,y) radeon##x##_RGB565##y +-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2) ++#define GET_PTR(X,Y) radeon_ptr16(rrb, (X), (Y)) + #include "spantmp2.h" + + /* 32 bit, ARGB8888 color spanline and pixel functions +@@ -104,7 +235,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #define TAG(x) radeon##x##_ARGB8888 + #define TAG2(x,y) radeon##x##_ARGB8888##y +-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4) ++#define GET_PTR(X,Y) radeon_ptr32(rrb, (X), (Y)) + #include "spantmp2.h" + + /* ================================================================ +@@ -121,65 +252,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * too... + */ + +-static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y) +-{ +- GLuint pitch = drb->pitch; +- if (drb->depthHasSurface) { +- return 4 * (x + y * pitch); +- } else { +- GLuint ba, address = 0; /* a[0..1] = 0 */ +- +-#ifdef COMPILE_R300 +- ba = (y / 8) * (pitch / 8) + (x / 8); +-#else +- ba = (y / 16) * (pitch / 16) + (x / 16); +-#endif +- +- address |= (x & 0x7) << 2; /* a[2..4] = x[0..2] */ +- address |= (y & 0x3) << 5; /* a[5..6] = y[0..1] */ +- address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5; /* a[7] = x[4] ^ y[2] */ +- address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */ +- +- address |= (y & 0x8) << 7; /* a[10] = y[3] */ +- address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7; /* a[11] = x[3] ^ y[4] */ +- address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */ +- +- return address; +- } +-} +- +-static INLINE GLuint +-radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) +-{ +- GLuint pitch = drb->pitch; +- if (drb->depthHasSurface) { +- return 2 * (x + y * pitch); +- } else { +- GLuint ba, address = 0; /* a[0] = 0 */ +- +- ba = (y / 16) * (pitch / 32) + (x / 32); +- +- address |= (x & 0x7) << 1; /* a[1..3] = x[0..2] */ +- address |= (y & 0x7) << 4; /* a[4..6] = y[0..2] */ +- address |= (x & 0x8) << 4; /* a[7] = x[3] */ +- address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */ +- address |= (y & 0x8) << 7; /* a[10] = y[3] */ +- address |= ((x & 0x10) ^ (y & 0x10)) << 7; /* a[11] = x[4] ^ y[4] */ +- address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */ +- +- return address; +- } +-} +- + /* 16-bit depth buffer functions + */ + #define VALUE_TYPE GLushort + + #define WRITE_DEPTH( _x, _y, d ) \ +- *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d; ++ *(GLushort *)radeon_ptr(rrb, _x, _y) = d + + #define READ_DEPTH( d, _x, _y ) \ +- d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )); ++ d = *(GLushort *)radeon_ptr(rrb, _x, _y) + + #define TAG(x) radeon##x##_z16 + #include "depthtmp.h" +@@ -194,35 +275,36 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) + #ifdef COMPILE_R300 + #define WRITE_DEPTH( _x, _y, d ) \ + do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ +- GLuint tmp = *(GLuint *)(buf + offset); \ ++ GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y ); \ ++ GLuint tmp = *_ptr; \ + tmp &= 0x000000ff; \ + tmp |= ((d << 8) & 0xffffff00); \ +- *(GLuint *)(buf + offset) = tmp; \ ++ *_ptr = tmp; \ + } while (0) + #else + #define WRITE_DEPTH( _x, _y, d ) \ + do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ +- GLuint tmp = *(GLuint *)(buf + offset); \ ++ GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y ); \ ++ GLuint tmp = *_ptr; \ + tmp &= 0xff000000; \ + tmp |= ((d) & 0x00ffffff); \ +- *(GLuint *)(buf + offset) = tmp; \ ++ *_ptr = tmp; \ + } while (0) + #endif + + #ifdef COMPILE_R300 + #define READ_DEPTH( d, _x, _y ) \ + do { \ +- d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ +- _y + yo )) & 0xffffff00) >> 8; \ ++ d = (*(GLuint*)(radeon_ptr32(rrb, _x, _y)) & 0xffffff00) >> 8; \ + }while(0) + #else + #define READ_DEPTH( d, _x, _y ) \ +- d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ +- _y + yo )) & 0x00ffffff; ++ d = *(GLuint*)(radeon_ptr32(rrb, _x, _y )) & 0x00ffffff; + #endif +- ++/* ++ fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\ ++ d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff; ++*/ + #define TAG(x) radeon##x##_z24_s8 + #include "depthtmp.h" + +@@ -235,35 +317,35 @@ do { \ + #ifdef COMPILE_R300 + #define WRITE_STENCIL( _x, _y, d ) \ + do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ +- GLuint tmp = *(GLuint *)(buf + offset); \ ++ GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x, _y); \ ++ GLuint tmp = *_ptr; \ + tmp &= 0xffffff00; \ + tmp |= (d) & 0xff; \ +- *(GLuint *)(buf + offset) = tmp; \ ++ *_ptr = tmp; \ + } while (0) + #else + #define WRITE_STENCIL( _x, _y, d ) \ + do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ +- GLuint tmp = *(GLuint *)(buf + offset); \ ++ GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x, _y); \ ++ GLuint tmp = *_ptr; \ + tmp &= 0x00ffffff; \ + tmp |= (((d) & 0xff) << 24); \ +- *(GLuint *)(buf + offset) = tmp; \ ++ *_ptr = tmp; \ + } while (0) + #endif + + #ifdef COMPILE_R300 + #define READ_STENCIL( d, _x, _y ) \ + do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ +- GLuint tmp = *(GLuint *)(buf + offset); \ ++ GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y ); \ ++ GLuint tmp = *_ptr; \ + d = tmp & 0x000000ff; \ + } while (0) + #else + #define READ_STENCIL( d, _x, _y ) \ + do { \ +- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ +- GLuint tmp = *(GLuint *)(buf + offset); \ ++ GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y ); \ ++ GLuint tmp = *_ptr; \ + d = (tmp & 0xff000000) >> 24; \ + } while (0) + #endif +@@ -271,20 +353,60 @@ do { \ + #define TAG(x) radeon##x##_z24_s8 + #include "stenciltmp.h" + +-/* Move locking out to get reasonable span performance (10x better +- * than doing this in HW_LOCK above). WaitForIdle() is the main +- * culprit. +- */ ++ ++static void map_buffer(struct gl_renderbuffer *rb, GLboolean write) ++{ ++ struct radeon_renderbuffer *rrb = (void*)rb; ++ int r; ++ ++ if (rrb->bo) { ++ r = radeon_bo_map(rrb->bo, write); ++ if (r) { ++ fprintf(stderr, "(%s) error(%d) mapping buffer.\n", ++ __FUNCTION__, r); + } -+ break; -+ case GL_TEXTURE_RECTANGLE_NV: -+ case GL_TEXTURE_4D_SGIS: -+ firstLevel = lastLevel = 0; -+ break; -+ default: -+ return; + } -+ -+ /* save these values */ -+ *pfirstLevel = firstLevel; -+ *plastLevel = lastLevel; +} + -+ -+/** -+ * Checks whether the given miptree can hold the given texture image at the -+ * given face and level. -+ */ -+GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt, -+ struct gl_texture_image *texImage, GLuint face, GLuint level) ++static void unmap_buffer(struct gl_renderbuffer *rb) +{ -+ radeon_mipmap_level *lvl; ++ struct radeon_renderbuffer *rrb = (void*)rb; + -+ if (face >= mt->faces || level < mt->firstLevel || level > mt->lastLevel) -+ return GL_FALSE; ++ if (rrb->bo) { ++ radeon_bo_unmap(rrb->bo); ++ } ++} + + static void radeonSpanRenderStart(GLcontext * ctx) + { + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +-#ifdef COMPILE_R300 +- r300ContextPtr r300 = (r300ContextPtr) rmesa; +- R300_FIREVERTICES(r300); +-#else +- RADEON_FIREVERTICES(rmesa); +-#endif ++ int i; + -+ if (texImage->IsCompressed != mt->compressed) -+ return GL_FALSE; ++ radeon_firevertices(rmesa); + -+ if (!texImage->IsCompressed && -+ !mt->compressed && -+ texImage->TexFormat->TexelBytes != mt->bpp) -+ return GL_FALSE; ++ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { ++ if (ctx->Texture.Unit[i]._ReallyEnabled) ++ ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current); ++ } + -+ lvl = &mt->levels[level - mt->firstLevel]; -+ if (lvl->width != texImage->Width || -+ lvl->height != texImage->Height || -+ lvl->depth != texImage->Depth) -+ return GL_FALSE; ++ /* color draw buffers */ ++ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { ++ map_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i], GL_TRUE); ++ } + -+ return GL_TRUE; -+} ++ map_buffer(ctx->ReadBuffer->_ColorReadBuffer, GL_FALSE); ++ ++ if (ctx->DrawBuffer->_DepthBuffer) { ++ map_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped, GL_TRUE); ++ } ++ if (ctx->DrawBuffer->_StencilBuffer) ++ map_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped, GL_TRUE); ++ ++ /* The locking and wait for idle should really only be needed in classic mode. ++ * In a future memory manager based implementation, this should become ++ * unnecessary due to the fact that mapping our buffers, textures, etc. ++ * should implicitly wait for any previous rendering commands that must ++ * be waited on. */ + LOCK_HARDWARE(rmesa); + radeonWaitForIdleLocked(rmesa); + } +@@ -292,8 +414,25 @@ static void radeonSpanRenderStart(GLcontext * ctx) + static void radeonSpanRenderFinish(GLcontext * ctx) + { + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ int i; + _swrast_flush(ctx); + UNLOCK_HARDWARE(rmesa); + ++ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { ++ if (ctx->Texture.Unit[i]._ReallyEnabled) ++ ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current); ++ } + -+/** -+ * Checks whether the given miptree has the right format to store the given texture object. ++ /* color draw buffers */ ++ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) ++ unmap_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i]); ++ ++ unmap_buffer(ctx->ReadBuffer->_ColorReadBuffer); ++ ++ if (ctx->DrawBuffer->_DepthBuffer) ++ unmap_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped); ++ if (ctx->DrawBuffer->_StencilBuffer) ++ unmap_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped); + } + + void radeonInitSpanFuncs(GLcontext * ctx) +@@ -307,20 +446,17 @@ void radeonInitSpanFuncs(GLcontext * ctx) + /** + * Plug in the Get/Put routines for the given driRenderbuffer. + */ +-void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis) ++void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb) + { +- if (drb->Base.InternalFormat == GL_RGBA) { +- if (vis->redBits == 5 && vis->greenBits == 6 +- && vis->blueBits == 5) { +- radeonInitPointers_RGB565(&drb->Base); +- } else { +- radeonInitPointers_ARGB8888(&drb->Base); +- } +- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { +- radeonInitDepthPointers_z16(&drb->Base); +- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { +- radeonInitDepthPointers_z24_s8(&drb->Base); +- } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { +- radeonInitStencilPointers_z24_s8(&drb->Base); ++ if (rrb->base.InternalFormat == GL_RGB5) { ++ radeonInitPointers_RGB565(&rrb->base); ++ } else if (rrb->base.InternalFormat == GL_RGBA8) { ++ radeonInitPointers_ARGB8888(&rrb->base); ++ } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT16) { ++ radeonInitDepthPointers_z16(&rrb->base); ++ } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT24) { ++ radeonInitDepthPointers_z24_s8(&rrb->base); ++ } else if (rrb->base.InternalFormat == GL_STENCIL_INDEX8_EXT) { ++ radeonInitStencilPointers_z24_s8(&rrb->base); + } + } +diff --git a/src/mesa/drivers/dri/radeon/radeon_span.h b/src/mesa/drivers/dri/radeon/radeon_span.h +index 9abe086..dd44ab5 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_span.h ++++ b/src/mesa/drivers/dri/radeon/radeon_span.h +@@ -42,9 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #ifndef __RADEON_SPAN_H__ + #define __RADEON_SPAN_H__ + +-#include "drirenderbuffer.h" +- + extern void radeonInitSpanFuncs(GLcontext * ctx); +-extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis); + ++extern void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); + #endif +diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c +index 32bcff3..5fffa28 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_state.c ++++ b/src/mesa/drivers/dri/radeon/radeon_state.c +@@ -62,7 +62,7 @@ static void radeonUpdateSpecular( GLcontext *ctx ); + + static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC]; + GLubyte refByte; + +@@ -106,7 +106,7 @@ static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref ) + static void radeonBlendEquationSeparate( GLcontext *ctx, + GLenum modeRGB, GLenum modeA ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~RADEON_COMB_FCN_MASK; + GLboolean fallback = GL_FALSE; + +@@ -147,7 +147,7 @@ static void radeonBlendFuncSeparate( GLcontext *ctx, + GLenum sfactorRGB, GLenum dfactorRGB, + GLenum sfactorA, GLenum dfactorA ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & + ~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK); + GLboolean fallback = GL_FALSE; +@@ -257,7 +257,7 @@ static void radeonBlendFuncSeparate( GLcontext *ctx, + + static void radeonDepthFunc( GLcontext *ctx, GLenum func ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + + RADEON_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_TEST_MASK; +@@ -293,7 +293,7 @@ static void radeonDepthFunc( GLcontext *ctx, GLenum func ) + + static void radeonDepthMask( GLcontext *ctx, GLboolean flag ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + RADEON_STATECHANGE( rmesa, ctx ); + + if ( ctx->Depth.Mask ) { +@@ -305,16 +305,16 @@ static void radeonDepthMask( GLcontext *ctx, GLboolean flag ) + + static void radeonClearDepth( GLcontext *ctx, GLclampd d ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] & + RADEON_DEPTH_FORMAT_MASK); + + switch ( format ) { + case RADEON_DEPTH_FORMAT_16BIT_INT_Z: +- rmesa->state.depth.clear = d * 0x0000ffff; ++ rmesa->radeon.state.depth.clear = d * 0x0000ffff; + break; + case RADEON_DEPTH_FORMAT_24BIT_INT_Z: +- rmesa->state.depth.clear = d * 0x00ffffff; ++ rmesa->radeon.state.depth.clear = d * 0x00ffffff; + break; + } + } +@@ -327,7 +327,7 @@ static void radeonClearDepth( GLcontext *ctx, GLclampd d ) + + static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + union { int i; float f; } c, d; + GLchan col[4]; + +@@ -406,109 +406,13 @@ static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) + } + } + +- +-/* ============================================================= +- * Scissoring +- */ +- +- +-static GLboolean intersect_rect( drm_clip_rect_t *out, +- drm_clip_rect_t *a, +- drm_clip_rect_t *b ) +-{ +- *out = *a; +- if ( b->x1 > out->x1 ) out->x1 = b->x1; +- if ( b->y1 > out->y1 ) out->y1 = b->y1; +- if ( b->x2 < out->x2 ) out->x2 = b->x2; +- if ( b->y2 < out->y2 ) out->y2 = b->y2; +- if ( out->x1 >= out->x2 ) return GL_FALSE; +- if ( out->y1 >= out->y2 ) return GL_FALSE; +- return GL_TRUE; +-} +- +- +-void radeonRecalcScissorRects( radeonContextPtr rmesa ) +-{ +- drm_clip_rect_t *out; +- int i; +- +- /* Grow cliprect store? +- */ +- if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) { +- while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) { +- rmesa->state.scissor.numAllocedClipRects += 1; /* zero case */ +- rmesa->state.scissor.numAllocedClipRects *= 2; +- } +- +- if (rmesa->state.scissor.pClipRects) +- FREE(rmesa->state.scissor.pClipRects); +- +- rmesa->state.scissor.pClipRects = +- MALLOC( rmesa->state.scissor.numAllocedClipRects * +- sizeof(drm_clip_rect_t) ); +- +- if ( rmesa->state.scissor.pClipRects == NULL ) { +- rmesa->state.scissor.numAllocedClipRects = 0; +- return; +- } +- } +- +- out = rmesa->state.scissor.pClipRects; +- rmesa->state.scissor.numClipRects = 0; +- +- for ( i = 0 ; i < rmesa->numClipRects ; i++ ) { +- if ( intersect_rect( out, +- &rmesa->pClipRects[i], +- &rmesa->state.scissor.rect ) ) { +- rmesa->state.scissor.numClipRects++; +- out++; +- } +- } +-} +- +- +-static void radeonUpdateScissor( GLcontext *ctx ) +-{ +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- +- if ( rmesa->dri.drawable ) { +- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; +- +- int x = ctx->Scissor.X; +- int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height; +- int w = ctx->Scissor.X + ctx->Scissor.Width - 1; +- int h = dPriv->h - ctx->Scissor.Y - 1; +- +- rmesa->state.scissor.rect.x1 = x + dPriv->x; +- rmesa->state.scissor.rect.y1 = y + dPriv->y; +- rmesa->state.scissor.rect.x2 = w + dPriv->x + 1; +- rmesa->state.scissor.rect.y2 = h + dPriv->y + 1; +- +- radeonRecalcScissorRects( rmesa ); +- } +-} +- +- +-static void radeonScissor( GLcontext *ctx, +- GLint x, GLint y, GLsizei w, GLsizei h ) +-{ +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- +- if ( ctx->Scissor.Enabled ) { +- RADEON_FIREVERTICES( rmesa ); /* don't pipeline cliprect changes */ +- radeonUpdateScissor( ctx ); +- } +- +-} +- +- + /* ============================================================= + * Culling + */ + + static void radeonCullFace( GLcontext *ctx, GLenum unused ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL]; + GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL]; + +@@ -545,7 +449,7 @@ static void radeonCullFace( GLcontext *ctx, GLenum unused ) + + static void radeonFrontFace( GLcontext *ctx, GLenum mode ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + + RADEON_STATECHANGE( rmesa, set ); + rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_FFACE_CULL_DIR_MASK; +@@ -570,7 +474,7 @@ static void radeonFrontFace( GLcontext *ctx, GLenum mode ) + */ + static void radeonLineWidth( GLcontext *ctx, GLfloat widthf ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + + RADEON_STATECHANGE( rmesa, lin ); + RADEON_STATECHANGE( rmesa, set ); +@@ -587,7 +491,7 @@ static void radeonLineWidth( GLcontext *ctx, GLfloat widthf ) + + static void radeonLineStipple( GLcontext *ctx, GLint factor, GLushort pattern ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + + RADEON_STATECHANGE( rmesa, lin ); + rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = +@@ -602,8 +506,8 @@ static void radeonColorMask( GLcontext *ctx, + GLboolean r, GLboolean g, + GLboolean b, GLboolean a ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- GLuint mask = radeonPackColor( rmesa->radeonScreen->cpp, ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); ++ GLuint mask = radeonPackColor( rmesa->radeon.radeonScreen->cpp, + ctx->Color.ColorMask[RCOMP], + ctx->Color.ColorMask[GCOMP], + ctx->Color.ColorMask[BCOMP], +@@ -623,8 +527,8 @@ static void radeonColorMask( GLcontext *ctx, + static void radeonPolygonOffset( GLcontext *ctx, + GLfloat factor, GLfloat units ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- float_ui32_type constant = { units * rmesa->state.depth.scale }; ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); ++ float_ui32_type constant = { units * rmesa->radeon.state.depth.scale }; + float_ui32_type factoru = { factor }; + + RADEON_STATECHANGE( rmesa, zbs ); +@@ -634,7 +538,7 @@ static void radeonPolygonOffset( GLcontext *ctx, + + static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint i; + drm_radeon_stipple_t stipple; + +@@ -646,27 +550,27 @@ static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask ) + + /* TODO: push this into cmd mechanism + */ +- RADEON_FIREVERTICES( rmesa ); +- LOCK_HARDWARE( rmesa ); ++ radeon_firevertices(&rmesa->radeon); ++ LOCK_HARDWARE( &rmesa->radeon ); + + /* FIXME: Use window x,y offsets into stipple RAM. + */ + stipple.mask = rmesa->state.stipple.mask; +- drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, ++ drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, + &stipple, sizeof(drm_radeon_stipple_t) ); +- UNLOCK_HARDWARE( rmesa ); ++ UNLOCK_HARDWARE( &rmesa->radeon ); + } + + static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0; + + /* Can't generally do unfilled via tcl, but some good special + * cases work. + */ + TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_UNFILLED, flag); +- if (rmesa->TclFallback) { ++ if (rmesa->radeon.TclFallback) { + radeonChooseRenderState( ctx ); + radeonChooseVertexState( ctx ); + } +@@ -686,7 +590,7 @@ static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode ) + */ + static void radeonUpdateSpecular( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + uint32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL]; + GLuint flag = 0; + +@@ -757,7 +661,7 @@ static void radeonUpdateSpecular( GLcontext *ctx ) + + /* Update vertex/render formats + */ +- if (rmesa->TclFallback) { ++ if (rmesa->radeon.TclFallback) { + radeonChooseRenderState( ctx ); + radeonChooseVertexState( ctx ); + } +@@ -774,7 +678,7 @@ static void radeonUpdateSpecular( GLcontext *ctx ) + */ + static void update_global_ambient( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + float *fcmd = (float *)RADEON_DB_STATE( glt ); + + /* Need to do more if both emmissive & ambient are PREMULT: +@@ -809,7 +713,7 @@ static void update_light_colors( GLcontext *ctx, GLuint p ) + /* fprintf(stderr, "%s\n", __FUNCTION__); */ + + if (l->Enabled) { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + float *fcmd = (float *)RADEON_DB_STATE( lit[p] ); + + COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient ); +@@ -849,7 +753,7 @@ static void check_twoside_fallback( GLcontext *ctx ) + + static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]; + + light_model_ctl1 &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) | +@@ -913,7 +817,7 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) + + void radeonUpdateMaterial( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLfloat (*mat)[4] = ctx->Light.Material.Attrib; + GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl ); + GLuint mask = ~0; +@@ -978,7 +882,7 @@ void radeonUpdateMaterial( GLcontext *ctx ) + */ + static void update_light( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + + /* Have to check these, or have an automatic shortcircuit mechanism + * to remove noop statechanges. (Or just do a better job on the +@@ -1043,7 +947,7 @@ static void update_light( GLcontext *ctx ) + static void radeonLightfv( GLcontext *ctx, GLenum light, + GLenum pname, const GLfloat *params ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLint p = light - GL_LIGHT0; + struct gl_light *l = &ctx->Light.Light[p]; + GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd; +@@ -1164,7 +1068,7 @@ static void radeonLightfv( GLcontext *ctx, GLenum light, + static void radeonLightModelfv( GLcontext *ctx, GLenum pname, + const GLfloat *param ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + + switch (pname) { + case GL_LIGHT_MODEL_AMBIENT: +@@ -1188,7 +1092,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname, + + check_twoside_fallback( ctx ); + +- if (rmesa->TclFallback) { ++ if (rmesa->radeon.TclFallback) { + radeonChooseRenderState( ctx ); + radeonChooseVertexState( ctx ); + } +@@ -1205,7 +1109,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname, + + static void radeonShadeModel( GLcontext *ctx, GLenum mode ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL]; + + s &= ~(RADEON_DIFFUSE_SHADE_MASK | +@@ -1244,7 +1148,7 @@ static void radeonShadeModel( GLcontext *ctx, GLenum mode ) + static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) + { + GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0; +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + + RADEON_STATECHANGE( rmesa, ucp[p] ); +@@ -1256,7 +1160,7 @@ static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) + + static void radeonUpdateClipPlanes( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint p; + + for (p = 0; p < ctx->Const.MaxClipPlanes; p++) { +@@ -1281,7 +1185,7 @@ static void + radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func, + GLint ref, GLuint mask ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << RADEON_STENCIL_REF_SHIFT) | + ((ctx->Stencil.ValueMask[0] & 0xff) << RADEON_STENCIL_MASK_SHIFT)); + +@@ -1325,7 +1229,7 @@ radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func, + static void + radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + + RADEON_STATECHANGE( rmesa, msk ); + rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~RADEON_STENCIL_WRITE_MASK; +@@ -1336,7 +1240,7 @@ radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask ) + static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail, + GLenum zfail, GLenum zpass ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + + /* radeon 7200 have stencil bug, DEC and INC_WRAP will actually both do DEC_WRAP, + and DEC_WRAP (and INVERT) will do INVERT. No way to get correct INC_WRAP and DEC, +@@ -1349,7 +1253,7 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail, + GLuint tempRADEON_STENCIL_ZPASS_DEC_WRAP; + GLuint tempRADEON_STENCIL_ZPASS_INC_WRAP; + +- if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) { ++ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) { + tempRADEON_STENCIL_FAIL_DEC_WRAP = RADEON_STENCIL_FAIL_DEC; + tempRADEON_STENCIL_FAIL_INC_WRAP = RADEON_STENCIL_FAIL_INC; + tempRADEON_STENCIL_ZFAIL_DEC_WRAP = RADEON_STENCIL_ZFAIL_DEC; +@@ -1455,9 +1359,9 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail, + + static void radeonClearStencil( GLcontext *ctx, GLint s ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + +- rmesa->state.stencil.clear = ++ rmesa->radeon.state.stencil.clear = + ((GLuint) (ctx->Stencil.Clear & 0xff) | + (0xff << RADEON_STENCIL_MASK_SHIFT) | + ((ctx->Stencil.WriteMask[0] & 0xff) << RADEON_STENCIL_WRITEMASK_SHIFT)); +@@ -1481,20 +1385,20 @@ static void radeonClearStencil( GLcontext *ctx, GLint s ) + */ + void radeonUpdateWindow( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; +- GLfloat xoffset = (GLfloat)dPriv->x; +- GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h; ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); ++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; ++ GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; ++ GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; + const GLfloat *v = ctx->Viewport._WindowMap.m; + + float_ui32_type sx = { v[MAT_SX] }; + float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X }; + float_ui32_type sy = { - v[MAT_SY] }; + float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y }; +- float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale }; +- float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale }; ++ float_ui32_type sz = { v[MAT_SZ] * rmesa->radeon.state.depth.scale }; ++ float_ui32_type tz = { v[MAT_TZ] * rmesa->radeon.state.depth.scale }; + +- RADEON_FIREVERTICES( rmesa ); ++ radeon_firevertices(&rmesa->radeon); + RADEON_STATECHANGE( rmesa, vpt ); + + rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE] = sx.ui32; +@@ -1524,8 +1428,8 @@ static void radeonDepthRange( GLcontext *ctx, GLclampd nearval, + + void radeonUpdateViewportOffset( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); ++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + GLfloat xoffset = (GLfloat)dPriv->x; + GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h; + const GLfloat *v = ctx->Viewport._WindowMap.m; +@@ -1555,8 +1459,8 @@ void radeonUpdateViewportOffset( GLcontext *ctx ) + RADEON_STIPPLE_Y_OFFSET_MASK); + + /* add magic offsets, then invert */ +- stx = 31 - ((rmesa->dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK); +- sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1) ++ stx = 31 - ((rmesa->radeon.dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK); ++ sty = 31 - ((rmesa->radeon.dri.drawable->y + rmesa->radeon.dri.drawable->h - 1) + & RADEON_STIPPLE_COORD_MASK); + + m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) | +@@ -1580,20 +1484,20 @@ void radeonUpdateViewportOffset( GLcontext *ctx ) + + static void radeonClearColor( GLcontext *ctx, const GLfloat color[4] ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLubyte c[4]; + CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); +- rmesa->state.color.clear = radeonPackColor( rmesa->radeonScreen->cpp, ++ rmesa->radeon.state.color.clear = radeonPackColor( rmesa->radeon.radeonScreen->cpp, + c[0], c[1], c[2], c[3] ); + } + + + static void radeonRenderMode( GLcontext *ctx, GLenum mode ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + FALLBACK( rmesa, RADEON_FALLBACK_RENDER_MODE, (mode != GL_RENDER) ); + } + +@@ -1619,7 +1523,7 @@ static GLuint radeon_rop_tab[] = { + + static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint rop = (GLuint)opcode - GL_CLEAR; + + ASSERT( rop < 16 ); +@@ -1630,66 +1534,17 @@ static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode ) + + + /** +- * Set up the cliprects for either front or back-buffer drawing. +- */ +-void radeonSetCliprects( radeonContextPtr rmesa ) +-{ +- __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; +- __DRIdrawablePrivate *const readable = rmesa->dri.readable; +- GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate; +- GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate; +- +- if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { +- /* Can't ignore 2d windows if we are page flipping. +- */ +- if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) { +- rmesa->numClipRects = drawable->numClipRects; +- rmesa->pClipRects = drawable->pClipRects; +- } +- else { +- rmesa->numClipRects = drawable->numBackClipRects; +- rmesa->pClipRects = drawable->pBackClipRects; +- } +- } +- else { +- /* front buffer (or none, or multiple buffers */ +- rmesa->numClipRects = drawable->numClipRects; +- rmesa->pClipRects = drawable->pClipRects; +- } +- +- if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) { +- _mesa_resize_framebuffer(rmesa->glCtx, draw_fb, +- drawable->w, drawable->h); +- draw_fb->Initialized = GL_TRUE; +- } +- +- if (drawable != readable) { +- if ((read_fb->Width != readable->w) || (read_fb->Height != readable->h)) { +- _mesa_resize_framebuffer(rmesa->glCtx, read_fb, +- readable->w, readable->h); +- read_fb->Initialized = GL_TRUE; +- } +- } +- +- if (rmesa->state.scissor.enabled) +- radeonRecalcScissorRects( rmesa ); +- +- rmesa->lastStamp = drawable->lastStamp; +-} +- +- +-/** + * Called via glDrawBuffer. + */ + static void radeonDrawBuffer( GLcontext *ctx, GLenum mode ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s %s\n", __FUNCTION__, + _mesa_lookup_enum_by_nr( mode )); + +- RADEON_FIREVERTICES(rmesa); /* don't pipeline cliprect changes */ ++ radeon_firevertices(&rmesa->radeon); /* don't pipeline cliprect changes */ + + if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) { + /* 0 (GL_NONE) buffers or multiple color drawing buffers */ +@@ -1707,8 +1562,9 @@ static void radeonDrawBuffer( GLcontext *ctx, GLenum mode ) + return; + } + +- radeonSetCliprects( rmesa ); +- ++ radeonSetCliprects( &rmesa->radeon ); ++ if (!rmesa->radeon.radeonScreen->driScreen->dri2.enabled) ++ radeonUpdatePageFlipping(&rmesa->radeon); + /* We'll set the drawing engine's offset/pitch parameters later + * when we update other state. + */ +@@ -1726,7 +1582,7 @@ static void radeonReadBuffer( GLcontext *ctx, GLenum mode ) + + static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint p, flag; + + if ( RADEON_DEBUG & DEBUG_STATE ) +@@ -1821,10 +1677,10 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) + RADEON_STATECHANGE(rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_ENABLE; +- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable; ++ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_DITHER_ENABLE; +- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable; ++ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable; + } + break; + +@@ -1971,13 +1827,13 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) + } + + case GL_SCISSOR_TEST: +- RADEON_FIREVERTICES( rmesa ); +- rmesa->state.scissor.enabled = state; ++ radeon_firevertices(&rmesa->radeon); ++ rmesa->radeon.state.scissor.enabled = state; + radeonUpdateScissor( ctx ); + break; + + case GL_STENCIL_TEST: +- if ( rmesa->state.stencil.hwBuffer ) { ++ if ( rmesa->radeon.state.stencil.hwBuffer ) { + RADEON_STATECHANGE( rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_STENCIL_ENABLE; +@@ -2010,7 +1866,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) + + static void radeonLightingSpaceChange( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLboolean tmp; + RADEON_STATECHANGE( rmesa, tcl ); + +@@ -2039,7 +1895,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx ) + */ + + +-void radeonUploadTexMatrix( radeonContextPtr rmesa, ++void radeonUploadTexMatrix( r100ContextPtr rmesa, + int unit, GLboolean swapcols ) + { + /* Here's how this works: on r100, only 3 tex coords can be submitted, so the +@@ -2065,7 +1921,7 @@ void radeonUploadTexMatrix( radeonContextPtr rmesa, + int idx = TEXMAT_0 + unit; + float *dest = ((float *)RADEON_DB_STATE( mat[idx] )) + MAT_ELT_0; + int i; +- struct gl_texture_unit tUnit = rmesa->glCtx->Texture.Unit[unit]; ++ struct gl_texture_unit tUnit = rmesa->radeon.glCtx->Texture.Unit[unit]; + GLfloat *src = rmesa->tmpmat[unit].m; + + rmesa->TexMatColSwap &= ~(1 << unit); +@@ -2119,7 +1975,7 @@ void radeonUploadTexMatrix( radeonContextPtr rmesa, + } + + +-static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx ) ++static void upload_matrix( r100ContextPtr rmesa, GLfloat *src, int idx ) + { + float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0; + int i; +@@ -2135,7 +1991,7 @@ static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx ) + RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] ); + } + +-static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx ) ++static void upload_matrix_t( r100ContextPtr rmesa, GLfloat *src, int idx ) + { + float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0; + memcpy(dest, src, 16*sizeof(float)); +@@ -2145,7 +2001,7 @@ static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx ) + + static void update_texturematrix( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); ++ r100ContextPtr rmesa = R100_CONTEXT( ctx ); + GLuint tpc = rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL]; + GLuint vs = rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL]; + int unit; +@@ -2217,43 +2073,32 @@ static void update_texturematrix( GLcontext *ctx ) + void + radeonUpdateDrawBuffer(GLcontext *ctx) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; +- driRenderbuffer *drb; ++ struct radeon_renderbuffer *rrb; + + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { +- /* draw to front */ +- drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; +- } +- else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { +- /* draw to back */ +- drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; +- } +- else { +- /* drawing to multiple buffers, or none */ +- return; ++ /* draw to front */ ++ rrb = (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; ++ } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { ++ /* draw to back */ ++ rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; ++ } else { ++ /* drawing to multiple buffers, or none */ ++ return; + } + +- assert(drb); +- assert(drb->flippedPitch); ++ assert(rrb); ++ assert(rrb->pitch); + + RADEON_STATECHANGE( rmesa, ctx ); +- +- /* Note: we used the (possibly) page-flipped values */ +- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] +- = ((drb->flippedOffset + rmesa->radeonScreen->fbLocation) +- & RADEON_COLOROFFSET_MASK); +- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch; +- if (rmesa->sarea->tiling_enabled) { +- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE; +- } + } + + + void radeonValidateState( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- GLuint new_state = rmesa->NewGLState; ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); ++ GLuint new_state = rmesa->radeon.NewGLState; + + if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + radeonUpdateDrawBuffer(ctx); +@@ -2261,7 +2106,7 @@ void radeonValidateState( GLcontext *ctx ) + + if (new_state & _NEW_TEXTURE) { + radeonUpdateTextureState( ctx ); +- new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */ ++ new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */ + } + + /* Need an event driven matrix update? +@@ -2295,7 +2140,7 @@ void radeonValidateState( GLcontext *ctx ) + } + + +- rmesa->NewGLState = 0; ++ rmesa->radeon.NewGLState = 0; + } + + +@@ -2306,7 +2151,7 @@ static void radeonInvalidateState( GLcontext *ctx, GLuint new_state ) + _vbo_InvalidateState( ctx, new_state ); + _tnl_InvalidateState( ctx, new_state ); + _ae_invalidate_state( ctx, new_state ); +- RADEON_CONTEXT(ctx)->NewGLState |= new_state; ++ R100_CONTEXT(ctx)->radeon.NewGLState |= new_state; + } + + +@@ -2330,15 +2175,15 @@ static GLboolean check_material( GLcontext *ctx ) + + static void radeonWrapRunPipeline( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLboolean has_material; + + if (0) +- fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState); ++ fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState); + + /* Validate state: + */ +- if (rmesa->NewGLState) ++ if (rmesa->radeon.NewGLState) + radeonValidateState( ctx ); + + has_material = (ctx->Light.Enabled && check_material( ctx )); +diff --git a/src/mesa/drivers/dri/radeon/radeon_state.h b/src/mesa/drivers/dri/radeon/radeon_state.h +index 2171879..17c2b11 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_state.h ++++ b/src/mesa/drivers/dri/radeon/radeon_state.h +@@ -39,22 +39,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #include "radeon_context.h" + +-extern void radeonInitState( radeonContextPtr rmesa ); ++extern void radeonInitState( r100ContextPtr rmesa ); + extern void radeonInitStateFuncs( GLcontext *ctx ); + + extern void radeonUpdateMaterial( GLcontext *ctx ); + +-extern void radeonSetCliprects( radeonContextPtr rmesa ); +-extern void radeonRecalcScissorRects( radeonContextPtr rmesa ); + extern void radeonUpdateViewportOffset( GLcontext *ctx ); + extern void radeonUpdateWindow( GLcontext *ctx ); + extern void radeonUpdateDrawBuffer( GLcontext *ctx ); +-extern void radeonUploadTexMatrix( radeonContextPtr rmesa, ++extern void radeonUploadTexMatrix( r100ContextPtr rmesa, + int unit, GLboolean swapcols ); + + extern void radeonValidateState( GLcontext *ctx ); + +-extern void radeonPrintDirty( radeonContextPtr rmesa, ++extern void radeonPrintDirty( r100ContextPtr rmesa, + const char *msg ); + + +@@ -62,7 +60,7 @@ extern void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode ); + #define FALLBACK( rmesa, bit, mode ) do { \ + if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \ + __FUNCTION__, bit, mode ); \ +- radeonFallback( rmesa->glCtx, bit, mode ); \ ++ radeonFallback( rmesa->radeon.glCtx, bit, mode ); \ + } while (0) + + +diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c +index 57dc380..7ff0eb4 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_state_init.c ++++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c +@@ -38,39 +38,156 @@ + #include "swrast_setup/swrast_setup.h" + + #include "radeon_context.h" ++#include "radeon_mipmap_tree.h" + #include "radeon_ioctl.h" + #include "radeon_state.h" + #include "radeon_tcl.h" + #include "radeon_tex.h" + #include "radeon_swtcl.h" + ++#include "../r200/r200_reg.h" ++ + #include "xmlpool.h" + ++/* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in ++ * 1.3 cmdbuffers allow all previous state to be updated as well as ++ * the tcl scalar and vector areas. + */ -+GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj) ++static struct { ++ int start; ++ int len; ++ const char *name; ++} packet[RADEON_MAX_STATE_PACKETS] = { ++ {RADEON_PP_MISC, 7, "RADEON_PP_MISC"}, ++ {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"}, ++ {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"}, ++ {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"}, ++ {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"}, ++ {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"}, ++ {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"}, ++ {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"}, ++ {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"}, ++ {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"}, ++ {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"}, ++ {RADEON_RE_MISC, 1, "RADEON_RE_MISC"}, ++ {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"}, ++ {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"}, ++ {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"}, ++ {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"}, ++ {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"}, ++ {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"}, ++ {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"}, ++ {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"}, ++ {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17, ++ "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"}, ++ {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"}, ++ {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"}, ++ {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"}, ++ {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"}, ++ {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"}, ++ {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"}, ++ {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"}, ++ {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"}, ++ {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"}, ++ {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"}, ++ {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"}, ++ {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"}, ++ {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"}, ++ {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"}, ++ {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"}, ++ {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"}, ++ {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"}, ++ {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"}, ++ {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"}, ++ {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"}, ++ {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"}, ++ {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"}, ++ {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"}, ++ {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"}, ++ {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"}, ++ {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"}, ++ {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"}, ++ {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"}, ++ {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, ++ "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"}, ++ {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"}, ++ {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"}, ++ {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"}, ++ {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"}, ++ {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"}, ++ {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"}, ++ {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"}, ++ {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"}, ++ {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"}, ++ {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"}, ++ {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, ++ "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"}, ++ {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */ ++ {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */ ++ {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"}, ++ {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"}, ++ {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"}, ++ {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"}, ++ {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"}, ++ {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"}, ++ {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"}, ++ {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"}, ++ {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"}, ++ {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"}, ++ {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"}, ++ {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"}, ++ {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"}, ++ {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"}, ++ {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"}, ++ {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"}, ++ {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"}, ++ {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"}, ++ {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"}, ++ {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"}, ++ {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"}, ++ {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"}, ++ {R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"}, /* 85 */ ++ {R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"}, ++ {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"}, ++ {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"}, ++ {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"}, ++ {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"}, ++ {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"}, ++ {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"}, ++ {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"}, ++ {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"}, ++}; ++ + /* ============================================================= + * State initialization + */ + +-void radeonPrintDirty( radeonContextPtr rmesa, const char *msg ) ++void radeonPrintDirty( r100ContextPtr rmesa, const char *msg ) + { + struct radeon_state_atom *l; + + fprintf(stderr, msg); + fprintf(stderr, ": "); + +- foreach(l, &rmesa->hw.atomlist) { +- if (l->dirty || rmesa->hw.all_dirty) ++ foreach(l, &rmesa->radeon.hw.atomlist) { ++ if (l->dirty || rmesa->radeon.hw.all_dirty) + fprintf(stderr, "%s, ", l->name); + } + + fprintf(stderr, "\n"); + } + +-static int cmdpkt( int id ) ++static int cmdpkt( r100ContextPtr rmesa, int id ) + { + drm_radeon_cmd_header_t h; +- h.i = 0; +- h.packet.cmd_type = RADEON_CMD_PACKET; +- h.packet.packet_id = id; ++ ++ if (rmesa->radeon.radeonScreen->kernel_mm) { ++ return CP_PACKET0(packet[id].start, packet[id].len - 1); ++ } else { ++ h.i = 0; ++ h.packet.cmd_type = RADEON_CMD_PACKET; ++ h.packet.packet_id = id; ++ } + return h.i; + } + +@@ -96,17 +213,17 @@ static int cmdscl( int offset, int stride, int count ) + return h.i; + } + +-#define CHECK( NM, FLAG ) \ +-static GLboolean check_##NM( GLcontext *ctx ) \ +-{ \ +- return FLAG; \ ++#define CHECK( NM, FLAG ) \ ++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \ ++{ \ ++ return FLAG ? atom->cmd_size : 0; \ + } + + #define TCL_CHECK( NM, FLAG ) \ +-static GLboolean check_##NM( GLcontext *ctx ) \ ++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \ + { \ +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \ +- return !rmesa->TclFallback && (FLAG); \ ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); \ ++ return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0; \ + } + + +@@ -146,42 +263,290 @@ CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT)) + CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT)) + CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT)) + ++#define OUT_VEC(hdr, data) do { \ ++ drm_radeon_cmd_header_t h; \ ++ h.i = hdr; \ ++ OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0)); \ ++ OUT_BATCH(0); \ ++ OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0)); \ ++ OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \ ++ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1)); \ ++ OUT_BATCH_TABLE((data), h.vectors.count); \ ++ } while(0) ++ ++#define OUT_SCL(hdr, data) do { \ ++ drm_radeon_cmd_header_t h; \ ++ h.i = hdr; \ ++ OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0)); \ ++ OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \ ++ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1)); \ ++ OUT_BATCH_TABLE((data), h.scalars.count); \ ++ } while(0) ++ ++static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ -+ struct gl_texture_image *firstImage; -+ GLuint compressed; -+ GLuint numfaces = 1; -+ GLuint firstLevel, lastLevel; ++ r100ContextPtr r100 = R100_CONTEXT(ctx); ++ BATCH_LOCALS(&r100->radeon); ++ uint32_t dwords = atom->cmd_size; ++ ++ dwords += 2; ++ BEGIN_BATCH_NO_AUTOSTATE(dwords); ++ OUT_SCL(atom->cmd[0], atom->cmd+1); ++ END_BATCH(); ++} + + +-/* Initialize the context's hardware state. +- */ +-void radeonInitState( radeonContextPtr rmesa ) ++static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom) + { +- GLcontext *ctx = rmesa->glCtx; +- GLuint color_fmt, depth_fmt, i; +- GLint drawPitch, drawOffset; ++ r100ContextPtr r100 = R100_CONTEXT(ctx); ++ BATCH_LOCALS(&r100->radeon); ++ uint32_t dwords = atom->cmd_size; ++ ++ dwords += 4; ++ BEGIN_BATCH_NO_AUTOSTATE(dwords); ++ OUT_VEC(atom->cmd[0], atom->cmd+1); ++ END_BATCH(); ++} + +- switch ( rmesa->radeonScreen->cpp ) { +- case 2: +- color_fmt = RADEON_COLOR_FORMAT_RGB565; +- break; +- case 4: +- color_fmt = RADEON_COLOR_FORMAT_ARGB8888; +- break; +- default: +- fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" ); +- exit( -1 ); ++ ++static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom) ++{ ++ r100ContextPtr r100 = R100_CONTEXT(ctx); ++ BATCH_LOCALS(&r100->radeon); ++ uint32_t dwords = atom->cmd_size; ++ ++ dwords += 6; ++ BEGIN_BATCH_NO_AUTOSTATE(dwords); ++ OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1); ++ OUT_SCL(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1); ++ END_BATCH(); ++} + -+ calculate_first_last_level(texObj, &firstLevel, &lastLevel, 0, texObj->BaseLevel); -+ if (texObj->Target == GL_TEXTURE_CUBE_MAP) -+ numfaces = 6; ++static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom) ++{ ++ r100ContextPtr r100 = R100_CONTEXT(ctx); ++ BATCH_LOCALS(&r100->radeon); ++ struct radeon_renderbuffer *rrb; ++ uint32_t cbpitch; ++ uint32_t zbpitch, depth_fmt; ++ uint32_t dwords = atom->cmd_size; ++ ++ /* output the first 7 bytes of context */ ++ BEGIN_BATCH_NO_AUTOSTATE(dwords + 4); ++ OUT_BATCH_TABLE(atom->cmd, 5); ++ ++ rrb = radeon_get_depthbuffer(&r100->radeon); ++ if (!rrb) { ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ } else { ++ zbpitch = (rrb->pitch / rrb->cpp); ++ if (r100->using_hyperz) ++ zbpitch |= RADEON_DEPTH_HYPERZ; ++ ++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); ++ OUT_BATCH(zbpitch); ++ if (rrb->cpp == 4) ++ depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; ++ else ++ depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z; ++ atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK; ++ atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt; ++ } ++ ++ OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]); ++ OUT_BATCH(atom->cmd[CTX_CMD_1]); ++ OUT_BATCH(atom->cmd[CTX_PP_CNTL]); ++ ++ rrb = radeon_get_colorbuffer(&r100->radeon); ++ if (!rrb || !rrb->bo) { ++ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); ++ OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]); ++ } else { ++ atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10); ++ if (rrb->cpp == 4) ++ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888; ++ else ++ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565; ++ ++ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); ++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); ++ } + -+ firstImage = texObj->Image[0][firstLevel]; -+ compressed = firstImage->IsCompressed ? firstImage->TexFormat->MesaFormat : 0; ++ OUT_BATCH(atom->cmd[CTX_CMD_2]); + -+ return (mt->firstLevel == firstLevel && -+ mt->lastLevel == lastLevel && -+ mt->width0 == firstImage->Width && -+ mt->height0 == firstImage->Height && -+ mt->depth0 == firstImage->Depth && -+ mt->bpp == firstImage->TexFormat->TexelBytes && -+ mt->compressed == compressed); -+} ++ if (!rrb || !rrb->bo) { ++ OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]); ++ } else { ++ cbpitch = (rrb->pitch / rrb->cpp); ++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) ++ cbpitch |= RADEON_COLOR_TILE_ENABLE; ++ OUT_BATCH(cbpitch); ++ } + ++ END_BATCH(); ++} + -+/** -+ * Try to allocate a mipmap tree for the given texture that will fit the -+ * given image in the given position. -+ */ -+void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t, -+ struct gl_texture_image *texImage, GLuint face, GLuint level) ++static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) +{ -+ GLuint compressed = texImage->IsCompressed ? texImage->TexFormat->MesaFormat : 0; -+ GLuint numfaces = 1; -+ GLuint firstLevel, lastLevel; ++ r100ContextPtr r100 = R100_CONTEXT(ctx); ++ BATCH_LOCALS(&r100->radeon); ++ struct radeon_renderbuffer *rrb, *drb; ++ uint32_t cbpitch = 0; ++ uint32_t zbpitch = 0; ++ uint32_t dwords = atom->cmd_size; ++ uint32_t depth_fmt; ++ ++ rrb = radeon_get_colorbuffer(&r100->radeon); ++ if (!rrb || !rrb->bo) { ++ fprintf(stderr, "no rrb\n"); ++ return; ++ } + -+ assert(!t->mt); ++ atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10); ++ if (rrb->cpp == 4) ++ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888; ++ else ++ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565; ++ ++ cbpitch = (rrb->pitch / rrb->cpp); ++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) ++ cbpitch |= R200_COLOR_TILE_ENABLE; ++ ++ drb = radeon_get_depthbuffer(&r100->radeon); ++ if (drb) { ++ zbpitch = (drb->pitch / drb->cpp); ++ if (drb->cpp == 4) ++ depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; ++ else ++ depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z; ++ atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK; ++ atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt; ++ ++ } + -+ calculate_first_last_level(&t->base, &firstLevel, &lastLevel, face, level); -+ if (t->base.Target == GL_TEXTURE_CUBE_MAP) -+ numfaces = 6; ++ /* output the first 7 bytes of context */ ++ if (drb) ++ dwords += 4; ++ if (rrb) ++ dwords += 4; ++ BEGIN_BATCH_NO_AUTOSTATE(dwords); + -+ if (level != firstLevel || face >= numfaces) -+ return; ++ /* In the CS case we need to split this up */ ++ OUT_BATCH(CP_PACKET0(packet[0].start, 3)); ++ OUT_BATCH_TABLE((atom->cmd + 1), 4); + -+ t->mt = radeon_miptree_create(rmesa, t, t->base.Target, -+ firstLevel, lastLevel, -+ texImage->Width, texImage->Height, texImage->Depth, -+ texImage->TexFormat->TexelBytes, t->tile_bits, compressed); -+} -diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h -new file mode 100644 -index 0000000..43dfa48 ---- /dev/null -+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h -@@ -0,0 +1,97 @@ -+/* -+ * Copyright (C) 2008 Nicolai Haehnle. -+ * -+ * All Rights Reserved. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining -+ * a copy of this software and associated documentation files (the -+ * "Software"), to deal in the Software without restriction, including -+ * without limitation the rights to use, copy, modify, merge, publish, -+ * distribute, sublicense, and/or sell copies of the Software, and to -+ * permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the -+ * next paragraph) shall be included in all copies or substantial -+ * portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ * -+ */ ++ if (drb) { ++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0)); ++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + -+#ifndef __RADEON_MIPMAP_TREE_H_ -+#define __RADEON_MIPMAP_TREE_H_ ++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0)); ++ OUT_BATCH(zbpitch); ++ } + -+#include "radeon_common.h" ++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0)); ++ OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]); ++ OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1)); ++ OUT_BATCH(atom->cmd[CTX_PP_CNTL]); ++ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); ++ ++ if (rrb) { ++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0)); ++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + } + +- rmesa->state.color.clear = 0x00000000; ++ if (rrb) { ++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0)); ++ OUT_BATCH(cbpitch); ++ } + -+typedef struct _radeon_mipmap_tree radeon_mipmap_tree; -+typedef struct _radeon_mipmap_level radeon_mipmap_level; -+typedef struct _radeon_mipmap_image radeon_mipmap_image; ++ // if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) { ++ // OUT_BATCH_TABLE((atom->cmd + 14), 4); ++ // } + -+struct _radeon_mipmap_image { -+ GLuint offset; /** Offset of this image from the start of mipmap tree buffer, in bytes */ -+}; ++ END_BATCH(); ++} + -+struct _radeon_mipmap_level { -+ GLuint width; -+ GLuint height; -+ GLuint depth; -+ GLuint size; /** Size of each image, in bytes */ -+ GLuint rowstride; /** in bytes */ -+ radeon_mipmap_image faces[6]; -+}; ++static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom) ++{ ++ r100ContextPtr r100 = R100_CONTEXT(ctx); ++ BATCH_LOCALS(&r100->radeon); ++ uint32_t dwords = atom->cmd_size; ++ int i = atom->idx, j; ++ radeonTexObj *t = r100->state.texture.unit[i].texobj; ++ radeon_mipmap_level *lvl; ++ ++ if (!(ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) ++ return; ++ ++ if (!t) ++ return; ++ ++ if (!t->mt) ++ return; ++ ++ BEGIN_BATCH_NO_AUTOSTATE(dwords + 10); ++ OUT_BATCH_TABLE(atom->cmd, 3); ++ lvl = &t->mt->levels[0]; ++ for (j = 0; j < 5; j++) { ++ OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset, ++ RADEON_GEM_DOMAIN_VRAM, 0, 0); ++ } ++ END_BATCH(); ++} + ++static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom) ++{ ++ r100ContextPtr r100 = R100_CONTEXT(ctx); ++ BATCH_LOCALS(&r100->radeon); ++ uint32_t dwords = atom->cmd_size; ++ int i = atom->idx; ++ radeonTexObj *t = r100->state.texture.unit[i].texobj; ++ radeon_mipmap_level *lvl; ++ ++ if (t && t->mt && !t->image_override) ++ dwords += 2; ++ BEGIN_BATCH_NO_AUTOSTATE(dwords); ++ OUT_BATCH_TABLE(atom->cmd, 3); ++ if (t && t->mt && !t->image_override) { ++ if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) { ++ lvl = &t->mt->levels[0]; ++ OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset, ++ RADEON_GEM_DOMAIN_VRAM, 0, 0); ++ } else { ++ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, ++ RADEON_GEM_DOMAIN_VRAM, 0, 0); ++ } ++ } else if (!t) { ++ /* workaround for old CS mechanism */ ++ OUT_BATCH(r100->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]); ++ // OUT_BATCH(r100->radeon.radeonScreen); ++ } else if (t->image_override) ++ OUT_BATCH(t->override_offset); ++ ++ OUT_BATCH_TABLE((atom->cmd+4), 5); ++ END_BATCH(); ++} + -+/** -+ * A mipmap tree contains texture images in the layout that the hardware -+ * expects. -+ * -+ * The meta-data of mipmap trees is immutable, i.e. you cannot change the -+ * layout on-the-fly; however, the texture contents (i.e. texels) can be -+ * changed. ++/* Initialize the context's hardware state. + */ -+struct _radeon_mipmap_tree { -+ radeonContextPtr radeon; -+ radeonTexObj *t; -+ struct radeon_bo *bo; -+ GLuint refcount; ++void radeonInitState( r100ContextPtr rmesa ) ++{ ++ GLcontext *ctx = rmesa->radeon.glCtx; ++ GLuint i; ++ ++ rmesa->radeon.state.color.clear = 0x00000000; + + switch ( ctx->Visual.depthBits ) { + case 16: +- rmesa->state.depth.clear = 0x0000ffff; +- rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff; +- depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z; +- rmesa->state.stencil.clear = 0x00000000; ++ rmesa->radeon.state.depth.clear = 0x0000ffff; ++ rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffff; ++ rmesa->radeon.state.stencil.clear = 0x00000000; + break; + case 24: +- rmesa->state.depth.clear = 0x00ffffff; +- rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff; +- depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; +- rmesa->state.stencil.clear = 0xffff0000; ++ rmesa->radeon.state.depth.clear = 0x00ffffff; ++ rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffffff; ++ rmesa->radeon.state.stencil.clear = 0xffff0000; + break; + default: + fprintf( stderr, "Error: Unsupported depth %d... exiting\n", +@@ -190,37 +555,37 @@ void radeonInitState( radeonContextPtr rmesa ) + } + + /* Only have hw stencil when depth buffer is 24 bits deep */ +- rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 && ++ rmesa->radeon.state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 && + ctx->Visual.depthBits == 24 ); + +- rmesa->Fallback = 0; ++ rmesa->radeon.Fallback = 0; + +- if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) { +- drawOffset = rmesa->radeonScreen->backOffset; +- drawPitch = rmesa->radeonScreen->backPitch; +- } else { +- drawOffset = rmesa->radeonScreen->frontOffset; +- drawPitch = rmesa->radeonScreen->frontPitch; +- } + +- rmesa->hw.max_state_size = 0; ++ rmesa->radeon.hw.max_state_size = 0; + +-#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG ) \ ++#define ALLOC_STATE_IDX( ATOM, CHK, SZ, NM, FLAG, IDX ) \ + do { \ + rmesa->hw.ATOM.cmd_size = SZ; \ +- rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int)); \ +- rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int)); \ +- rmesa->hw.ATOM.name = NM; \ ++ rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int)); \ ++ rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int)); \ ++ rmesa->hw.ATOM.name = NM; \ + rmesa->hw.ATOM.is_tcl = FLAG; \ + rmesa->hw.ATOM.check = check_##CHK; \ +- rmesa->hw.ATOM.dirty = GL_TRUE; \ +- rmesa->hw.max_state_size += SZ * sizeof(int); \ ++ rmesa->hw.ATOM.dirty = GL_TRUE; \ ++ rmesa->hw.ATOM.idx = IDX; \ ++ rmesa->radeon.hw.max_state_size += SZ * sizeof(int); \ + } while (0) +- +- ++ ++#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG ) \ ++ ALLOC_STATE_IDX(ATOM, CHK, SZ, NM, FLAG, 0) ++ + /* Allocate state buffers: + */ + ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 ); ++ if (rmesa->radeon.radeonScreen->kernel_mm) ++ rmesa->hw.ctx.emit = ctx_emit_cs; ++ else ++ rmesa->hw.ctx.emit = ctx_emit; + ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 ); + ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 ); + ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 ); +@@ -233,20 +598,25 @@ void radeonInitState( radeonContextPtr rmesa ) + ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 ); + ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 ); + ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 ); +- ALLOC_STATE( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0 ); +- ALLOC_STATE( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0 ); +- ALLOC_STATE( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0 ); +- if (rmesa->radeonScreen->drmSupportsCubeMapsR100) ++ ALLOC_STATE_IDX( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0, 0); ++ ALLOC_STATE_IDX( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0, 1); ++ ALLOC_STATE_IDX( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0, 2 ); ++ ++ for (i = 0; i < 3; i++) ++ rmesa->hw.tex[i].emit = tex_emit; ++ if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100) + { +- ALLOC_STATE( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0 ); +- ALLOC_STATE( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0 ); +- ALLOC_STATE( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0 ); ++ ALLOC_STATE_IDX( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 ); ++ ALLOC_STATE_IDX( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 ); ++ ALLOC_STATE_IDX( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 ); ++ for (i = 0; i < 3; i++) ++ rmesa->hw.cube[i].emit = cube_emit; + } + else + { +- ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0 ); +- ALLOC_STATE( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0 ); +- ALLOC_STATE( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0 ); ++ ALLOC_STATE_IDX( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 ); ++ ALLOC_STATE_IDX( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 ); ++ ALLOC_STATE_IDX( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 ); + } + ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 ); + ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 ); +@@ -268,43 +638,43 @@ void radeonInitState( radeonContextPtr rmesa ) + ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 ); + ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 ); + ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 ); +- ALLOC_STATE( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0 ); +- ALLOC_STATE( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0 ); +- ALLOC_STATE( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0 ); ++ ALLOC_STATE_IDX( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0, 0 ); ++ ALLOC_STATE_IDX( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0, 1 ); ++ ALLOC_STATE_IDX( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0, 2 ); + + radeonSetUpAtomList( rmesa ); + + /* Fill in the packet headers: + */ +- rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC); +- rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL); +- rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH); +- rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN); +- rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH); +- rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK); +- rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE); +- rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL); +- rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(RADEON_EMIT_SE_CNTL_STATUS); +- rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC); +- rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_0); +- rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_0); +- rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_1); +- rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_1); +- rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_2); +- rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_2); +- rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_0); +- rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T0); +- rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_1); +- rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T1); +- rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_2); +- rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T2); +- rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR); +- rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT); ++ rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC); ++ rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL); ++ rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH); ++ rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN); ++ rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH); ++ rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK); ++ rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE); ++ rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL); ++ rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL_STATUS); ++ rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC); ++ rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_0); ++ rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_0); ++ rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_1); ++ rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_1); ++ rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_2); ++ rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_2); ++ rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_0); ++ rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T0); ++ rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_1); ++ rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T1); ++ rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_2); ++ rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T2); ++ rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR); ++ rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT); + rmesa->hw.mtl.cmd[MTL_CMD_0] = +- cmdpkt(RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED); +- rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_0); +- rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_1); +- rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_2); ++ cmdpkt(rmesa, RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED); ++ rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_0); ++ rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_1); ++ rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_2); + rmesa->hw.grd.cmd[GRD_CMD_0] = + cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 ); + rmesa->hw.fog.cmd[FOG_CMD_0] = +@@ -331,6 +701,22 @@ void radeonInitState( radeonContextPtr rmesa ) + cmdvec( RADEON_VS_UCP_ADDR + i, 1, 4 ); + } + ++ if (rmesa->radeon.radeonScreen->kernel_mm) { ++ rmesa->hw.grd.emit = scl_emit; ++ rmesa->hw.fog.emit = vec_emit; ++ rmesa->hw.glt.emit = vec_emit; ++ rmesa->hw.eye.emit = vec_emit; ++ ++ for (i = 0; i <= 6; i++) ++ rmesa->hw.mat[i].emit = vec_emit; + -+ GLuint totalsize; /** total size of the miptree, in bytes */ ++ for (i = 0; i < 8; i++) ++ rmesa->hw.lit[i].emit = lit_emit; + -+ GLenum target; /** GL_TEXTURE_xxx */ -+ GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */ -+ GLuint firstLevel; /** First mip level stored in this mipmap tree */ -+ GLuint lastLevel; /** Last mip level stored in this mipmap tree */ ++ for (i = 0; i < 6; i++) ++ rmesa->hw.ucp[i].emit = vec_emit; ++ } + -+ GLuint width0; /** Width of firstLevel image */ -+ GLuint height0; /** Height of firstLevel image */ -+ GLuint depth0; /** Depth of firstLevel image */ + rmesa->last_ReallyEnabled = -1; + + /* Initial Harware state: +@@ -352,19 +738,7 @@ void radeonInitState( radeonContextPtr rmesa ) + RADEON_SRC_BLEND_GL_ONE | + RADEON_DST_BLEND_GL_ZERO ); + +- rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] = +- rmesa->radeonScreen->depthOffset + rmesa->radeonScreen->fbLocation; +- +- rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = +- ((rmesa->radeonScreen->depthPitch & +- RADEON_DEPTHPITCH_MASK) | +- RADEON_DEPTH_ENDIAN_NO_SWAP); +- +- if (rmesa->using_hyperz) +- rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= RADEON_DEPTH_HYPERZ; +- +- rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt | +- RADEON_Z_TEST_LESS | ++ rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (RADEON_Z_TEST_LESS | + RADEON_STENCIL_TEST_ALWAYS | + RADEON_STENCIL_FAIL_KEEP | + RADEON_STENCIL_ZPASS_KEEP | +@@ -374,7 +748,7 @@ void radeonInitState( radeonContextPtr rmesa ) + if (rmesa->using_hyperz) { + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_COMPRESSION_ENABLE | + RADEON_Z_DECOMPRESSION_ENABLE; +- if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { ++ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + /* works for q3, but slight rendering errors with glxgears ? */ + /* rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/ + /* need this otherwise get lots of lockups with q3 ??? */ +@@ -386,10 +760,9 @@ void radeonInitState( radeonContextPtr rmesa ) + RADEON_ANTI_ALIAS_NONE); + + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = (RADEON_PLANE_MASK_ENABLE | +- color_fmt | + RADEON_ZBLOCK16); + +- switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) { ++ switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) { + case DRI_CONF_DITHER_XERRORDIFFRESET: + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_INIT; + break; +@@ -397,30 +770,17 @@ void radeonInitState( radeonContextPtr rmesa ) + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_SCALE_DITHER_ENABLE; + break; + } +- if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) == ++ if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) == + DRI_CONF_ROUND_ROUND ) +- rmesa->state.color.roundEnable = RADEON_ROUND_ENABLE; ++ rmesa->radeon.state.color.roundEnable = RADEON_ROUND_ENABLE; + else +- rmesa->state.color.roundEnable = 0; +- if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) == ++ rmesa->radeon.state.color.roundEnable = 0; ++ if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) == + DRI_CONF_COLOR_REDUCTION_DITHER ) + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_ENABLE; + else +- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable; ++ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable; + +- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset + +- rmesa->radeonScreen->fbLocation) +- & RADEON_COLOROFFSET_MASK); +- +- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch & +- RADEON_COLORPITCH_MASK) | +- RADEON_COLOR_ENDIAN_NO_SWAP); +- +- +- /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */ +- if (rmesa->sarea->tiling_enabled) { +- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE; +- } + + rmesa->hw.set.cmd[SET_SE_CNTL] = (RADEON_FFACE_CULL_CCW | + RADEON_BFACE_SOLID | +@@ -444,7 +804,7 @@ void radeonInitState( radeonContextPtr rmesa ) + RADEON_VC_NO_SWAP; + #endif + +- if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { ++ if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { + rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] |= RADEON_TCL_BYPASS; + } + +@@ -491,8 +851,8 @@ void radeonInitState( radeonContextPtr rmesa ) + (2 << RADEON_TXFORMAT_HEIGHT_SHIFT)); + + /* Initialize the texture offset to the start of the card texture heap */ +- rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] = +- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; ++ // rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] = ++ // rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + + rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0; + rmesa->hw.tex[i].cmd[TEX_PP_TXCBLEND] = +@@ -513,15 +873,15 @@ void radeonInitState( radeonContextPtr rmesa ) + + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_0] = +- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; ++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_1] = +- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; ++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_2] = +- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; ++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_3] = +- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; ++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_4] = +- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; ++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + } + + /* Can only add ST1 at the time of doing some multitex but can keep +@@ -613,5 +973,7 @@ void radeonInitState( radeonContextPtr rmesa ) + rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE; + rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE; + +- rmesa->hw.all_dirty = GL_TRUE; ++ rmesa->radeon.hw.all_dirty = GL_TRUE; ++ ++ rcommonInitCmdBuf(&rmesa->radeon); + } +diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c +index ebea1fe..af933a3 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c ++++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c +@@ -52,8 +52,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "radeon_tcl.h" + + +-static void flush_last_swtcl_prim( radeonContextPtr rmesa ); +- + /* R100: xyzw, c0, c1/fog, stq[0..2] = 4+1+1+3*3 = 15 right? */ + /* R200: xyzw, c0, c1/fog, strq[0..5] = 4+1+1+4*6 = 30 */ + #define RADEON_MAX_TNL_VERTEX_SIZE (15 * sizeof(GLfloat)) /* for mesa _tnl stage */ +@@ -64,18 +62,18 @@ static void flush_last_swtcl_prim( radeonContextPtr rmesa ); + + #define EMIT_ATTR( ATTR, STYLE, F0 ) \ + do { \ +- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \ +- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \ +- rmesa->swtcl.vertex_attr_count++; \ ++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \ ++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \ ++ rmesa->radeon.swtcl.vertex_attr_count++; \ + fmt_0 |= F0; \ + } while (0) + + #define EMIT_PAD( N ) \ + do { \ +- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \ +- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \ +- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \ +- rmesa->swtcl.vertex_attr_count++; \ ++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \ ++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \ ++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \ ++ rmesa->radeon.swtcl.vertex_attr_count++; \ + } while (0) + + static GLuint radeon_cp_vc_frmts[3][2] = +@@ -87,7 +85,7 @@ static GLuint radeon_cp_vc_frmts[3][2] = + + static void radeonSetVertexFormat( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); ++ r100ContextPtr rmesa = R100_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + DECLARE_RENDERINPUTS(index_bitset); +@@ -106,7 +104,7 @@ static void radeonSetVertexFormat( GLcontext *ctx ) + } + + assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); +- rmesa->swtcl.vertex_attr_count = 0; ++ rmesa->radeon.swtcl.vertex_attr_count = 0; + + /* EMIT_ATTR's must be in order as they tell t_vertex.c how to + * build up a hardware vertex. +@@ -204,33 +202,33 @@ static void radeonSetVertexFormat( GLcontext *ctx ) + } + } + +- if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) || ++ if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) || + fmt_0 != rmesa->swtcl.vertex_format) { + RADEON_NEWPRIM(rmesa); + rmesa->swtcl.vertex_format = fmt_0; +- rmesa->swtcl.vertex_size = ++ rmesa->radeon.swtcl.vertex_size = + _tnl_install_attrs( ctx, +- rmesa->swtcl.vertex_attrs, +- rmesa->swtcl.vertex_attr_count, ++ rmesa->radeon.swtcl.vertex_attrs, ++ rmesa->radeon.swtcl.vertex_attr_count, + NULL, 0 ); +- rmesa->swtcl.vertex_size /= 4; +- RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); ++ rmesa->radeon.swtcl.vertex_size /= 4; ++ RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset ); + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf( stderr, "%s: vertex_size= %d floats\n", +- __FUNCTION__, rmesa->swtcl.vertex_size); ++ __FUNCTION__, rmesa->radeon.swtcl.vertex_size); + } + } + + + static void radeonRenderStart( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); ++ r100ContextPtr rmesa = R100_CONTEXT( ctx ); + + radeonSetVertexFormat( ctx ); + +- if (rmesa->dma.flush != 0 && +- rmesa->dma.flush != flush_last_swtcl_prim) +- rmesa->dma.flush( rmesa ); ++ if (rmesa->radeon.dma.flush != 0 && ++ rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim) ++ rmesa->radeon.dma.flush( ctx ); + } + + +@@ -241,7 +239,7 @@ static void radeonRenderStart( GLcontext *ctx ) + */ + void radeonChooseVertexState( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); ++ r100ContextPtr rmesa = R100_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + + GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT]; +@@ -254,7 +252,7 @@ void radeonChooseVertexState( GLcontext *ctx ) + * rasterization fallback. As this function will be called again when we + * leave a rasterization fallback, we can just skip it for now. + */ +- if (rmesa->Fallback != 0) ++ if (rmesa->radeon.Fallback != 0) + return; + + /* HW perspective divide is a win, but tiny vertex formats are a +@@ -281,80 +279,29 @@ void radeonChooseVertexState( GLcontext *ctx ) + } + } + +- +-/* Flush vertices in the current dma region. +- */ +-static void flush_last_swtcl_prim( radeonContextPtr rmesa ) ++void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset) + { +- if (RADEON_DEBUG & DEBUG_IOCTL) +- fprintf(stderr, "%s\n", __FUNCTION__); +- +- rmesa->dma.flush = NULL; +- +- if (rmesa->dma.current.buf) { +- struct radeon_dma_region *current = &rmesa->dma.current; +- GLuint current_offset = (rmesa->radeonScreen->gart_buffer_offset + +- current->buf->buf->idx * RADEON_BUFFER_SIZE + +- current->start); +- +- assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + +- assert (current->start + +- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == +- current->ptr); ++ rcommonEnsureCmdBufSpace(&rmesa->radeon, ++ rmesa->radeon.hw.max_state_size + (12*sizeof(int)), ++ __FUNCTION__); + +- if (rmesa->dma.current.start != rmesa->dma.current.ptr) { +- radeonEnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ + +- rmesa->hw.max_state_size + VBUF_BUFSZ ); + +- radeonEmitVertexAOS( rmesa, +- rmesa->swtcl.vertex_size, +- current_offset); ++ radeonEmitState(&rmesa->radeon); ++ radeonEmitVertexAOS( rmesa, ++ rmesa->radeon.swtcl.vertex_size, ++ rmesa->radeon.dma.current, ++ current_offset); + +- radeonEmitVbufPrim( rmesa, +- rmesa->swtcl.vertex_format, +- rmesa->swtcl.hw_primitive, +- rmesa->swtcl.numverts); +- } ++ ++ radeonEmitVbufPrim( rmesa, ++ rmesa->swtcl.vertex_format, ++ rmesa->radeon.swtcl.hw_primitive, ++ rmesa->radeon.swtcl.numverts); + +- rmesa->swtcl.numverts = 0; +- current->start = current->ptr; +- } + } + +- +-/* Alloc space in the current dma region. +- */ +-static INLINE void * +-radeonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) +-{ +- GLuint bytes = vsize * nverts; +- +- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) +- radeonRefillCurrentDmaRegion( rmesa ); +- +- if (!rmesa->dma.flush) { +- rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; +- rmesa->dma.flush = flush_last_swtcl_prim; +- } +- +- assert( vsize == rmesa->swtcl.vertex_size * 4 ); +- assert( rmesa->dma.flush == flush_last_swtcl_prim ); +- assert (rmesa->dma.current.start + +- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == +- rmesa->dma.current.ptr); +- +- +- { +- GLubyte *head = (GLubyte *)(rmesa->dma.current.address + rmesa->dma.current.ptr); +- rmesa->dma.current.ptr += bytes; +- rmesa->swtcl.numverts += nverts; +- return head; +- } +- +-} +- +- + /* + * Render unclipped vertex buffers by emitting vertices directly to + * dma buffers. Use strip/fan hardware primitives where possible. +@@ -387,22 +334,22 @@ static const GLuint hw_prim[GL_POLYGON+1] = { + }; + + static INLINE void +-radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim ) ++radeonDmaPrimitive( r100ContextPtr rmesa, GLenum prim ) + { + RADEON_NEWPRIM( rmesa ); +- rmesa->swtcl.hw_primitive = hw_prim[prim]; +- assert(rmesa->dma.current.ptr == rmesa->dma.current.start); ++ rmesa->radeon.swtcl.hw_primitive = hw_prim[prim]; ++ // assert(rmesa->radeon.dma.current.ptr == rmesa->radeon.dma.current.start); + } + +-#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx) ++#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx) + #define INIT( prim ) radeonDmaPrimitive( rmesa, prim ) + #define FLUSH() RADEON_NEWPRIM( rmesa ) +-#define GET_CURRENT_VB_MAX_VERTS() \ +- (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4)) ++#define GET_CURRENT_VB_MAX_VERTS() 10\ ++// (((int)rmesa->radeon.dma.current.end - (int)rmesa->radeon.dma.current.ptr) / (rmesa->radeon.swtcl.vertex_size*4)) + #define GET_SUBSEQUENT_VB_MAX_VERTS() \ +- ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4)) ++ ((RADEON_BUFFER_SIZE) / (rmesa->radeon.swtcl.vertex_size*4)) + #define ALLOC_VERTS( nr ) \ +- radeonAllocDmaLowVerts( rmesa, nr, rmesa->swtcl.vertex_size * 4 ) ++ rcommonAllocDmaLowVerts( &rmesa->radeon, nr, rmesa->radeon.swtcl.vertex_size * 4 ) + #define EMIT_VERTS( ctx, j, nr, buf ) \ + _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf) + +@@ -418,16 +365,13 @@ radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim ) + static GLboolean radeon_run_render( GLcontext *ctx, + struct tnl_pipeline_stage *stage ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + tnl_render_func *tab = TAG(render_tab_verts); + GLuint i; + +- if (rmesa->swtcl.indexed_verts.buf) +- RELEASE_ELT_VERTS(); +- +- if (rmesa->swtcl.RenderIndex != 0 || ++ if (rmesa->radeon.swtcl.RenderIndex != 0 || + !radeon_dma_validate_render( ctx, VB )) + return GL_TRUE; + +@@ -496,13 +440,13 @@ static void radeonResetLineStipple( GLcontext *ctx ); + + #undef LOCAL_VARS + #undef ALLOC_VERTS +-#define CTX_ARG radeonContextPtr rmesa +-#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size +-#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, (size) * 4 ) ++#define CTX_ARG r100ContextPtr rmesa ++#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size ++#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, (size) * 4 ) + #undef LOCAL_VARS + #define LOCAL_VARS \ +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \ +- const char *radeonverts = (char *)rmesa->swtcl.verts; ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); \ ++ const char *radeonverts = (char *)rmesa->radeon.swtcl.verts; + #define VERT(x) (radeonVertex *)(radeonverts + ((x) * (vertsize) * sizeof(int))) + #define VERTEX radeonVertex + #undef TAG +@@ -560,7 +504,7 @@ static struct { + #define VERT_Y(_v) _v->v.y + #define VERT_Z(_v) _v->v.z + #define AREA_IS_CCW( a ) (a < 0) +-#define GET_VERTEX(e) (rmesa->swtcl.verts + ((e) * rmesa->swtcl.vertex_size * sizeof(int))) ++#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + ((e) * rmesa->radeon.swtcl.vertex_size * sizeof(int))) + + #define VERT_SET_RGBA( v, c ) \ + do { \ +@@ -606,7 +550,7 @@ do { \ + #undef INIT + + #define LOCAL_VARS(n) \ +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \ ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); \ + GLuint color[n], spec[n]; \ + GLuint coloroffset = rmesa->swtcl.coloroffset; \ + GLuint specoffset = rmesa->swtcl.specoffset; \ +@@ -617,7 +561,7 @@ do { \ + ***********************************************************************/ + + #define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] ) +-#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive ++#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive + #undef TAG + #define TAG(x) x + #include "tnl_dd/t_dd_unfilled.h" +@@ -673,9 +617,9 @@ static void init_rast_tab( void ) + } while (0) + #undef LOCAL_VARS + #define LOCAL_VARS \ +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \ +- const GLuint vertsize = rmesa->swtcl.vertex_size; \ +- const char *radeonverts = (char *)rmesa->swtcl.verts; \ ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); \ ++ const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \ ++ const char *radeonverts = (char *)rmesa->radeon.swtcl.verts; \ + const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \ + const GLboolean stipple = ctx->Line.StippleFlag; \ + (void) elt; (void) stipple; +@@ -700,17 +644,17 @@ static void init_rast_tab( void ) + void radeonChooseRenderState( GLcontext *ctx ) + { + TNLcontext *tnl = TNL_CONTEXT(ctx); +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint index = 0; + GLuint flags = ctx->_TriangleCaps; + +- if (!rmesa->TclFallback || rmesa->Fallback) ++ if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback) + return; + + if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT; + if (flags & DD_TRI_UNFILLED) index |= RADEON_UNFILLED_BIT; + +- if (index != rmesa->swtcl.RenderIndex) { ++ if (index != rmesa->radeon.swtcl.RenderIndex) { + tnl->Driver.Render.Points = rast_tab[index].points; + tnl->Driver.Render.Line = rast_tab[index].line; + tnl->Driver.Render.ClippedLine = rast_tab[index].line; +@@ -727,7 +671,7 @@ void radeonChooseRenderState( GLcontext *ctx ) + tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon; + } + +- rmesa->swtcl.RenderIndex = index; ++ rmesa->radeon.swtcl.RenderIndex = index; + } + } + +@@ -739,18 +683,18 @@ void radeonChooseRenderState( GLcontext *ctx ) + + static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + +- if (rmesa->swtcl.hw_primitive != hwprim) { ++ if (rmesa->radeon.swtcl.hw_primitive != hwprim) { + RADEON_NEWPRIM( rmesa ); +- rmesa->swtcl.hw_primitive = hwprim; ++ rmesa->radeon.swtcl.hw_primitive = hwprim; + } + } + + static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- rmesa->swtcl.render_primitive = prim; ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); ++ rmesa->radeon.swtcl.render_primitive = prim; + if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) + radeonRasterPrimitive( ctx, reduced_hw_prim[prim] ); + } +@@ -761,7 +705,7 @@ static void radeonRenderFinish( GLcontext *ctx ) + + static void radeonResetLineStipple( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + RADEON_STATECHANGE( rmesa, lin ); + } + +@@ -795,17 +739,17 @@ static const char *getFallbackString(GLuint bit) + + void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); +- GLuint oldfallback = rmesa->Fallback; ++ GLuint oldfallback = rmesa->radeon.Fallback; + + if (mode) { +- rmesa->Fallback |= bit; ++ rmesa->radeon.Fallback |= bit; + if (oldfallback == 0) { +- RADEON_FIREVERTICES( rmesa ); ++ radeon_firevertices(&rmesa->radeon); + TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE ); + _swsetup_Wakeup( ctx ); +- rmesa->swtcl.RenderIndex = ~0; ++ rmesa->radeon.swtcl.RenderIndex = ~0; + if (RADEON_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n", + bit, getFallbackString(bit)); +@@ -813,7 +757,7 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) + } + } + else { +- rmesa->Fallback &= ~bit; ++ rmesa->radeon.Fallback &= ~bit; + if (oldfallback == bit) { + _swrast_flush( ctx ); + tnl->Driver.Render.Start = radeonRenderStart; +@@ -826,14 +770,14 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) + + tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple; + TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_FALSE ); +- if (rmesa->TclFallback) { +- /* These are already done if rmesa->TclFallback goes to ++ if (rmesa->radeon.TclFallback) { ++ /* These are already done if rmesa->radeon.TclFallback goes to + * zero above. But not if it doesn't (RADEON_NO_TCL for + * example?) + */ + _tnl_invalidate_vertex_state( ctx, ~0 ); + _tnl_invalidate_vertices( ctx, ~0 ); +- RENDERINPUTS_ZERO( rmesa->tnl_index_bitset ); ++ RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset ); + radeonChooseVertexState( ctx ); + radeonChooseRenderState( ctx ); + } +@@ -853,7 +797,7 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) + void radeonInitSwtcl( GLcontext *ctx ) + { + TNLcontext *tnl = TNL_CONTEXT(ctx); +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + static int firsttime = 1; + + if (firsttime) { +@@ -872,18 +816,15 @@ void radeonInitSwtcl( GLcontext *ctx ) + _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, + RADEON_MAX_TNL_VERTEX_SIZE); + +- rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; +- rmesa->swtcl.RenderIndex = ~0; +- rmesa->swtcl.render_primitive = GL_TRIANGLES; +- rmesa->swtcl.hw_primitive = 0; ++ rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; ++ rmesa->radeon.swtcl.RenderIndex = ~0; ++ rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES; ++ rmesa->radeon.swtcl.hw_primitive = 0; + } + + + void radeonDestroySwtcl( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + +- if (rmesa->swtcl.indexed_verts.buf) +- radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, +- __FUNCTION__ ); + } +diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.h b/src/mesa/drivers/dri/radeon/radeon_swtcl.h +index e485052..3ada989 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.h ++++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.h +@@ -63,5 +63,5 @@ extern void radeon_translate_vertex( GLcontext *ctx, + + extern void radeon_print_vertex( GLcontext *ctx, const radeonVertex *v ); + +- ++extern void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset); + #endif +diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c +index 779e9ae..5887ab3 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c ++++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c +@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "tnl/tnl.h" + #include "tnl/t_pipeline.h" + ++#include "radeon_common.h" + #include "radeon_context.h" + #include "radeon_state.h" + #include "radeon_ioctl.h" +@@ -104,7 +105,7 @@ static GLboolean discrete_prim[0x10] = { + }; + + +-#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx) ++#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx) + #define ELT_TYPE GLushort + + #define ELT_INIT(prim, hw_prim) \ +@@ -125,7 +126,7 @@ static GLboolean discrete_prim[0x10] = { + + #define RESET_STIPPLE() do { \ + RADEON_STATECHANGE( rmesa, lin ); \ +- radeonEmitState( rmesa ); \ ++ radeonEmitState(&rmesa->radeon); \ + } while (0) + + #define AUTO_STIPPLE( mode ) do { \ +@@ -136,31 +137,29 @@ static GLboolean discrete_prim[0x10] = { + else \ + rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \ + ~RADEON_LINE_PATTERN_AUTO_RESET; \ +- radeonEmitState( rmesa ); \ ++ radeonEmitState(&rmesa->radeon); \ + } while (0) + + + + #define ALLOC_ELTS(nr) radeonAllocElts( rmesa, nr ) + +-static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr ) ++static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr ) + { +- if (rmesa->dma.flush) +- rmesa->dma.flush( rmesa ); ++ if (rmesa->radeon.dma.flush) ++ rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); + +- radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + +- rmesa->hw.max_state_size + ELTS_BUFSZ(nr)); ++ rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->radeon.hw.max_state_size + ELTS_BUFSZ(nr) + ++ AOS_BUFSZ(rmesa->tcl.nr_aos_components), __FUNCTION__); + +- radeonEmitAOS( rmesa, +- rmesa->tcl.aos_components, +- rmesa->tcl.nr_aos_components, 0 ); ++ radeonEmitAOS( rmesa, ++ rmesa->tcl.nr_aos_components, 0 ); + +- return radeonAllocEltsOpenEnded( rmesa, +- rmesa->tcl.vertex_format, +- rmesa->tcl.hw_primitive, nr ); ++ return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format, ++ rmesa->tcl.hw_primitive, nr ); + } + +-#define CLOSE_ELTS() RADEON_NEWPRIM( rmesa ) ++#define CLOSE_ELTS() if (0) RADEON_NEWPRIM( rmesa ) + + + +@@ -174,14 +173,14 @@ static void radeonEmitPrim( GLcontext *ctx, + GLuint start, + GLuint count) + { +- radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); ++ r100ContextPtr rmesa = R100_CONTEXT( ctx ); + radeonTclPrimitive( ctx, prim, hwprim ); + +- radeonEnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + +- rmesa->hw.max_state_size + VBUF_BUFSZ ); ++ rcommonEnsureCmdBufSpace( &rmesa->radeon, ++ AOS_BUFSZ(rmesa->tcl.nr_aos_components) + ++ rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ ); + + radeonEmitAOS( rmesa, +- rmesa->tcl.aos_components, + rmesa->tcl.nr_aos_components, + start ); + +@@ -254,7 +253,7 @@ void radeonTclPrimitive( GLcontext *ctx, + GLenum prim, + int hw_prim ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint se_cntl; + GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE; + +@@ -371,7 +370,7 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ) + static GLboolean radeon_run_tcl_render( GLcontext *ctx, + struct tnl_pipeline_stage *stage ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0; +@@ -379,7 +378,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx, + + /* TODO: separate this from the swtnl pipeline + */ +- if (rmesa->TclFallback) ++ if (rmesa->radeon.TclFallback) + return GL_TRUE; /* fallback to software t&l */ + + if (VB->Count == 0) +@@ -461,7 +460,7 @@ const struct tnl_pipeline_stage _radeon_tcl_stage = + + static void transition_to_swtnl( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLuint se_cntl; + +@@ -490,7 +489,7 @@ static void transition_to_swtnl( GLcontext *ctx ) + + static void transition_to_hwtnl( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT]; + +@@ -509,15 +508,15 @@ static void transition_to_hwtnl( GLcontext *ctx ) + + tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial; + +- if ( rmesa->dma.flush ) +- rmesa->dma.flush( rmesa ); ++ if ( rmesa->radeon.dma.flush ) ++ rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); + +- rmesa->dma.flush = NULL; ++ rmesa->radeon.dma.flush = NULL; + rmesa->swtcl.vertex_format = 0; + +- if (rmesa->swtcl.indexed_verts.buf) +- radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, +- __FUNCTION__ ); ++ // if (rmesa->swtcl.indexed_verts.buf) ++ // radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, ++ // __FUNCTION__ ); + + if (RADEON_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "Radeon end tcl fallback\n"); +@@ -550,11 +549,11 @@ static char *getFallbackString(GLuint bit) + + void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- GLuint oldfallback = rmesa->TclFallback; ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); ++ GLuint oldfallback = rmesa->radeon.TclFallback; + + if (mode) { +- rmesa->TclFallback |= bit; ++ rmesa->radeon.TclFallback |= bit; + if (oldfallback == 0) { + if (RADEON_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "Radeon begin tcl fallback %s\n", +@@ -563,7 +562,7 @@ void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) + } + } + else { +- rmesa->TclFallback &= ~bit; ++ rmesa->radeon.TclFallback &= ~bit; + if (oldfallback == bit) { + if (RADEON_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "Radeon end tcl fallback %s\n", +diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c +index b0aec21..2dfb504 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_tex.c ++++ b/src/mesa/drivers/dri/radeon/radeon_tex.c +@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "main/texobj.h" + + #include "radeon_context.h" ++#include "radeon_mipmap_tree.h" + #include "radeon_state.h" + #include "radeon_ioctl.h" + #include "radeon_swtcl.h" +@@ -170,10 +171,13 @@ static void radeonSetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf ) + { + GLuint anisotropy = (t->pp_txfilter & RADEON_MAX_ANISO_MASK); + ++ /* Force revalidation to account for switches from/to mipmapping. */ ++ t->validated = GL_FALSE; ++ + t->pp_txfilter &= ~(RADEON_MIN_FILTER_MASK | RADEON_MAG_FILTER_MASK); + + /* r100 chips can't handle mipmaps/aniso for cubemap/volume textures */ +- if ( t->base.tObj->Target == GL_TEXTURE_CUBE_MAP ) { ++ if ( t->base.Target == GL_TEXTURE_CUBE_MAP ) { + switch ( minf ) { + case GL_NEAREST: + case GL_NEAREST_MIPMAP_NEAREST: +@@ -244,433 +248,13 @@ static void radeonSetTexBorderColor( radeonTexObjPtr t, GLubyte c[4] ) + t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] ); + } + +- +-/** +- * Allocate space for and load the mesa images into the texture memory block. +- * This will happen before drawing with a new texture, or drawing with a +- * texture after it was swapped out or teximaged again. +- */ +- +-static radeonTexObjPtr radeonAllocTexObj( struct gl_texture_object *texObj ) +-{ +- radeonTexObjPtr t; +- +- t = CALLOC_STRUCT( radeon_tex_obj ); +- texObj->DriverData = t; +- if ( t != NULL ) { +- if ( RADEON_DEBUG & DEBUG_TEXTURE ) { +- fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj, (void *)t ); +- } +- +- /* Initialize non-image-dependent parts of the state: +- */ +- t->base.tObj = texObj; +- t->border_fallback = GL_FALSE; +- +- t->pp_txfilter = RADEON_BORDER_MODE_OGL; +- t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP | +- RADEON_TXFORMAT_PERSPECTIVE_ENABLE); +- +- make_empty_list( & t->base ); +- +- radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT ); +- radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy ); +- radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter ); +- radeonSetTexBorderColor( t, texObj->_BorderChan ); +- } +- +- return t; +-} +- +- +-static const struct gl_texture_format * +-radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat, +- GLenum format, GLenum type ) +-{ +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- const GLboolean do32bpt = +- ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 ); +- const GLboolean force16bpt = +- ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 ); +- (void) format; +- +- switch ( internalFormat ) { +- case 4: +- case GL_RGBA: +- case GL_COMPRESSED_RGBA: +- switch ( type ) { +- case GL_UNSIGNED_INT_10_10_10_2: +- case GL_UNSIGNED_INT_2_10_10_10_REV: +- return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555; +- case GL_UNSIGNED_SHORT_4_4_4_4: +- case GL_UNSIGNED_SHORT_4_4_4_4_REV: +- return _dri_texformat_argb4444; +- case GL_UNSIGNED_SHORT_5_5_5_1: +- case GL_UNSIGNED_SHORT_1_5_5_5_REV: +- return _dri_texformat_argb1555; +- default: +- return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb4444; +- } +- +- case 3: +- case GL_RGB: +- case GL_COMPRESSED_RGB: +- switch ( type ) { +- case GL_UNSIGNED_SHORT_4_4_4_4: +- case GL_UNSIGNED_SHORT_4_4_4_4_REV: +- return _dri_texformat_argb4444; +- case GL_UNSIGNED_SHORT_5_5_5_1: +- case GL_UNSIGNED_SHORT_1_5_5_5_REV: +- return _dri_texformat_argb1555; +- case GL_UNSIGNED_SHORT_5_6_5: +- case GL_UNSIGNED_SHORT_5_6_5_REV: +- return _dri_texformat_rgb565; +- default: +- return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565; +- } +- +- case GL_RGBA8: +- case GL_RGB10_A2: +- case GL_RGBA12: +- case GL_RGBA16: +- return !force16bpt ? +- _dri_texformat_argb8888 : _dri_texformat_argb4444; +- +- case GL_RGBA4: +- case GL_RGBA2: +- return _dri_texformat_argb4444; +- +- case GL_RGB5_A1: +- return _dri_texformat_argb1555; +- +- case GL_RGB8: +- case GL_RGB10: +- case GL_RGB12: +- case GL_RGB16: +- return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565; +- +- case GL_RGB5: +- case GL_RGB4: +- case GL_R3_G3_B2: +- return _dri_texformat_rgb565; +- +- case GL_ALPHA: +- case GL_ALPHA4: +- case GL_ALPHA8: +- case GL_ALPHA12: +- case GL_ALPHA16: +- case GL_COMPRESSED_ALPHA: +- return _dri_texformat_a8; +- +- case 1: +- case GL_LUMINANCE: +- case GL_LUMINANCE4: +- case GL_LUMINANCE8: +- case GL_LUMINANCE12: +- case GL_LUMINANCE16: +- case GL_COMPRESSED_LUMINANCE: +- return _dri_texformat_l8; +- +- case 2: +- case GL_LUMINANCE_ALPHA: +- case GL_LUMINANCE4_ALPHA4: +- case GL_LUMINANCE6_ALPHA2: +- case GL_LUMINANCE8_ALPHA8: +- case GL_LUMINANCE12_ALPHA4: +- case GL_LUMINANCE12_ALPHA12: +- case GL_LUMINANCE16_ALPHA16: +- case GL_COMPRESSED_LUMINANCE_ALPHA: +- return _dri_texformat_al88; +- +- case GL_INTENSITY: +- case GL_INTENSITY4: +- case GL_INTENSITY8: +- case GL_INTENSITY12: +- case GL_INTENSITY16: +- case GL_COMPRESSED_INTENSITY: +- return _dri_texformat_i8; +- +- case GL_YCBCR_MESA: +- if (type == GL_UNSIGNED_SHORT_8_8_APPLE || +- type == GL_UNSIGNED_BYTE) +- return &_mesa_texformat_ycbcr; +- else +- return &_mesa_texformat_ycbcr_rev; +- +- case GL_RGB_S3TC: +- case GL_RGB4_S3TC: +- case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: +- return &_mesa_texformat_rgb_dxt1; +- +- case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: +- return &_mesa_texformat_rgba_dxt1; +- +- case GL_RGBA_S3TC: +- case GL_RGBA4_S3TC: +- case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: +- return &_mesa_texformat_rgba_dxt3; +- +- case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: +- return &_mesa_texformat_rgba_dxt5; +- +- default: +- _mesa_problem(ctx, "unexpected texture format in %s", __FUNCTION__); +- return NULL; +- } +- +- return NULL; /* never get here */ +-} +- +- +-static void radeonTexImage1D( GLcontext *ctx, GLenum target, GLint level, +- GLint internalFormat, +- GLint width, GLint border, +- GLenum format, GLenum type, const GLvoid *pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage ) +-{ +- driTextureObject * t = (driTextureObject *) texObj->DriverData; +- +- if ( t ) { +- driSwapOutTextureObject( t ); +- } +- else { +- t = (driTextureObject *) radeonAllocTexObj( texObj ); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D"); +- return; +- } +- } +- +- /* Note, this will call ChooseTextureFormat */ +- _mesa_store_teximage1d(ctx, target, level, internalFormat, +- width, border, format, type, pixels, +- &ctx->Unpack, texObj, texImage); +- +- t->dirty_images[0] |= (1 << level); +-} +- +- +-static void radeonTexSubImage1D( GLcontext *ctx, GLenum target, GLint level, +- GLint xoffset, +- GLsizei width, +- GLenum format, GLenum type, +- const GLvoid *pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage ) +-{ +- driTextureObject * t = (driTextureObject *) texObj->DriverData; +- +- assert( t ); /* this _should_ be true */ +- if ( t ) { +- driSwapOutTextureObject( t ); +- } +- else { +- t = (driTextureObject *) radeonAllocTexObj( texObj ); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D"); +- return; +- } +- } +- +- _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, +- format, type, pixels, packing, texObj, +- texImage); +- +- t->dirty_images[0] |= (1 << level); +-} +- +- +-static void radeonTexImage2D( GLcontext *ctx, GLenum target, GLint level, +- GLint internalFormat, +- GLint width, GLint height, GLint border, +- GLenum format, GLenum type, const GLvoid *pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage ) +-{ +- driTextureObject * t = (driTextureObject *) texObj->DriverData; +- GLuint face; +- +- /* which cube face or ordinary 2D image */ +- switch (target) { +- case GL_TEXTURE_CUBE_MAP_POSITIVE_X: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: +- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; +- ASSERT(face < 6); +- break; +- default: +- face = 0; +- } +- +- if ( t != NULL ) { +- driSwapOutTextureObject( t ); +- } +- else { +- t = (driTextureObject *) radeonAllocTexObj( texObj ); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D"); +- return; +- } +- } +- +- /* Note, this will call ChooseTextureFormat */ +- _mesa_store_teximage2d(ctx, target, level, internalFormat, +- width, height, border, format, type, pixels, +- &ctx->Unpack, texObj, texImage); +- +- t->dirty_images[face] |= (1 << level); +-} +- +- +-static void radeonTexSubImage2D( GLcontext *ctx, GLenum target, GLint level, +- GLint xoffset, GLint yoffset, +- GLsizei width, GLsizei height, +- GLenum format, GLenum type, +- const GLvoid *pixels, +- const struct gl_pixelstore_attrib *packing, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage ) +-{ +- driTextureObject * t = (driTextureObject *) texObj->DriverData; +- GLuint face; +- +- /* which cube face or ordinary 2D image */ +- switch (target) { +- case GL_TEXTURE_CUBE_MAP_POSITIVE_X: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: +- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; +- ASSERT(face < 6); +- break; +- default: +- face = 0; +- } +- +- assert( t ); /* this _should_ be true */ +- if ( t ) { +- driSwapOutTextureObject( t ); +- } +- else { +- t = (driTextureObject *) radeonAllocTexObj( texObj ); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D"); +- return; +- } +- } +- +- _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, +- height, format, type, pixels, packing, texObj, +- texImage); +- +- t->dirty_images[face] |= (1 << level); +-} +- +-static void radeonCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, +- GLint internalFormat, +- GLint width, GLint height, GLint border, +- GLsizei imageSize, const GLvoid *data, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage ) +-{ +- driTextureObject * t = (driTextureObject *) texObj->DriverData; +- GLuint face; +- +- /* which cube face or ordinary 2D image */ +- switch (target) { +- case GL_TEXTURE_CUBE_MAP_POSITIVE_X: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: +- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; +- ASSERT(face < 6); +- break; +- default: +- face = 0; +- } +- +- if ( t != NULL ) { +- driSwapOutTextureObject( t ); +- } +- else { +- t = (driTextureObject *) radeonAllocTexObj( texObj ); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D"); +- return; +- } +- } +- +- /* Note, this will call ChooseTextureFormat */ +- _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width, +- height, border, imageSize, data, texObj, texImage); +- +- t->dirty_images[face] |= (1 << level); +-} +- +- +-static void radeonCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level, +- GLint xoffset, GLint yoffset, +- GLsizei width, GLsizei height, +- GLenum format, +- GLsizei imageSize, const GLvoid *data, +- struct gl_texture_object *texObj, +- struct gl_texture_image *texImage ) +-{ +- driTextureObject * t = (driTextureObject *) texObj->DriverData; +- GLuint face; +- +- +- /* which cube face or ordinary 2D image */ +- switch (target) { +- case GL_TEXTURE_CUBE_MAP_POSITIVE_X: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: +- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: +- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: +- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; +- ASSERT(face < 6); +- break; +- default: +- face = 0; +- } +- +- assert( t ); /* this _should_ be true */ +- if ( t ) { +- driSwapOutTextureObject( t ); +- } +- else { +- t = (driTextureObject *) radeonAllocTexObj( texObj ); +- if (!t) { +- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D"); +- return; +- } +- } +- +- _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width, +- height, format, imageSize, data, texObj, texImage); +- +- t->dirty_images[face] |= (1 << level); +-} +- + #define SCALED_FLOAT_TO_BYTE( x, scale ) \ + (((GLuint)((255.0F / scale) * (x))) / 2) + + static void radeonTexEnv( GLcontext *ctx, GLenum target, + GLenum pname, const GLfloat *param ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint unit = ctx->Texture.CurrentUnit; + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + +@@ -701,7 +285,7 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target, + * functions, one mapping [-1.0,0.0] to [-128,0] and one mapping + * [0.0,4.0] to [0,127]. + */ +- min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ? ++ min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ? + 0.0 : -1.0; + bias = CLAMP( *param, min, 4.0 ); + if ( bias == 0 ) { +@@ -734,7 +318,7 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj, + GLenum pname, const GLfloat *params ) + { +- radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData; ++ radeonTexObj* t = radeon_tex_obj(texObj); + + if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { + fprintf( stderr, "%s( %s )\n", __FUNCTION__, +@@ -762,57 +346,51 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target, + case GL_TEXTURE_MAX_LEVEL: + case GL_TEXTURE_MIN_LOD: + case GL_TEXTURE_MAX_LOD: ++ + /* This isn't the most efficient solution but there doesn't appear to + * be a nice alternative. Since there's no LOD clamping, + * we just have to rely on loading the right subset of mipmap levels + * to simulate a clamped LOD. + */ +- driSwapOutTextureObject( (driTextureObject *) t ); ++ if (t->mt) { ++ radeon_miptree_unreference(t->mt); ++ t->mt = 0; ++ t->validated = GL_FALSE; ++ } + break; + + default: + return; + } +- +- /* Mark this texobj as dirty (one bit per tex unit) +- */ +- t->dirty_state = TEX_ALL; +-} +- +- +-static void radeonBindTexture( GLcontext *ctx, GLenum target, +- struct gl_texture_object *texObj ) +-{ +- if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { +- fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj, +- ctx->Texture.CurrentUnit ); +- } +- +- assert( (target != GL_TEXTURE_1D && target != GL_TEXTURE_2D && +- target != GL_TEXTURE_RECTANGLE_NV && target != GL_TEXTURE_CUBE_MAP) || +- (texObj->DriverData != NULL) ); + } + +- + static void radeonDeleteTexture( GLcontext *ctx, + struct gl_texture_object *texObj ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- driTextureObject * t = (driTextureObject *) texObj->DriverData; ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); ++ radeonTexObj* t = radeon_tex_obj(texObj); ++ int i; + + if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { + fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj, + _mesa_lookup_enum_by_nr( texObj->Target ) ); + } + +- if ( t != NULL ) { +- if ( rmesa ) { +- RADEON_FIREVERTICES( rmesa ); +- } +- +- driDestroyTextureObject( t ); ++ if ( rmesa ) { ++ radeon_firevertices(&rmesa->radeon); ++ for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) { ++ if ( t == rmesa->state.texture.unit[i].texobj ) { ++ rmesa->state.texture.unit[i].texobj = NULL; ++ rmesa->hw.tex[i].dirty = GL_FALSE; ++ rmesa->hw.cube[i].dirty = GL_FALSE; ++ } ++ } + } + ++ if (t->mt) { ++ radeon_miptree_unreference(t->mt); ++ t->mt = 0; ++ } + /* Free mipmap images and the texture object itself */ + _mesa_delete_texture_object(ctx, texObj); + } +@@ -832,7 +410,7 @@ static void radeonTexGen( GLcontext *ctx, + GLenum pname, + const GLfloat *params ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint unit = ctx->Texture.CurrentUnit; + rmesa->recheck_texgen[unit] = GL_TRUE; + } +@@ -846,17 +424,27 @@ static void radeonTexGen( GLcontext *ctx, + static struct gl_texture_object * + radeonNewTextureObject( GLcontext *ctx, GLuint name, GLenum target ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- struct gl_texture_object *obj; +- obj = _mesa_new_texture_object(ctx, name, target); +- if (!obj) +- return NULL; +- obj->MaxAnisotropy = rmesa->initialMaxAnisotropy; +- radeonAllocTexObj( obj ); +- return obj; ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); ++ radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj); ++ ++ _mesa_initialize_texture_object(&t->base, name, target); ++ t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy; ++ ++ t->border_fallback = GL_FALSE; ++ ++ t->pp_txfilter = RADEON_BORDER_MODE_OGL; ++ t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP | ++ RADEON_TXFORMAT_PERSPECTIVE_ENABLE); ++ ++ radeonSetTexWrap( t, t->base.WrapS, t->base.WrapT ); ++ radeonSetTexMaxAnisotropy( t, t->base.MaxAnisotropy ); ++ radeonSetTexFilter( t, t->base.MinFilter, t->base.MagFilter ); ++ radeonSetTexBorderColor( t, t->base._BorderChan ); ++ return &t->base; + } + + ++ + void radeonInitTextureFuncs( struct dd_function_table *functions ) + { + functions->ChooseTextureFormat = radeonChooseTextureFormat; +@@ -864,11 +452,12 @@ void radeonInitTextureFuncs( struct dd_function_table *functions ) + functions->TexImage2D = radeonTexImage2D; + functions->TexSubImage1D = radeonTexSubImage1D; + functions->TexSubImage2D = radeonTexSubImage2D; ++ functions->GetTexImage = radeonGetTexImage; ++ functions->GetCompressedTexImage = radeonGetCompressedTexImage; + + functions->NewTextureObject = radeonNewTextureObject; +- functions->BindTexture = radeonBindTexture; ++ // functions->BindTexture = radeonBindTexture; + functions->DeleteTexture = radeonDeleteTexture; +- functions->IsTextureResident = driIsTextureResident; + + functions->TexEnv = radeonTexEnv; + functions->TexParameter = radeonTexParameter; +@@ -877,5 +466,12 @@ void radeonInitTextureFuncs( struct dd_function_table *functions ) + functions->CompressedTexImage2D = radeonCompressedTexImage2D; + functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + ++ functions->GenerateMipmap = radeonGenerateMipmap; ++ ++ functions->NewTextureImage = radeonNewTextureImage; ++ functions->FreeTexImageData = radeonFreeTexImageData; ++ functions->MapTexture = radeonMapTexture; ++ functions->UnmapTexture = radeonUnmapTexture; ++ + driInitTextureFormats(); + } +diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h +index 8000880..8c2f9be 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_tex.h ++++ b/src/mesa/drivers/dri/radeon/radeon_tex.h +@@ -43,10 +43,10 @@ extern void radeonSetTexOffset(__DRIcontext *pDRICtx, GLint texname, + + extern void radeonUpdateTextureState( GLcontext *ctx ); + +-extern int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, ++extern int radeonUploadTexImages( r100ContextPtr rmesa, radeonTexObjPtr t, + GLuint face ); + +-extern void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t ); ++extern void radeonDestroyTexObj( r100ContextPtr rmesa, radeonTexObjPtr t ); + + extern void radeonInitTextureFuncs( struct dd_function_table *functions ); + +diff --git a/src/mesa/drivers/dri/radeon/radeon_texmem.c b/src/mesa/drivers/dri/radeon/radeon_texmem.c +deleted file mode 100644 +index 5f7bbe6..0000000 +--- a/src/mesa/drivers/dri/radeon/radeon_texmem.c ++++ /dev/null +@@ -1,404 +0,0 @@ +-/************************************************************************** +- +-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and +- VA Linux Systems Inc., Fremont, California. +- +-All Rights Reserved. +- +-Permission is hereby granted, free of charge, to any person obtaining +-a copy of this software and associated documentation files (the +-"Software"), to deal in the Software without restriction, including +-without limitation on the rights to use, copy, modify, merge, publish, +-distribute, sub license, and/or sell copies of the Software, and to +-permit persons to whom the Software is furnished to do so, subject to +-the following conditions: +- +-The above copyright notice and this permission notice (including the +-next paragraph) shall be included in all copies or substantial +-portions of the Software. +- +-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +-NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR +-SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +-IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +-SOFTWARE. +- +-**************************************************************************/ +- +-/* +- * Authors: +- * Kevin E. Martin +- * Gareth Hughes +- * +- */ +-#include +- +-#include "main/glheader.h" +-#include "main/imports.h" +-#include "main/context.h" +-#include "main/macros.h" +- +-#include "radeon_context.h" +-#include "radeon_ioctl.h" +-#include "radeon_tex.h" +- +-#include /* for usleep() */ +- +- +-/** +- * Destroy any device-dependent state associated with the texture. This may +- * include NULLing out hardware state that points to the texture. +- */ +-void +-radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t ) +-{ +- if ( RADEON_DEBUG & DEBUG_TEXTURE ) { +- fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)t, (void *)t->base.tObj ); +- } +- +- if ( rmesa != NULL ) { +- unsigned i; +- +- +- for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) { +- if ( t == rmesa->state.texture.unit[i].texobj ) { +- rmesa->state.texture.unit[i].texobj = NULL; +- } +- } +- } +-} +- +- +-/* ------------------------------------------------------------ +- * Texture image conversions +- */ +- +- +-static void radeonUploadRectSubImage( radeonContextPtr rmesa, +- radeonTexObjPtr t, +- struct gl_texture_image *texImage, +- GLint x, GLint y, +- GLint width, GLint height ) +-{ +- const struct gl_texture_format *texFormat = texImage->TexFormat; +- int blit_format, dstPitch, done; +- +- switch ( texFormat->TexelBytes ) { +- case 1: +- blit_format = RADEON_GMC_DST_8BPP_CI; +- break; +- case 2: +- blit_format = RADEON_GMC_DST_16BPP; +- break; +- case 4: +- blit_format = RADEON_GMC_DST_32BPP; +- break; +- default: +- fprintf( stderr, "radeonUploadRectSubImage: unknown blit_format (texelbytes=%d)\n", +- texFormat->TexelBytes); +- return; +- } +- +- t->image[0][0].data = texImage->Data; +- +- /* Currently don't need to cope with small pitches. +- */ +- width = texImage->Width; +- height = texImage->Height; +- dstPitch = t->pp_txpitch + 32; +- +- { /* FIXME: prefer GART-texturing if possible */ +- /* Data not in GART memory, or bad pitch. +- */ +- for (done = 0; done < height ; ) { +- struct radeon_dma_region region; +- int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch ); +- int src_pitch; +- char *tex; +- +- src_pitch = texImage->RowStride * texFormat->TexelBytes; +- +- tex = (char *)texImage->Data + done * src_pitch; +- +- memset(®ion, 0, sizeof(region)); +- radeonAllocDmaRegion( rmesa, ®ion, lines * dstPitch, 1024 ); +- +- /* Copy texdata to dma: +- */ +- if (0) +- fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n", +- __FUNCTION__, src_pitch, dstPitch); +- +- if (src_pitch == dstPitch) { +- memcpy( region.address + region.start, tex, lines * src_pitch ); +- } +- else { +- char *buf = region.address + region.start; +- int i; +- for (i = 0 ; i < lines ; i++) { +- memcpy( buf, tex, src_pitch ); +- buf += dstPitch; +- tex += src_pitch; +- } +- } +- +- radeonEmitWait( rmesa, RADEON_WAIT_3D ); +- +- +- +- /* Blit to framebuffer +- */ +- radeonEmitBlit( rmesa, +- blit_format, +- dstPitch, GET_START( ®ion ), +- dstPitch, t->bufAddr, +- 0, 0, +- 0, done, +- width, lines ); +- +- radeonEmitWait( rmesa, RADEON_WAIT_2D ); +- +- radeonReleaseDmaRegion( rmesa, ®ion, __FUNCTION__ ); +- done += lines; +- } +- } +-} +- +- +-/** +- * Upload the texture image associated with texture \a t at the specified +- * level at the address relative to \a start. +- */ +-static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t, +- GLint hwlevel, +- GLint x, GLint y, GLint width, GLint height, +- GLuint face ) +-{ +- struct gl_texture_image *texImage = NULL; +- GLuint offset; +- GLint imageWidth, imageHeight; +- GLint ret; +- drm_radeon_texture_t tex; +- drm_radeon_tex_image_t tmp; +- const int level = hwlevel + t->base.firstLevel; +- +- if ( RADEON_DEBUG & DEBUG_TEXTURE ) { +- fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", +- __FUNCTION__, (void *)t, (void *)t->base.tObj, level, width, height, face ); +- } +- +- ASSERT(face < 6); +- +- /* Ensure we have a valid texture to upload */ +- if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) { +- _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__); +- return; +- } +- +- texImage = t->base.tObj->Image[face][level]; +- +- if ( !texImage ) { +- if ( RADEON_DEBUG & DEBUG_TEXTURE ) +- fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level ); +- return; +- } +- if ( !texImage->Data ) { +- if ( RADEON_DEBUG & DEBUG_TEXTURE ) +- fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ ); +- return; +- } +- +- +- if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { +- assert(level == 0); +- assert(hwlevel == 0); +- if ( RADEON_DEBUG & DEBUG_TEXTURE ) +- fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__); +- radeonUploadRectSubImage( rmesa, t, texImage, x, y, width, height ); +- return; +- } +- +- imageWidth = texImage->Width; +- imageHeight = texImage->Height; +- +- offset = t->bufAddr + t->base.totalSize * face / 6; +- +- if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) { +- GLint imageX = 0; +- GLint imageY = 0; +- GLint blitX = t->image[face][hwlevel].x; +- GLint blitY = t->image[face][hwlevel].y; +- GLint blitWidth = t->image[face][hwlevel].width; +- GLint blitHeight = t->image[face][hwlevel].height; +- fprintf( stderr, " upload image: %d,%d at %d,%d\n", +- imageWidth, imageHeight, imageX, imageY ); +- fprintf( stderr, " upload blit: %d,%d at %d,%d\n", +- blitWidth, blitHeight, blitX, blitY ); +- fprintf( stderr, " blit ofs: 0x%07x level: %d/%d\n", +- (GLuint)offset, hwlevel, level ); +- } +- +- t->image[face][hwlevel].data = texImage->Data; +- +- /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct. +- * NOTE: we're always use a 1KB-wide blit and I8 texture format. +- * We used to use 1, 2 and 4-byte texels and used to use the texture +- * width to dictate the blit width - but that won't work for compressed +- * textures. (Brian) +- * NOTE: can't do that with texture tiling. (sroland) +- */ +- tex.offset = offset; +- tex.image = &tmp; +- /* copy (x,y,width,height,data) */ +- memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) ); +- +- if (texImage->TexFormat->TexelBytes) { +- /* use multi-byte upload scheme */ +- tex.height = imageHeight; +- tex.width = imageWidth; +- tex.format = t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK; +- tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1); +- tex.offset += tmp.x & ~1023; +- tmp.x = tmp.x % 1024; +- if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) { +- /* need something like "tiled coordinates" ? */ +- tmp.y = tmp.x / (tex.pitch * 128) * 2; +- tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes; +- tex.pitch |= RADEON_DST_TILE_MICRO >> 22; +- } +- else { +- tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); +- } +- if ((t->tile_bits & RADEON_TXO_MACRO_TILE) && +- (texImage->Width * texImage->TexFormat->TexelBytes >= 256)) { +- /* radeon switches off macro tiling for small textures/mipmaps it seems */ +- tex.pitch |= RADEON_DST_TILE_MACRO >> 22; +- } +- } +- else { +- /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is +- needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ +- /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed +- so the kernel module reads the right amount of data. */ +- tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ +- tex.pitch = (BLIT_WIDTH_BYTES / 64); +- tex.height = (imageHeight + 3) / 4; +- tex.width = (imageWidth + 3) / 4; +- switch (t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) { +- case RADEON_TXFORMAT_DXT1: +- tex.width *= 8; +- break; +- case RADEON_TXFORMAT_DXT23: +- case RADEON_TXFORMAT_DXT45: +- tex.width *= 16; +- break; +- } +- } +- +- LOCK_HARDWARE( rmesa ); +- do { +- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE, +- &tex, sizeof(drm_radeon_texture_t) ); +- } while ( ret == -EAGAIN ); +- +- UNLOCK_HARDWARE( rmesa ); +- +- if ( ret ) { +- fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret ); +- fprintf( stderr, " offset=0x%08x\n", +- offset ); +- fprintf( stderr, " image width=%d height=%d\n", +- imageWidth, imageHeight ); +- fprintf( stderr, " blit width=%d height=%d data=%p\n", +- t->image[face][hwlevel].width, t->image[face][hwlevel].height, +- t->image[face][hwlevel].data ); +- exit( 1 ); +- } +-} +- +- +-/** +- * Upload the texture images associated with texture \a t. This might +- * require the allocation of texture memory. +- * +- * \param rmesa Context pointer +- * \param t Texture to be uploaded +- * \param face Cube map face to be uploaded. Zero for non-cube maps. +- */ +- +-int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint face ) +-{ +- int numLevels; +- +- if ( !t || t->base.totalSize == 0 || t->image_override ) +- return 0; +- +- if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) { +- fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__, +- (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize, +- t->base.firstLevel, t->base.lastLevel ); +- } +- +- numLevels = t->base.lastLevel - t->base.firstLevel + 1; +- +- if (RADEON_DEBUG & DEBUG_SYNC) { +- fprintf(stderr, "%s: Syncing\n", __FUNCTION__ ); +- radeonFinish( rmesa->glCtx ); +- } +- +- LOCK_HARDWARE( rmesa ); +- +- if ( t->base.memBlock == NULL ) { +- int heap; +- +- heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps, +- (driTextureObject *) t ); +- if ( heap == -1 ) { +- UNLOCK_HARDWARE( rmesa ); +- return -1; +- } +- +- /* Set the base offset of the texture image */ +- t->bufAddr = rmesa->radeonScreen->texOffset[heap] +- + t->base.memBlock->ofs; +- t->pp_txoffset = t->bufAddr; +- +- if (!(t->base.tObj->Image[0][0]->IsClientData)) { +- /* hope it's safe to add that here... */ +- t->pp_txoffset |= t->tile_bits; +- } +- +- /* Mark this texobj as dirty on all units: +- */ +- t->dirty_state = TEX_ALL; +- } +- +- +- /* Let the world know we've used this memory recently. +- */ +- driUpdateTextureLRU( (driTextureObject *) t ); +- UNLOCK_HARDWARE( rmesa ); +- +- +- /* Upload any images that are new */ +- if (t->base.dirty_images[face]) { +- int i; +- for ( i = 0 ; i < numLevels ; i++ ) { +- if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) { +- uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width, +- t->image[face][i].height, face ); +- } +- } +- t->base.dirty_images[face] = 0; +- } +- +- if (RADEON_DEBUG & DEBUG_SYNC) { +- fprintf(stderr, "%s: Syncing\n", __FUNCTION__ ); +- radeonFinish( rmesa->glCtx ); +- } +- +- return 0; +-} +diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c +index 1e2f654..6a34f1e 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c ++++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c +@@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "main/enums.h" + + #include "radeon_context.h" ++#include "radeon_mipmap_tree.h" + #include "radeon_state.h" + #include "radeon_ioctl.h" + #include "radeon_swtcl.h" +@@ -75,10 +76,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \ + && (tx_table[f].format != 0xffffffff) ) + +-static const struct { ++struct tx_table { + GLuint format, filter; +-} +-tx_table[] = ++}; + -+ GLuint bpp; /** Bytes per texel */ -+ GLuint tilebits; /** RADEON_TXO_xxx_TILE */ -+ GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */ ++static const struct tx_table tx_table[] = + { + _ALPHA(RGBA8888), + _ALPHA_REV(RGBA8888), +@@ -111,252 +113,6 @@ tx_table[] = + #undef _ALPHA + #undef _INVALID + +-/** +- * This function computes the number of bytes of storage needed for +- * the given texture object (all mipmap levels, all cube faces). +- * The \c image[face][level].x/y/width/height parameters for upload/blitting +- * are computed here. \c pp_txfilter, \c pp_txformat, etc. will be set here +- * too. +- * +- * \param rmesa Context pointer +- * \param tObj GL texture object whose images are to be posted to +- * hardware state. +- */ +-static void radeonSetTexImages( radeonContextPtr rmesa, +- struct gl_texture_object *tObj ) +-{ +- radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData; +- const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; +- GLint curOffset, blitWidth; +- GLint i, texelBytes; +- GLint numLevels; +- GLint log2Width, log2Height, log2Depth; +- +- /* Set the hardware texture format +- */ +- if ( !t->image_override ) { +- t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK | +- RADEON_TXFORMAT_ALPHA_IN_MAP); +- t->pp_txfilter &= ~RADEON_YUV_TO_RGB; +- +- if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) { +- t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format; +- t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter; +- } +- else { +- _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); +- return; +- } +- } +- +- texelBytes = baseImage->TexFormat->TexelBytes; +- +- /* Compute which mipmap levels we really want to send to the hardware. +- */ +- +- if (tObj->Target != GL_TEXTURE_CUBE_MAP) +- driCalculateTextureFirstLastLevel( (driTextureObject *) t ); +- else { +- /* r100 can't handle mipmaps for cube/3d textures, so don't waste +- memory for them */ +- t->base.firstLevel = t->base.lastLevel = tObj->BaseLevel; +- } +- log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2; +- log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2; +- log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2; +- +- numLevels = t->base.lastLevel - t->base.firstLevel + 1; +- +- assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); +- +- /* Calculate mipmap offsets and dimensions for blitting (uploading) +- * The idea is that we lay out the mipmap levels within a block of +- * memory organized as a rectangle of width BLIT_WIDTH_BYTES. +- */ +- curOffset = 0; +- blitWidth = BLIT_WIDTH_BYTES; +- t->tile_bits = 0; +- +- /* figure out if this texture is suitable for tiling. */ +- if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) { +- if (rmesa->texmicrotile && (baseImage->Height > 1)) { +- /* allow 32 (bytes) x 1 mip (which will use two times the space +- the non-tiled version would use) max if base texture is large enough */ +- if ((numLevels == 1) || +- (((baseImage->Width * texelBytes / baseImage->Height) <= 32) && +- (baseImage->Width * texelBytes > 64)) || +- ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) { +- /* R100 has two microtile bits (only the txoffset reg, not the blitter) +- weird: X2 + OPT: 32bit correct, 16bit completely hosed +- X2: 32bit correct, 16bit correct +- OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */ +- t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/; +- } +- } +- if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) { +- /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not +- in the case if height is smaller than 16 (not 100% sure), as does the r200, +- so need to disable macro tiling in that case */ +- if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) { +- t->tile_bits |= RADEON_TXO_MACRO_TILE; +- } +- } +- } +- +- for (i = 0; i < numLevels; i++) { +- const struct gl_texture_image *texImage; +- GLuint size; +- +- texImage = tObj->Image[0][i + t->base.firstLevel]; +- if ( !texImage ) +- break; +- +- /* find image size in bytes */ +- if (texImage->IsCompressed) { +- /* need to calculate the size AFTER padding even though the texture is +- submitted without padding. +- Only handle pot textures currently - don't know if npot is even possible, +- size calculation would certainly need (trivial) adjustments. +- Align (and later pad) to 32byte, not sure what that 64byte blit width is +- good for? */ +- if ((t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) == RADEON_TXFORMAT_DXT1) { +- /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */ +- if ((texImage->Width + 3) < 8) /* width one block */ +- size = texImage->CompressedSize * 4; +- else if ((texImage->Width + 3) < 16) +- size = texImage->CompressedSize * 2; +- else size = texImage->CompressedSize; +- } +- else /* DXT3/5, 16 bytes per block */ +- if ((texImage->Width + 3) < 8) +- size = texImage->CompressedSize * 2; +- else size = texImage->CompressedSize; +- } +- else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { +- size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height; +- } +- else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) { +- /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, +- though the actual offset may be different (if texture is less than +- 32 bytes width) to the untiled case */ +- int w = (texImage->Width * texelBytes * 2 + 31) & ~31; +- size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth; +- blitWidth = MAX2(texImage->Width, 64 / texelBytes); +- } +- else { +- int w = (texImage->Width * texelBytes + 31) & ~31; +- size = w * texImage->Height * texImage->Depth; +- blitWidth = MAX2(texImage->Width, 64 / texelBytes); +- } +- assert(size > 0); +- +- /* Align to 32-byte offset. It is faster to do this unconditionally +- * (no branch penalty). +- */ +- +- curOffset = (curOffset + 0x1f) & ~0x1f; +- +- if (texelBytes) { +- t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */ +- t->image[0][i].y = 0; +- t->image[0][i].width = MIN2(size / texelBytes, blitWidth); +- t->image[0][i].height = (size / texelBytes) / t->image[0][i].width; +- } +- else { +- t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES; +- t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES; +- t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES); +- t->image[0][i].height = size / t->image[0][i].width; +- } +- +-#if 0 +- /* for debugging only and only applicable to non-rectangle targets */ +- assert(size % t->image[0][i].width == 0); +- assert(t->image[0][i].x == 0 +- || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1)); +-#endif +- +- if (0) +- fprintf(stderr, +- "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", +- i, texImage->Width, texImage->Height, +- t->image[0][i].x, t->image[0][i].y, +- t->image[0][i].width, t->image[0][i].height, size, curOffset); +- +- curOffset += size; +- +- } +- +- /* Align the total size of texture memory block. +- */ +- t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; +- +- /* Setup remaining cube face blits, if needed */ +- if (tObj->Target == GL_TEXTURE_CUBE_MAP) { +- const GLuint faceSize = t->base.totalSize; +- GLuint face; +- /* reuse face 0 x/y/width/height - just update the offset when uploading */ +- for (face = 1; face < 6; face++) { +- for (i = 0; i < numLevels; i++) { +- t->image[face][i].x = t->image[0][i].x; +- t->image[face][i].y = t->image[0][i].y; +- t->image[face][i].width = t->image[0][i].width; +- t->image[face][i].height = t->image[0][i].height; +- } +- } +- t->base.totalSize = 6 * faceSize; /* total texmem needed */ +- } +- +- /* Hardware state: +- */ +- t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK; +- t->pp_txfilter |= (numLevels - 1) << RADEON_MAX_MIP_LEVEL_SHIFT; +- +- t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK | +- RADEON_TXFORMAT_HEIGHT_MASK | +- RADEON_TXFORMAT_CUBIC_MAP_ENABLE | +- RADEON_TXFORMAT_F5_WIDTH_MASK | +- RADEON_TXFORMAT_F5_HEIGHT_MASK); +- t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) | +- (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT)); +- +- if (tObj->Target == GL_TEXTURE_CUBE_MAP) { +- assert(log2Width == log2Height); +- t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) | +- (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) | +- (RADEON_TXFORMAT_CUBIC_MAP_ENABLE)); +- t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) | +- (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) | +- (log2Width << RADEON_FACE_WIDTH_2_SHIFT) | +- (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) | +- (log2Width << RADEON_FACE_WIDTH_3_SHIFT) | +- (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) | +- (log2Width << RADEON_FACE_WIDTH_4_SHIFT) | +- (log2Height << RADEON_FACE_HEIGHT_4_SHIFT)); +- } +- +- t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) | +- ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16)); +- +- /* Only need to round to nearest 32 for textures, but the blitter +- * requires 64-byte aligned pitches, and we may/may not need the +- * blitter. NPOT only! +- */ +- if ( !t->image_override ) { +- if (baseImage->IsCompressed) +- t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); +- else +- t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); +- t->pp_txpitch -= 32; +- } +- +- t->dirty_state = TEX_ALL; +- +- /* FYI: radeonUploadTexImages( rmesa, t ); used to be called here */ +-} +- +- +- + /* ================================================================ + * Texture combine functions + */ +@@ -503,7 +259,7 @@ do { \ + + static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + GLuint color_combine, alpha_combine; + const GLuint color_combine0 = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO +@@ -846,22 +602,21 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit ) + void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname, + unsigned long long offset, GLint depth, GLuint pitch) + { +- radeonContextPtr rmesa = pDRICtx->driverPrivate; ++ r100ContextPtr rmesa = pDRICtx->driverPrivate; + struct gl_texture_object *tObj = +- _mesa_lookup_texture(rmesa->glCtx, texname); +- radeonTexObjPtr t; ++ _mesa_lookup_texture(rmesa->radeon.glCtx, texname); ++ radeonTexObjPtr t = radeon_tex_obj(tObj); + + if (tObj == NULL) + return; + +- t = (radeonTexObjPtr) tObj->DriverData; +- + t->image_override = GL_TRUE; + + if (!offset) + return; +- +- t->pp_txoffset = offset; ++ ++ t->bo = NULL; ++ t->override_offset = offset; + t->pp_txpitch = pitch - 32; + + switch (depth) { +@@ -901,12 +656,58 @@ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname, + RADEON_TXFORMAT_NON_POWER2) + + +-static void import_tex_obj_state( radeonContextPtr rmesa, ++static void disable_tex_obj_state( r100ContextPtr rmesa, ++ int unit ) ++{ ++ /* do not use RADEON_DB_STATE to avoid stale texture caches */ ++ uint32_t *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0]; ++ GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT]; ++ GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] ); + -+ radeon_mipmap_level levels[RADEON_MAX_TEXTURE_LEVELS]; -+}; ++ RADEON_STATECHANGE( rmesa, tex[unit] ); + -+radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t, -+ GLenum target, GLuint firstLevel, GLuint lastLevel, -+ GLuint width0, GLuint height0, GLuint depth0, -+ GLuint bpp, GLuint tilebits, GLuint compressed); -+void radeon_miptree_reference(radeon_mipmap_tree *mt); -+void radeon_miptree_unreference(radeon_mipmap_tree *mt); ++ RADEON_STATECHANGE( rmesa, tcl ); ++ rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) | ++ RADEON_Q_BIT(unit)); ++ ++ if (rmesa->radeon.TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<radeon.glCtx, (RADEON_TCL_FALLBACK_TEXGEN_0<recheck_texgen[unit] = GL_TRUE; ++ } + -+GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt, -+ struct gl_texture_image *texImage, GLuint face, GLuint level); -+GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj); -+void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t, -+ struct gl_texture_image *texImage, GLuint face, GLuint level); ++ if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) { ++ /* this seems to be a genuine (r100 only?) hw bug. Need to remove the ++ cubic_map bit on unit 2 when the unit is disabled, otherwise every ++ 2nd (2d) mipmap on unit 0 will be broken (may not be needed for other ++ units, better be safe than sorry though).*/ ++ RADEON_STATECHANGE( rmesa, tex[unit] ); ++ rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE; ++ } + ++ { ++ GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4; ++ GLuint tmp = rmesa->TexGenEnabled; + -+#endif /* __RADEON_MIPMAP_TREE_H_ */ ++ rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<TexGenNeedNormals[unit] = 0; ++ rmesa->TexGenEnabled |= ++ (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift; ++ ++ if (tmp != rmesa->TexGenEnabled) { ++ rmesa->recheck_texgen[unit] = GL_TRUE; ++ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX; ++ } ++ } ++} ++ ++static void import_tex_obj_state( r100ContextPtr rmesa, + int unit, + radeonTexObjPtr texobj ) + { + /* do not use RADEON_DB_STATE to avoid stale texture caches */ +- int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0]; ++ uint32_t *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0]; + GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT]; + + RADEON_STATECHANGE( rmesa, tex[unit] ); +@@ -915,10 +716,9 @@ static void import_tex_obj_state( radeonContextPtr rmesa, + cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK; + cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK; + cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK; +- cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset; + cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color; + +- if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { ++ if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) { + GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] ); + txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */ + txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */ +@@ -928,22 +728,12 @@ static void import_tex_obj_state( radeonContextPtr rmesa, + else { + se_coord_fmt &= ~(RADEON_VTX_ST0_NONPARAMETRIC << unit); + +- if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) { +- int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0]; +- GLuint bytesPerFace = texobj->base.totalSize / 6; +- ASSERT(texobj->base.totalSize % 6 == 0); ++ if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) { ++ uint32_t *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0]; + + RADEON_STATECHANGE( rmesa, cube[unit] ); + cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces; +- /* dont know if this setup conforms to OpenGL.. +- * at least it matches the behavior of mesa software renderer +- */ +- cube_cmd[CUBE_PP_CUBIC_OFFSET_0] = texobj->pp_txoffset; /* right */ +- cube_cmd[CUBE_PP_CUBIC_OFFSET_1] = texobj->pp_txoffset + 1 * bytesPerFace; /* left */ +- cube_cmd[CUBE_PP_CUBIC_OFFSET_2] = texobj->pp_txoffset + 2 * bytesPerFace; /* top */ +- cube_cmd[CUBE_PP_CUBIC_OFFSET_3] = texobj->pp_txoffset + 3 * bytesPerFace; /* bottom */ +- cube_cmd[CUBE_PP_CUBIC_OFFSET_4] = texobj->pp_txoffset + 4 * bytesPerFace; /* front */ +- cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset + 5 * bytesPerFace; /* back */ ++ /* state filled out in the cube_emit */ + } + } + +@@ -952,13 +742,11 @@ static void import_tex_obj_state( radeonContextPtr rmesa, + rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt; + } + +- texobj->dirty_state &= ~(1<radeon.NewGLState |= _NEW_TEXTURE_MATRIX; + } + + +- +- +-static void set_texgen_matrix( radeonContextPtr rmesa, ++static void set_texgen_matrix( r100ContextPtr rmesa, + GLuint unit, + const GLfloat *s_plane, + const GLfloat *t_plane, +@@ -986,14 +774,14 @@ static void set_texgen_matrix( radeonContextPtr rmesa, + rmesa->TexGenMatrix[unit].m[15] = q_plane[3]; + + rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE << unit; +- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; ++ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX; + } + + /* Returns GL_FALSE if fallback required. + */ + static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4; + GLuint tmp = rmesa->TexGenEnabled; +@@ -1094,283 +882,185 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit ) + } + + if (tmp != rmesa->TexGenEnabled) { +- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; ++ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX; + } + + return GL_TRUE; + } + +- +-static void disable_tex( GLcontext *ctx, int unit ) +-{ +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- +- if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<state.texture.unit[unit].texobj != NULL ) { +- /* The old texture is no longer bound to this texture unit. +- * Mark it as such. +- */ +- +- rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit); +- rmesa->state.texture.unit[unit].texobj = NULL; +- } +- +- RADEON_STATECHANGE( rmesa, ctx ); +- rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= +- ~((RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit); +- +- RADEON_STATECHANGE( rmesa, tcl ); +- rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) | +- RADEON_Q_BIT(unit)); +- +- if (rmesa->TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<recheck_texgen[unit] = GL_TRUE; +- } +- +- if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) { +- /* this seems to be a genuine (r100 only?) hw bug. Need to remove the +- cubic_map bit on unit 2 when the unit is disabled, otherwise every +- 2nd (2d) mipmap on unit 0 will be broken (may not be needed for other +- units, better be safe than sorry though).*/ +- RADEON_STATECHANGE( rmesa, tex[unit] ); +- rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE; +- } +- +- { +- GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4; +- GLuint tmp = rmesa->TexGenEnabled; +- +- rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<TexGenNeedNormals[unit] = 0; +- rmesa->TexGenEnabled |= +- (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift; +- +- if (tmp != rmesa->TexGenEnabled) { +- rmesa->recheck_texgen[unit] = GL_TRUE; +- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; +- } +- } +- } +-} +- +-static GLboolean enable_tex_2d( GLcontext *ctx, int unit ) +-{ +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; +- struct gl_texture_object *tObj = texUnit->_Current; +- radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData; +- +- /* Need to load the 2d images associated with this unit. +- */ +- if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) { +- t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2; +- t->base.dirty_images[0] = ~0; +- } +- +- ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D); +- +- if ( t->base.dirty_images[0] ) { +- RADEON_FIREVERTICES( rmesa ); +- radeonSetTexImages( rmesa, tObj ); +- radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 ); +- if ( !t->base.memBlock && !t->image_override ) +- return GL_FALSE; +- } +- +- return GL_TRUE; +-} +- +-static GLboolean enable_tex_cube( GLcontext *ctx, int unit ) ++/** ++ * Compute the cached hardware register values for the given texture object. ++ * ++ * \param rmesa Context pointer ++ * \param t the r300 texture object ++ */ ++static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; +- struct gl_texture_object *tObj = texUnit->_Current; +- radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData; +- GLuint face; +- +- /* Need to load the 2d images associated with this unit. +- */ +- if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) { +- t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2; +- for (face = 0; face < 6; face++) +- t->base.dirty_images[face] = ~0; +- } ++ const struct gl_texture_image *firstImage; ++ GLint log2Width, log2Height, log2Depth, texelBytes; + +- ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP); ++ firstImage = t->base.Image[0][t->mt->firstLevel]; + +- if ( t->base.dirty_images[0] || t->base.dirty_images[1] || +- t->base.dirty_images[2] || t->base.dirty_images[3] || +- t->base.dirty_images[4] || t->base.dirty_images[5] ) { +- /* flush */ +- RADEON_FIREVERTICES( rmesa ); +- /* layout memory space, once for all faces */ +- radeonSetTexImages( rmesa, tObj ); ++ if (firstImage->Border > 0) { ++ fprintf(stderr, "%s: border\n", __FUNCTION__); ++ return GL_FALSE; + } + +- /* upload (per face) */ +- for (face = 0; face < 6; face++) { +- if (t->base.dirty_images[face]) { +- radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, face ); ++ log2Width = firstImage->WidthLog2; ++ log2Height = firstImage->HeightLog2; ++ log2Depth = firstImage->DepthLog2; ++ texelBytes = firstImage->TexFormat->TexelBytes; ++ ++ if (!t->image_override) { ++ if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) { ++ const struct tx_table *table = tx_table; ++ ++ t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK | ++ RADEON_TXFORMAT_ALPHA_IN_MAP); ++ t->pp_txfilter &= ~RADEON_YUV_TO_RGB; ++ ++ t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format; ++ t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter; ++ } else { ++ _mesa_problem(NULL, "unexpected texture format in %s", ++ __FUNCTION__); ++ return GL_FALSE; + } + } +- +- if ( !t->base.memBlock ) { +- /* texmem alloc failed, use s/w fallback */ +- return GL_FALSE; ++ ++ t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK; ++ t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << RADEON_MAX_MIP_LEVEL_SHIFT; ++ ++ t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK | ++ RADEON_TXFORMAT_HEIGHT_MASK | ++ RADEON_TXFORMAT_CUBIC_MAP_ENABLE | ++ RADEON_TXFORMAT_F5_WIDTH_MASK | ++ RADEON_TXFORMAT_F5_HEIGHT_MASK); ++ t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) | ++ (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT)); ++ ++ t->tile_bits = 0; ++ ++ if (t->base.Target == GL_TEXTURE_CUBE_MAP) { ++ ASSERT(log2Width == log2Height); ++ t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) | ++ (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) | ++ /* don't think we need this bit, if it exists at all - fglrx does not set it */ ++ (RADEON_TXFORMAT_CUBIC_MAP_ENABLE)); ++ t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) | ++ (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) | ++ (log2Width << RADEON_FACE_WIDTH_2_SHIFT) | ++ (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) | ++ (log2Width << RADEON_FACE_WIDTH_3_SHIFT) | ++ (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) | ++ (log2Width << RADEON_FACE_WIDTH_4_SHIFT) | ++ (log2Height << RADEON_FACE_HEIGHT_4_SHIFT)); + } + +- return GL_TRUE; +-} ++ t->pp_txsize = (((firstImage->Width - 1) << RADEON_TEX_USIZE_SHIFT) ++ | ((firstImage->Height - 1) << RADEON_TEX_VSIZE_SHIFT)); + +-static GLboolean enable_tex_rect( GLcontext *ctx, int unit ) +-{ +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; +- struct gl_texture_object *tObj = texUnit->_Current; +- radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData; +- +- if (!(t->pp_txformat & RADEON_TXFORMAT_NON_POWER2)) { +- t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2; +- t->base.dirty_images[0] = ~0; ++ if ( !t->image_override ) { ++ if (firstImage->IsCompressed) ++ t->pp_txpitch = (firstImage->Width + 63) & ~(63); ++ else ++ t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63); ++ t->pp_txpitch -= 32; + } + +- ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV); +- +- if ( t->base.dirty_images[0] ) { +- RADEON_FIREVERTICES( rmesa ); +- radeonSetTexImages( rmesa, tObj ); +- radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 ); +- if ( !t->base.memBlock && +- !t->image_override /* && !rmesa->prefer_gart_client_texturing FIXME */ ) { +- fprintf(stderr, "%s: upload failed\n", __FUNCTION__); +- return GL_FALSE; +- } ++ if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) { ++ t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2; + } + + return GL_TRUE; + } + +- +-static GLboolean update_tex_common( GLcontext *ctx, int unit ) ++static GLboolean radeon_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; +- struct gl_texture_object *tObj = texUnit->_Current; +- radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData; +- GLenum format; ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); ++ radeonTexObj *t = radeon_tex_obj(texObj); ++ int ret; + +- /* Fallback if there's a texture border */ +- if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) { +- fprintf(stderr, "%s: border\n", __FUNCTION__); ++ if (!radeon_validate_texture_miptree(ctx, texObj)) + return GL_FALSE; +- } ++ ++ ret = setup_hardware_state(rmesa, t, unit); ++ if (ret == GL_FALSE) ++ return GL_FALSE; ++ + /* yuv conversion only works in first unit */ + if (unit != 0 && (t->pp_txfilter & RADEON_YUV_TO_RGB)) + return GL_FALSE; + +- /* Update state if this is a different texture object to last +- * time. +- */ +- if ( rmesa->state.texture.unit[unit].texobj != t ) { +- if ( rmesa->state.texture.unit[unit].texobj != NULL ) { +- /* The old texture is no longer bound to this texture unit. +- * Mark it as such. +- */ +- +- rmesa->state.texture.unit[unit].texobj->base.bound &= +- ~(1UL << unit); +- } ++ RADEON_STATECHANGE( rmesa, ctx ); ++ rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= ++ (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit; + +- rmesa->state.texture.unit[unit].texobj = t; +- t->base.bound |= (1UL << unit); +- t->dirty_state |= 1<hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit); + ++ rmesa->recheck_texgen[unit] = GL_TRUE; + +- /* Newly enabled? +- */ +- if ( !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<hw.ctx.cmd[CTX_PP_CNTL] |= +- (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit; +- +- RADEON_STATECHANGE( rmesa, tcl ); +- +- rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit); +- +- rmesa->recheck_texgen[unit] = GL_TRUE; +- } +- +- if (t->dirty_state & (1<NewGLState |= _NEW_TEXTURE_MATRIX; +- } ++ import_tex_obj_state( rmesa, unit, t ); + + if (rmesa->recheck_texgen[unit]) { + GLboolean fallback = !radeon_validate_texgen( ctx, unit ); + TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<recheck_texgen[unit] = 0; +- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; ++ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX; + } + +- format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat; +- if ( rmesa->state.texture.unit[unit].format != format || +- rmesa->state.texture.unit[unit].envMode != texUnit->EnvMode ) { +- rmesa->state.texture.unit[unit].format = format; +- rmesa->state.texture.unit[unit].envMode = texUnit->EnvMode; +- if ( ! radeonUpdateTextureEnv( ctx, unit ) ) { +- return GL_FALSE; +- } ++ if ( ! radeonUpdateTextureEnv( ctx, unit ) ) { ++ return GL_FALSE; + } +- + FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback ); ++ ++ t->validated = GL_TRUE; + return !t->border_fallback; + } + +- +- + static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit ) + { ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + +- if ( texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT) ) { +- return (enable_tex_rect( ctx, unit ) && +- update_tex_common( ctx, unit )); +- } +- else if ( texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) { +- return (enable_tex_2d( ctx, unit ) && +- update_tex_common( ctx, unit )); +- } +- else if ( texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT) ) { +- return (enable_tex_cube( ctx, unit ) && +- update_tex_common( ctx, unit )); ++ ++ if (ctx->Texture.Unit[unit]._ReallyEnabled & TEXTURE_3D_BIT) { ++ return GL_FALSE; + } +- else if ( texUnit->_ReallyEnabled ) { +- return GL_FALSE; ++ ++ if (!ctx->Texture.Unit[unit]._ReallyEnabled) { ++ /* disable the unit */ ++ disable_tex_obj_state(rmesa, unit); ++ return GL_TRUE; + } +- else { +- disable_tex( ctx, unit ); +- return GL_TRUE; ++ ++ if (!radeon_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) { ++ _mesa_warning(ctx, ++ "failed to validate texture for unit %d.\n", ++ unit); ++ rmesa->state.texture.unit[unit].texobj = NULL; ++ return GL_FALSE; + } ++ rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current); ++ return GL_TRUE; + } + + void radeonUpdateTextureState( GLcontext *ctx ) + { +- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); ++ r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLboolean ok; + ++ /* set the ctx all textures off */ ++ RADEON_STATECHANGE( rmesa, ctx ); ++ rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~((RADEON_TEX_ENABLE_MASK) | (RADEON_TEX_BLEND_ENABLE_MASK)); ++ + ok = (radeonUpdateTextureUnit( ctx, 0 ) && + radeonUpdateTextureUnit( ctx, 1 ) && + radeonUpdateTextureUnit( ctx, 2 )); + + FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok ); + +- if (rmesa->TclFallback) ++ if (rmesa->radeon.TclFallback) + radeonChooseVertexState( ctx ); + } diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c new file mode 100644 index 0000000..63680b4 @@ -6081,6 +35305,349 @@ index 0000000..d90fda7 + struct gl_texture_image *texImage); + +#endif --- -1.6.0.3 - +diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h +index 596a8aa..0df634b 100644 +--- a/src/mesa/drivers/dri/radeon/server/radeon_reg.h ++++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h +@@ -2031,6 +2031,9 @@ + #define RADEON_CP_PACKET3_3D_DRAW_INDX 0xC0002A00 + #define RADEON_CP_PACKET3_LOAD_PALETTE 0xC0002C00 + #define RADEON_CP_PACKET3_3D_LOAD_VBPNTR 0xC0002F00 ++#define R200_CP_CMD_3D_DRAW_VBUF_2 0xC0003400 ++#define R200_CP_CMD_3D_DRAW_IMMD_2 0xC0003500 ++#define R200_CP_CMD_3D_DRAW_INDX_2 0xC0003600 + #define RADEON_CP_PACKET3_CNTL_PAINT 0xC0009100 + #define RADEON_CP_PACKET3_CNTL_BITBLT 0xC0009200 + #define RADEON_CP_PACKET3_CNTL_SMALLTEXT 0xC0009300 +diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c +index dae576a..1c02617 100644 +--- a/src/mesa/main/enable.c ++++ b/src/mesa/main/enable.c +@@ -922,10 +922,13 @@ _mesa_set_enable(GLcontext *ctx, GLenum cap, GLboolean state) + return; + FLUSH_VERTICES(ctx, _NEW_STENCIL); + ctx->Stencil.TestTwoSide = state; +- if (state) ++ if (state) { ++ ctx->Stencil._BackFace = 2; + ctx->_TriangleCaps |= DD_TRI_TWOSTENCIL; +- else ++ } else { ++ ctx->Stencil._BackFace = 1; + ctx->_TriangleCaps &= ~DD_TRI_TWOSTENCIL; ++ } + break; + + #if FEATURE_ARB_fragment_program +diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c +index e1008d7..a42c446 100644 +--- a/src/mesa/main/getstring.c ++++ b/src/mesa/main/getstring.c +@@ -82,7 +82,16 @@ compute_version(const GLcontext *ctx) + ctx->Extensions.ARB_vertex_shader && + ctx->Extensions.ARB_fragment_shader && + ctx->Extensions.ARB_texture_non_power_of_two && +- ctx->Extensions.EXT_blend_equation_separate); ++ ctx->Extensions.EXT_blend_equation_separate && ++ ++ /* Technically, 2.0 requires the functionality ++ * of the EXT version. Enable 2.0 if either ++ * extension is available, and assume that a ++ * driver that only exposes the ATI extension ++ * will fallback to software when necessary. ++ */ ++ (ctx->Extensions.EXT_stencil_two_side ++ || ctx->Extensions.ATI_separate_stencil)); + const GLboolean ver_2_1 = (ver_2_0 && + ctx->Extensions.ARB_shading_language_120 && + ctx->Extensions.EXT_pixel_buffer_object && +diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h +index 2014745..144c61e 100644 +--- a/src/mesa/main/mtypes.h ++++ b/src/mesa/main/mtypes.h +@@ -1108,20 +1108,34 @@ struct gl_scissor_attrib + + /** + * Stencil attribute group (GL_STENCIL_BUFFER_BIT). ++ * ++ * Three sets of stencil data are tracked so that OpenGL 2.0, ++ * GL_EXT_stencil_two_side, and GL_ATI_separate_stencil can all be supported ++ * simultaneously. In each of the stencil state arrays, element 0 corresponds ++ * to GL_FRONT. Element 1 corresponds to the OpenGL 2.0 / ++ * GL_ATI_separate_stencil GL_BACK state. Element 2 corresponds to the ++ * GL_EXT_stencil_two_side GL_BACK state. ++ * ++ * The derived value \c _BackFace is either 1 or 2 depending on whether or ++ * not GL_STENCIL_TEST_TWO_SIDE_EXT is enabled. ++ * ++ * The derived value \c _TestTwoSide is set when the front-face and back-face ++ * stencil state are different. + */ + struct gl_stencil_attrib + { + GLboolean Enabled; /**< Enabled flag */ + GLboolean TestTwoSide; /**< GL_EXT_stencil_two_side */ +- GLubyte ActiveFace; /**< GL_EXT_stencil_two_side (0 or 1) */ ++ GLubyte ActiveFace; /**< GL_EXT_stencil_two_side (0 or 2) */ + GLboolean _TestTwoSide; +- GLenum Function[2]; /**< Stencil function */ +- GLenum FailFunc[2]; /**< Fail function */ +- GLenum ZPassFunc[2]; /**< Depth buffer pass function */ +- GLenum ZFailFunc[2]; /**< Depth buffer fail function */ +- GLint Ref[2]; /**< Reference value */ +- GLuint ValueMask[2]; /**< Value mask */ +- GLuint WriteMask[2]; /**< Write mask */ ++ GLubyte _BackFace; ++ GLenum Function[3]; /**< Stencil function */ ++ GLenum FailFunc[3]; /**< Fail function */ ++ GLenum ZPassFunc[3]; /**< Depth buffer pass function */ ++ GLenum ZFailFunc[3]; /**< Depth buffer fail function */ ++ GLint Ref[3]; /**< Reference value */ ++ GLuint ValueMask[3]; /**< Value mask */ ++ GLuint WriteMask[3]; /**< Write mask */ + GLuint Clear; /**< Clear value */ + }; + +diff --git a/src/mesa/main/stencil.c b/src/mesa/main/stencil.c +index 2a4c38b..b4ea997 100644 +--- a/src/mesa/main/stencil.c ++++ b/src/mesa/main/stencil.c +@@ -27,21 +27,6 @@ + * \file stencil.c + * Stencil operations. + * +- * Note: There's an incompatibility between GL_EXT_stencil_two_side and +- * OpenGL 2.0's two-sided stencil feature. +- * +- * With GL_EXT_stencil_two_side, calling glStencilOp/Func/Mask() only the +- * front OR back face state (as set by glActiveStencilFaceEXT) is set. +- * +- * But with OpenGL 2.0, calling glStencilOp/Func/Mask() sets BOTH the +- * front AND back state. +- * +- * So either we advertise the GL_EXT_stencil_two_side extension, or OpenGL +- * 2.0, but not both. +- * +- * Also, note that GL_ATI_separate_stencil is different as well: +- * glStencilFuncSeparateATI(GLenum frontfunc, GLenum backfunc, ...) vs. +- * glStencilFuncSeparate(GLenum face, GLenum func, ...). + */ + + +@@ -198,6 +183,7 @@ _mesa_StencilFunc( GLenum func, GLint ref, GLuint mask ) + { + GET_CURRENT_CONTEXT(ctx); + const GLint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1; ++ const GLint face = ctx->Stencil.ActiveFace; + ASSERT_OUTSIDE_BEGIN_END(ctx); + + if (!validate_stencil_func(ctx, func)) { +@@ -207,9 +193,7 @@ _mesa_StencilFunc( GLenum func, GLint ref, GLuint mask ) + + ref = CLAMP( ref, 0, stencilMax ); + +- if (ctx->Extensions.EXT_stencil_two_side) { +- /* only set active face state */ +- const GLint face = ctx->Stencil.ActiveFace; ++ if (face != 0) { + if (ctx->Stencil.Function[face] == func && + ctx->Stencil.ValueMask[face] == mask && + ctx->Stencil.Ref[face] == ref) +@@ -218,9 +202,12 @@ _mesa_StencilFunc( GLenum func, GLint ref, GLuint mask ) + ctx->Stencil.Function[face] = func; + ctx->Stencil.Ref[face] = ref; + ctx->Stencil.ValueMask[face] = mask; +- if (ctx->Driver.StencilFuncSeparate) { +- ctx->Driver.StencilFuncSeparate(ctx, face ? GL_BACK : GL_FRONT, +- func, ref, mask); ++ ++ /* Only propagate the change to the driver if EXT_stencil_two_side ++ * is enabled. ++ */ ++ if (ctx->Driver.StencilFuncSeparate && ctx->Stencil.TestTwoSide) { ++ ctx->Driver.StencilFuncSeparate(ctx, GL_BACK, func, ref, mask); + } + } + else { +@@ -237,7 +224,9 @@ _mesa_StencilFunc( GLenum func, GLint ref, GLuint mask ) + ctx->Stencil.Ref[0] = ctx->Stencil.Ref[1] = ref; + ctx->Stencil.ValueMask[0] = ctx->Stencil.ValueMask[1] = mask; + if (ctx->Driver.StencilFuncSeparate) { +- ctx->Driver.StencilFuncSeparate(ctx, GL_FRONT_AND_BACK, ++ ctx->Driver.StencilFuncSeparate(ctx, ++ ((ctx->Stencil.TestTwoSide) ++ ? GL_FRONT : GL_FRONT_AND_BACK), + func, ref, mask); + } + } +@@ -259,17 +248,23 @@ void GLAPIENTRY + _mesa_StencilMask( GLuint mask ) + { + GET_CURRENT_CONTEXT(ctx); ++ const GLint face = ctx->Stencil.ActiveFace; ++ + ASSERT_OUTSIDE_BEGIN_END(ctx); + +- if (ctx->Extensions.EXT_stencil_two_side) { +- /* only set active face state */ +- const GLint face = ctx->Stencil.ActiveFace; ++ if (face != 0) { ++ /* Only modify the EXT_stencil_two_side back-face state. ++ */ + if (ctx->Stencil.WriteMask[face] == mask) + return; + FLUSH_VERTICES(ctx, _NEW_STENCIL); + ctx->Stencil.WriteMask[face] = mask; +- if (ctx->Driver.StencilMaskSeparate) { +- ctx->Driver.StencilMaskSeparate(ctx, face ? GL_BACK : GL_FRONT, mask); ++ ++ /* Only propagate the change to the driver if EXT_stencil_two_side ++ * is enabled. ++ */ ++ if (ctx->Driver.StencilMaskSeparate && ctx->Stencil.TestTwoSide) { ++ ctx->Driver.StencilMaskSeparate(ctx, GL_BACK, mask); + } + } + else { +@@ -280,7 +275,10 @@ _mesa_StencilMask( GLuint mask ) + FLUSH_VERTICES(ctx, _NEW_STENCIL); + ctx->Stencil.WriteMask[0] = ctx->Stencil.WriteMask[1] = mask; + if (ctx->Driver.StencilMaskSeparate) { +- ctx->Driver.StencilMaskSeparate(ctx, GL_FRONT_AND_BACK, mask); ++ ctx->Driver.StencilMaskSeparate(ctx, ++ ((ctx->Stencil.TestTwoSide) ++ ? GL_FRONT : GL_FRONT_AND_BACK), ++ mask); + } + } + } +@@ -304,6 +302,8 @@ void GLAPIENTRY + _mesa_StencilOp(GLenum fail, GLenum zfail, GLenum zpass) + { + GET_CURRENT_CONTEXT(ctx); ++ const GLint face = ctx->Stencil.ActiveFace; ++ + ASSERT_OUTSIDE_BEGIN_END(ctx); + + if (!validate_stencil_op(ctx, fail)) { +@@ -319,9 +319,8 @@ _mesa_StencilOp(GLenum fail, GLenum zfail, GLenum zpass) + return; + } + +- if (ctx->Extensions.EXT_stencil_two_side) { ++ if (face != 0) { + /* only set active face state */ +- const GLint face = ctx->Stencil.ActiveFace; + if (ctx->Stencil.ZFailFunc[face] == zfail && + ctx->Stencil.ZPassFunc[face] == zpass && + ctx->Stencil.FailFunc[face] == fail) +@@ -330,9 +329,12 @@ _mesa_StencilOp(GLenum fail, GLenum zfail, GLenum zpass) + ctx->Stencil.ZFailFunc[face] = zfail; + ctx->Stencil.ZPassFunc[face] = zpass; + ctx->Stencil.FailFunc[face] = fail; +- if (ctx->Driver.StencilOpSeparate) { +- ctx->Driver.StencilOpSeparate(ctx, face ? GL_BACK : GL_FRONT, +- fail, zfail, zpass); ++ ++ /* Only propagate the change to the driver if EXT_stencil_two_side ++ * is enabled. ++ */ ++ if (ctx->Driver.StencilOpSeparate && ctx->Stencil.TestTwoSide) { ++ ctx->Driver.StencilOpSeparate(ctx, GL_BACK, fail, zfail, zpass); + } + } + else { +@@ -349,7 +351,9 @@ _mesa_StencilOp(GLenum fail, GLenum zfail, GLenum zpass) + ctx->Stencil.ZPassFunc[0] = ctx->Stencil.ZPassFunc[1] = zpass; + ctx->Stencil.FailFunc[0] = ctx->Stencil.FailFunc[1] = fail; + if (ctx->Driver.StencilOpSeparate) { +- ctx->Driver.StencilOpSeparate(ctx, GL_FRONT_AND_BACK, ++ ctx->Driver.StencilOpSeparate(ctx, ++ ((ctx->Stencil.TestTwoSide) ++ ? GL_FRONT : GL_FRONT_AND_BACK), + fail, zfail, zpass); + } + } +@@ -372,7 +376,7 @@ _mesa_ActiveStencilFaceEXT(GLenum face) + + if (face == GL_FRONT || face == GL_BACK) { + FLUSH_VERTICES(ctx, _NEW_STENCIL); +- ctx->Stencil.ActiveFace = (face == GL_FRONT) ? 0 : 1; ++ ctx->Stencil.ActiveFace = (face == GL_FRONT) ? 0 : 2; + } + else { + _mesa_error(ctx, GL_INVALID_ENUM, "glActiveStencilFaceEXT(face)"); +@@ -513,19 +517,16 @@ _mesa_StencilMaskSeparate(GLenum face, GLuint mask) + void + _mesa_update_stencil(GLcontext *ctx) + { +- if (ctx->Extensions.EXT_stencil_two_side) { +- ctx->Stencil._TestTwoSide = ctx->Stencil.TestTwoSide; +- } +- else { +- ctx->Stencil._TestTwoSide = +- (ctx->Stencil.Function[0] != ctx->Stencil.Function[1] || +- ctx->Stencil.FailFunc[0] != ctx->Stencil.FailFunc[1] || +- ctx->Stencil.ZPassFunc[0] != ctx->Stencil.ZPassFunc[1] || +- ctx->Stencil.ZFailFunc[0] != ctx->Stencil.ZFailFunc[1] || +- ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1] || +- ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[1] || +- ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[1]); +- } ++ const GLint face = ctx->Stencil._BackFace; ++ ++ ctx->Stencil._TestTwoSide = ++ (ctx->Stencil.Function[0] != ctx->Stencil.Function[face] || ++ ctx->Stencil.FailFunc[0] != ctx->Stencil.FailFunc[face] || ++ ctx->Stencil.ZPassFunc[0] != ctx->Stencil.ZPassFunc[face] || ++ ctx->Stencil.ZFailFunc[0] != ctx->Stencil.ZFailFunc[face] || ++ ctx->Stencil.Ref[0] != ctx->Stencil.Ref[face] || ++ ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[face] || ++ ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[face]); + } + + +@@ -544,17 +545,24 @@ _mesa_init_stencil(GLcontext *ctx) + ctx->Stencil.ActiveFace = 0; /* 0 = GL_FRONT, 1 = GL_BACK */ + ctx->Stencil.Function[0] = GL_ALWAYS; + ctx->Stencil.Function[1] = GL_ALWAYS; ++ ctx->Stencil.Function[2] = GL_ALWAYS; + ctx->Stencil.FailFunc[0] = GL_KEEP; + ctx->Stencil.FailFunc[1] = GL_KEEP; ++ ctx->Stencil.FailFunc[2] = GL_KEEP; + ctx->Stencil.ZPassFunc[0] = GL_KEEP; + ctx->Stencil.ZPassFunc[1] = GL_KEEP; ++ ctx->Stencil.ZPassFunc[2] = GL_KEEP; + ctx->Stencil.ZFailFunc[0] = GL_KEEP; + ctx->Stencil.ZFailFunc[1] = GL_KEEP; ++ ctx->Stencil.ZFailFunc[2] = GL_KEEP; + ctx->Stencil.Ref[0] = 0; + ctx->Stencil.Ref[1] = 0; ++ ctx->Stencil.Ref[2] = 0; + ctx->Stencil.ValueMask[0] = ~0U; + ctx->Stencil.ValueMask[1] = ~0U; ++ ctx->Stencil.ValueMask[2] = ~0U; + ctx->Stencil.WriteMask[0] = ~0U; + ctx->Stencil.WriteMask[1] = ~0U; ++ ctx->Stencil.WriteMask[2] = ~0U; + ctx->Stencil.Clear = 0; + } +diff --git a/src/mesa/swrast/s_stencil.c b/src/mesa/swrast/s_stencil.c +index c925922..2e84dde 100644 +--- a/src/mesa/swrast/s_stencil.c ++++ b/src/mesa/swrast/s_stencil.c +@@ -997,10 +997,12 @@ stencil_and_ztest_pixels( GLcontext *ctx, SWspan *span, GLuint face ) + GLboolean + _swrast_stencil_and_ztest_span(GLcontext *ctx, SWspan *span) + { ++ const GLuint face = (span->facing == 0) ? 0 : ctx->Stencil._BackFace; ++ + if (span->arrayMask & SPAN_XY) +- return stencil_and_ztest_pixels(ctx, span, span->facing); ++ return stencil_and_ztest_pixels(ctx, span, face); + else +- return stencil_and_ztest_span(ctx, span, span->facing); ++ return stencil_and_ztest_span(ctx, span, face); + } + +