From f77271ad73d2432502fe9001943fb8399758af64 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Jun 11 2009 17:17:23 +0000 Subject: - mesa-7.5-r300-batch-accounting.patch: Fix cmdbuf sizing (#501312) --- diff --git a/mesa.spec b/mesa.spec index b2f4a48..a2be54b 100644 --- a/mesa.spec +++ b/mesa.spec @@ -14,13 +14,13 @@ %define manpages gl-manpages-1.0.1 %define xdriinfo xdriinfo-1.0.2 -%define gitdate 20090322 +%define gitdate 20090428 #% define snapshot Summary: Mesa graphics libraries Name: mesa Version: 7.5 -Release: 0.10%{?dist} +Release: 0.15%{?dist} License: MIT Group: System Environment/Libraries URL: http://www.mesa3d.org @@ -48,7 +48,8 @@ Patch9: intel-revert-vbl.patch Patch12: mesa-7.1-disable-intel-classic-warn.patch Patch13: mesa-7.5-sparc64.patch -Patch20: mesa-7.5-get-driver-name.patch +Patch15: radeon-rewrite-emit1clip.patch +Patch16: mesa-7.5-r300-batch-accounting.patch BuildRequires: pkgconfig autoconf automake %if %{with_dri} @@ -177,7 +178,8 @@ This package provides some demo applications for testing Mesa. %patch9 -p1 -b .intel-vbl %patch12 -p1 -b .intel-nowarn %patch13 -p1 -b .sparc64 -%patch20 -p1 -b .get-driver-name +%patch15 -p1 -b .fix-clip +%patch16 -p1 -b .r300-accounting # Hack the demos to use installed data files sed -i 's,../images,%{_libdir}/mesa-demos-data,' progs/demos/*.c @@ -384,9 +386,11 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/clearspd %{_bindir}/copypix %{_bindir}/cubemap +%{_bindir}/dinoshade %{_bindir}/drawpix %{_bindir}/engine %{_bindir}/fbo_firecube +%{_bindir}/fbotexture %{_bindir}/fire %{_bindir}/fogcoord %{_bindir}/fplight @@ -397,9 +401,7 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/geartrain %{_bindir}/glinfo %{_bindir}/gloss -%{_bindir}/glslnoise %{_bindir}/gltestperf -%{_bindir}/glutfx %{_bindir}/ipers %{_bindir}/isosurf %{_bindir}/lodbias @@ -407,6 +409,7 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/multiarb %{_bindir}/paltex %{_bindir}/pointblast +%{_bindir}/projtex %{_bindir}/mesa-rain %{_bindir}/ray %{_bindir}/readpix @@ -417,14 +420,11 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/spectex %{_bindir}/spriteblast %{_bindir}/stex3d -%{_bindir}/streaming_rect %{_bindir}/teapot %{_bindir}/terrain %{_bindir}/tessdemo %{_bindir}/texcyl -%{_bindir}/texdown %{_bindir}/texenv -%{_bindir}/texobj %{_bindir}/textures %{_bindir}/trispd %{_bindir}/tunnel @@ -434,6 +434,21 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/mesa-demos-data %changelog +* Thu May 21 2009 Adam Jackson 7.5-0.15 +- mesa-7.5-r300-batch-accounting.patch: Fix cmdbuf sizing (#501312) + +* Tue May 05 2009 Dave Airlie 7.5-0.14 +- radeon-rewrite.patch: fixes from upstream for rs690 + r200 + +* Tue Apr 28 2009 Dave Airlie 7.5-0.13 +- radeon fix clip emits + +* Tue Apr 28 2009 Dave Airlie 7.5-0.12 +- rebase to upstream snapshot + radeon-rewrite + +* Thu Apr 16 2009 Dave Airlie 7.5-0.11 +- radeon-rewrite-fixes.patch: fix context crash in compiz + r200 fixes + * Tue Apr 14 2009 Adam Jackson 7.5-0.10 - mesa-7.5-get-driver-name.patch: Fix glXGetScreenDriver for DRI2 (#495342) diff --git a/radeon-rewrite.patch b/radeon-rewrite.patch index 22f6dec..ee7a4b4 100644 --- a/radeon-rewrite.patch +++ b/radeon-rewrite.patch @@ -12,10 +12,10 @@ index b61d7f3..0f9306d 100644 # Assembler MESA_ASM_SOURCES = @MESA_ASM_SOURCES@ diff --git a/configure.ac b/configure.ac -index 46070fd..4164d37 100644 +index 8412cdc..231b7e9 100644 --- a/configure.ac +++ b/configure.ac -@@ -456,6 +456,8 @@ AC_SUBST([GALLIUM_WINSYS_DRM_DIRS]) +@@ -446,6 +446,8 @@ AC_SUBST([GALLIUM_WINSYS_DRM_DIRS]) AC_SUBST([GALLIUM_DRIVERS_DIRS]) AC_SUBST([GALLIUM_AUXILIARY_DIRS]) AC_SUBST([GALLIUM_STATE_TRACKERS_DIRS]) @@ -24,7 +24,7 @@ index 46070fd..4164d37 100644 dnl dnl User supplied program configuration -@@ -583,6 +585,13 @@ dri) +@@ -573,6 +575,13 @@ dri) GL_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED dri2proto >= $DRI2PROTO_REQUIRED" DRI_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED" @@ -38,6 +38,96 @@ index 46070fd..4164d37 100644 # find the DRI deps for libGL if test "$x11_pkgconfig" = yes; then # add xcb modules if necessary +diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c +index e112720..ae0e61e 100644 +--- a/src/mesa/drivers/dri/common/dri_util.c ++++ b/src/mesa/drivers/dri/common/dri_util.c +@@ -37,6 +37,9 @@ + typedef GLboolean ( * PFNGLXGETMSCRATEOMLPROC) (__DRIdrawable *drawable, int32_t *numerator, int32_t *denominator); + #endif + ++static void dri_get_drawable(__DRIdrawable *pdp); ++static void dri_put_drawable(__DRIdrawable *pdp); ++ + /** + * This is just a token extension used to signal that the driver + * supports setting a read drawable. +@@ -127,7 +130,7 @@ static int driUnbindContext(__DRIcontext *pcp) + return GL_FALSE; + } + +- pdp->refcount--; ++ dri_put_drawable(pdp); + + if (prp != pdp) { + if (prp->refcount == 0) { +@@ -135,7 +138,7 @@ static int driUnbindContext(__DRIcontext *pcp) + return GL_FALSE; + } + +- prp->refcount--; ++ dri_put_drawable(prp); + } + + +@@ -170,10 +173,10 @@ static int driBindContext(__DRIcontext *pcp, + pcp->driReadablePriv = prp; + if (pdp) { + pdp->driContextPriv = pcp; +- pdp->refcount++; ++ dri_get_drawable(pdp); + } + if ( prp && pdp != prp ) { +- prp->refcount++; ++ dri_get_drawable(prp); + } + } + +@@ -430,7 +433,7 @@ driCreateNewDrawable(__DRIscreen *psp, const __DRIconfig *config, + + pdp->loaderPrivate = data; + pdp->hHWDrawable = hwDrawable; +- pdp->refcount = 0; ++ pdp->refcount = 1; + pdp->pStamp = NULL; + pdp->lastStamp = 0; + pdp->index = 0; +@@ -483,12 +486,19 @@ dri2CreateNewDrawable(__DRIscreen *screen, + return pdraw; + } + +- +-static void +-driDestroyDrawable(__DRIdrawable *pdp) ++static void dri_get_drawable(__DRIdrawable *pdp) ++{ ++ pdp->refcount++; ++} ++ ++static void dri_put_drawable(__DRIdrawable *pdp) + { + __DRIscreenPrivate *psp; + ++ pdp->refcount--; ++ if (pdp->refcount) ++ return; ++ + if (pdp) { + psp = pdp->driScreenPriv; + (*psp->DriverAPI.DestroyBuffer)(pdp); +@@ -504,6 +514,12 @@ driDestroyDrawable(__DRIdrawable *pdp) + } + } + ++static void ++driDestroyDrawable(__DRIdrawable *pdp) ++{ ++ dri_put_drawable(pdp); ++} ++ + /*@}*/ + + diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile index e9144ac..6a246ed 100644 --- a/src/mesa/drivers/dri/r200/Makefile @@ -125,7 +215,7 @@ index e9144ac..6a246ed 100644 ##### TARGETS ##### diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c -index e163377..3a11a44 100644 +index e163377..e34ea96 100644 --- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c +++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c @@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -422,21 +512,21 @@ index e163377..3a11a44 100644 + rmesa->radeon.dma.flush = NULL; + + elt_used = (elt_used + 2) & ~2; ++ ++ nr = elt_used / 2; - assert( rmesa->dma.flush == r200FlushElts ); - rmesa->dma.flush = NULL; -+ nr = elt_used / 2; ++ radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo); - /* Cope with odd number of elts: - */ - rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2; - dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4; -+ radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo); ++ r200FireEB(rmesa, nr, rmesa->tcl.hw_primitive); - cmd[1] |= (dwords - 3) << 16; - cmd[2] |= nr << R200_VF_VERTEX_NUMBER_SHIFT; -+ r200FireEB(rmesa, nr, rmesa->tcl.hw_primitive); -+ + radeon_bo_unref(rmesa->radeon.tcl.elt_dma_bo); + rmesa->radeon.tcl.elt_dma_bo = NULL; @@ -455,7 +545,7 @@ index e163377..3a11a44 100644 GLushort *retval; if (R200_DEBUG & DEBUG_IOCTL) -@@ -269,30 +201,25 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, +@@ -269,30 +201,30 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, assert((primitive & R200_VF_PRIM_WALK_IND)); @@ -476,6 +566,11 @@ index e163377..3a11a44 100644 + RADEON_GEM_DOMAIN_GTT, 0); + rmesa->radeon.tcl.elt_dma_offset = 0; + rmesa->tcl.elt_used = min_nr * 2; ++ ++ radeon_validate_bo(&rmesa->radeon, rmesa->radeon.tcl.elt_dma_bo, ++ RADEON_GEM_DOMAIN_GTT, 0); ++ if (radeon_revalidate_bos(rmesa->radeon.glCtx) == GL_FALSE) ++ fprintf(stderr,"failure to revalidate BOs - badness\n"); + radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1); + retval = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset; @@ -500,7 +595,7 @@ index e163377..3a11a44 100644 return retval; } -@@ -300,129 +227,119 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, +@@ -300,129 +232,119 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, void r200EmitVertexAOS( r200ContextPtr rmesa, @@ -4354,7 +4449,7 @@ index bae5644..0000000 - -#endif diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c -index 0eaaaf6..1b9724d 100644 +index 2fcc87c..ebf389e 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -47,6 +47,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -6804,7 +6899,7 @@ index 99aecfe..5803709 100644 if (R200_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "R200 end tcl fallback %s\n", diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c -index 5a4db33..fc2caab 100644 +index 259f35a..9f79157 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -43,8 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -6863,12 +6958,18 @@ index 5a4db33..fc2caab 100644 t->pp_txfilter &= ~(R200_MIN_FILTER_MASK | R200_MAG_FILTER_MASK); t->pp_txformat_x &= ~R200_VOLUME_FILTER_MASK; -@@ -267,693 +270,12 @@ static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf ) +@@ -267,701 +270,16 @@ static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf ) } } --static void r200SetTexBorderColor( r200TexObjPtr t, GLubyte c[4] ) --{ +-static void r200SetTexBorderColor( r200TexObjPtr t, const GLfloat color[4] ) ++static void r200SetTexBorderColor( radeonTexObjPtr t, const GLfloat color[4] ) + { + GLubyte c[4]; + CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); - t->pp_border_color = r200PackColor( 4, c[0], c[1], c[2], c[3] ); -} - @@ -6901,7 +7002,7 @@ index 5a4db33..fc2caab 100644 - r200SetTexWrap( t, texObj->WrapS, texObj->WrapT, texObj->WrapR ); - r200SetTexMaxAnisotropy( t, texObj->MaxAnisotropy ); - r200SetTexFilter( t, texObj->MinFilter, texObj->MagFilter ); -- r200SetTexBorderColor( t, texObj->_BorderChan ); +- r200SetTexBorderColor( t, texObj->BorderColor ); - } - - return t; @@ -7358,8 +7459,7 @@ index 5a4db33..fc2caab 100644 - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -+static void r200SetTexBorderColor( radeonTexObjPtr t, GLubyte c[4] ) - { +-{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - GLuint face; - @@ -7414,8 +7514,9 @@ index 5a4db33..fc2caab 100644 - - t->dirty_images[face] |= (1 << level); - } --} -- ++ t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] ); + } + - -static void r200CompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, @@ -7516,11 +7617,10 @@ index 5a4db33..fc2caab 100644 - - t->dirty_images[0] |= (1 << level); - } -+ t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] ); - } +-} -#endif - - +- -#if ENABLE_HW_3D_TEXTURE -static void -r200TexSubImage3D( GLcontext *ctx, GLenum target, GLint level, @@ -7548,7 +7648,7 @@ index 5a4db33..fc2caab 100644 - } - texObj->DriverData = t; - } - +- - _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset, - width, height, depth, - format, type, pixels, packing, texObj, texImage); @@ -7556,10 +7656,13 @@ index 5a4db33..fc2caab 100644 - t->dirty_images[0] |= (1 << level); -} -#endif - - - -@@ -978,7 +300,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target, +- +- +- + static void r200TexEnv( GLcontext *ctx, GLenum target, + GLenum pname, const GLfloat *param ) + { +@@ -983,7 +301,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target, GLubyte c[4]; GLuint envColor; UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor ); @@ -7568,7 +7671,7 @@ index 5a4db33..fc2caab 100644 if ( rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] != envColor ) { R200_STATECHANGE( rmesa, tf ); rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] = envColor; -@@ -997,7 +319,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target, +@@ -1002,7 +320,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target, * NOTE: Add a small bias to the bias for conform mipsel.c test. */ bias = *param + .01; @@ -7577,7 +7680,7 @@ index 5a4db33..fc2caab 100644 0.0 : -16.0; bias = CLAMP( bias, min, 16.0 ); b = (int)(bias * fixed_one) & R200_LOD_BIAS_MASK; -@@ -1034,7 +356,7 @@ static void r200TexParameter( GLcontext *ctx, GLenum target, +@@ -1039,7 +357,7 @@ static void r200TexParameter( GLcontext *ctx, GLenum target, struct gl_texture_object *texObj, GLenum pname, const GLfloat *params ) { @@ -7586,7 +7689,7 @@ index 5a4db33..fc2caab 100644 if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { fprintf( stderr, "%s( %s )\n", __FUNCTION__, -@@ -1068,59 +390,46 @@ static void r200TexParameter( GLcontext *ctx, GLenum target, +@@ -1073,59 +391,46 @@ static void r200TexParameter( GLcontext *ctx, GLenum target, * we just have to rely on loading the right subset of mipmap levels * to simulate a clamped LOD. */ @@ -7674,7 +7777,7 @@ index 5a4db33..fc2caab 100644 _mesa_delete_texture_object(ctx, texObj); } -@@ -1150,46 +459,59 @@ static void r200TexGen( GLcontext *ctx, +@@ -1155,46 +460,59 @@ static void r200TexGen( GLcontext *ctx, * Called via ctx->Driver.NewTextureObject. * Note: this function will be called during context creation to * allocate the default texture objects. @@ -7711,7 +7814,7 @@ index 5a4db33..fc2caab 100644 + r200SetTexWrap( t, t->base.WrapS, t->base.WrapT, t->base.WrapR ); + r200SetTexMaxAnisotropy( t, t->base.MaxAnisotropy ); + r200SetTexFilter(t, t->base.MinFilter, t->base.MagFilter); -+ r200SetTexBorderColor(t, t->base._BorderChan); ++ r200SetTexBorderColor(t, t->base.BorderColor); + + return &t->base; } @@ -7753,7 +7856,7 @@ index 5a4db33..fc2caab 100644 functions->DeleteTexture = r200DeleteTexture; functions->IsTextureResident = driIsTextureResident; -@@ -1197,22 +519,16 @@ void r200InitTextureFuncs( struct dd_function_table *functions ) +@@ -1202,22 +520,16 @@ void r200InitTextureFuncs( struct dd_function_table *functions ) functions->TexParameter = r200TexParameter; functions->TexGen = r200TexGen; @@ -7761,15 +7864,9 @@ index 5a4db33..fc2caab 100644 - functions->CompressedTexSubImage2D = r200CompressedTexSubImage2D; + functions->CompressedTexImage2D = radeonCompressedTexImage2D; + functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; -+ -+ functions->GenerateMipmap = radeonGenerateMipmap; -+ -+ functions->NewTextureImage = radeonNewTextureImage; -+ functions->FreeTexImageData = radeonFreeTexImageData; -+ functions->MapTexture = radeonMapTexture; -+ functions->UnmapTexture = radeonUnmapTexture; - driInitTextureFormats(); +- driInitTextureFormats(); ++ functions->GenerateMipmap = radeonGenerateMipmap; -#if 000 - /* moved or obsolete code */ @@ -7777,7 +7874,13 @@ index 5a4db33..fc2caab 100644 - driInitTextureObjects( ctx, & rmesa->swapped, - DRI_TEXMGR_DO_TEXTURE_1D - | DRI_TEXMGR_DO_TEXTURE_2D ); -- ++ functions->NewTextureImage = radeonNewTextureImage; ++ functions->FreeTexImageData = radeonFreeTexImageData; ++ functions->MapTexture = radeonMapTexture; ++ functions->UnmapTexture = radeonUnmapTexture; ++ ++ driInitTextureFormats(); + - /* Hack: r200NewTextureObject is not yet installed when the - * default textures are created. Therefore set MaxAnisotropy of the - * default 2D texture now. */ @@ -8347,7 +8450,7 @@ index 3b81ac0..0000000 - return 0; -} diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c -index 0ad5651..9797f77 100644 +index 0ad5651..eee54cd 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -37,9 +37,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -8646,7 +8749,7 @@ index 0ad5651..9797f77 100644 t->pp_txpitch = pitch - 32; switch (depth) { -@@ -1014,6 +765,123 @@ void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname, +@@ -1014,6 +765,122 @@ void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname, } } @@ -8718,7 +8821,6 @@ index 0ad5651..9797f77 100644 + radeon_miptree_unreference(rImage->mt); + rImage->mt = NULL; + } -+ fprintf(stderr,"settexbuf %d %dx%d@%d\n", rb->pitch, rb->width, rb->height, rb->cpp); + _mesa_init_teximage_fields(radeon->glCtx, target, texImage, + rb->width, rb->height, 1, 0, rb->cpp); + texImage->RowStride = rb->pitch / rb->cpp; @@ -8770,7 +8872,7 @@ index 0ad5651..9797f77 100644 #define REF_COLOR 1 #define REF_ALPHA 2 -@@ -1207,12 +1075,41 @@ static GLboolean r200UpdateAllTexEnv( GLcontext *ctx ) +@@ -1207,12 +1074,41 @@ static GLboolean r200UpdateAllTexEnv( GLcontext *ctx ) R200_VOLUME_FILTER_MASK) @@ -8814,7 +8916,7 @@ index 0ad5651..9797f77 100644 R200_STATECHANGE( rmesa, tex[unit] ); -@@ -1225,36 +1122,21 @@ static void import_tex_obj_state( r200ContextPtr rmesa, +@@ -1225,36 +1121,21 @@ static void import_tex_obj_state( r200ContextPtr rmesa, cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */ cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */ cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color; @@ -8854,7 +8956,7 @@ index 0ad5651..9797f77 100644 static void set_texgen_matrix( r200ContextPtr rmesa, GLuint unit, const GLfloat *s_plane, -@@ -1377,7 +1259,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) +@@ -1377,7 +1258,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) } else { tgcm |= R200_TEXGEN_COMP_T << (unit * 4); } @@ -8862,7 +8964,7 @@ index 0ad5651..9797f77 100644 if (texUnit->TexGenEnabled & R_BIT) { if (texUnit->GenR.Mode != mode) mixed_fallback = GL_TRUE; -@@ -1517,52 +1398,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) +@@ -1517,52 +1397,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) return GL_TRUE; } @@ -8915,7 +9017,7 @@ index 0ad5651..9797f77 100644 void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); -@@ -1579,237 +1414,169 @@ void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d ) +@@ -1579,237 +1413,169 @@ void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d ) } } @@ -8938,8 +9040,8 @@ index 0ad5651..9797f77 100644 - if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) { - t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2; - t->base.dirty_images[0] = ~0; -+ const struct gl_texture_image *firstImage = -+ t->base.Image[0][t->mt->firstLevel]; ++ int firstlevel = t->mt ? t->mt->firstLevel : 0; ++ const struct gl_texture_image *firstImage = t->base.Image[0][firstlevel]; + GLint log2Width, log2Height, log2Depth, texelBytes; + + if ( t->bo ) { @@ -9277,7 +9379,7 @@ index 0ad5651..9797f77 100644 } -@@ -1850,11 +1617,11 @@ void r200UpdateTextureState( GLcontext *ctx ) +@@ -1850,11 +1616,11 @@ void r200UpdateTextureState( GLcontext *ctx ) FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok ); @@ -9291,7 +9393,7 @@ index 0ad5651..9797f77 100644 /* * T0 hang workaround ------------- -@@ -1867,7 +1634,7 @@ void r200UpdateTextureState( GLcontext *ctx ) +@@ -1867,7 +1633,7 @@ void r200UpdateTextureState( GLcontext *ctx ) R200_STATECHANGE(rmesa, tex[1]); rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE; if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE)) @@ -9301,7 +9403,7 @@ index 0ad5651..9797f77 100644 } else if (!ctx->ATIFragmentShader._Enabled) { diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c -index a2561df..aadd144 100644 +index 4ce93b5..620f29b 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -1110,9 +1110,9 @@ void r200SetupVertexProg( GLcontext *ctx ) { @@ -9317,7 +9419,7 @@ index a2561df..aadd144 100644 R200_STATECHANGE( rmesa, vap ); /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it? diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile -index 6ca9342..0dff9a1 100644 +index 6ca9342..62715e3 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -3,6 +3,8 @@ @@ -9362,7 +9464,15 @@ index 6ca9342..0dff9a1 100644 r300_tex.c \ r300_texstate.c \ radeon_program.c \ -@@ -49,12 +56,15 @@ DRIVER_SOURCES = \ +@@ -41,6 +48,7 @@ DRIVER_SOURCES = \ + radeon_program_pair.c \ + radeon_nqssadce.c \ + r300_vertprog.c \ ++ r300_fragprog_common.c \ + r300_fragprog.c \ + r300_fragprog_swizzle.c \ + r300_fragprog_emit.c \ +@@ -49,12 +57,15 @@ DRIVER_SOURCES = \ r300_shader.c \ r300_emit.c \ r300_swtcl.c \ @@ -9379,7 +9489,7 @@ index 6ca9342..0dff9a1 100644 SYMLINKS = \ server/radeon_dri.c \ -@@ -68,7 +78,29 @@ COMMON_SYMLINKS = \ +@@ -68,7 +79,29 @@ COMMON_SYMLINKS = \ radeon_chipset.h \ radeon_screen.c \ radeon_screen.h \ @@ -9411,10 +9521,10 @@ index 6ca9342..0dff9a1 100644 ##### TARGETS ##### diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c -index 3eb2dc8..2dd2c6a 100644 +index f447275..afca0e2 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c -@@ -44,245 +44,306 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +@@ -44,245 +44,336 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drm.h" #include "radeon_drm.h" @@ -9492,7 +9602,7 @@ index 3eb2dc8..2dd2c6a 100644 + BATCH_LOCALS(&r300->radeon); + drm_r300_cmd_header_t cmd; + uint32_t addr, ndw, i; -+ ++ + if (!r300->radeon.radeonScreen->kernel_mm) { + uint32_t dwords; + dwords = (*atom->check) (ctx, atom); @@ -9501,7 +9611,7 @@ index 3eb2dc8..2dd2c6a 100644 + END_BATCH(); + return; } -- + - ret = drmCommandWrite(r300->radeon.dri.fd, - DRM_RADEON_CMDBUF, &cmd, sizeof(cmd)); - @@ -9509,7 +9619,6 @@ index 3eb2dc8..2dd2c6a 100644 - fprintf(stderr, "Syncing in %s (from %s)\n\n", - __FUNCTION__, caller); - radeonWaitForIdleLocked(&r300->radeon); -+ + cmd.u = atom->cmd[0]; + addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; + ndw = cmd.vpu.count * 4; @@ -9606,23 +9715,13 @@ index 3eb2dc8..2dd2c6a 100644 + BATCH_LOCALS(&r300->radeon); + int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd); + int notexture = 0; -+ -+ if (numtmus) { -+ int i; -+ -+ for(i = 0; i < numtmus; ++i) { -+ radeonTexObj *t = r300->hw.textures[i]; -+ -+ if (!t) -+ notexture = 1; -+ } - fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, - state->cmd_size); - - if (RADEON_DEBUG & DEBUG_VERBOSE) { - for (i = 0; i < dwords;) { -- cmd = (drm_r300_cmd_header_t) state->cmd[i]; +- cmd = *((drm_r300_cmd_header_t *) &state->cmd[i]); - reg = (cmd.packet0.reghi << 8) | cmd.packet0.reglo; - fprintf(stderr, " %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n", - state->name, i, reg, cmd.packet0.count); @@ -9633,6 +9732,16 @@ index 3eb2dc8..2dd2c6a 100644 - reg += 4; - ++i; - } ++ if (numtmus) { ++ int i; ++ ++ for(i = 0; i < numtmus; ++i) { ++ radeonTexObj *t = r300->hw.textures[i]; ++ ++ if (!t) ++ notexture = 1; ++ } ++ + if (r300->radeon.radeonScreen->kernel_mm && notexture) { + return; } @@ -9719,6 +9828,8 @@ index 3eb2dc8..2dd2c6a 100644 + struct radeon_renderbuffer *rrb; + uint32_t cbpitch; + uint32_t offset = r300->radeon.state.color.draw_offset; ++ uint32_t dw = 6; ++ int i; + + rrb = radeon_get_colorbuffer(&r300->radeon); + if (!rrb || !rrb->bo) { @@ -9735,27 +9846,55 @@ index 3eb2dc8..2dd2c6a 100644 + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) + cbpitch |= R300_COLOR_TILE_ENABLE; + -+ BEGIN_BATCH_NO_AUTOSTATE(8); ++ if (r300->radeon.radeonScreen->kernel_mm) ++ dw += 2; ++ BEGIN_BATCH_NO_AUTOSTATE(dw); + OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); + OUT_BATCH_RELOC(offset, rrb->bo, offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1); -+ OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); ++ if (!r300->radeon.radeonScreen->kernel_mm) ++ OUT_BATCH(cbpitch); ++ else ++ OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + if (r300->radeon.radeonScreen->driScreen->dri2.enabled) { + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH(0); -+ OUT_BATCH((rrb->width << R300_SCISSORS_X_SHIFT) | -+ (rrb->height << R300_SCISSORS_Y_SHIFT)); ++ OUT_BATCH(((rrb->width - 1) << R300_SCISSORS_X_SHIFT) | ++ ((rrb->height - 1) << R300_SCISSORS_Y_SHIFT)); ++ END_BATCH(); ++ BEGIN_BATCH_NO_AUTOSTATE(16); ++ for (i = 0; i < 4; i++) { ++ OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2); ++ OUT_BATCH((0 << R300_CLIPRECT_X_SHIFT) | (0 << R300_CLIPRECT_Y_SHIFT)); ++ OUT_BATCH(((rrb->width - 1) << R300_CLIPRECT_X_SHIFT) | ((rrb->height - 1) << R300_CLIPRECT_Y_SHIFT)); ++ } ++ OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1); ++ OUT_BATCH(0xAAAA); ++ OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1); ++ OUT_BATCH(0xffffff); + END_BATCH(); + } else { + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH((R300_SCISSORS_OFFSET << R300_SCISSORS_X_SHIFT) | + (R300_SCISSORS_OFFSET << R300_SCISSORS_Y_SHIFT)); -+ OUT_BATCH(((rrb->width + R300_SCISSORS_OFFSET) << R300_SCISSORS_X_SHIFT) | -+ ((rrb->height + R300_SCISSORS_OFFSET) << R300_SCISSORS_Y_SHIFT)); ++ OUT_BATCH(((rrb->width + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_X_SHIFT) | ++ ((rrb->height + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_Y_SHIFT)); ++ END_BATCH(); ++ BEGIN_BATCH_NO_AUTOSTATE(16); ++ for (i = 0; i < 4; i++) { ++ OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2); ++ OUT_BATCH((R300_SCISSORS_OFFSET << R300_CLIPRECT_X_SHIFT) | (R300_SCISSORS_OFFSET << R300_CLIPRECT_Y_SHIFT)); ++ OUT_BATCH(((R300_SCISSORS_OFFSET + rrb->width - 1) << R300_CLIPRECT_X_SHIFT) | ++ ((R300_SCISSORS_OFFSET + rrb->height - 1) << R300_CLIPRECT_Y_SHIFT)); ++ } ++ OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1); ++ OUT_BATCH(0xAAAA); ++ OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1); ++ OUT_BATCH(0xffffff); + END_BATCH(); + } + } @@ -9798,11 +9937,15 @@ index 3eb2dc8..2dd2c6a 100644 + zbpitch = (rrb->pitch / rrb->cpp); + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { + zbpitch |= R300_DEPTHMACROTILE_ENABLE; - } ++ } + if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){ + zbpitch |= R300_DEPTHMICROTILE_TILED; -+ } -+ + } + +- if (RADEON_DEBUG & DEBUG_STATE) +- fprintf(stderr, "Begin dirty state\n"); +- +- r300EmitAtoms(r300, GL_TRUE); + BEGIN_BATCH_NO_AUTOSTATE(6); + OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); @@ -9810,11 +9953,6 @@ index 3eb2dc8..2dd2c6a 100644 + END_BATCH(); +} -- if (RADEON_DEBUG & DEBUG_STATE) -- fprintf(stderr, "Begin dirty state\n"); -- -- r300EmitAtoms(r300, GL_TRUE); -- - assert(r300->cmdbuf.count_used < r300->cmdbuf.size); +static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom) +{ @@ -9893,7 +10031,7 @@ index 3eb2dc8..2dd2c6a 100644 cnt = r500fp_count(atom->cmd); return cnt ? (cnt * 4) + 1 : 0; } -@@ -295,8 +356,8 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom) +@@ -295,8 +386,8 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom) r300->hw.ATOM.idx = (IDX); \ r300->hw.ATOM.check = check_##CHK; \ r300->hw.ATOM.dirty = GL_FALSE; \ @@ -9904,7 +10042,7 @@ index 3eb2dc8..2dd2c6a 100644 } while (0) /** * Allocate memory for the command buffer and initialize the state atom -@@ -304,7 +365,7 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom) +@@ -304,7 +395,7 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom) */ void r300InitCmdBuf(r300ContextPtr r300) { @@ -9913,7 +10051,7 @@ index 3eb2dc8..2dd2c6a 100644 int has_tcl = 1; int is_r500 = 0; int i; -@@ -315,7 +376,7 @@ void r300InitCmdBuf(r300ContextPtr r300) +@@ -315,7 +406,7 @@ void r300InitCmdBuf(r300ContextPtr r300) if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) is_r500 = 1; @@ -9922,7 +10060,7 @@ index 3eb2dc8..2dd2c6a 100644 mtu = r300->radeon.glCtx->Const.MaxTextureUnits; if (RADEON_DEBUG & DEBUG_TEXTURE) { -@@ -323,97 +384,97 @@ void r300InitCmdBuf(r300ContextPtr r300) +@@ -323,97 +414,97 @@ void r300InitCmdBuf(r300ContextPtr r300) } /* Setup the atom linked list */ @@ -10057,7 +10195,7 @@ index 3eb2dc8..2dd2c6a 100644 for (i = 0; i < 8; i++) { r300->hw.ri.cmd[R300_RI_CMD_0 + i +1] = (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | -@@ -422,133 +483,149 @@ void r300InitCmdBuf(r300ContextPtr r300) +@@ -422,133 +513,149 @@ void r300InitCmdBuf(r300ContextPtr r300) (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); } ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); @@ -10191,10 +10329,11 @@ index 3eb2dc8..2dd2c6a 100644 + r300->hw.zb.emit = emit_zb_offset; ALLOC_STATE(zb_depthclearvalue, always, 2, 0); - r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1); -+ r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1); - ALLOC_STATE(unk4F30, always, 3, 0); +- ALLOC_STATE(unk4F30, always, 3, 0); - r300->hw.unk4F30.cmd[0] = cmdpacket0(0x4F30, 2); -+ r300->hw.unk4F30.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4F30, 2); ++ r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1); ++ ALLOC_STATE(zb_zmask, always, 3, 0); ++ r300->hw.zb_zmask.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_ZMASK_OFFSET, 2); ALLOC_STATE(zb_hiz_offset, always, 2, 0); - r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(R300_ZB_HIZ_OFFSET, 1); + r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_OFFSET, 1); @@ -10263,7 +10402,7 @@ index 3eb2dc8..2dd2c6a 100644 } } } -@@ -556,130 +633,37 @@ void r300InitCmdBuf(r300ContextPtr r300) +@@ -556,130 +663,37 @@ void r300InitCmdBuf(r300ContextPtr r300) /* Textures */ ALLOC_STATE(tex.filter, variable, mtu + 1, 0); r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = @@ -10497,7 +10636,7 @@ index a8eaa58..3786813 100644 #endif /* __R300_CMDBUF_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c -index fddd87b..06db7ab 100644 +index 12bee1a..70c7730 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -10569,21 +10708,18 @@ index fddd87b..06db7ab 100644 /** * The GL 2.0 functions are needed to make display lists work with * functions added by GL_ATI_separate_stencil. -@@ -183,6 +191,91 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = { +@@ -164,6 +172,7 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = { + &_tnl_fog_coordinate_stage, + &_tnl_texgen_stage, + &_tnl_texture_transform_stage, ++ &_tnl_point_attenuation_stage, + &_tnl_vertex_program_stage, + + /* Try again to go to tcl? +@@ -183,6 +192,143 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = { 0, }; -+static void r300RunPipeline(GLcontext * ctx) -+{ -+ _mesa_lock_context_textures(ctx); -+ -+ if (ctx->NewState) -+ _mesa_update_state_locked(ctx); -+ -+ _tnl_run_pipeline(ctx); -+ _mesa_unlock_context_textures(ctx); -+} -+ +static void r300_get_lock(radeonContextPtr rmesa) +{ + drm_radeon_sarea_t *sarea = rmesa->sarea; @@ -10593,7 +10729,7 @@ index fddd87b..06db7ab 100644 + if (!rmesa->radeonScreen->kernel_mm) + radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom); + } -+} ++} + +static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa) +{ @@ -10628,9 +10764,9 @@ index fddd87b..06db7ab 100644 +{ + r300ContextPtr r300 = (r300ContextPtr)radeon; + BATCH_LOCALS(radeon); -+ ++ + r300->vap_flush_needed = GL_TRUE; -+ ++ + cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH); @@ -10657,11 +10793,74 @@ index fddd87b..06db7ab 100644 + radeon->vtbl.fallback = r300_fallback; +} + ++static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) ++{ ++ r300ContextPtr r300 = R300_CONTEXT(ctx); ++ ++ ctx->Const.MaxTextureImageUnits = ++ driQueryOptioni(&r300->radeon.optionCache, "texture_image_units"); ++ ctx->Const.MaxTextureCoordUnits = ++ driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units"); ++ ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureImageUnits, ++ ctx->Const.MaxTextureCoordUnits); ++ ctx->Const.MaxTextureMaxAnisotropy = 16.0; ++ ctx->Const.MaxTextureLodBias = 16.0; ++ ++ if (screen->chip_family >= CHIP_FAMILY_RV515) ++ ctx->Const.MaxTextureLevels = 13; ++ else ++ ctx->Const.MaxTextureLevels = 12; ++ ++ ctx->Const.MinPointSize = 1.0; ++ ctx->Const.MinPointSizeAA = 1.0; ++ ctx->Const.MaxPointSize = R300_POINTSIZE_MAX; ++ ctx->Const.MaxPointSizeAA = R300_POINTSIZE_MAX; ++ ++ ctx->Const.MinLineWidth = 1.0; ++ ctx->Const.MinLineWidthAA = 1.0; ++ ctx->Const.MaxLineWidth = R300_LINESIZE_MAX; ++ ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; ++ ++ ctx->Const.MaxDrawBuffers = 1; ++ ++ /* currently bogus data */ ++ if (screen->chip_flags & RADEON_CHIPSET_TCL) { ++ ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; ++ ctx->Const.VertexProgram.MaxNativeInstructions = ++ VSF_MAX_FRAGMENT_LENGTH / 4; ++ ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ ++ ctx->Const.VertexProgram.MaxTemps = 32; ++ ctx->Const.VertexProgram.MaxNativeTemps = ++ /*VSF_MAX_FRAGMENT_TEMPS */ 32; ++ ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ ++ ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; ++ } ++ ++ if (screen->chip_family >= CHIP_FAMILY_RV515) { ++ ctx->Const.FragmentProgram.MaxNativeTemps = R500_PFS_NUM_TEMP_REGS; ++ ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ ++ ctx->Const.FragmentProgram.MaxNativeParameters = R500_PFS_NUM_CONST_REGS; ++ ctx->Const.FragmentProgram.MaxNativeAluInstructions = R500_PFS_MAX_INST; ++ ctx->Const.FragmentProgram.MaxNativeTexInstructions = R500_PFS_MAX_INST; ++ ctx->Const.FragmentProgram.MaxNativeInstructions = R500_PFS_MAX_INST; ++ ctx->Const.FragmentProgram.MaxNativeTexIndirections = R500_PFS_MAX_INST; ++ ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; ++ } else { ++ ctx->Const.FragmentProgram.MaxNativeTemps = R300_PFS_NUM_TEMP_REGS; ++ ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ ++ ctx->Const.FragmentProgram.MaxNativeParameters = R300_PFS_NUM_CONST_REGS; ++ ctx->Const.FragmentProgram.MaxNativeAluInstructions = R300_PFS_MAX_ALU_INST; ++ ctx->Const.FragmentProgram.MaxNativeTexInstructions = R300_PFS_MAX_TEX_INST; ++ ctx->Const.FragmentProgram.MaxNativeInstructions = R300_PFS_MAX_ALU_INST + R300_PFS_MAX_TEX_INST; ++ ctx->Const.FragmentProgram.MaxNativeTexIndirections = R300_PFS_MAX_TEX_INDIRECT; ++ ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; ++ } ++} + /* Create the device specific rendering context. */ GLboolean r300CreateContext(const __GLcontextModes * glVisual, -@@ -194,7 +287,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, +@@ -194,13 +340,12 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, struct dd_function_table functions; r300ContextPtr r300; GLcontext *ctx; @@ -10670,23 +10869,33 @@ index fddd87b..06db7ab 100644 assert(glVisual); assert(driContextPriv); -@@ -208,13 +301,14 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, + assert(screen); + +- /* Allocate the R300 context */ + r300 = (r300ContextPtr) CALLOC(sizeof(*r300)); + if (!r300) + return GL_FALSE; +@@ -208,28 +353,17 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) hw_tcl_on = future_hw_tcl_on = 0; -+ r300_init_vtbl(&r300->radeon); - /* Parse configuration files. - * Do this here so that initialMaxAnisotropy is set before we create - * the default textures. - */ +- /* Parse configuration files. +- * Do this here so that initialMaxAnisotropy is set before we create +- * the default textures. +- */ driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache, screen->driScreen->myNum, "r300"); - r300->initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache, -+ r300->radeon.initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache, - "def_max_anisotropy"); +- "def_max_anisotropy"); - /* Init default driver functions then plug in our R300-specific functions -@@ -226,10 +320,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, +- /* Init default driver functions then plug in our R300-specific functions +- * (the texture functions are especially important) +- */ ++ r300_init_vtbl(&r300->radeon); ++ + _mesa_init_driver_functions(&functions); + r300InitIoctlFuncs(&functions); + r300InitStateFuncs(&functions); r300InitTextureFuncs(&functions); r300InitShaderFuncs(&functions); @@ -10697,10 +10906,11 @@ index fddd87b..06db7ab 100644 if (!radeonInitContext(&r300->radeon, &functions, glVisual, driContextPriv, sharedContextPrivate)) { -@@ -238,39 +328,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, +@@ -237,94 +371,13 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, + return GL_FALSE; } - /* Init r300 context data */ +- /* Init r300 context data */ - r300->dma.buf0_address = - r300->radeon.radeonScreen->buffers->list[0].address; - @@ -10734,51 +10944,167 @@ index fddd87b..06db7ab 100644 - r300->texture_depth = (screen->cpp == 4) ? - DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16; - - /* Set the maximum texture size small enough that we can guarentee that - * all texture units can bind a maximal texture and have them both in - * texturable memory at once. -@@ -303,13 +360,11 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, - ctx->Const.MaxLineWidth = R300_LINESIZE_MAX; - ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; - +- /* Set the maximum texture size small enough that we can guarentee that +- * all texture units can bind a maximal texture and have them both in +- * texturable memory at once. +- */ +- + ctx = r300->radeon.glCtx; ++ r300InitConstValues(ctx, screen); + +- ctx->Const.MaxTextureImageUnits = +- driQueryOptioni(&r300->radeon.optionCache, "texture_image_units"); +- ctx->Const.MaxTextureCoordUnits = +- driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units"); +- ctx->Const.MaxTextureUnits = +- MIN2(ctx->Const.MaxTextureImageUnits, +- ctx->Const.MaxTextureCoordUnits); +- ctx->Const.MaxTextureMaxAnisotropy = 16.0; +- ctx->Const.MaxTextureLodBias = 16.0; +- +- if (screen->chip_family >= CHIP_FAMILY_RV515) +- ctx->Const.MaxTextureLevels = 13; +- else +- ctx->Const.MaxTextureLevels = 12; +- +- driCalculateMaxTextureLevels( r300->texture_heaps, +- r300->nr_heaps, +- & ctx->Const, +- 4, +- ctx->Const.MaxTextureLevels - 1, +- MIN2(ctx->Const.MaxTextureLevels, +- MAX_3D_TEXTURE_LEVELS) - 1, +- ctx->Const.MaxTextureLevels - 1, +- ctx->Const.MaxTextureLevels - 1, +- ctx->Const.MaxTextureLevels - 1, +- GL_FALSE, +- 2 ); ++ if (hw_tcl_on) ++ ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; + +- ctx->Const.MinPointSize = 1.0; +- ctx->Const.MinPointSizeAA = 1.0; +- ctx->Const.MaxPointSize = R300_POINTSIZE_MAX; +- ctx->Const.MaxPointSizeAA = R300_POINTSIZE_MAX; +- +- ctx->Const.MinLineWidth = 1.0; +- ctx->Const.MinLineWidthAA = 1.0; +- ctx->Const.MaxLineWidth = R300_LINESIZE_MAX; +- ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; +- -#ifdef USER_BUFFERS - /* Needs further modifications */ - #if 0 - ctx->Const.MaxArrayLockSize = - ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4); - #endif +- /* Needs further modifications */ +-#if 0 +- ctx->Const.MaxArrayLockSize = +- ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4); -#endif +-#endif +- +- ctx->Const.MaxDrawBuffers = 1; ++ ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - ctx->Const.MaxDrawBuffers = 1; + /* Initialize the software rasterizer and helper modules. + */ +@@ -333,16 +386,12 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, + _tnl_CreateContext(ctx); + _swsetup_CreateContext(ctx); + _swsetup_Wakeup(ctx); +- _ae_create_context(ctx); -@@ -365,6 +420,8 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, - ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; + /* Install the customized pipeline: + */ + _tnl_destroy_pipeline(ctx); + _tnl_install_pipeline(ctx, r300_pipeline); +- +- /* Try and keep materials and vertices separate: +- */ +-/* _tnl_isolate_materials(ctx, GL_TRUE); */ ++ TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; + + /* Configure swrast and TNL to match hardware characteristics: + */ +@@ -351,59 +400,38 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, + _tnl_allow_pixel_fog(ctx, GL_FALSE); + _tnl_allow_vertex_fog(ctx, GL_TRUE); + +- /* currently bogus data */ +- if (screen->chip_flags & RADEON_CHIPSET_TCL) { +- ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; +- ctx->Const.VertexProgram.MaxNativeInstructions = +- VSF_MAX_FRAGMENT_LENGTH / 4; +- ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ +- ctx->Const.VertexProgram.MaxTemps = 32; +- ctx->Const.VertexProgram.MaxNativeTemps = +- /*VSF_MAX_FRAGMENT_TEMPS */ 32; +- ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ +- ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; +- } +- +- ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS; +- ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ +- ctx->Const.FragmentProgram.MaxNativeParameters = PFS_NUM_CONST_REGS; +- ctx->Const.FragmentProgram.MaxNativeAluInstructions = PFS_MAX_ALU_INST; +- ctx->Const.FragmentProgram.MaxNativeTexInstructions = PFS_MAX_TEX_INST; +- ctx->Const.FragmentProgram.MaxNativeInstructions = +- PFS_MAX_ALU_INST + PFS_MAX_TEX_INST; +- ctx->Const.FragmentProgram.MaxNativeTexIndirections = +- PFS_MAX_TEX_INDIRECT; +- ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ +- ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; +- ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; ++ radeon_fbo_init(&r300->radeon); ++ radeonInitSpanFuncs( ctx ); ++ r300InitCmdBuf(r300); ++ r300InitState(r300); ++ r300InitShaderFunctions(r300); ++ if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) ++ r300InitSwtcl(ctx); driInitExtensions(ctx, card_extensions, GL_TRUE); + if (r300->radeon.radeonScreen->kernel_mm) + driInitExtensions(ctx, mm_extensions, GL_FALSE); ++ ++ if (screen->chip_family == CHIP_FAMILY_RS600 || screen->chip_family == CHIP_FAMILY_RS690 || ++ screen->chip_family == CHIP_FAMILY_RS740) { ++ r300->radeon.texture_row_align = 64; ++ } + +- if (driQueryOptionb +- (&r300->radeon.optionCache, "disable_stencil_two_side")) ++ r300->radeon.initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache, ++ "def_max_anisotropy"); ++ ++ if (driQueryOptionb(&r300->radeon.optionCache, "disable_stencil_two_side")) + _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); + +- if (r300->radeon.glCtx->Mesa_DXTn +- && !driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc")) { ++ if (ctx->Mesa_DXTn && !driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc")) { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + _mesa_enable_extension(ctx, "GL_S3_s3tc"); +- } else +- if (driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable")) +- { ++ } else if (driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable")) { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + } - if (driQueryOptionb - (&r300->radeon.optionCache, "disable_stencil_two_side")) -@@ -383,14 +440,14 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300->disable_lowimpact_fallback = - driQueryOptionb(&r300->radeon.optionCache, - "disable_lowimpact_fallback"); +- driQueryOptionb(&r300->radeon.optionCache, +- "disable_lowimpact_fallback"); - - radeonInitSpanFuncs(ctx); -+ radeon_fbo_init(&r300->radeon); -+ radeonInitSpanFuncs( ctx ); - r300InitCmdBuf(r300); - r300InitState(r300); - if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) - r300InitSwtcl(ctx); - +- r300InitCmdBuf(r300); +- r300InitState(r300); +- if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) +- r300InitSwtcl(ctx); +- - TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; -+ TNL_CONTEXT(ctx)->Driver.RunPipeline = r300RunPipeline; ++ driQueryOptionb(&r300->radeon.optionCache, "disable_lowimpact_fallback"); tcl_mode = driQueryOptioni(&r300->radeon.optionCache, "tcl_mode"); if (driQueryOptionb(&r300->radeon.optionCache, "no_rast")) { -@@ -413,145 +470,3 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, +@@ -426,145 +454,3 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, return GL_TRUE; } @@ -10925,38 +11251,45 @@ index fddd87b..06db7ab 100644 - } -} diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h -index c15e9fa..5ef59d2 100644 +index 9c49586..379977b 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h -@@ -42,21 +42,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +@@ -37,26 +37,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #ifndef __R300_CONTEXT_H__ + #define __R300_CONTEXT_H__ + +-#include "tnl/t_vertex.h" + #include "drm.h" #include "radeon_drm.h" #include "dri_util.h" - #include "texmem.h" +-#include "texmem.h" +#include "radeon_common.h" - #include "main/macros.h" +-#include "main/macros.h" #include "main/mtypes.h" - #include "main/colormac.h" - --#define USER_BUFFERS +-#include "main/colormac.h" - +-#define USER_BUFFERS ++#include "shader/prog_instruction.h" + struct r300_context; typedef struct r300_context r300ContextRec; typedef struct r300_context *r300ContextPtr; -#include "radeon_lock.h" -+ - #include "main/mm.h" +-#include "main/mm.h" -/* From http://gcc.gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . +/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble with other compilers ... GLUE! */ -@@ -75,174 +74,19 @@ typedef struct r300_context *r300ContextPtr; - #include "r300_vertprog.h" - #include "r500_fragprog.h" +@@ -73,180 +67,14 @@ typedef struct r300_context *r300ContextPtr; + } + #include "r300_vertprog.h" +-#include "r500_fragprog.h" +- -/** - * This function takes a float and packs it into a uint32_t - */ @@ -10985,7 +11318,7 @@ index c15e9fa..5ef59d2 100644 - - if (f == 0.0) - return 0; - +- - mantissa = frexpf(f, &exponent); - - /* Handle -ve */ @@ -11001,9 +11334,9 @@ index c15e9fa..5ef59d2 100644 - - return float24; -} - - /************ DMA BUFFERS **************/ - +- +-/************ DMA BUFFERS **************/ +- -/* Need refcounting on dma buffers: - */ -struct r300_dma_buffer { @@ -11037,7 +11370,7 @@ index c15e9fa..5ef59d2 100644 - * these may be flushed by calling flush_current(); - */ - struct r300_dma_region current; -- + - void (*flush) (r300ContextPtr); - - char *buf0_address; /* start of buf[0], for index calcs */ @@ -11052,6 +11385,10 @@ index c15e9fa..5ef59d2 100644 - -typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr; - +-/* Maximum number of mipmap levels supported by any supported GPU +- */ +-#define R300_MAX_TEXTURE_LEVELS 13 +- -/* Texture object in locally shared texture space. - */ -struct r300_tex_obj { @@ -11060,7 +11397,7 @@ index c15e9fa..5ef59d2 100644 - GLuint bufAddr; /* Offset to start of locally - shared texture block */ - -- drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; +- drm_radeon_tex_image_t image[6][R300_MAX_TEXTURE_LEVELS]; - /* Six, for the cube faces */ - - GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ @@ -11097,16 +11434,16 @@ index c15e9fa..5ef59d2 100644 - GLenum format; - GLenum envMode; -}; -- + /* The blit width for texture uploads */ #define R300_BLIT_WIDTH_BYTES 1024 #define R300_MAX_TEXTURE_UNITS 8 - struct r300_texture_state { +-struct r300_texture_state { - struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS]; - int tc_count; /* number of incoming texture coordinates from VAP */ - }; +- int tc_count; /* number of incoming texture coordinates from VAP */ +-}; -/** - * A block of hardware state. @@ -11128,7 +11465,7 @@ index c15e9fa..5ef59d2 100644 #define R300_VPT_CMD_0 0 #define R300_VPT_XSCALE 1 -@@ -459,124 +303,98 @@ struct r300_state_atom { +@@ -463,124 +291,98 @@ struct r300_state_atom { * Cache for hardware register state. */ struct r300_hw_state { @@ -11208,7 +11545,7 @@ index c15e9fa..5ef59d2 100644 - struct r300_state_atom vpucp[6]; /* vp user clip plane - 6 */ + struct radeon_state_atom vpt; /* viewport (1D98) */ + struct radeon_state_atom vap_cntl; -+ struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */ ++ struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */ + struct radeon_state_atom vof; /* VAP output format register 0x2090 */ + struct radeon_state_atom vte; /* (20B0) */ + struct radeon_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */ @@ -11266,7 +11603,7 @@ index c15e9fa..5ef59d2 100644 + struct radeon_state_atom zstencil_format; + struct radeon_state_atom zb; /* z buffer (4F20) */ + struct radeon_state_atom zb_depthclearvalue; /* (4F28) */ -+ struct radeon_state_atom unk4F30; /* (4F30) */ ++ struct radeon_state_atom zb_zmask; /* (4F30) */ + struct radeon_state_atom zb_hiz_offset; /* (4F44) */ + struct radeon_state_atom zb_hiz_pitch; /* (4F54) */ + @@ -11331,30 +11668,198 @@ index c15e9fa..5ef59d2 100644 /* Vertex shader state */ /* Perhaps more if we store programs in vmem? */ -@@ -812,22 +630,14 @@ struct r500_fragment_program { - #define REG_TEX0 2 +@@ -613,12 +415,8 @@ extern int hw_tcl_on; + #include "tnl_dd/t_dd_vertex.h" + #undef TAG + +-//#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current) + #define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->selected_vp) + +-/* Should but doesnt work */ +-//#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->curr_vp) +- + /* r300_vertex_shader_state and r300_vertex_program should probably be merged together someday. + * Keeping them them seperate for now should ensure fixed pipeline keeps functioning properly. + */ +@@ -652,14 +450,18 @@ struct r300_vertex_program_cont { + struct r300_vertex_program *progs; + }; + +-#define PFS_MAX_ALU_INST 64 +-#define PFS_MAX_TEX_INST 64 +-#define PFS_MAX_TEX_INDIRECT 4 +-#define PFS_NUM_TEMP_REGS 32 +-#define PFS_NUM_CONST_REGS 16 ++#define R300_PFS_MAX_ALU_INST 64 ++#define R300_PFS_MAX_TEX_INST 32 ++#define R300_PFS_MAX_TEX_INDIRECT 4 ++#define R300_PFS_NUM_TEMP_REGS 32 ++#define R300_PFS_NUM_CONST_REGS 32 + +-struct r300_pfs_compile_state; ++#define R500_PFS_MAX_INST 512 ++#define R500_PFS_NUM_TEMP_REGS 128 ++#define R500_PFS_NUM_CONST_REGS 256 + ++struct r300_pfs_compile_state; ++struct r500_pfs_compile_state; + + /** + * Stores state that influences the compilation of a fragment program. +@@ -702,7 +504,7 @@ struct r300_fragment_program_node { + struct r300_fragment_program_code { + struct { + int length; /**< total # of texture instructions used */ +- GLuint inst[PFS_MAX_TEX_INST]; ++ GLuint inst[R300_PFS_MAX_TEX_INST]; + } tex; + + struct { +@@ -712,7 +514,7 @@ struct r300_fragment_program_code { + GLuint inst1; + GLuint inst2; + GLuint inst3; +- } inst[PFS_MAX_ALU_INST]; ++ } inst[R300_PFS_MAX_ALU_INST]; + } alu; + + struct r300_fragment_program_node node[4]; +@@ -723,53 +525,12 @@ struct r300_fragment_program_code { + * Remember which program register a given hardware constant + * belongs to. + */ +- struct prog_src_register constant[PFS_NUM_CONST_REGS]; ++ struct prog_src_register constant[R300_PFS_NUM_CONST_REGS]; + int const_nr; + + int max_temp_idx; + }; + +-/** +- * Store everything about a fragment program that is needed +- * to render with that program. +- */ +-struct r300_fragment_program { +- struct gl_fragment_program mesa_program; +- +- GLboolean translated; +- GLboolean error; +- +- struct r300_fragment_program_external_state state; +- struct r300_fragment_program_code code; +- +- GLboolean WritesDepth; +- GLuint optimization; +-}; +- +-struct r500_pfs_compile_state; +- +-struct r500_fragment_program_external_state { +- struct { +- /** +- * If the sampler is used as a shadow sampler, +- * this field is: +- * 0 - GL_LUMINANCE +- * 1 - GL_INTENSITY +- * 2 - GL_ALPHA +- * depending on the depth texture mode. +- */ +- GLuint depth_texture_mode : 2; +- +- /** +- * If the sampler is used as a shadow sampler, +- * this field is (texture_compare_func - GL_NEVER). +- * [e.g. if compare function is GL_LEQUAL, this field is 3] +- * +- * Otherwise, this field is 0. +- */ +- GLuint texture_compare_func : 3; +- } unit[16]; +-}; + + struct r500_fragment_program_code { + struct { +@@ -779,7 +540,7 @@ struct r500_fragment_program_code { + GLuint inst3; + GLuint inst4; + GLuint inst5; +- } inst[512]; ++ } inst[R500_PFS_MAX_INST]; + + int inst_offset; + int inst_end; +@@ -788,51 +549,41 @@ struct r500_fragment_program_code { + * Remember which program register a given hardware constant + * belongs to. + */ +- struct prog_src_register constant[PFS_NUM_CONST_REGS]; ++ struct prog_src_register constant[R500_PFS_NUM_CONST_REGS]; + int const_nr; + + int max_temp_idx; + }; + +-struct r500_fragment_program { +- struct gl_fragment_program mesa_program; ++/** ++* Store everything about a fragment program that is needed ++* to render with that program. ++*/ ++struct r300_fragment_program { ++ struct gl_fragment_program Base; + +- GLcontext *ctx; + GLboolean translated; + GLboolean error; - struct r300_state { +- struct r500_fragment_program_external_state state; +- struct r500_fragment_program_code code; ++ struct r300_fragment_program_external_state state; ++ union rX00_fragment_program_code { ++ struct r300_fragment_program_code r300; ++ struct r500_fragment_program_code r500; ++ } code; + + GLboolean writes_depth; +- + GLuint optimization; + }; + +-#define R300_MAX_AOS_ARRAYS 16 +- +-#define REG_COORDS 0 +-#define REG_COLOR0 1 +-#define REG_TEX0 2 +- +-struct r300_state { - struct r300_depthbuffer_state depth; - struct r300_texture_state texture; - int sw_tcl_inputs[VERT_ATTRIB_MAX]; - struct r300_vertex_shader_state vertex_shader; +- struct r300_texture_state texture; +- int sw_tcl_inputs[VERT_ATTRIB_MAX]; +- struct r300_vertex_shader_state vertex_shader; - struct r300_dma_region aos[R300_MAX_AOS_ARRAYS]; - int aos_count; - +- - GLuint *Elts; - struct r300_dma_region elt_dma; - +- - struct r300_dma_region swtcl_dma; - DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. - They are the same as tnl->render_inputs for fixed pipeline */ +- DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. +- They are the same as tnl->render_inputs for fixed pipeline */ ++struct r300_fragment_program_compiler { ++ r300ContextPtr r300; ++ struct r300_fragment_program *fp; ++ union rX00_fragment_program_code *code; ++ struct gl_program *program; ++}; - struct r300_stencilbuffer_state stencil; -- - }; ++#define R300_MAX_AOS_ARRAYS 16 + +-}; #define R300_FALLBACK_NONE 0 -@@ -837,41 +647,7 @@ struct r300_state { + #define R300_FALLBACK_TCL 1 +@@ -841,41 +592,7 @@ struct r300_state { /* r300_swtcl.c */ struct r300_swtcl_info { @@ -11397,28 +11902,49 @@ index c15e9fa..5ef59d2 100644 * Offset of the 4UB color data within a hardware (swtcl) vertex. */ GLuint coloroffset; -@@ -880,13 +656,6 @@ struct r300_swtcl_info { - * Offset of the 3UB specular color data within a hardware (swtcl) vertex. +@@ -885,12 +602,25 @@ struct r300_swtcl_info { */ GLuint specoffset; -- + - /** - * Should Mesa project vertex data or will the hardware do it? - */ - GLboolean needproj; -- ++ struct vertex_attribute{ ++ GLuint attr; ++ GLubyte format; ++ GLubyte dst_loc; ++ GLuint swizzle; ++ GLubyte write_mask; ++ } vert_attrs[VERT_ATTRIB_MAX]; ++ ++ GLubyte vertex_attr_count; ++ ++ int sw_tcl_inputs[VERT_ATTRIB_MAX]; ++}; + - struct r300_dma_region indexed_verts; ++struct r300_vtable { ++ void (* SetupRSUnit)(GLcontext *ctx); ++ void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings); ++ GLboolean (* FragmentProgramEmit)(struct r300_fragment_program_compiler *compiler); ++ void (* FragmentProgramDump)(union rX00_fragment_program_code *code); ++ GLboolean (* SetupPixelShader)(GLcontext *ctx); }; -@@ -897,40 +666,22 @@ struct r300_context { +@@ -900,46 +630,24 @@ struct r300_swtcl_info { + struct r300_context { struct radeon_context radeon; /* parent class, must be first */ ++ struct r300_vtable vtbl; ++ struct r300_hw_state hw; - struct r300_cmdbuf cmdbuf; +- struct r300_state state; +- struct gl_vertex_program *curr_vp; + - struct r300_state state; - struct gl_vertex_program *curr_vp; ++ struct r300_vertex_shader_state vertex_shader; struct r300_vertex_program *selected_vp; /* Vertex buffers @@ -11448,23 +11974,34 @@ index c15e9fa..5ef59d2 100644 GLboolean disable_lowimpact_fallback; - DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */ -+ +- DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */ struct r300_swtcl_info swtcl; +-}; + GLboolean vap_flush_needed; + +-struct r300_buffer_object { +- struct gl_buffer_object mesa_obj; +- int id; ++ DECLARE_RENDERINPUTS(render_inputs_bitset); }; - struct r300_buffer_object { -@@ -956,4 +707,7 @@ extern int r300VertexProgUpdateParams(GLcontext * ctx, - #define RADEON_D_PLAYBACK_RAW 2 - #define RADEON_D_T 3 + #define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx)) +@@ -955,9 +663,9 @@ extern int r300VertexProgUpdateParams(GLcontext * ctx, + struct r300_vertex_program_cont *vp, + float *dst); +-#define RADEON_D_CAPTURE 0 +-#define RADEON_D_PLAYBACK 1 +-#define RADEON_D_PLAYBACK_RAW 2 +-#define RADEON_D_T 3 ++extern void r300InitShaderFunctions(r300ContextPtr r300); ++ +#define r300PackFloat32 radeonPackFloat32 +#define r300PackFloat24 radeonPackFloat24 -+ + #endif /* __R300_CONTEXT_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c -index 28c3157..bcf8803 100644 +index 28c3157..a19b0f1 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -46,14 +46,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -11630,7 +12167,44 @@ index 28c3157..bcf8803 100644 #define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) | \ (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT) -@@ -376,7 +231,6 @@ int r300EmitArrays(GLcontext * ctx) +@@ -272,7 +127,6 @@ GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead) + + GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead) + { +- r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint i, vic_1 = 0; + + if (InputsRead & (1 << VERT_ATTRIB_POS)) +@@ -284,10 +138,8 @@ GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead) + if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) + vic_1 |= R300_INPUT_CNTL_COLOR; + +- rmesa->state.texture.tc_count = 0; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) { +- rmesa->state.texture.tc_count++; + vic_1 |= R300_INPUT_CNTL_TC0 << i; + } + +@@ -336,7 +188,7 @@ GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten) + fprintf(stderr, "\tout of free texcoords to write fog coord\n"); + _mesa_exit(-1); + } +- ret |= 4 << (3 * first_free_texcoord); ++ ret |= 1 << (3 * first_free_texcoord); + } + + return ret; +@@ -367,7 +219,7 @@ int r300EmitArrays(GLcontext * ctx) + InputsRead = prog->key.InputsRead; + OutputsWritten = prog->key.OutputsWritten; + } else { +- inputs = rmesa->state.sw_tcl_inputs; ++ inputs = rmesa->swtcl.sw_tcl_inputs; + + DECLARE_RENDERINPUTS(render_inputs_bitset); + RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); +@@ -376,7 +228,6 @@ int r300EmitArrays(GLcontext * ctx) assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)); assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0); @@ -11638,7 +12212,16 @@ index 28c3157..bcf8803 100644 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) { InputsRead |= 1 << VERT_ATTRIB_POS; -@@ -438,7 +292,7 @@ int r300EmitArrays(GLcontext * ctx) +@@ -421,7 +272,7 @@ int r300EmitArrays(GLcontext * ctx) + if (InputsRead & (1 << i)) + inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); + +- RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset); ++ RENDERINPUTS_COPY(rmesa->render_inputs_bitset, render_inputs_bitset); + } + + assert(InputsRead); +@@ -438,7 +289,7 @@ int r300EmitArrays(GLcontext * ctx) } for (i = 0; i < nr; i++) { @@ -11647,7 +12230,7 @@ index 28c3157..bcf8803 100644 swizzle[i][0] = SWIZZLE_ZERO; swizzle[i][1] = SWIZZLE_ZERO; -@@ -448,61 +302,35 @@ int r300EmitArrays(GLcontext * ctx) +@@ -448,60 +299,34 @@ int r300EmitArrays(GLcontext * ctx) for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) { swizzle[i][ci] = ci; } @@ -11706,7 +12289,6 @@ index 28c3157..bcf8803 100644 - ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = - r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, - nr); -- + if (rmesa->radeon.radeonScreen->kernel_mm) { + R300_STATECHANGE(rmesa, vir[0]); + rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF; @@ -11728,11 +12310,10 @@ index 28c3157..bcf8803 100644 + r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, + nr); + } -+ + /* Setup INPUT_CNTL. */ R300_STATECHANGE(rmesa, vic); - rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); -@@ -515,50 +343,22 @@ int r300EmitArrays(GLcontext * ctx) +@@ -515,50 +340,22 @@ int r300EmitArrays(GLcontext * ctx) rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten); @@ -12101,6 +12682,765 @@ index 89d7383..80c22d5 100644 extern int r300PrimitiveType(r300ContextPtr rmesa, int prim); extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); +diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c +index 873cde4..ea530fd 100644 +--- a/src/mesa/drivers/dri/r300/r300_fragprog.c ++++ b/src/mesa/drivers/dri/r300/r300_fragprog.c +@@ -25,32 +25,12 @@ + * + */ + +-/** +- * \file +- * +- * Fragment program compiler. Perform transformations on the intermediate +- * representation until the program is in a form where we can translate +- * it more or less directly into machine-readable form. +- * +- * \author Ben Skeggs +- * \author Jerome Glisse +- */ ++#include "r300_fragprog.h" + +-#include "main/glheader.h" +-#include "main/macros.h" +-#include "main/enums.h" +-#include "shader/prog_instruction.h" + #include "shader/prog_parameter.h" +-#include "shader/prog_print.h" + + #include "r300_context.h" +-#include "r300_fragprog.h" + #include "r300_fragprog_swizzle.h" +-#include "r300_state.h" +- +-#include "radeon_nqssadce.h" +-#include "radeon_program_alu.h" +- + + static void reset_srcreg(struct prog_src_register* reg) + { +@@ -81,7 +61,7 @@ static struct prog_src_register shadow_ambient(struct gl_program *program, int t + * \todo If/when r5xx uses the radeon_program architecture, this can probably + * be reused. + */ +-static GLboolean transform_TEX( ++GLboolean r300_transform_TEX( + struct radeon_transform_context *t, + struct prog_instruction* orig_inst, void* data) + { +@@ -175,7 +155,7 @@ static GLboolean transform_TEX( + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tmpreg; + } +- ++ + tgt = radeonAppendInstructions(t->Program, 1); + _mesa_copy_instructions(tgt, &inst, 1); + +@@ -246,241 +226,10 @@ static GLboolean transform_TEX( + return GL_TRUE; + } + +- +-static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp) +-{ +- struct gl_fragment_program *mp = &fp->mesa_program; +- +- /* Ask Mesa nicely to fill in ParameterValues for us */ +- if (mp->Base.Parameters) +- _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); +-} +- +- +-/** +- * Transform the program to support fragment.position. +- * +- * Introduce a small fragment at the start of the program that will be +- * the only code that directly reads the FRAG_ATTRIB_WPOS input. +- * All other code pieces that reference that input will be rewritten +- * to read from a newly allocated temporary. +- * +- * \todo if/when r5xx supports the radeon_program architecture, this is a +- * likely candidate for code sharing. +- */ +-static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) +-{ +- GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead; +- +- if (!(InputsRead & FRAG_BIT_WPOS)) +- return; +- +- static gl_state_index tokens[STATE_LENGTH] = { +- STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 +- }; +- struct prog_instruction *fpi; +- GLuint window_index; +- int i = 0; +- GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); +- +- _mesa_insert_instructions(compiler->program, 0, 3); +- fpi = compiler->program->Instructions; +- +- /* perspective divide */ +- fpi[i].Opcode = OPCODE_RCP; +- +- fpi[i].DstReg.File = PROGRAM_TEMPORARY; +- fpi[i].DstReg.Index = tempregi; +- fpi[i].DstReg.WriteMask = WRITEMASK_W; +- fpi[i].DstReg.CondMask = COND_TR; +- +- fpi[i].SrcReg[0].File = PROGRAM_INPUT; +- fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; +- fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; +- i++; +- +- fpi[i].Opcode = OPCODE_MUL; +- +- fpi[i].DstReg.File = PROGRAM_TEMPORARY; +- fpi[i].DstReg.Index = tempregi; +- fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; +- fpi[i].DstReg.CondMask = COND_TR; +- +- fpi[i].SrcReg[0].File = PROGRAM_INPUT; +- fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; +- fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; +- +- fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; +- fpi[i].SrcReg[1].Index = tempregi; +- fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; +- i++; +- +- /* viewport transformation */ +- window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); +- +- fpi[i].Opcode = OPCODE_MAD; +- +- fpi[i].DstReg.File = PROGRAM_TEMPORARY; +- fpi[i].DstReg.Index = tempregi; +- fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; +- fpi[i].DstReg.CondMask = COND_TR; +- +- fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; +- fpi[i].SrcReg[0].Index = tempregi; +- fpi[i].SrcReg[0].Swizzle = +- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); +- +- fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; +- fpi[i].SrcReg[1].Index = window_index; +- fpi[i].SrcReg[1].Swizzle = +- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); +- +- fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; +- fpi[i].SrcReg[2].Index = window_index; +- fpi[i].SrcReg[2].Swizzle = +- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); +- i++; +- +- for (; i < compiler->program->NumInstructions; ++i) { +- int reg; +- for (reg = 0; reg < 3; reg++) { +- if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && +- fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { +- fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; +- fpi[i].SrcReg[reg].Index = tempregi; +- } +- } +- } +-} +- +- +-static void nqssadce_init(struct nqssadce_state* s) +-{ +- s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; +- s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; +-} +- +- +-static GLuint build_dtm(GLuint depthmode) +-{ +- switch(depthmode) { +- default: +- case GL_LUMINANCE: return 0; +- case GL_INTENSITY: return 1; +- case GL_ALPHA: return 2; +- } +-} +- +-static GLuint build_func(GLuint comparefunc) +-{ +- return comparefunc - GL_NEVER; +-} +- +- +-/** +- * Collect all external state that is relevant for compiling the given +- * fragment program. +- */ +-static void build_state( +- r300ContextPtr r300, +- struct r300_fragment_program *fp, +- struct r300_fragment_program_external_state *state) +-{ +- int unit; +- +- _mesa_bzero(state, sizeof(*state)); +- +- for(unit = 0; unit < 16; ++unit) { +- if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) { +- struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; +- +- state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); +- state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); +- } +- } +-} +- +- +-void r300TranslateFragmentShader(r300ContextPtr r300, +- struct r300_fragment_program *fp) +-{ +- struct r300_fragment_program_external_state state; +- +- build_state(r300, fp, &state); +- if (_mesa_memcmp(&fp->state, &state, sizeof(state))) { +- /* TODO: cache compiled programs */ +- fp->translated = GL_FALSE; +- _mesa_memcpy(&fp->state, &state, sizeof(state)); +- } +- +- if (!fp->translated) { +- struct r300_fragment_program_compiler compiler; +- +- compiler.r300 = r300; +- compiler.fp = fp; +- compiler.code = &fp->code; +- compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base); +- +- if (RADEON_DEBUG & DEBUG_PIXEL) { +- _mesa_printf("Fragment Program: Initial program:\n"); +- _mesa_print_program(compiler.program); +- } +- +- insert_WPOS_trailer(&compiler); +- +- struct radeon_program_transformation transformations[] = { +- { &transform_TEX, &compiler }, +- { &radeonTransformALU, 0 }, +- { &radeonTransformTrigSimple, 0 } +- }; +- radeonLocalTransform( +- r300->radeon.glCtx, +- compiler.program, +- 3, transformations); +- +- if (RADEON_DEBUG & DEBUG_PIXEL) { +- _mesa_printf("Fragment Program: After native rewrite:\n"); +- _mesa_print_program(compiler.program); +- } +- +- struct radeon_nqssadce_descr nqssadce = { +- .Init = &nqssadce_init, +- .IsNativeSwizzle = &r300FPIsNativeSwizzle, +- .BuildSwizzle = &r300FPBuildSwizzle, +- .RewriteDepthOut = GL_TRUE +- }; +- radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce); +- +- if (RADEON_DEBUG & DEBUG_PIXEL) { +- _mesa_printf("Compiler: after NqSSA-DCE:\n"); +- _mesa_print_program(compiler.program); +- } +- +- if (!r300FragmentProgramEmit(&compiler)) +- fp->error = GL_TRUE; +- +- /* Subtle: Rescue any parameters that have been added during transformations */ +- _mesa_free_parameter_list(fp->mesa_program.Base.Parameters); +- fp->mesa_program.Base.Parameters = compiler.program->Parameters; +- compiler.program->Parameters = 0; +- +- _mesa_reference_program(r300->radeon.glCtx, &compiler.program, NULL); +- +- if (!fp->error) +- fp->translated = GL_TRUE; +- if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) +- r300FragmentProgramDump(fp, &fp->code); +- r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); +- } +- +- update_params(r300, fp); +-} +- + /* just some random things... */ +-void r300FragmentProgramDump( +- struct r300_fragment_program *fp, +- struct r300_fragment_program_code *code) ++void r300FragmentProgramDump(union rX00_fragment_program_code *c) + { ++ struct r300_fragment_program_code *code = &c->r300; + int n, i, j; + static int pc = 0; + +diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h +index 94fb554..affa022 100644 +--- a/src/mesa/drivers/dri/r300/r300_fragprog.h ++++ b/src/mesa/drivers/dri/r300/r300_fragprog.h +@@ -33,9 +33,6 @@ + #ifndef __R300_FRAGPROG_H_ + #define __R300_FRAGPROG_H_ + +-#include "main/glheader.h" +-#include "main/macros.h" +-#include "main/enums.h" + #include "shader/program.h" + #include "shader/prog_instruction.h" + +@@ -105,28 +102,10 @@ + + #endif + +-struct r300_fragment_program; +- +-extern void r300TranslateFragmentShader(r300ContextPtr r300, +- struct r300_fragment_program *fp); +- +- +-/** +- * Used internally by the r300 fragment program code to store compile-time +- * only data. +- */ +-struct r300_fragment_program_compiler { +- r300ContextPtr r300; +- struct r300_fragment_program *fp; +- struct r300_fragment_program_code *code; +- struct gl_program *program; +-}; +- + extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); + ++extern void r300FragmentProgramDump(union rX00_fragment_program_code *c); + +-extern void r300FragmentProgramDump( +- struct r300_fragment_program *fp, +- struct r300_fragment_program_code *code); ++extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); + + #endif +diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c +new file mode 100644 +index 0000000..6eaad76 +--- /dev/null ++++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c +@@ -0,0 +1,291 @@ ++/* ++ * Copyright (C) 2009 Maciej Cencora ++ * ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining ++ * a copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sublicense, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial ++ * portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ++ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE ++ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION ++ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION ++ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ */ ++ ++/** ++ * \file ++ * ++ * Fragment program compiler. Perform transformations on the intermediate ++ * representation until the program is in a form where we can translate ++ * it more or less directly into machine-readable form. ++ * ++ * \author Ben Skeggs ++ * \author Jerome Glisse ++ */ ++ ++#include "r300_fragprog_common.h" ++ ++#include "shader/program.h" ++#include "shader/prog_parameter.h" ++#include "shader/prog_print.h" ++ ++#include "r300_state.h" ++#include "r300_fragprog.h" ++#include "r300_fragprog_swizzle.h" ++#include "r500_fragprog.h" ++ ++#include "radeon_program.h" ++#include "radeon_program_alu.h" ++ ++static void update_params(GLcontext *ctx, struct gl_fragment_program *fp) ++{ ++ /* Ask Mesa nicely to fill in ParameterValues for us */ ++ if (fp->Base.Parameters) ++ _mesa_load_state_parameters(ctx, fp->Base.Parameters); ++} ++ ++static void nqssadce_init(struct nqssadce_state* s) ++{ ++ s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; ++ s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; ++} ++ ++/** ++ * Transform the program to support fragment.position. ++ * ++ * Introduce a small fragment at the start of the program that will be ++ * the only code that directly reads the FRAG_ATTRIB_WPOS input. ++ * All other code pieces that reference that input will be rewritten ++ * to read from a newly allocated temporary. ++ * ++ */ ++static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) ++{ ++ GLuint InputsRead = compiler->fp->Base.Base.InputsRead; ++ ++ if (!(InputsRead & FRAG_BIT_WPOS)) ++ return; ++ ++ static gl_state_index tokens[STATE_LENGTH] = { ++ STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 ++ }; ++ struct prog_instruction *fpi; ++ GLuint window_index; ++ int i = 0; ++ GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); ++ ++ _mesa_insert_instructions(compiler->program, 0, 3); ++ fpi = compiler->program->Instructions; ++ ++ /* perspective divide */ ++ fpi[i].Opcode = OPCODE_RCP; ++ ++ fpi[i].DstReg.File = PROGRAM_TEMPORARY; ++ fpi[i].DstReg.Index = tempregi; ++ fpi[i].DstReg.WriteMask = WRITEMASK_W; ++ fpi[i].DstReg.CondMask = COND_TR; ++ ++ fpi[i].SrcReg[0].File = PROGRAM_INPUT; ++ fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; ++ fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; ++ i++; ++ ++ fpi[i].Opcode = OPCODE_MUL; ++ ++ fpi[i].DstReg.File = PROGRAM_TEMPORARY; ++ fpi[i].DstReg.Index = tempregi; ++ fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; ++ fpi[i].DstReg.CondMask = COND_TR; ++ ++ fpi[i].SrcReg[0].File = PROGRAM_INPUT; ++ fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; ++ fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; ++ ++ fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; ++ fpi[i].SrcReg[1].Index = tempregi; ++ fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; ++ i++; ++ ++ /* viewport transformation */ ++ window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); ++ ++ fpi[i].Opcode = OPCODE_MAD; ++ ++ fpi[i].DstReg.File = PROGRAM_TEMPORARY; ++ fpi[i].DstReg.Index = tempregi; ++ fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; ++ fpi[i].DstReg.CondMask = COND_TR; ++ ++ fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; ++ fpi[i].SrcReg[0].Index = tempregi; ++ fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); ++ ++ fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; ++ fpi[i].SrcReg[1].Index = window_index; ++ fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); ++ ++ fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; ++ fpi[i].SrcReg[2].Index = window_index; ++ fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); ++ i++; ++ ++ for (; i < compiler->program->NumInstructions; ++i) { ++ int reg; ++ for (reg = 0; reg < 3; reg++) { ++ if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && ++ fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { ++ fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; ++ fpi[i].SrcReg[reg].Index = tempregi; ++ } ++ } ++ } ++} ++ ++static GLuint build_dtm(GLuint depthmode) ++{ ++ switch(depthmode) { ++ default: ++ case GL_LUMINANCE: return 0; ++ case GL_INTENSITY: return 1; ++ case GL_ALPHA: return 2; ++ } ++} ++ ++static GLuint build_func(GLuint comparefunc) ++{ ++ return comparefunc - GL_NEVER; ++} ++ ++/** ++ * Collect all external state that is relevant for compiling the given ++ * fragment program. ++ */ ++static void build_state( ++ r300ContextPtr r300, ++ struct r300_fragment_program *fp, ++ struct r300_fragment_program_external_state *state) ++{ ++ int unit; ++ ++ _mesa_bzero(state, sizeof(*state)); ++ ++ for(unit = 0; unit < 16; ++unit) { ++ if (fp->Base.Base.ShadowSamplers & (1 << unit)) { ++ struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; ++ ++ state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); ++ state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); ++ } ++ } ++} ++ ++void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) ++{ ++ r300ContextPtr r300 = R300_CONTEXT(ctx); ++ struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp; ++ struct r300_fragment_program_external_state state; ++ ++ build_state(r300, r300_fp, &state); ++ if (_mesa_memcmp(&r300_fp->state, &state, sizeof(state))) { ++ /* TODO: cache compiled programs */ ++ r300_fp->translated = GL_FALSE; ++ _mesa_memcpy(&r300_fp->state, &state, sizeof(state)); ++ } ++ ++ if (!r300_fp->translated) { ++ struct r300_fragment_program_compiler compiler; ++ ++ compiler.r300 = r300; ++ compiler.fp = r300_fp; ++ compiler.code = &r300_fp->code; ++ compiler.program = _mesa_clone_program(ctx, &fp->Base); ++ ++ if (RADEON_DEBUG & DEBUG_PIXEL) { ++ fflush(stdout); ++ _mesa_printf("Fragment Program: Initial program:\n"); ++ _mesa_print_program(compiler.program); ++ fflush(stdout); ++ } ++ ++ insert_WPOS_trailer(&compiler); ++ ++ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { ++ struct radeon_program_transformation transformations[] = { ++ { &r500_transform_TEX, &compiler }, ++ { &radeonTransformALU, 0 }, ++ { &radeonTransformDeriv, 0 }, ++ { &radeonTransformTrigScale, 0 } ++ }; ++ radeonLocalTransform(ctx, compiler.program, 4, transformations); ++ } else { ++ struct radeon_program_transformation transformations[] = { ++ { &r300_transform_TEX, &compiler }, ++ { &radeonTransformALU, 0 }, ++ { &radeonTransformTrigSimple, 0 } ++ }; ++ radeonLocalTransform(ctx, compiler.program, 3, transformations); ++ } ++ ++ if (RADEON_DEBUG & DEBUG_PIXEL) { ++ _mesa_printf("Fragment Program: After native rewrite:\n"); ++ _mesa_print_program(compiler.program); ++ fflush(stdout); ++ } ++ ++ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { ++ struct radeon_nqssadce_descr nqssadce = { ++ .Init = &nqssadce_init, ++ .IsNativeSwizzle = &r500FPIsNativeSwizzle, ++ .BuildSwizzle = &r500FPBuildSwizzle, ++ .RewriteDepthOut = GL_TRUE ++ }; ++ radeonNqssaDce(ctx, compiler.program, &nqssadce); ++ } else { ++ struct radeon_nqssadce_descr nqssadce = { ++ .Init = &nqssadce_init, ++ .IsNativeSwizzle = &r300FPIsNativeSwizzle, ++ .BuildSwizzle = &r300FPBuildSwizzle, ++ .RewriteDepthOut = GL_TRUE ++ }; ++ radeonNqssaDce(ctx, compiler.program, &nqssadce); ++ } ++ ++ if (RADEON_DEBUG & DEBUG_PIXEL) { ++ _mesa_printf("Compiler: after NqSSA-DCE:\n"); ++ _mesa_print_program(compiler.program); ++ fflush(stdout); ++ } ++ ++ if (!r300->vtbl.FragmentProgramEmit(&compiler)) ++ r300_fp->error = GL_TRUE; ++ ++ /* Subtle: Rescue any parameters that have been added during transformations */ ++ _mesa_free_parameter_list(fp->Base.Parameters); ++ fp->Base.Parameters = compiler.program->Parameters; ++ compiler.program->Parameters = 0; ++ ++ _mesa_reference_program(ctx, &compiler.program, NULL); ++ ++ r300_fp->translated = GL_TRUE; ++ ++ r300UpdateStateParameters(ctx, _NEW_PROGRAM); ++ ++ if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) ++ r300->vtbl.FragmentProgramDump(&r300_fp->code); ++ } ++ ++ update_params(ctx, fp); ++} +diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h +new file mode 100644 +index 0000000..85ea86f +--- /dev/null ++++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (C) 2009 Maciej Cencora ++ * ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining ++ * a copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sublicense, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial ++ * portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ++ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE ++ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION ++ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION ++ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ */ ++ ++#ifndef __R300_FRAGPROG_COMMON_H_ ++#define __R300_FRAGPROG_COMMON_H_ ++ ++#include "main/mtypes.h" ++ ++extern void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp); ++ ++#endif +diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +index 9f0b7e3..af8bb38 100644 +--- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c ++++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +@@ -47,7 +47,7 @@ + + #define PROG_CODE \ + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ +- struct r300_fragment_program_code *code = c->code ++ struct r300_fragment_program_code *code = &c->code->r300 + + #define error(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ +@@ -66,7 +66,7 @@ static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwind + } + + if (*hwindex >= code->const_nr) { +- if (*hwindex >= PFS_NUM_CONST_REGS) { ++ if (*hwindex >= R300_PFS_NUM_CONST_REGS) { + error("Out of hw constants!\n"); + return GL_FALSE; + } +@@ -138,7 +138,7 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) + { + PROG_CODE; + +- if (code->alu.length >= PFS_MAX_ALU_INST) { ++ if (code->alu.length >= R300_PFS_MAX_ALU_INST) { + error("Too many ALU instructions"); + return GL_FALSE; + } +@@ -201,7 +201,7 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) + if (inst->Alpha.DepthWriteMask) { + code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH; + code->node[code->cur_node].flags |= R300_W_OUT; +- c->fp->WritesDepth = GL_TRUE; ++ c->fp->writes_depth = GL_TRUE; + } + + return GL_TRUE; +@@ -213,7 +213,7 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) + */ + static GLboolean finish_node(struct r300_fragment_program_compiler *c) + { +- struct r300_fragment_program_code *code = c->code; ++ struct r300_fragment_program_code *code = &c->code->r300; + struct r300_fragment_program_node *node = &code->node[code->cur_node]; + + if (node->alu_end < 0) { +@@ -275,7 +275,7 @@ static GLboolean emit_tex(void* data, struct prog_instruction* inst) + { + PROG_CODE; + +- if (code->tex.length >= PFS_MAX_TEX_INST) { ++ if (code->tex.length >= R300_PFS_MAX_TEX_INST) { + error("Too many TEX instructions"); + return GL_FALSE; + } +@@ -318,7 +318,7 @@ static const struct radeon_pair_handler pair_handler = { + .EmitPaired = &emit_alu, + .EmitTex = &emit_tex, + .BeginTexBlock = &begin_tex, +- .MaxHwTemps = PFS_NUM_TEMP_REGS ++ .MaxHwTemps = R300_PFS_NUM_TEMP_REGS + }; + + /** +@@ -327,7 +327,7 @@ static const struct radeon_pair_handler pair_handler = { + */ + GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler) + { +- struct r300_fragment_program_code *code = compiler->code; ++ struct r300_fragment_program_code *code = &compiler->code->r300; + + _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); + code->node[0].alu_end = -1; diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index ee85e22..a7f5121 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -13759,7 +15099,7 @@ index 625a7f6..0000000 - -#endif diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h -index 8f1a663..ed552d0 100644 +index 8f1a663..79dd1e1 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1531,6 +1531,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -13776,11 +15116,24 @@ index 8f1a663..ed552d0 100644 /* BEGIN: Guess from R200 */ # define R300_TXO_ENDIAN_NO_SWAP (0 << 0) # define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) +@@ -2425,6 +2432,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. + /* Z Buffer Clear Value */ + #define R300_ZB_DEPTHCLEARVALUE 0x4f28 + ++#define R300_ZB_ZMASK_OFFSET 0x4f30 ++#define R300_ZB_ZMASK_PITCH 0x4f34 ++#define R300_ZB_ZMASK_WRINDEX 0x4f38 ++#define R300_ZB_ZMASK_DWORD 0x4f3c ++#define R300_ZB_ZMASK_RDINDEX 0x4f40 ++ + /* Hierarchical Z Memory Offset */ + #define R300_ZB_HIZ_OFFSET 0x4f44 + diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c -index 16ce4a1..924305d 100644 +index 16ce4a1..f87fee4 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c -@@ -66,8 +66,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. +@@ -66,15 +66,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/t_vp_build.h" #include "radeon_reg.h" #include "radeon_macros.h" @@ -13789,7 +15142,16 @@ index 16ce4a1..924305d 100644 #include "r300_context.h" #include "r300_ioctl.h" #include "r300_state.h" -@@ -175,85 +173,164 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) + #include "r300_reg.h" + #include "r300_tex.h" + #include "r300_emit.h" +-#include "r300_fragprog.h" ++#include "r300_fragprog_common.h" ++ + extern int future_hw_tcl_on; + + /** +@@ -175,85 +174,164 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts) { r300ContextPtr rmesa = R300_CONTEXT(ctx); @@ -13845,7 +15207,7 @@ index 16ce4a1..924305d 100644 + ((vertex_count + 0) << 16) | + type | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit); -+ ++ + if (!rmesa->radeon.radeonScreen->kernel_mm) { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | @@ -13878,21 +15240,19 @@ index 16ce4a1..924305d 100644 - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; -- -+ + if (RADEON_DEBUG & DEBUG_VERTS) fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, offset); - start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1); - e32(nr); -- + - for (i = 0; i + 1 < nr; i += 2) { - e32((rmesa->state.aos[i].aos_size << 0) | - (rmesa->state.aos[i].aos_stride << 8) | - (rmesa->state.aos[i + 1].aos_size << 16) | - (rmesa->state.aos[i + 1].aos_stride << 24)); -+ + if (!rmesa->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH(sz+2+(nr * 2)); + OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); @@ -13903,7 +15263,7 @@ index 16ce4a1..924305d 100644 + (rmesa->radeon.tcl.aos[i].stride << 8) | + (rmesa->radeon.tcl.aos[i + 1].components << 16) | + (rmesa->radeon.tcl.aos[i + 1].stride << 24)); -+ ++ + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + OUT_BATCH_RELOC(voffset, @@ -13919,7 +15279,10 @@ index 16ce4a1..924305d 100644 + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } -+ + +- e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride); +- e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride); +- } + if (nr & 1) { + OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | + (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); @@ -13934,8 +15297,10 @@ index 16ce4a1..924305d 100644 + END_BATCH(); + } else { -- e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride); -- e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride); +- if (nr & 1) { +- e32((rmesa->state.aos[nr - 1].aos_size << 0) | +- (rmesa->state.aos[nr - 1].aos_stride << 8)); +- e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride); + BEGIN_BATCH(sz+2+(nr * 2)); + OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); + OUT_BATCH(nr); @@ -13945,7 +15310,7 @@ index 16ce4a1..924305d 100644 + (rmesa->radeon.tcl.aos[i].stride << 8) | + (rmesa->radeon.tcl.aos[i + 1].components << 16) | + (rmesa->radeon.tcl.aos[i + 1].stride << 24)); -+ ++ + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + OUT_BATCH(voffset); @@ -13953,7 +15318,7 @@ index 16ce4a1..924305d 100644 + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + OUT_BATCH(voffset); + } -+ ++ + if (nr & 1) { + OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | + (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); @@ -13985,12 +15350,7 @@ index 16ce4a1..924305d 100644 + } + END_BATCH(); } - -- if (nr & 1) { -- e32((rmesa->state.aos[nr - 1].aos_size << 0) | -- (rmesa->state.aos[nr - 1].aos_stride << 8)); -- e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride); -- } ++ } static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) @@ -14009,7 +15369,7 @@ index 16ce4a1..924305d 100644 } static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, -@@ -269,6 +346,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, +@@ -269,6 +347,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, if (type < 0 || num_verts <= 0) return; @@ -14022,7 +15382,7 @@ index 16ce4a1..924305d 100644 if (vb->Elts) { if (num_verts > 65535) { /* not implemented yet */ -@@ -287,12 +370,13 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, +@@ -287,12 +371,13 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, * arrays. *sigh* */ r300EmitElts(ctx, vb->Elts, num_verts); @@ -14039,7 +15399,7 @@ index 16ce4a1..924305d 100644 } static GLboolean r300RunRender(GLcontext * ctx, -@@ -303,7 +387,6 @@ static GLboolean r300RunRender(GLcontext * ctx, +@@ -303,7 +388,6 @@ static GLboolean r300RunRender(GLcontext * ctx, TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *vb = &tnl->vb; @@ -14047,7 +15407,7 @@ index 16ce4a1..924305d 100644 if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); -@@ -314,7 +397,7 @@ static GLboolean r300RunRender(GLcontext * ctx, +@@ -314,7 +398,7 @@ static GLboolean r300RunRender(GLcontext * ctx, r300UpdateShaderStates(rmesa); r300EmitCacheFlush(rmesa); @@ -14056,7 +15416,7 @@ index 16ce4a1..924305d 100644 for (i = 0; i < vb->PrimitiveCount; i++) { GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); -@@ -325,11 +408,7 @@ static GLboolean r300RunRender(GLcontext * ctx, +@@ -325,11 +409,7 @@ static GLboolean r300RunRender(GLcontext * ctx, r300EmitCacheFlush(rmesa); @@ -14069,38 +15429,198 @@ index 16ce4a1..924305d 100644 return GL_FALSE; } -@@ -348,7 +427,8 @@ static int r300Fallback(GLcontext * ctx) - { +@@ -349,38 +429,19 @@ static int r300Fallback(GLcontext * ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); const unsigned back = ctx->Stencil._BackFace; -- -+ + +- /* Do we need to use new-style shaders? +- * Also is there a better way to do this? */ +- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { +- struct r500_fragment_program *fp = (struct r500_fragment_program *) +- (char *)ctx->FragmentProgram._Current; +- if (fp) { +- if (!fp->translated) { +- r500TranslateFragmentShader(r300, fp); +- FALLBACK_IF(!fp->translated); +- } +- } +- } else { +- struct r300_fragment_program *fp = (struct r300_fragment_program *) +- (char *)ctx->FragmentProgram._Current; +- if (fp) { +- if (!fp->translated) { +- r300TranslateFragmentShader(r300, fp); +- FALLBACK_IF(!fp->translated); +- } +- } + FALLBACK_IF(r300->radeon.Fallback); - /* Do we need to use new-style shaders? - * Also is there a better way to do this? */ - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { -@@ -410,6 +490,9 @@ static GLboolean r300RunNonTCLRender(GLcontext * ctx, ++ ++ struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; ++ if (fp && !fp->translated) { ++ r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current); ++ FALLBACK_IF(fp->error); + } + + FALLBACK_IF(ctx->RenderMode != GL_RENDER); + +- /* If GL_EXT_stencil_two_side is disabled, this fallback check can +- * be removed. +- */ +- FALLBACK_IF(ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back] +- || ctx->Stencil.ValueMask[0] != +- ctx->Stencil.ValueMask[back] +- || ctx->Stencil.WriteMask[0] != +- ctx->Stencil.WriteMask[back]); ++ FALLBACK_IF(ctx->Stencil.Enabled && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back] ++ || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back] ++ || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])); + + if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) + FALLBACK_IF(ctx->Point.PointSprite); +@@ -410,6 +471,9 @@ static GLboolean r300RunNonTCLRender(GLcontext * ctx, if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) return GL_TRUE; + if (!r300ValidateBuffers(ctx)) + return GL_TRUE; -+ ++ return r300RunRender(ctx, stage); } -@@ -432,6 +515,9 @@ static GLboolean r300RunTCLRender(GLcontext * ctx, +@@ -432,6 +496,9 @@ static GLboolean r300RunTCLRender(GLcontext * ctx, return GL_TRUE; } + if (!r300ValidateBuffers(ctx)) + return GL_TRUE; -+ ++ r300UpdateShaders(rmesa); vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); +diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c +index f30fd98..0133b83 100644 +--- a/src/mesa/drivers/dri/r300/r300_shader.c ++++ b/src/mesa/drivers/dri/r300/r300_shader.c +@@ -1,18 +1,42 @@ ++/* ++ * Copyright 2009 Maciej Cencora ++ * ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining ++ * a copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sublicense, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial ++ * portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ++ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE ++ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION ++ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION ++ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ */ + + #include "main/glheader.h" + + #include "shader/program.h" + #include "tnl/tnl.h" + #include "r300_context.h" +-#include "r300_fragprog.h" ++#include "r300_fragprog_common.h" + + static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, + GLuint id) + { +- r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_vertex_program_cont *vp; +- struct r300_fragment_program *r300_fp; +- struct r500_fragment_program *r500_fp; ++ struct r300_fragment_program *fp; + + switch (target) { + case GL_VERTEX_STATE_PROGRAM_NV: +@@ -20,28 +44,12 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, + vp = CALLOC_STRUCT(r300_vertex_program_cont); + return _mesa_init_vertex_program(ctx, &vp->mesa_program, + target, id); +- case GL_FRAGMENT_PROGRAM_ARB: +- if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { +- r500_fp = CALLOC_STRUCT(r500_fragment_program); +- r500_fp->ctx = ctx; +- return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program, +- target, id); +- } else { +- r300_fp = CALLOC_STRUCT(r300_fragment_program); +- return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, +- target, id); +- } + + case GL_FRAGMENT_PROGRAM_NV: +- if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { +- r500_fp = CALLOC_STRUCT(r500_fragment_program); +- return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program, +- target, id); +- } else { +- r300_fp = CALLOC_STRUCT(r300_fragment_program); +- return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, +- target, id); +- } ++ case GL_FRAGMENT_PROGRAM_ARB: ++ fp = CALLOC_STRUCT(r300_fragment_program); ++ return _mesa_init_fragment_program(ctx, &fp->Base, target, id); ++ + default: + _mesa_problem(ctx, "Bad target in r300NewProgram"); + } +@@ -57,20 +65,15 @@ static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog) + static void + r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) + { +- r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_vertex_program_cont *vp = (void *)prog; + struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog; +- struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)prog; + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: + vp->progs = NULL; + break; + case GL_FRAGMENT_PROGRAM_ARB: +- if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) +- r500_fp->translated = GL_FALSE; +- else +- r300_fp->translated = GL_FALSE; ++ r300_fp->translated = GL_FALSE; + break; + } + +@@ -81,7 +84,14 @@ r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) + static GLboolean + r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) + { +- return GL_TRUE; ++ if (target == GL_FRAGMENT_PROGRAM_ARB) { ++ struct r300_fragment_program *fp = (struct r300_fragment_program *)prog; ++ if (!fp->translated) ++ r300TranslateFragmentShader(ctx, &fp->Base); ++ ++ return !fp->error; ++ } else ++ return GL_TRUE; + } + + void r300InitShaderFuncs(struct dd_function_table *functions) diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c -index 8095538..6796d36 100644 +index 8095538..99441a2 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -14111,7 +15631,7 @@ index 8095538..6796d36 100644 #include "main/simple_list.h" #include "main/api_arrayelt.h" #include "main/texformat.h" -@@ -53,8 +54,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +@@ -53,20 +54,19 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "vbo/vbo.h" #include "tnl/tnl.h" @@ -14120,7 +15640,59 @@ index 8095538..6796d36 100644 #include "r300_context.h" #include "r300_ioctl.h" #include "r300_state.h" -@@ -589,8 +588,14 @@ static void r300SetDepthState(GLcontext * ctx) + #include "r300_reg.h" + #include "r300_emit.h" +-#include "r300_fragprog.h" + #include "r300_tex.h" ++#include "r300_fragprog_common.h" ++#include "r300_fragprog.h" ++#include "r500_fragprog.h" + + #include "drirenderbuffer.h" + + extern int future_hw_tcl_on; +-extern void _tnl_UpdateFixedFunctionProgram(GLcontext * ctx); + + static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4]) + { +@@ -451,18 +451,9 @@ static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state) + + static GLboolean current_fragment_program_writes_depth(GLcontext* ctx) + { +- r300ContextPtr r300 = R300_CONTEXT(ctx); ++ struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + +- if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { +- struct r300_fragment_program *fp = (struct r300_fragment_program *) +- (char *)ctx->FragmentProgram._Current; +- return (fp && fp->WritesDepth); +- } else { +- struct r500_fragment_program* fp = +- (struct r500_fragment_program*)(char*) +- ctx->FragmentProgram._Current; +- return (fp && fp->writes_depth); +- } ++ return (fp && fp->writes_depth); + } + + static void r300SetEarlyZState(GLcontext * ctx) +@@ -533,8 +524,6 @@ static void r300SetAlphaState(GLcontext * ctx) + R300_STATECHANGE(r300, at); + r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc; + r300->hw.at.cmd[R300_AT_UNKNOWN] = 0; +- +- r300SetEarlyZState(ctx); + } + + static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) +@@ -582,15 +571,19 @@ static void r300SetDepthState(GLcontext * ctx) + r300->hw.zs.cmd[R300_ZS_CNTL_1] |= + translate_func(ctx->Depth.Func) << R300_Z_FUNC_SHIFT; + } +- +- r300SetEarlyZState(ctx); + } + static void r300SetStencilState(GLcontext * ctx, GLboolean state) { r300ContextPtr r300 = R300_CONTEXT(ctx); @@ -14136,7 +15708,21 @@ index 8095538..6796d36 100644 R300_STATECHANGE(r300, zs); if (state) { r300->hw.zs.cmd[R300_ZS_CNTL_0] |= -@@ -935,15 +940,25 @@ static void r300UpdateWindow(GLcontext * ctx) +@@ -735,7 +728,12 @@ static void r300ColorMask(GLcontext * ctx, + static void r300PointSize(GLcontext * ctx, GLfloat size) + { + r300ContextPtr r300 = R300_CONTEXT(ctx); +- /* same size limits for AA, non-AA points */ ++ ++ /* We need to clamp to user defined range here, because ++ * the HW clamping happens only for per vertex point size. */ ++ size = CLAMP(size, ctx->Point.MinSize, ctx->Point.MaxSize); ++ ++ /* same size limits for AA, non-AA points */ + size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize); + + R300_STATECHANGE(r300, ps); +@@ -935,15 +933,25 @@ static void r300UpdateWindow(GLcontext * ctx) GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; const GLfloat *v = ctx->Viewport._WindowMap.m; @@ -14167,7 +15753,7 @@ index 8095538..6796d36 100644 R300_STATECHANGE(rmesa, vpt); rmesa->hw.vpt.cmd[R300_VPT_XSCALE] = r300PackFloat32(sx); -@@ -962,6 +977,8 @@ static void r300Viewport(GLcontext * ctx, GLint x, GLint y, +@@ -962,6 +970,8 @@ static void r300Viewport(GLcontext * ctx, GLint x, GLint y, * values, or keep the originals hanging around. */ r300UpdateWindow(ctx); @@ -14176,7 +15762,7 @@ index 8095538..6796d36 100644 } static void r300DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) -@@ -994,64 +1011,6 @@ void r300UpdateViewportOffset(GLcontext * ctx) +@@ -994,64 +1004,6 @@ void r300UpdateViewportOffset(GLcontext * ctx) radeonUpdateScissor(ctx); } @@ -14241,7 +15827,28 @@ index 8095538..6796d36 100644 static void r300FetchStateParameter(GLcontext * ctx, const gl_state_index state[STATE_LENGTH], -@@ -1269,7 +1228,8 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) +@@ -1114,7 +1066,7 @@ void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) + if (!fp) + return; + +- paramList = fp->mesa_program.Base.Parameters; ++ paramList = fp->Base.Base.Parameters; + + if (!paramList) + return; +@@ -1233,9 +1185,8 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) + { + r300ContextPtr r300 = R300_CONTEXT(ctx); + int i; +- struct r300_fragment_program *fp = (struct r300_fragment_program *) +- (char *)ctx->FragmentProgram._Current; +- struct r300_fragment_program_code *code = &fp->code; ++ struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; ++ struct r300_fragment_program_code *code = &fp->code.r300; + + R300_STATECHANGE(r300, fpt); + +@@ -1269,15 +1220,15 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) } r300->hw.fpt.cmd[R300_FPT_CMD_0] = @@ -14251,7 +15858,17 @@ index 8095538..6796d36 100644 } static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) -@@ -1320,7 +1280,7 @@ static GLuint translate_lod_bias(GLfloat bias) + { + int i; +- struct r500_fragment_program *fp = (struct r500_fragment_program *) +- (char *)ctx->FragmentProgram._Current; +- struct r500_fragment_program_code *code = &fp->code; ++ struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; ++ struct r500_fragment_program_code *code = &fp->code.r500; + + /* find all the texture instructions and relocate the texture units */ + for (i = 0; i < code->inst_end + 1; i++) { +@@ -1320,7 +1271,7 @@ static GLuint translate_lod_bias(GLfloat bias) static void r300SetupTextures(GLcontext * ctx) { int i, mtu; @@ -14260,7 +15877,7 @@ index 8095538..6796d36 100644 r300ContextPtr r300 = R300_CONTEXT(ctx); int hw_tmu = 0; int last_hw_tmu = -1; /* -1 translates into no setup costs for fields */ -@@ -1354,21 +1314,16 @@ static void r300SetupTextures(GLcontext * ctx) +@@ -1354,21 +1305,16 @@ static void r300SetupTextures(GLcontext * ctx) /* We cannot let disabled tmu offsets pass DRM */ for (i = 0; i < mtu; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { @@ -14285,7 +15902,7 @@ index 8095538..6796d36 100644 } if (RADEON_DEBUG & DEBUG_STATE) -@@ -1379,29 +1334,28 @@ static void r300SetupTextures(GLcontext * ctx) +@@ -1379,29 +1325,28 @@ static void r300SetupTextures(GLcontext * ctx) r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 + hw_tmu] = @@ -14324,7 +15941,7 @@ index 8095538..6796d36 100644 WARN_ONCE("micro tiling enabled!\n"); } -@@ -1418,21 +1372,21 @@ static void r300SetupTextures(GLcontext * ctx) +@@ -1418,37 +1363,36 @@ static void r300SetupTextures(GLcontext * ctx) } r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = @@ -14354,25 +15971,246 @@ index 8095538..6796d36 100644 if (!fp) /* should only happenen once, just after context is created */ return; -@@ -1444,7 +1398,7 @@ static void r300SetupTextures(GLcontext * ctx) + + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { +- if (fp->mesa_program.UsesKill && last_hw_tmu < 0) { ++ if (fp->Base.UsesKill && last_hw_tmu < 0) { + // The KILL operation requires the first texture unit + // to be enabled. r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0; r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_FILTER0_0, 1); + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 1); } - r300SetupFragmentShaderTextures(ctx, tmu_mappings); - } else -@@ -1609,7 +1563,7 @@ static void r300SetupRSUnit(GLcontext * ctx) - r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; +- r300SetupFragmentShaderTextures(ctx, tmu_mappings); +- } else +- r500SetupFragmentShaderTextures(ctx, tmu_mappings); ++ } ++ r300->vtbl.SetupFragmentShaderTextures(ctx, tmu_mappings); + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n", +@@ -1479,7 +1423,7 @@ static void r300SetupRSUnit(GLcontext * ctx) + if (hw_tcl_on) + OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + else +- RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset); ++ RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); + + if (ctx->FragmentProgram._Current) + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; +@@ -1543,6 +1487,7 @@ static void r300SetupRSUnit(GLcontext * ctx) + } + } + ++ /* We always route 4 texcoord components */ + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (! ( InputsRead & FRAG_BIT_TEX(i) ) ) + continue; +@@ -1552,36 +1497,30 @@ static void r300SetupRSUnit(GLcontext * ctx) + continue; + } + +- int swiz; +- +- /* with TCL we always seem to route 4 components */ +- if (hw_tcl_on) +- count = 4; +- else +- count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; +- +- switch(count) { +- case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break; +- case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; +- default: +- case 1: +- case 2: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; +- }; +- +- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz | R300_RS_TEX_PTR(rs_tex_count); ++ r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~(FRAG_BIT_TEX0 << i); +- rs_tex_count += count; ++ rs_tex_count += 4; ++ ++tex_ip; ++ ++fp_reg; ++ } ++ ++ if (InputsRead & FRAG_BIT_WPOS) { ++ r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); ++ r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); ++ InputsRead &= ~FRAG_BIT_WPOS; ++ rs_tex_count += 4; + ++tex_ip; + ++fp_reg; + } + + if (InputsRead & FRAG_BIT_FOGC) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) { +- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); ++ r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(R300_RS_SEL_K0) | R300_RS_SEL_R(R300_RS_SEL_K0); ++ r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_Q(R300_RS_SEL_K1) | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_FOGC; +- rs_tex_count += 4; ++ rs_tex_count += 1; + ++tex_ip; + ++fp_reg; + } else { +@@ -1589,16 +1528,6 @@ static void r300SetupRSUnit(GLcontext * ctx) + } + } + +- if (InputsRead & FRAG_BIT_WPOS) { +- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); +- r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); +- InputsRead &= ~FRAG_BIT_WPOS; +- rs_tex_count += 4; +- ++tex_ip; +- ++fp_reg; +- } +- InputsRead &= ~FRAG_BIT_WPOS; +- + /* Setup default color if no color or tex was set */ + if (rs_tex_count == 0 && col_ip == 0) { + r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001); +@@ -1606,10 +1535,10 @@ static void r300SetupRSUnit(GLcontext * ctx) + } + + high_rr = (col_ip > tex_ip) ? col_ip : tex_ip; +- r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; ++ r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; r300->hw.rc.cmd[2] |= high_rr - 1; - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr); -+ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr); ++ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr); if (InputsRead) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); -@@ -1787,7 +1741,7 @@ static void r500SetupRSUnit(GLcontext * ctx) +@@ -1630,7 +1559,7 @@ static void r500SetupRSUnit(GLcontext * ctx) + if (hw_tcl_on) + OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + else +- RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset); ++ RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); + + if (ctx->FragmentProgram._Current) + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; +@@ -1694,7 +1623,7 @@ static void r500SetupRSUnit(GLcontext * ctx) + } + } + +- ++ /* We always route 4 texcoord components */ + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (! ( InputsRead & FRAG_BIT_TEX(i) ) ) + continue; +@@ -1704,59 +1633,41 @@ static void r500SetupRSUnit(GLcontext * ctx) + continue; + } + +- int swiz = 0; +- +- /* with TCL we always seem to route 4 components */ +- if (hw_tcl_on) +- count = 4; +- else +- count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; +- +- if (count == 4) { +- swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; +- swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; +- swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT; +- swiz |= (rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT; +- } else if (count == 3) { +- swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; +- swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; +- swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT; +- swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; +- } else if (count == 2) { +- swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; +- swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; +- swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; +- swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; +- } else if (count == 1) { +- swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; +- swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; +- swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; +- swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; +- } else { +- swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT; +- swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; +- swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; +- swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; +- } ++ r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | ++ ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | ++ ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | ++ ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); + +- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz; + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~(FRAG_BIT_TEX0 << i); +- rs_tex_count += count; ++ rs_tex_count += 4; ++ ++tex_ip; ++ ++fp_reg; ++ } ++ ++ if (InputsRead & FRAG_BIT_WPOS) { ++ r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | ++ ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | ++ ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | ++ ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); ++ ++ r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); ++ InputsRead &= ~FRAG_BIT_WPOS; ++ rs_tex_count += 4; + ++tex_ip; + ++fp_reg; + } + + if (InputsRead & FRAG_BIT_FOGC) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) { +- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | +- ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | +- ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | +- ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); ++ r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= (rs_tex_count << R500_RS_IP_TEX_PTR_S_SHIFT) | ++ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | ++ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | ++ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); + + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_FOGC; +- rs_tex_count += 4; ++ rs_tex_count += 1; + ++tex_ip; + ++fp_reg; + } else { +@@ -1764,19 +1675,6 @@ static void r500SetupRSUnit(GLcontext * ctx) + } + } + +- if (InputsRead & FRAG_BIT_WPOS) { +- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | +- ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | +- ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | +- ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); +- +- r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); +- InputsRead &= ~FRAG_BIT_WPOS; +- rs_tex_count += 4; +- ++tex_ip; +- ++fp_reg; +- } +- + /* Setup default color if no color or tex was set */ + if (rs_tex_count == 0 && col_ip == 0) { + r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001); +@@ -1787,7 +1685,7 @@ static void r500SetupRSUnit(GLcontext * ctx) r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; r300->hw.rc.cmd[2] |= 0xC0 | (high_rr - 1); @@ -14381,7 +16219,32 @@ index 8095538..6796d36 100644 if (InputsRead) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); -@@ -1984,6 +1938,7 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa) +@@ -1900,7 +1798,7 @@ static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, + + static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) + { +- struct r300_vertex_shader_state *prog = &(rmesa->state.vertex_shader); ++ struct r300_vertex_shader_state *prog = &(rmesa->vertex_shader); + GLuint o_reg = 0; + GLuint i_reg = 0; + int i; +@@ -1909,11 +1807,11 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) + int program_end = 0; + + for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) { +- if (rmesa->state.sw_tcl_inputs[i] != -1) { ++ if (rmesa->swtcl.sw_tcl_inputs[i] != -1) { + prog->program.body.i[program_end + 0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, GL_FALSE, GL_FALSE, o_reg++, VSF_FLAG_ALL, PVS_DST_REG_OUT); +- prog->program.body.i[program_end + 1] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); +- prog->program.body.i[program_end + 2] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); +- prog->program.body.i[program_end + 3] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); ++ prog->program.body.i[program_end + 1] = PVS_SRC_OPERAND(rmesa->swtcl.sw_tcl_inputs[i], PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); ++ prog->program.body.i[program_end + 2] = PVS_SRC_OPERAND(rmesa->swtcl.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); ++ prog->program.body.i[program_end + 3] = PVS_SRC_OPERAND(rmesa->swtcl.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + program_end += 4; + i_reg++; + } +@@ -1984,6 +1882,7 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa) (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); } @@ -14389,7 +16252,7 @@ index 8095538..6796d36 100644 static void r300SetupVertexProgram(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; -@@ -2013,6 +1968,7 @@ static void r300SetupVertexProgram(r300ContextPtr rmesa) +@@ -2013,6 +1912,7 @@ static void r300SetupVertexProgram(r300ContextPtr rmesa) */ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) { @@ -14397,7 +16260,7 @@ index 8095538..6796d36 100644 if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__, _mesa_lookup_enum_by_nr(cap), -@@ -2058,8 +2014,12 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) +@@ -2058,8 +1958,12 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) case GL_POLYGON_OFFSET_FILL: r300SetPolygonOffsetState(ctx, state); break; @@ -14411,7 +16274,7 @@ index 8095538..6796d36 100644 break; } } -@@ -2078,7 +2038,7 @@ static void r300ResetHwState(r300ContextPtr r300) +@@ -2078,7 +1982,7 @@ static void r300ResetHwState(r300ContextPtr r300) if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "%s\n", __FUNCTION__); @@ -14420,7 +16283,7 @@ index 8095538..6796d36 100644 r300ColorMask(ctx, ctx->Color.ColorMask[RCOMP], -@@ -2100,8 +2060,6 @@ static void r300ResetHwState(r300ContextPtr r300) +@@ -2100,8 +2004,6 @@ static void r300ResetHwState(r300ContextPtr r300) r300UpdateCulling(ctx); @@ -14429,7 +16292,7 @@ index 8095538..6796d36 100644 r300SetBlendState(ctx); r300SetLogicOpState(ctx); -@@ -2240,20 +2198,6 @@ static void r300ResetHwState(r300ContextPtr r300) +@@ -2240,20 +2142,6 @@ static void r300ResetHwState(r300ContextPtr r300) r300BlendColor(ctx, ctx->Color.BlendColor); @@ -14450,7 +16313,7 @@ index 8095538..6796d36 100644 r300->hw.rb3d_dither_ctl.cmd[1] = 0; r300->hw.rb3d_dither_ctl.cmd[2] = 0; r300->hw.rb3d_dither_ctl.cmd[3] = 0; -@@ -2269,34 +2213,8 @@ static void r300ResetHwState(r300ContextPtr r300) +@@ -2269,41 +2157,15 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000; r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff; @@ -14485,7 +16348,16 @@ index 8095538..6796d36 100644 r300->hw.zstencil_format.cmd[2] = R300_ZTOP_DISABLE; r300->hw.zstencil_format.cmd[3] = 0x00000003; r300->hw.zstencil_format.cmd[4] = 0x00000000; -@@ -2317,7 +2235,7 @@ static void r300ResetHwState(r300ContextPtr r300) + r300SetEarlyZState(ctx); + +- r300->hw.unk4F30.cmd[1] = 0; +- r300->hw.unk4F30.cmd[2] = 0; ++ r300->hw.zb_zmask.cmd[1] = 0; ++ r300->hw.zb_zmask.cmd[2] = 0; + + r300->hw.zb_hiz_offset.cmd[1] = 0; + +@@ -2317,7 +2179,7 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0; } @@ -14494,7 +16366,7 @@ index 8095538..6796d36 100644 } void r300UpdateShaders(r300ContextPtr rmesa) -@@ -2328,8 +2246,8 @@ void r300UpdateShaders(r300ContextPtr rmesa) +@@ -2328,8 +2190,8 @@ void r300UpdateShaders(r300ContextPtr rmesa) ctx = rmesa->radeon.glCtx; @@ -14505,7 +16377,39 @@ index 8095538..6796d36 100644 for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { rmesa->temp_attrib[i] = -@@ -2408,10 +2326,10 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) +@@ -2383,24 +2245,18 @@ static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, + } + + +-static void r300SetupPixelShader(r300ContextPtr rmesa) ++static GLboolean r300SetupPixelShader(GLcontext *ctx) + { +- GLcontext *ctx = rmesa->radeon.glCtx; +- struct r300_fragment_program *fp = (struct r300_fragment_program *) +- (char *)ctx->FragmentProgram._Current; ++ r300ContextPtr rmesa = R300_CONTEXT(ctx); ++ struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + struct r300_fragment_program_code *code; + int i, k; + +- if (!fp) /* should only happenen once, just after context is created */ +- return; ++ /* Program is not native, fallback to software */ ++ if (fp->error) ++ return GL_FALSE; + +- r300TranslateFragmentShader(rmesa, fp); +- if (!fp->translated) { +- fprintf(stderr, "%s: No valid fragment shader, exiting\n", +- __FUNCTION__); +- return; +- } +- code = &fp->code; ++ code = &fp->code.r300; + + r300SetupTextures(ctx); + +@@ -2408,10 +2264,10 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) R300_STATECHANGE(rmesa, fpi[1]); R300_STATECHANGE(rmesa, fpi[2]); R300_STATECHANGE(rmesa, fpi[3]); @@ -14520,7 +16424,7 @@ index 8095538..6796d36 100644 for (i = 0; i < code->alu.length; i++) { rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0; rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1; -@@ -2442,7 +2360,7 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) +@@ -2442,15 +2298,17 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) } R300_STATECHANGE(rmesa, fpp); @@ -14528,17 +16432,113 @@ index 8095538..6796d36 100644 + rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4); for (i = 0; i < code->const_nr; i++) { const GLfloat *constant = get_fragmentprogram_constant(ctx, - &fp->mesa_program.Base, code->constant[i]); -@@ -2534,7 +2452,6 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) +- &fp->mesa_program.Base, code->constant[i]); ++ &fp->Base.Base, code->constant[i]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(constant[3]); + } ++ ++ return GL_TRUE; + } + + #define bump_r500fp_count(ptr, new_count) do{\ +@@ -2467,27 +2325,21 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) + if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ + } while(0) + +-static void r500SetupPixelShader(r300ContextPtr rmesa) ++static GLboolean r500SetupPixelShader(GLcontext *ctx) + { +- GLcontext *ctx = rmesa->radeon.glCtx; +- struct r500_fragment_program *fp = (struct r500_fragment_program *) +- (char *)ctx->FragmentProgram._Current; ++ r300ContextPtr rmesa = R300_CONTEXT(ctx); ++ struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + int i; + struct r500_fragment_program_code *code; + +- if (!fp) /* should only happenen once, just after context is created */ +- return; +- + ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0; + +- r500TranslateFragmentShader(rmesa, fp); +- if (!fp->translated) { +- fprintf(stderr, "%s: No valid fragment shader, exiting\n", +- __FUNCTION__); +- return; +- } +- code = &fp->code; ++ /* Program is not native, fallback to software */ ++ if (fp->error) ++ return GL_FALSE; ++ ++ code = &fp->code.r500; + + r300SetupTextures(ctx); + +@@ -2519,7 +2371,7 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) + R300_STATECHANGE(rmesa, r500fp_const); + for (i = 0; i < code->const_nr; i++) { + const GLfloat *constant = get_fragmentprogram_constant(ctx, +- &fp->mesa_program.Base, code->constant[i]); ++ &fp->Base.Base, code->constant[i]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]); +@@ -2527,6 +2379,7 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) + } + bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4); + ++ return GL_TRUE; + } + + void r300UpdateShaderStates(r300ContextPtr rmesa) +@@ -2534,7 +2387,10 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) GLcontext *ctx; ctx = rmesa->radeon.glCtx; - r300UpdateTextureState(ctx); ++ /* should only happenen once, just after context is created */ ++ if (!ctx->FragmentProgram._Current) ++ return; ++ r300SetEarlyZState(ctx); /* w_fmt value is set to get best performance -@@ -2587,12 +2504,16 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) - _ae_invalidate_state(ctx, new_state); +@@ -2558,19 +2414,15 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) + rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc; + } + +- if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) +- r500SetupPixelShader(rmesa); +- else +- r300SetupPixelShader(rmesa); ++ r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current); + +- if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) +- r500SetupRSUnit(ctx); +- else +- r300SetupRSUnit(ctx); ++ if (!rmesa->vtbl.SetupPixelShader(ctx)) ++ return; ++ ++ rmesa->vtbl.SetupRSUnit(ctx); + + if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + r300SetupVertexProgram(rmesa); +- + } + + /** +@@ -2584,15 +2436,18 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) + _swsetup_InvalidateState(ctx, new_state); + _vbo_InvalidateState(ctx, new_state); + _tnl_InvalidateState(ctx, new_state); +- _ae_invalidate_state(ctx, new_state); if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { - r300UpdateDrawBuffer(ctx); @@ -14556,7 +16556,7 @@ index 8095538..6796d36 100644 } /** -@@ -2602,30 +2523,6 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) +@@ -2602,32 +2457,6 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) */ void r300InitState(r300ContextPtr r300) { @@ -14584,10 +16584,12 @@ index 8095538..6796d36 100644 - r300->state.stencil.hw_stencil = (ctx->Visual.stencilBits > 0 && - ctx->Visual.depthBits == 24); - - memset(&(r300->state.texture), 0, sizeof(r300->state.texture)); - +- memset(&(r300->state.texture), 0, sizeof(r300->state.texture)); +- r300ResetHwState(r300); -@@ -2661,7 +2558,6 @@ void r300UpdateClipPlanes( GLcontext *ctx ) + } + +@@ -2661,7 +2490,6 @@ void r300UpdateClipPlanes( GLcontext *ctx ) */ void r300InitStateFuncs(struct dd_function_table *functions) { @@ -14595,7 +16597,7 @@ index 8095538..6796d36 100644 functions->UpdateState = r300InvalidateState; functions->AlphaFunc = r300AlphaFunc; -@@ -2697,4 +2593,8 @@ void r300InitStateFuncs(struct dd_function_table *functions) +@@ -2697,4 +2525,25 @@ void r300InitStateFuncs(struct dd_function_table *functions) functions->RenderMode = r300RenderMode; functions->ClipPlane = r300ClipPlane; @@ -14603,6 +16605,23 @@ index 8095538..6796d36 100644 + + functions->DrawBuffer = radeonDrawBuffer; + functions->ReadBuffer = radeonReadBuffer; ++} ++ ++void r300InitShaderFunctions(r300ContextPtr r300) ++{ ++ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { ++ r300->vtbl.SetupRSUnit = r500SetupRSUnit; ++ r300->vtbl.SetupPixelShader = r500SetupPixelShader; ++ r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures; ++ r300->vtbl.FragmentProgramEmit = r500FragmentProgramEmit; ++ r300->vtbl.FragmentProgramDump = r500FragmentProgramDump; ++ } else { ++ r300->vtbl.SetupRSUnit = r300SetupRSUnit; ++ r300->vtbl.SetupPixelShader = r300SetupPixelShader; ++ r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures; ++ r300->vtbl.FragmentProgramEmit = r300FragmentProgramEmit; ++ r300->vtbl.FragmentProgramDump = r300FragmentProgramDump; ++ } } diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h index 0589ab7..247a20e 100644 @@ -14645,22 +16664,51 @@ index 0589ab7..247a20e 100644 extern int future_hw_tcl_on; void _tnl_UpdateFixedFunctionProgram (GLcontext * ctx); diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c -index d463ab3..f57516a 100644 +index ba3621b..a40d037 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c -@@ -56,26 +56,23 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. +@@ -28,362 +28,303 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. + /* + * Authors: + * Dave Airlie ++ * Maciej Cencora + */ + +-/* derived from r200 swtcl path */ +- +- +- +-#include "main/glheader.h" +-#include "main/mtypes.h" +-#include "main/colormac.h" +-#include "main/enums.h" +-#include "main/image.h" +-#include "main/imports.h" +-#include "main/light.h" +-#include "main/macros.h" +- +-#include "swrast/s_context.h" +-#include "swrast/s_fog.h" +-#include "swrast_setup/swrast_setup.h" +-#include "math/m_translate.h" + #include "tnl/tnl.h" +-#include "tnl/t_context.h" + #include "tnl/t_pipeline.h" + +-#include "r300_context.h" +-#include "r300_swtcl.h" #include "r300_state.h" - #include "r300_ioctl.h" +-#include "r300_ioctl.h" ++#include "r300_swtcl.h" #include "r300_emit.h" -#include "r300_mem.h" +- +-static void flush_last_swtcl_prim( r300ContextPtr rmesa ); +#include "r300_tex.h" --static void flush_last_swtcl_prim( r300ContextPtr rmesa ); -- - -void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset); -+void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset); - void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr); +-void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr); #define EMIT_ATTR( ATTR, STYLE ) \ do { \ - rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \ @@ -14681,54 +16729,412 @@ index d463ab3..f57516a 100644 + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \ + rmesa->radeon.swtcl.vertex_attr_count++; \ ++} while (0) ++ ++#define ADD_ATTR(_attr, _format, _dst_loc, _swizzle, _write_mask) \ ++do { \ ++ attrs[num_attrs].attr = (_attr); \ ++ attrs[num_attrs].format = (_format); \ ++ attrs[num_attrs].dst_loc = (_dst_loc); \ ++ attrs[num_attrs].swizzle = (_swizzle); \ ++ attrs[num_attrs].write_mask = (_write_mask); \ ++ ++num_attrs; \ } while (0) ++static void r300SwtclVAPSetup(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten, GLuint vap_out_fmt_1) ++{ ++ r300ContextPtr rmesa = R300_CONTEXT( ctx ); ++ struct vertex_attribute *attrs = rmesa->swtcl.vert_attrs; ++ int i, j, reg_count; ++ uint32_t *vir0 = &rmesa->hw.vir[0].cmd[1]; ++ uint32_t *vir1 = &rmesa->hw.vir[1].cmd[1]; ++ ++ for (i = 0; i < R300_VIR_CMDSIZE-1; ++i) ++ vir0[i] = vir1[i] = 0; ++ ++ for (i = 0, j = 0; i < rmesa->radeon.swtcl.vertex_attr_count; ++i) { ++ int tmp, data_format; ++ switch (attrs[i].format) { ++ case EMIT_1F: ++ data_format = R300_DATA_TYPE_FLOAT_1; ++ break; ++ case EMIT_2F: ++ data_format = R300_DATA_TYPE_FLOAT_2; ++ break; ++ case EMIT_3F: ++ data_format = R300_DATA_TYPE_FLOAT_3; ++ break; ++ case EMIT_4F: ++ data_format = R300_DATA_TYPE_FLOAT_4; ++ break; ++ case EMIT_4UB_4F_RGBA: ++ case EMIT_4UB_4F_ABGR: ++ data_format = R300_DATA_TYPE_BYTE | R300_NORMALIZE; ++ break; ++ default: ++ fprintf(stderr, "%s: Invalid data format type", __FUNCTION__); ++ _mesa_exit(-1); ++ break; ++ } ++ ++ tmp = data_format | (attrs[i].dst_loc << R300_DST_VEC_LOC_SHIFT); ++ if (i % 2 == 0) { ++ vir0[j] = tmp << R300_DATA_TYPE_0_SHIFT; ++ vir1[j] = attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT); ++ } else { ++ vir0[j] |= tmp << R300_DATA_TYPE_1_SHIFT; ++ vir1[j] |= (attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT; ++ ++j; ++ } ++ } ++ ++ reg_count = (rmesa->radeon.swtcl.vertex_attr_count + 1) >> 1; ++ if (rmesa->radeon.swtcl.vertex_attr_count % 2 != 0) { ++ vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; ++ } else { ++ vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; ++ } ++ ++ R300_STATECHANGE(rmesa, vir[0]); ++ R300_STATECHANGE(rmesa, vir[1]); ++ R300_STATECHANGE(rmesa, vof); ++ R300_STATECHANGE(rmesa, vic); ++ ++ if (rmesa->radeon.radeonScreen->kernel_mm) { ++ rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF; ++ rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF; ++ rmesa->hw.vir[0].cmd[0] |= (reg_count & 0x3FFF) << 16; ++ rmesa->hw.vir[1].cmd[0] |= (reg_count & 0x3FFF) << 16; ++ } else { ++ ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = reg_count; ++ ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = reg_count; ++ } ++ ++ rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); ++ rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); ++ rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); ++ /** ++ * Can't use r300VAPOutputCntl1 function because it assumes ++ * that all texture coords have 4 components and that's the case ++ * for HW TCL path, but not for SW TCL. ++ */ ++ rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_out_fmt_1; ++} ++ ++ static void r300SetVertexFormat( GLcontext *ctx ) -@@ -114,7 +111,7 @@ static void r300SetVertexFormat( GLcontext *ctx ) - } + { + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; +- DECLARE_RENDERINPUTS(index_bitset); +- GLuint InputsRead = 0, OutputsWritten = 0; +- int vap_fmt_1 = 0; +- int offset = 0; +- int vte = 0; +- int fog_id; +- GLint inputs[VERT_ATTRIB_MAX]; +- GLint tab[VERT_ATTRIB_MAX]; +- int swizzle[VERT_ATTRIB_MAX][4]; +- GLuint i, nr; +- GLuint sz; +- +- DECLARE_RENDERINPUTS(render_inputs_bitset); +- RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); +- RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset ); +- RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset); +- +- vte = rmesa->hw.vte.cmd[1]; +- vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT); +- /* Important: +- */ +- if ( VB->NdcPtr != NULL ) { +- VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; +- vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT; +- } +- else { +- VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; +- vte |= R300_VTX_W0_FMT; +- } ++ int first_free_tex = 0, vap_out_fmt_1 = 0; ++ GLuint InputsRead = 0; ++ GLuint OutputsWritten = 0; ++ int num_attrs = 0; ++ struct vertex_attribute *attrs = rmesa->swtcl.vert_attrs; - assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); +- assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); - rmesa->swtcl.vertex_attr_count = 0; ++ rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0; + rmesa->radeon.swtcl.vertex_attr_count = 0; - /* EMIT_ATTR's must be in order as they tell t_vertex.c how to - * build up a hardware vertex. -@@ -266,14 +263,27 @@ static void r300SetVertexFormat( GLcontext *ctx ) +- /* EMIT_ATTR's must be in order as they tell t_vertex.c how to +- * build up a hardware vertex. +- */ +- if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS)) { +- sz = VB->AttribPtr[VERT_ATTRIB_POS]->size; ++ /* We always want non Ndc coords format */ ++ VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; ++ ++ if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POS)) { + InputsRead |= 1 << VERT_ATTRIB_POS; + OutputsWritten |= 1 << VERT_RESULT_HPOS; +- EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 ); +- offset = sz; +- } else { +- offset = 4; +- EMIT_PAD(4 * sizeof(float)); +- } +-/* +- if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) { +- EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); +- offset += 1; ++ EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); ++ ADD_ATTR(VERT_ATTRIB_POS, EMIT_4F, SWTCL_OVM_POS, SWIZZLE_XYZW, MASK_XYZW); ++ rmesa->swtcl.coloroffset = 4; + } +-*/ +- if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR0)) { +- sz = VB->AttribPtr[VERT_ATTRIB_COLOR0]->size; +- rmesa->swtcl.coloroffset = offset; ++ ++ if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR0)) { + InputsRead |= 1 << VERT_ATTRIB_COLOR0; + OutputsWritten |= 1 << VERT_RESULT_COL0; +- EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_1F + sz - 1 ); +- offset += sz; ++#if MESA_LITTLE_ENDIAN ++ EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA ); ++ ADD_ATTR(VERT_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW); ++#else ++ EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR ); ++ ADD_ATTR(VERT_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW); ++#endif + } + +- rmesa->swtcl.specoffset = 0; +- if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { +- sz = VB->AttribPtr[VERT_ATTRIB_COLOR1]->size; +- rmesa->swtcl.specoffset = offset; +- EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_1F + sz - 1 ); ++ if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR1 )) { ++ GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + InputsRead |= 1 << VERT_ATTRIB_COLOR1; + OutputsWritten |= 1 << VERT_RESULT_COL1; ++#if MESA_LITTLE_ENDIAN ++ EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA ); ++ ADD_ATTR(VERT_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA, SWTCL_OVM_COLOR1, swiz, MASK_XYZW); ++#else ++ EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR ); ++ ADD_ATTR(VERT_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR, SWTCL_OVM_COLOR1, swiz, MASK_XYZW); ++#endif ++ rmesa->swtcl.specoffset = rmesa->swtcl.coloroffset + 1; + } + +- fog_id = -1; +- if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_FOG)) { +- /* find first free tex coord slot */ +- if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { +- int i; +- for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { +- if (!RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { +- fog_id = i; +- break; +- } +- } +- } else { +- fog_id = 0; +- } +- +- if (fog_id == -1) { +- fprintf(stderr, "\tout of free texcoords to do fog\n"); +- _mesa_exit(-1); ++ if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) { ++ VB->AttribPtr[VERT_ATTRIB_GENERIC0] = VB->ColorPtr[1]; ++ OutputsWritten |= 1 << VERT_RESULT_BFC0; ++#if MESA_LITTLE_ENDIAN ++ EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_RGBA ); ++ ADD_ATTR(VERT_ATTRIB_GENERIC0, EMIT_4UB_4F_RGBA, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW); ++#else ++ EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_ABGR ); ++ ADD_ATTR(VERT_ATTRIB_GENERIC0, EMIT_4UB_4F_ABGR, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW); ++#endif ++ if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR1 )) { ++ GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); ++ OutputsWritten |= 1 << VERT_RESULT_BFC1; ++#if MESA_LITTLE_ENDIAN ++ EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_RGBA ); ++ ADD_ATTR(VERT_ATTRIB_GENERIC1, EMIT_4UB_4F_RGBA, SWTCL_OVM_COLOR3, swiz, MASK_XYZW); ++#else ++ EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_ABGR ); ++ ADD_ATTR(VERT_ATTRIB_GENERIC1, EMIT_4UB_4F_ABGR, SWTCL_OVM_COLOR3, swiz, MASK_XYZW); ++#endif + } ++ } + +- sz = VB->AttribPtr[VERT_ATTRIB_FOG]->size; +- EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F + sz - 1); +- InputsRead |= 1 << VERT_ATTRIB_FOG; +- OutputsWritten |= 1 << VERT_RESULT_FOGC; +- vap_fmt_1 |= sz << (3 * fog_id); ++ if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POINTSIZE )) { ++ GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); ++ InputsRead |= 1 << VERT_ATTRIB_POINT_SIZE; ++ OutputsWritten |= 1 << VERT_RESULT_PSIZ; ++ EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); ++ ADD_ATTR(VERT_ATTRIB_POINT_SIZE, EMIT_1F, SWTCL_OVM_POINT_SIZE, swiz, MASK_X); + } + +- if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { ++ /** ++ * Sending only one texcoord component may lead to lock up, ++ * so for all textures always output 4 texcoord components to RS. ++ */ ++ if (RENDERINPUTS_TEST_RANGE(tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { + int i; +- ++ GLuint swiz, format; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { +- if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { +- sz = VB->TexCoordPtr[i]->size; ++ if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) )) { ++ switch (VB->TexCoordPtr[i]->size) { ++ case 1: ++ format = EMIT_1F; ++ swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); ++ break; ++ case 2: ++ format = EMIT_2F; ++ swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE); ++ break; ++ case 3: ++ format = EMIT_3F; ++ swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); ++ break; ++ case 4: ++ format = EMIT_4F; ++ swiz = SWIZZLE_XYZW; ++ break; ++ default: ++ continue; ++ } + InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); + OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); +- EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1 ); +- vap_fmt_1 |= sz << (3 * i); ++ EMIT_ATTR(_TNL_ATTRIB_TEX(i), format); ++ ADD_ATTR(VERT_ATTRIB_TEX0 + i, format, SWTCL_OVM_TEX(i), swiz, MASK_XYZW); ++ vap_out_fmt_1 |= 4 << (i * 3); ++ ++first_free_tex; + } + } + } + + /* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */ +- if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS) && (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_WPOS)) { +- int first_free_tex = -1; +- if (fog_id >= 0) { +- first_free_tex = fog_id+1; +- } else { +- if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { +- int i; +- for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { +- if (!RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { +- first_free_tex = i; +- break; +- } +- } +- } else { +- first_free_tex = 0; +- } +- } +- +- if (first_free_tex == -1) { ++ if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POS) && (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_WPOS)) { ++ if (first_free_tex >= ctx->Const.MaxTextureUnits) { + fprintf(stderr, "\tout of free texcoords to write w pos\n"); + _mesa_exit(-1); + } + +- sz = VB->AttribPtr[VERT_ATTRIB_POS]->size; + InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex); + OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex); +- EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 ); +- vap_fmt_1 |= sz << (3 * first_free_tex); +- } +- +- for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { +- if (InputsRead & (1 << i)) { +- inputs[i] = nr++; +- } else { +- inputs[i] = -1; +- } ++ EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); ++ ADD_ATTR(VERT_ATTRIB_POS, EMIT_4F, SWTCL_OVM_TEX(first_free_tex), SWIZZLE_XYZW, MASK_XYZW); ++ vap_out_fmt_1 |= 4 << (first_free_tex * 3); ++ ++first_free_tex; + } + +- /* Fixed, apply to vir0 only */ +- if (InputsRead & (1 << VERT_ATTRIB_POS)) +- inputs[VERT_ATTRIB_POS] = 0; +- if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) +- inputs[VERT_ATTRIB_COLOR0] = 2; +- if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) +- inputs[VERT_ATTRIB_COLOR1] = 3; +- if (InputsRead & (1 << VERT_ATTRIB_FOG)) +- inputs[VERT_ATTRIB_FOG] = 6 + fog_id; +- for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) +- if (InputsRead & (1 << i)) +- inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); +- +- for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { +- if (InputsRead & (1 << i)) { +- tab[nr++] = i; ++ if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_FOG)) { ++ if (first_free_tex >= ctx->Const.MaxTextureUnits) { ++ fprintf(stderr, "\tout of free texcoords to write fog coordinate\n"); ++ _mesa_exit(-1); + } +- } + +- for (i = 0; i < nr; i++) { +- int ci; +- +- swizzle[i][0] = SWIZZLE_ZERO; +- swizzle[i][1] = SWIZZLE_ZERO; +- swizzle[i][2] = SWIZZLE_ZERO; +- swizzle[i][3] = SWIZZLE_ONE; +- +- for (ci = 0; ci < VB->AttribPtr[tab[i]]->size; ci++) { +- swizzle[i][ci] = ci; +- } ++ InputsRead |= 1 << VERT_ATTRIB_FOG; ++ OutputsWritten |= 1 << VERT_RESULT_FOGC; ++ GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); ++ EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F ); ++ ADD_ATTR(VERT_ATTRIB_FOG, EMIT_1F, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_X); ++ vap_out_fmt_1 |= 1 << (first_free_tex * 3); } R300_NEWPRIM(rmesa); - R300_STATECHANGE(rmesa, vir[0]); - ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = -+ if (rmesa->radeon.radeonScreen->kernel_mm) { -+ R300_STATECHANGE(rmesa, vir[0]); -+ rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF; -+ rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF; -+ rmesa->hw.vir[0].cmd[0] |= -+ (r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], -+ VB->AttribPtr, inputs, tab, nr) & 0x3FFF) << 16; -+ R300_STATECHANGE(rmesa, vir[1]); -+ rmesa->hw.vir[1].cmd[0] |= -+ (r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, -+ nr) & 0x3FFF) << 16; -+ } else { -+ R300_STATECHANGE(rmesa, vir[0]); -+ ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = - r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], - VB->AttribPtr, inputs, tab, nr); +- r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], +- VB->AttribPtr, inputs, tab, nr); - R300_STATECHANGE(rmesa, vir[1]); - ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = -+ R300_STATECHANGE(rmesa, vir[1]); -+ ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = - r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, - nr); -+ } - - R300_STATECHANGE(rmesa, vic); - rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); -@@ -283,89 +293,20 @@ static void r300SetVertexFormat( GLcontext *ctx ) - rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); - rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1; +- r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, +- nr); ++ r300SwtclVAPSetup(ctx, InputsRead, OutputsWritten, vap_out_fmt_1); +- R300_STATECHANGE(rmesa, vic); +- rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); +- rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); +- +- R300_STATECHANGE(rmesa, vof); +- rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); +- rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1; +- - rmesa->swtcl.vertex_size = + rmesa->radeon.swtcl.vertex_size = _tnl_install_attrs( ctx, @@ -14739,17 +17145,18 @@ index d463ab3..f57516a 100644 NULL, 0 ); - rmesa->swtcl.vertex_size /= 4; +- +- RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); +- + rmesa->radeon.swtcl.vertex_size /= 4; - RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); +- R300_STATECHANGE(rmesa, vte); +- rmesa->hw.vte.cmd[1] = vte; +- rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size; ++ RENDERINPUTS_COPY(rmesa->render_inputs_bitset, tnl->render_inputs_bitset); + } - R300_STATECHANGE(rmesa, vte); - rmesa->hw.vte.cmd[1] = vte; -- rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size; --} -- -- -/* Flush vertices in the current dma region. - */ -static void flush_last_swtcl_prim( r300ContextPtr rmesa ) @@ -14816,11 +17223,38 @@ index d463ab3..f57516a 100644 - rmesa->swtcl.numverts += nverts; - return head; - } -+ rmesa->hw.vte.cmd[2] = rmesa->radeon.swtcl.vertex_size; - } - +-} +- static GLuint reduced_prim[] = { -@@ -405,11 +346,11 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); +- GL_POINTS, +- GL_LINES, +- GL_LINES, +- GL_LINES, +- GL_TRIANGLES, +- GL_TRIANGLES, +- GL_TRIANGLES, +- GL_TRIANGLES, +- GL_TRIANGLES, +- GL_TRIANGLES, ++ GL_POINTS, ++ GL_LINES, ++ GL_LINES, ++ GL_LINES, ++ GL_TRIANGLES, ++ GL_TRIANGLES, ++ GL_TRIANGLES, ++ GL_TRIANGLES, ++ GL_TRIANGLES, ++ GL_TRIANGLES, + }; + + static void r300RasterPrimitive( GLcontext *ctx, GLuint prim ); + static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); +-//static void r300ResetLineStipple( GLcontext *ctx ); + + /*********************************************************************** + * Emit primitives as inline vertices * +@@ -405,15 +346,13 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); #undef LOCAL_VARS #undef ALLOC_VERTS #define CTX_ARG r300ContextPtr rmesa @@ -14834,17 +17268,98 @@ index d463ab3..f57516a 100644 + const char *r300verts = (char *)rmesa->radeon.swtcl.verts; #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int))) #define VERTEX r300Vertex - #define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS)) -@@ -468,7 +409,7 @@ static struct { +-#define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS)) +-#define PRINT_VERTEX(x) + #undef TAG + #define TAG(x) r300_##x + #include "tnl_dd/t_dd_triemit.h" +@@ -433,9 +372,8 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); + * Build render functions from dd templates * + ***********************************************************************/ + +-#define R300_TWOSIDE_BIT 0x01 +-#define R300_UNFILLED_BIT 0x02 +-#define R300_MAX_TRIFUNC 0x04 ++#define R300_UNFILLED_BIT 0x01 ++#define R300_MAX_TRIFUNC 0x02 + + static struct { + tnl_points_func points; +@@ -446,9 +384,9 @@ static struct { + + #define DO_FALLBACK 0 + #define DO_UNFILLED (IND & R300_UNFILLED_BIT) +-#define DO_TWOSIDE (IND & R300_TWOSIDE_BIT) ++#define DO_TWOSIDE 0 + #define DO_FLAT 0 +-#define DO_OFFSET 0 ++#define DO_OFFSET 0 + #define DO_TRI 1 + #define DO_QUAD 1 + #define DO_LINE 1 +@@ -468,33 +406,39 @@ static struct { #define VERT_Y(_v) _v->v.y #define VERT_Z(_v) _v->v.z #define AREA_IS_CCW( a ) (a < 0) -#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int))) +- +-/* Only used to pull back colors into vertices (ie, we know color is +- * floating point). +- */ +-#define R300_COLOR( dst, src ) \ +-do { \ +- UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]); \ +- UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]); \ +- UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]); \ +- UNCLAMPED_FLOAT_TO_UBYTE((dst)[3], (src)[3]); \ +#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int))) ++ ++#define VERT_SET_RGBA( v, c ) \ ++do { \ ++ r300_color_t *color = (r300_color_t *)&((v)->ui[coloroffset]); \ ++ UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \ ++ UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \ ++ UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \ ++ UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]); \ + } while (0) + +-#define VERT_SET_RGBA( v, c ) if (coloroffset) R300_COLOR( v->ub4[coloroffset], c ) +-#define VERT_COPY_RGBA( v0, v1 ) if (coloroffset) v0->ui[coloroffset] = v1->ui[coloroffset] +-#define VERT_SAVE_RGBA( idx ) if (coloroffset) color[idx] = v[idx]->ui[coloroffset] +-#define VERT_RESTORE_RGBA( idx ) if (coloroffset) v[idx]->ui[coloroffset] = color[idx] ++#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset] + +-#define R300_SPEC( dst, src ) \ +-do { \ +- UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]); \ +- UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]); \ +- UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]); \ ++#define VERT_SET_SPEC( v0, c ) \ ++do { \ ++ if (specoffset) { \ ++ UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.red, (c)[0]); \ ++ UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.green, (c)[1]); \ ++ UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.blue, (c)[2]); \ ++ } \ + } while (0) + +-#define VERT_SET_SPEC( v, c ) if (specoffset) R300_SPEC( v->ub4[specoffset], c ) +-#define VERT_COPY_SPEC( v0, v1 ) if (specoffset) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset]) ++#define VERT_COPY_SPEC( v0, v1 ) \ ++do { \ ++ if (specoffset) { \ ++ v0->v.specular.red = v1->v.specular.red; \ ++ v0->v.specular.green = v1->v.specular.green; \ ++ v0->v.specular.blue = v1->v.specular.blue; \ ++ } \ ++} while (0) ++ ++#define VERT_SAVE_RGBA( idx ) color[idx] = v[idx]->ui[coloroffset] ++#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx] + #define VERT_SAVE_SPEC( idx ) if (specoffset) spec[idx] = v[idx]->ui[specoffset] + #define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx] - /* Only used to pull back colors into vertices (ie, we know color is - * floating point). -@@ -514,7 +455,7 @@ do { \ +@@ -514,7 +458,7 @@ do { \ ***********************************************************************/ #define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] ) @@ -14853,7 +17368,34 @@ index d463ab3..f57516a 100644 #undef TAG #define TAG(x) x #include "tnl_dd/t_dd_unfilled.h" -@@ -571,8 +512,8 @@ static void init_rast_tab( void ) +@@ -530,26 +474,15 @@ do { \ + #define TAG(x) x + #include "tnl_dd/t_dd_tritmp.h" + +-#define IND (R300_TWOSIDE_BIT) +-#define TAG(x) x##_twoside +-#include "tnl_dd/t_dd_tritmp.h" +- + #define IND (R300_UNFILLED_BIT) + #define TAG(x) x##_unfilled + #include "tnl_dd/t_dd_tritmp.h" + +-#define IND (R300_TWOSIDE_BIT|R300_UNFILLED_BIT) +-#define TAG(x) x##_twoside_unfilled +-#include "tnl_dd/t_dd_tritmp.h" +- +- + + static void init_rast_tab( void ) + { + init(); +- init_twoside(); + init_unfilled(); +- init_twoside_unfilled(); + } + + /**********************************************************************/ +@@ -571,8 +504,8 @@ static void init_rast_tab( void ) #undef LOCAL_VARS #define LOCAL_VARS \ r300ContextPtr rmesa = R300_CONTEXT(ctx); \ @@ -14864,8 +17406,11 @@ index d463ab3..f57516a 100644 const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \ const GLboolean stipple = ctx->Line.StippleFlag; \ (void) elt; (void) stipple; -@@ -604,7 +545,7 @@ static void r300ChooseRenderState( GLcontext *ctx ) - if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT; +@@ -601,10 +534,9 @@ static void r300ChooseRenderState( GLcontext *ctx ) + GLuint index = 0; + GLuint flags = ctx->_TriangleCaps; + +- if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT; if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT; - if (index != rmesa->swtcl.RenderIndex) { @@ -14873,7 +17418,7 @@ index d463ab3..f57516a 100644 tnl->Driver.Render.Points = rast_tab[index].points; tnl->Driver.Render.Line = rast_tab[index].line; tnl->Driver.Render.ClippedLine = rast_tab[index].line; -@@ -621,7 +562,7 @@ static void r300ChooseRenderState( GLcontext *ctx ) +@@ -621,27 +553,29 @@ static void r300ChooseRenderState( GLcontext *ctx ) tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon; } @@ -14882,7 +17427,12 @@ index d463ab3..f57516a 100644 } } -@@ -634,14 +575,17 @@ static void r300RenderStart(GLcontext *ctx) + + static void r300RenderStart(GLcontext *ctx) + { +- r300ContextPtr rmesa = R300_CONTEXT( ctx ); ++ r300ContextPtr rmesa = R300_CONTEXT( ctx ); + r300ChooseRenderState(ctx); r300SetVertexFormat(ctx); @@ -14896,26 +17446,28 @@ index d463ab3..f57516a 100644 - if (rmesa->dma.flush != 0 && - rmesa->dma.flush != flush_last_swtcl_prim) - rmesa->dma.flush( rmesa ); +- + /* investigate if we can put back flush optimisation if needed */ + if (rmesa->radeon.dma.flush != NULL) { + rmesa->radeon.dma.flush(ctx); + } - } -@@ -653,9 +597,9 @@ static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) + static void r300RenderFinish(GLcontext *ctx) +@@ -652,9 +586,9 @@ static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - if (rmesa->swtcl.hw_primitive != hwprim) { -+ if (rmesa->radeon.swtcl.hw_primitive != hwprim) { - R300_NEWPRIM( rmesa ); +- R300_NEWPRIM( rmesa ); - rmesa->swtcl.hw_primitive = hwprim; ++ if (rmesa->radeon.swtcl.hw_primitive != hwprim) { ++ R300_NEWPRIM( rmesa ); + rmesa->radeon.swtcl.hw_primitive = hwprim; } } -@@ -663,7 +607,7 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim) +@@ -662,18 +596,16 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim) { r300ContextPtr rmesa = R300_CONTEXT(ctx); @@ -14923,8 +17475,20 @@ index d463ab3..f57516a 100644 + rmesa->radeon.swtcl.render_primitive = prim; if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED)) - return; -@@ -702,10 +646,10 @@ void r300InitSwtcl(GLcontext *ctx) +- return; ++ return; + + r300RasterPrimitive( ctx, reduced_prim[prim] ); + } + + static void r300ResetLineStipple(GLcontext *ctx) + { +- +- + } + + void r300InitSwtcl(GLcontext *ctx) +@@ -699,14 +631,13 @@ void r300InitSwtcl(GLcontext *ctx) _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 48 * sizeof(GLfloat) ); @@ -14939,12 +17503,16 @@ index d463ab3..f57516a 100644 _tnl_invalidate_vertex_state( ctx, ~0 ); _tnl_invalidate_vertices( ctx, ~0 ); -@@ -724,33 +668,52 @@ void r300DestroySwtcl(GLcontext *ctx) +- RENDERINPUTS_ZERO( rmesa->tnl_index_bitset ); + + _tnl_need_projected_coords( ctx, GL_FALSE ); + r300ChooseRenderState(ctx); +@@ -716,33 +647,52 @@ void r300DestroySwtcl(GLcontext *ctx) { } -void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset) -+void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset) ++static void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset) { - int cmd_reserved = 0; - int cmd_written = 0; @@ -14970,7 +17538,8 @@ index d463ab3..f57516a 100644 + END_BATCH(); } - void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) +-void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) ++static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) { - - int cmd_reserved = 0; @@ -14992,35 +17561,56 @@ index d463ab3..f57516a 100644 + +void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset) +{ -+ r300ContextPtr rmesa = R300_CONTEXT(ctx); ++ r300ContextPtr rmesa = R300_CONTEXT(ctx); + -+ rcommonEnsureCmdBufSpace(&rmesa->radeon, ++ rcommonEnsureCmdBufSpace(&rmesa->radeon, + rmesa->radeon.hw.max_state_size + (12*sizeof(int)), + __FUNCTION__); -+ radeonEmitState(&rmesa->radeon); -+ r300EmitVertexAOS(rmesa, -+ rmesa->radeon.swtcl.vertex_size, -+ rmesa->radeon.dma.current, -+ current_offset); -+ -+ r300EmitVbufPrim(rmesa, ++ radeonEmitState(&rmesa->radeon); ++ r300EmitVertexAOS(rmesa, ++ rmesa->radeon.swtcl.vertex_size, ++ rmesa->radeon.dma.current, ++ current_offset); ++ ++ r300EmitVbufPrim(rmesa, + rmesa->radeon.swtcl.hw_primitive, + rmesa->radeon.swtcl.numverts); -+ r300EmitCacheFlush(rmesa); -+ COMMIT_BATCH(); ++ r300EmitCacheFlush(rmesa); ++ COMMIT_BATCH(); } diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.h b/src/mesa/drivers/dri/r300/r300_swtcl.h -index 55df53c..23b4ce3 100644 +index 55df53c..75c4193 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.h +++ b/src/mesa/drivers/dri/r300/r300_swtcl.h -@@ -42,4 +42,5 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +@@ -39,7 +39,26 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #include "swrast/swrast.h" + #include "r300_context.h" + ++#define MASK_XYZW (R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) ++#define MASK_X R300_WRITE_ENA_X ++#define MASK_Y R300_WRITE_ENA_Y ++#define MASK_Z R300_WRITE_ENA_Z ++#define MASK_W R300_WRITE_ENA_W ++ ++/* ++ * Here are definitions of OVM locations of vertex attributes for non TCL hw ++ */ ++#define SWTCL_OVM_POS 0 ++#define SWTCL_OVM_COLOR0 2 ++#define SWTCL_OVM_COLOR1 3 ++#define SWTCL_OVM_COLOR2 4 ++#define SWTCL_OVM_COLOR3 5 ++#define SWTCL_OVM_TEX(n) ((n) + 6) ++#define SWTCL_OVM_POINT_SIZE 15 ++ ++ extern void r300InitSwtcl( GLcontext *ctx ); extern void r300DestroySwtcl( GLcontext *ctx ); +extern void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset); #endif diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c -index 8ab382c..0cbb2bc 100644 +index 7c699ec..0af5bb4 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -15121,7 +17711,7 @@ index 8ab382c..0cbb2bc 100644 break; } -@@ -163,743 +168,20 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat +@@ -163,15 +168,15 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat */ switch (magf) { case GL_NEAREST: @@ -15135,10 +17725,12 @@ index 8ab382c..0cbb2bc 100644 } } --static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4]) -+static void r300SetTexBorderColor(radeonTexObjPtr t, GLubyte c[4]) +-static void r300SetTexBorderColor(r300TexObjPtr t, const GLfloat color[4]) ++static void r300SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4]) { - t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]); + GLubyte c[4]; + CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); +@@ -182,729 +187,6 @@ static void r300SetTexBorderColor(r300TexObjPtr t, const GLfloat color[4]) } /** @@ -15168,7 +17760,7 @@ index 8ab382c..0cbb2bc 100644 - - r300UpdateTexWrap(t); - r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy); -- r300SetTexBorderColor(t, texObj->_BorderChan); +- r300SetTexBorderColor(t, texObj->BorderColor); - } - - return t; @@ -15868,7 +18460,7 @@ index 8ab382c..0cbb2bc 100644 * Changes variables and flags for a state update, which will happen at the * next UpdateTextureState */ -@@ -908,7 +190,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, +@@ -913,7 +195,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, struct gl_texture_object *texObj, GLenum pname, const GLfloat * params) { @@ -15877,7 +18469,7 @@ index 8ab382c..0cbb2bc 100644 if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { fprintf(stderr, "%s( %s )\n", __FUNCTION__, -@@ -941,7 +223,11 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, +@@ -946,7 +228,11 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, * we just have to rely on loading the right subset of mipmap levels * to simulate a clamped LOD. */ @@ -15890,7 +18482,7 @@ index 8ab382c..0cbb2bc 100644 break; case GL_DEPTH_TEXTURE_MODE: -@@ -964,27 +250,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, +@@ -969,27 +255,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, } } @@ -15919,7 +18511,7 @@ index 8ab382c..0cbb2bc 100644 if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, -@@ -992,14 +261,24 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) +@@ -997,14 +266,24 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) _mesa_lookup_enum_by_nr(texObj->Target)); } @@ -15950,7 +18542,7 @@ index 8ab382c..0cbb2bc 100644 _mesa_delete_texture_object(ctx, texObj); } -@@ -1008,8 +287,6 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) +@@ -1013,8 +292,6 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) * Called via ctx->Driver.NewTextureObject. * Note: this function will be called during context creation to * allocate the default texture objects. @@ -15959,7 +18551,7 @@ index 8ab382c..0cbb2bc 100644 * Fixup MaxAnisotropy according to user preference. */ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, -@@ -1017,14 +294,23 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, +@@ -1022,14 +299,23 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, GLenum target) { r300ContextPtr rmesa = R300_CONTEXT(ctx); @@ -15984,13 +18576,13 @@ index 8ab382c..0cbb2bc 100644 + /* Initialize hardware state */ + r300UpdateTexWrap(t); + r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy); -+ r300SetTexBorderColor(t, t->base._BorderChan); ++ r300SetTexBorderColor(t, t->base.BorderColor); + + return &t->base; } void r300InitTextureFuncs(struct dd_function_table *functions) -@@ -1032,22 +318,30 @@ void r300InitTextureFuncs(struct dd_function_table *functions) +@@ -1037,22 +323,30 @@ void r300InitTextureFuncs(struct dd_function_table *functions) /* Note: we only plug in the functions we implement in the driver * since _mesa_init_driver_functions() was already called. */ @@ -16061,7 +18653,7 @@ index b86d45b..8a653ea 100644 diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c deleted file mode 100644 -index b03eefa..0000000 +index 0fe51b0..0000000 --- a/src/mesa/drivers/dri/r300/r300_texmem.c +++ /dev/null @@ -1,567 +0,0 @@ @@ -16373,7 +18965,7 @@ index b03eefa..0000000 - ASSERT(face < 6); - - /* Ensure we have a valid texture to upload */ -- if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) { +- if ((hwlevel < 0) || (hwlevel >= R300_MAX_TEXTURE_LEVELS)) { - _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__); - return; - } @@ -16633,7 +19225,7 @@ index b03eefa..0000000 - return 0; -} diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c -index e2329f0..cf4cad7 100644 +index cadec7f..2d7ad55 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -47,7 +47,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -16852,7 +19444,7 @@ index e2329f0..cf4cad7 100644 + if (t->image_override && t->bo) + return; -- assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); +- assert(numLevels <= R300_MAX_TEXTURE_LEVELS); + t->pp_txsize = (((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT) + | ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT) + | ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT) @@ -17157,7 +19749,7 @@ index e2329f0..cf4cad7 100644 } void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, -@@ -591,78 +325,164 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, +@@ -591,78 +325,163 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, r300ContextPtr rmesa = pDRICtx->driverPrivate; struct gl_texture_object *tObj = _mesa_lookup_texture(rmesa->radeon.glCtx, texname); @@ -17298,7 +19890,6 @@ index e2329f0..cf4cad7 100644 + radeon_miptree_unreference(rImage->mt); + rImage->mt = NULL; + } -+ fprintf(stderr,"settexbuf %dx%d@%d %d targ %x format %x\n", rb->width, rb->height, rb->cpp, rb->pitch, target, format); + _mesa_init_teximage_fields(radeon->glCtx, target, texImage, + rb->width, rb->height, 1, 0, rb->cpp); + texImage->RowStride = rb->pitch / rb->cpp; @@ -17363,6 +19954,385 @@ index e2329f0..cf4cad7 100644 - } + r300SetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); } +diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c +index 292573d..8f0b70a 100644 +--- a/src/mesa/drivers/dri/r300/r500_fragprog.c ++++ b/src/mesa/drivers/dri/r300/r500_fragprog.c +@@ -27,10 +27,6 @@ + + #include "r500_fragprog.h" + +-#include "radeon_nqssadce.h" +-#include "radeon_program_alu.h" +- +- + static void reset_srcreg(struct prog_src_register* reg) + { + _mesa_bzero(reg, sizeof(*reg)); +@@ -58,12 +54,12 @@ static struct prog_src_register shadow_ambient(struct gl_program *program, int t + * - introduce a temporary register when write masks are needed + * + */ +-static GLboolean transform_TEX( ++GLboolean r500_transform_TEX( + struct radeon_transform_context *t, + struct prog_instruction* orig_inst, void* data) + { +- struct r500_fragment_program_compiler *compiler = +- (struct r500_fragment_program_compiler*)data; ++ struct r300_fragment_program_compiler *compiler = ++ (struct r300_fragment_program_compiler*)data; + struct prog_instruction inst = *orig_inst; + struct prog_instruction* tgt; + GLboolean destredirect = GL_FALSE; +@@ -188,121 +184,7 @@ static GLboolean transform_TEX( + return GL_TRUE; + } + +- +-static void update_params(r300ContextPtr r300, struct r500_fragment_program *fp) +-{ +- struct gl_fragment_program *mp = &fp->mesa_program; +- +- /* Ask Mesa nicely to fill in ParameterValues for us */ +- if (mp->Base.Parameters) +- _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); +-} +- +- +-/** +- * Transform the program to support fragment.position. +- * +- * Introduce a small fragment at the start of the program that will be +- * the only code that directly reads the FRAG_ATTRIB_WPOS input. +- * All other code pieces that reference that input will be rewritten +- * to read from a newly allocated temporary. +- * +- * \todo if/when r5xx supports the radeon_program architecture, this is a +- * likely candidate for code sharing. +- */ +-static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler) +-{ +- GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead; +- +- if (!(InputsRead & FRAG_BIT_WPOS)) +- return; +- +- static gl_state_index tokens[STATE_LENGTH] = { +- STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 +- }; +- struct prog_instruction *fpi; +- GLuint window_index; +- int i = 0; +- GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); +- +- _mesa_insert_instructions(compiler->program, 0, 3); +- fpi = compiler->program->Instructions; +- +- /* perspective divide */ +- fpi[i].Opcode = OPCODE_RCP; +- +- fpi[i].DstReg.File = PROGRAM_TEMPORARY; +- fpi[i].DstReg.Index = tempregi; +- fpi[i].DstReg.WriteMask = WRITEMASK_W; +- fpi[i].DstReg.CondMask = COND_TR; +- +- fpi[i].SrcReg[0].File = PROGRAM_INPUT; +- fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; +- fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; +- i++; +- +- fpi[i].Opcode = OPCODE_MUL; +- +- fpi[i].DstReg.File = PROGRAM_TEMPORARY; +- fpi[i].DstReg.Index = tempregi; +- fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; +- fpi[i].DstReg.CondMask = COND_TR; +- +- fpi[i].SrcReg[0].File = PROGRAM_INPUT; +- fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; +- fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; +- +- fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; +- fpi[i].SrcReg[1].Index = tempregi; +- fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; +- i++; +- +- /* viewport transformation */ +- window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); +- +- fpi[i].Opcode = OPCODE_MAD; +- +- fpi[i].DstReg.File = PROGRAM_TEMPORARY; +- fpi[i].DstReg.Index = tempregi; +- fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; +- fpi[i].DstReg.CondMask = COND_TR; +- +- fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; +- fpi[i].SrcReg[0].Index = tempregi; +- fpi[i].SrcReg[0].Swizzle = +- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); +- +- fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; +- fpi[i].SrcReg[1].Index = window_index; +- fpi[i].SrcReg[1].Swizzle = +- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); +- +- fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; +- fpi[i].SrcReg[2].Index = window_index; +- fpi[i].SrcReg[2].Swizzle = +- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); +- i++; +- +- for (; i < compiler->program->NumInstructions; ++i) { +- int reg; +- for (reg = 0; reg < 3; reg++) { +- if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && +- fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { +- fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; +- fpi[i].SrcReg[reg].Index = tempregi; +- } +- } +- } +-} +- +- +-static void nqssadce_init(struct nqssadce_state* s) +-{ +- s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; +- s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; +-} +- +-static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) ++GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) + { + GLuint relevant; + int i; +@@ -367,8 +249,7 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) + * The only thing we *cannot* do in an ALU instruction is per-component + * negation. Therefore, we split the MOV into two instructions when necessary. + */ +-static void nqssadce_build_swizzle(struct nqssadce_state *s, +- struct prog_dst_register dst, struct prog_src_register src) ++void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) + { + struct prog_instruction *inst; + GLuint negatebase[2] = { 0, 0 }; +@@ -397,124 +278,6 @@ static void nqssadce_build_swizzle(struct nqssadce_state *s, + } + } + +-static GLuint build_dtm(GLuint depthmode) +-{ +- switch(depthmode) { +- default: +- case GL_LUMINANCE: return 0; +- case GL_INTENSITY: return 1; +- case GL_ALPHA: return 2; +- } +-} +- +-static GLuint build_func(GLuint comparefunc) +-{ +- return comparefunc - GL_NEVER; +-} +- +- +-/** +- * Collect all external state that is relevant for compiling the given +- * fragment program. +- */ +-static void build_state( +- r300ContextPtr r300, +- struct r500_fragment_program *fp, +- struct r500_fragment_program_external_state *state) +-{ +- int unit; +- +- _mesa_bzero(state, sizeof(*state)); +- +- for(unit = 0; unit < 16; ++unit) { +- if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) { +- struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; +- +- state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); +- state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); +- } +- } +-} +- +-static void dump_program(struct r500_fragment_program_code *code); +- +-void r500TranslateFragmentShader(r300ContextPtr r300, +- struct r500_fragment_program *fp) +-{ +- struct r500_fragment_program_external_state state; +- +- build_state(r300, fp, &state); +- if (_mesa_memcmp(&fp->state, &state, sizeof(state))) { +- /* TODO: cache compiled programs */ +- fp->translated = GL_FALSE; +- _mesa_memcpy(&fp->state, &state, sizeof(state)); +- } +- +- if (!fp->translated) { +- struct r500_fragment_program_compiler compiler; +- +- compiler.r300 = r300; +- compiler.fp = fp; +- compiler.code = &fp->code; +- compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base); +- +- if (RADEON_DEBUG & DEBUG_PIXEL) { +- _mesa_printf("Compiler: Initial program:\n"); +- _mesa_print_program(compiler.program); +- } +- +- insert_WPOS_trailer(&compiler); +- +- struct radeon_program_transformation transformations[] = { +- { &transform_TEX, &compiler }, +- { &radeonTransformALU, 0 }, +- { &radeonTransformDeriv, 0 }, +- { &radeonTransformTrigScale, 0 } +- }; +- radeonLocalTransform(r300->radeon.glCtx, compiler.program, +- 4, transformations); +- +- if (RADEON_DEBUG & DEBUG_PIXEL) { +- _mesa_printf("Compiler: after native rewrite:\n"); +- _mesa_print_program(compiler.program); +- } +- +- struct radeon_nqssadce_descr nqssadce = { +- .Init = &nqssadce_init, +- .IsNativeSwizzle = &is_native_swizzle, +- .BuildSwizzle = &nqssadce_build_swizzle, +- .RewriteDepthOut = GL_TRUE +- }; +- radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce); +- +- if (RADEON_DEBUG & DEBUG_PIXEL) { +- _mesa_printf("Compiler: after NqSSA-DCE:\n"); +- _mesa_print_program(compiler.program); +- } +- +- fp->translated = r500FragmentProgramEmit(&compiler); +- +- /* Subtle: Rescue any parameters that have been added during transformations */ +- _mesa_free_parameter_list(fp->mesa_program.Base.Parameters); +- fp->mesa_program.Base.Parameters = compiler.program->Parameters; +- compiler.program->Parameters = 0; +- +- _mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0); +- +- r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); +- +- if (RADEON_DEBUG & DEBUG_PIXEL) { +- if (fp->translated) { +- _mesa_printf("Machine-readable code:\n"); +- dump_program(&fp->code); +- } +- } +- +- } +- +- update_params(r300, fp); +- +-} + + static char *toswiz(int swiz_val) { + switch(swiz_val) { +@@ -613,9 +376,9 @@ static char *to_texop(int val) + return NULL; + } + +-static void dump_program(struct r500_fragment_program_code *code) ++void r500FragmentProgramDump(union rX00_fragment_program_code *c) + { +- ++ struct r500_fragment_program_code *code = &c->r500; + fprintf(stderr, "R500 Fragment Program:\n--------\n"); + + int n; +diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h +index 1e45538..9ca2f9b 100644 +--- a/src/mesa/drivers/dri/r300/r500_fragprog.h ++++ b/src/mesa/drivers/dri/r300/r500_fragprog.h +@@ -33,30 +33,20 @@ + #ifndef __R500_FRAGPROG_H_ + #define __R500_FRAGPROG_H_ + +-#include "main/glheader.h" +-#include "main/macros.h" +-#include "main/enums.h" + #include "shader/prog_parameter.h" +-#include "shader/prog_print.h" +-#include "shader/program.h" + #include "shader/prog_instruction.h" + + #include "r300_context.h" +-#include "r300_state.h" +-#include "radeon_program.h" ++#include "radeon_nqssadce.h" + +-struct r500_fragment_program; ++extern GLboolean r500FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); + +-extern void r500TranslateFragmentShader(r300ContextPtr r300, +- struct r500_fragment_program *fp); ++extern void r500FragmentProgramDump(union rX00_fragment_program_code *c); + +-struct r500_fragment_program_compiler { +- r300ContextPtr r300; +- struct r500_fragment_program *fp; +- struct r500_fragment_program_code *code; +- struct gl_program *program; +-}; ++extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); + +-extern GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler); ++extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); ++ ++extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); + + #endif +diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +index 4631235..277f801 100644 +--- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c ++++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +@@ -49,8 +49,8 @@ + + + #define PROG_CODE \ +- struct r500_fragment_program_compiler *c = (struct r500_fragment_program_compiler*)data; \ +- struct r500_fragment_program_code *code = c->code ++ struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ ++ struct r500_fragment_program_code *code = &c->code->r500 + + #define error(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ +@@ -72,7 +72,7 @@ static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex + } + + if (*hwindex >= code->const_nr) { +- if (*hwindex >= PFS_NUM_CONST_REGS) { ++ if (*hwindex >= R500_PFS_NUM_CONST_REGS) { + error("Out of hw constants!\n"); + return GL_FALSE; + } +@@ -299,9 +299,9 @@ static const struct radeon_pair_handler pair_handler = { + .MaxHwTemps = 128 + }; + +-GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler) ++GLboolean r500FragmentProgramEmit(struct r300_fragment_program_compiler *compiler) + { +- struct r500_fragment_program_code *code = compiler->code; ++ struct r500_fragment_program_code *code = &compiler->code->r500; + + _mesa_bzero(code, sizeof(*code)); + code->max_temp_idx = 1; diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c deleted file mode 100644 index 5267fe9..0000000 @@ -18611,7 +21581,7 @@ index a344837..0000000 - -#endif /* __RADEON_LOCK_H__ */ diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c -index f398404..49aa90d 100644 +index 4aa2319..906d36e 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c @@ -35,7 +35,7 @@ @@ -18623,6 +21593,14 @@ index f398404..49aa90d 100644 #include "shader/prog_print.h" +@@ -609,6 +609,7 @@ static void emit_all_tex(struct pair_state *s) + if (s->Debug) { + _mesa_printf(" "); + _mesa_print_instruction(inst); ++ fflush(stdout); + } + s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst); + } diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c deleted file mode 100644 index 16f9fb9..0000000 @@ -20406,10 +23384,10 @@ index 0000000..9187cd7 +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h new file mode 100644 -index 0000000..9921d35 +index 0000000..6f1a0b4 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h -@@ -0,0 +1,70 @@ +@@ -0,0 +1,85 @@ +#ifndef RADEON_CS_WRAPPER_H +#define RADEON_CS_WRAPPER_H + @@ -20417,6 +23395,17 @@ index 0000000..9921d35 +#define RADEON_PARAM_DEVICE_ID 16 +#endif + ++#ifndef RADEON_INFO_DEVICE_ID ++#define RADEON_INFO_DEVICE_ID 0 ++#endif ++#ifndef RADEON_INFO_NUM_GB_PIPES ++#define RADEON_INFO_NUM_GB_PIPES 0 ++#endif ++ ++#ifndef DRM_RADEON_INFO ++#define DRM_RADEON_INFO 0x1 ++#endif ++ +#ifdef HAVE_LIBDRM_RADEON + +#include "radeon_bo.h" @@ -20439,12 +23428,16 @@ index 0000000..9921d35 +#define DRM_RADEON_GEM_INFO 0x1c + +struct drm_radeon_gem_info { -+ uint64_t gart_start; + uint64_t gart_size; -+ uint64_t vram_start; + uint64_t vram_size; + uint64_t vram_visible; +}; ++ ++struct drm_radeon_info { ++ uint32_t request; ++ uint32_t pad; ++ uint32_t value; ++}; +#endif + + @@ -20631,10 +23624,10 @@ index 0000000..4b5116c +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c new file mode 100644 -index 0000000..756c09f +index 0000000..8b5b892 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_common.c -@@ -0,0 +1,1407 @@ +@@ -0,0 +1,1456 @@ +/************************************************************************** + +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. @@ -21488,20 +24481,57 @@ index 0000000..756c09f + radeon_window_moved(radeon); + radeon_draw_buffer(ctx, radeon->glCtx->DrawBuffer); + ctx->Driver.Viewport = old_viewport; ++} + ++static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state) ++{ ++ int i, j, reg; ++ int dwords = (*state->check) (radeon->glCtx, state); ++ drm_r300_cmd_header_t cmd; + ++ fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, state->cmd_size); ++ ++ if (RADEON_DEBUG & DEBUG_VERBOSE) { ++ for (i = 0; i < dwords;) { ++ cmd = *((drm_r300_cmd_header_t *) &state->cmd[i]); ++ reg = (cmd.packet0.reghi << 8) | cmd.packet0.reglo; ++ fprintf(stderr, " %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n", ++ state->name, i, reg, cmd.packet0.count); ++ ++i; ++ for (j = 0; j < cmd.packet0.count && i < dwords; j++) { ++ fprintf(stderr, " %s[%d]: 0x%04x = %08x\n", ++ state->name, i, reg, state->cmd[i]); ++ reg += 4; ++ ++i; ++ } ++ } ++ } +} -+static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state ) -+{ -+ int i; -+ int dwords = (*state->check)(radeon->glCtx, state); + -+ fprintf(stderr, "emit %s %d/%d\n", state->name, state->cmd_size, dwords); ++static void radeon_print_state_atom_kmm(radeonContextPtr radeon, struct radeon_state_atom *state) ++{ ++ int i, j, reg, count; ++ int dwords = (*state->check) (radeon->glCtx, state); ++ uint32_t packet0; + -+ if (RADEON_DEBUG & DEBUG_VERBOSE) -+ for (i = 0 ; i < dwords; i++) -+ fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]); ++ fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, state->cmd_size); + ++ if (RADEON_DEBUG & DEBUG_VERBOSE) { ++ for (i = 0; i < dwords;) { ++ packet0 = state->cmd[i]; ++ reg = (packet0 & 0x1FFF) << 2; ++ count = ((packet0 & 0x3FFF0000) >> 16) + 1; ++ fprintf(stderr, " %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n", ++ state->name, i, reg, count); ++ ++i; ++ for (j = 0; j < count && i < dwords; j++) { ++ fprintf(stderr, " %s[%d]: 0x%04x = %08x\n", ++ state->name, i, reg, state->cmd[i]); ++ reg += 4; ++ ++i; ++ } ++ } ++ } +} + +static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean dirty) @@ -21519,7 +24549,10 @@ index 0000000..756c09f + dwords = (*atom->check) (radeon->glCtx, atom); + if (dwords) { + if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { -+ radeon_print_state_atom(radeon, atom); ++ if (radeon->radeonScreen->kernel_mm) ++ radeon_print_state_atom_kmm(radeon, atom); ++ else ++ radeon_print_state_atom(radeon, atom); + } + if (atom->emit) { + (*atom->emit)(radeon->glCtx, atom); @@ -21565,6 +24598,7 @@ index 0000000..756c09f + int i; + + for (i = 0; i < radeon->state.validated_bo_count; i++) { ++ radeon_bo_unref(radeon->state.bos[i].bo); + radeon->state.bos[i].bo = NULL; + radeon->state.bos[i].read_domains = 0; + radeon->state.bos[i].write_domain = 0; @@ -21575,6 +24609,7 @@ index 0000000..756c09f + +void radeon_validate_bo(radeonContextPtr radeon, struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain) +{ ++ radeon_bo_ref(bo); + radeon->state.bos[radeon->state.validated_bo_count].bo = bo; + radeon->state.bos[radeon->state.validated_bo_count].read_domains = read_domains; + radeon->state.bos[radeon->state.validated_bo_count].write_domain = write_domain; @@ -21781,7 +24816,7 @@ index 0000000..756c09f + radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]); + radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size); + } else { -+ struct drm_radeon_gem_info mminfo; ++ struct drm_radeon_gem_info mminfo = { 0 }; + + if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo))) + { @@ -21893,6 +24928,7 @@ index 0000000..756c09f + GL_CURRENT_BIT | + GL_DEPTH_BUFFER_BIT | + GL_ENABLE_BIT | ++ GL_POLYGON_BIT | + GL_STENCIL_BUFFER_BIT | + GL_TRANSFORM_BIT | + GL_CURRENT_BIT); @@ -21914,6 +24950,7 @@ index 0000000..756c09f + _mesa_Disable(GL_CLIP_PLANE3); + _mesa_Disable(GL_CLIP_PLANE4); + _mesa_Disable(GL_CLIP_PLANE5); ++ _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL); + if (ctx->Extensions.ARB_fragment_program && ctx->FragmentProgram.Enabled) { + saved_fp_enable = GL_TRUE; + _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB); @@ -21946,6 +24983,11 @@ index 0000000..756c09f + } + } + ++#if FEATURE_ARB_vertex_buffer_object ++ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0); ++ _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0); ++#endif ++ + radeon_meta_set_passthrough_transform(rmesa); + + for (i = 0; i < 4; i++) { @@ -22150,10 +25192,10 @@ index 0000000..c2fbb09 +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c new file mode 100644 -index 0000000..ba74c97 +index 0000000..3e71362 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c -@@ -0,0 +1,623 @@ +@@ -0,0 +1,625 @@ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and @@ -22333,6 +25375,8 @@ index 0000000..ba74c97 + radeon->texture_depth = ( glVisual->rgbBits > 16 ) ? + DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16; + ++ radeon->texture_row_align = 32; ++ + return GL_TRUE; +} + @@ -22779,10 +25823,10 @@ index 0000000..ba74c97 + diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h new file mode 100644 -index 0000000..d32e5af +index 0000000..181688c --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h -@@ -0,0 +1,563 @@ +@@ -0,0 +1,564 @@ + +#ifndef COMMON_CONTEXT_H +#define COMMON_CONTEXT_H @@ -23206,6 +26250,7 @@ index 0000000..d32e5af + */ + int texture_depth; + float initialMaxAnisotropy; ++ uint32_t texture_row_align; + + struct radeon_dma dma; + struct radeon_hw_state hw; @@ -29115,10 +32160,10 @@ index 126d072..78ec119 100644 -} diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c new file mode 100644 -index 0000000..34d6261 +index 0000000..51538e3 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c -@@ -0,0 +1,386 @@ +@@ -0,0 +1,387 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * @@ -29207,10 +32252,11 @@ index 0000000..34d6261 + * \param curOffset points to the offset at which the image is to be stored + * and is updated by this function according to the size of the image. + */ -+static void compute_tex_image_offset(radeon_mipmap_tree *mt, ++static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree *mt, + GLuint face, GLuint level, GLuint* curOffset) +{ + radeon_mipmap_level *lvl = &mt->levels[level]; ++ uint32_t row_align = rmesa->texture_row_align - 1; + + /* Find image size in bytes */ + if (mt->compressed) { @@ -29228,7 +32274,7 @@ index 0000000..34d6261 + lvl->rowstride = (lvl->width * mt->bpp * 2 + 31) & ~31; + lvl->size = lvl->rowstride * ((lvl->height + 1) / 2) * lvl->depth; + } else { -+ lvl->rowstride = (lvl->width * mt->bpp + 31) & ~31; ++ lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align; + lvl->size = lvl->rowstride * lvl->height * lvl->depth; + } + assert(lvl->size > 0); @@ -29252,7 +32298,7 @@ index 0000000..34d6261 + return size; +} + -+static void calculate_miptree_layout(radeon_mipmap_tree *mt) ++static void calculate_miptree_layout(radeonContextPtr rmesa, radeon_mipmap_tree *mt) +{ + GLuint curOffset; + GLuint numLevels; @@ -29270,7 +32316,7 @@ index 0000000..34d6261 + mt->levels[i].depth = minify(mt->depth0, i); + + for(face = 0; face < mt->faces; face++) -+ compute_tex_image_offset(mt, face, i, &curOffset); ++ compute_tex_image_offset(rmesa, mt, face, i, &curOffset); + } + + /* Note the required size in memory */ @@ -29302,7 +32348,7 @@ index 0000000..34d6261 + mt->tilebits = tilebits; + mt->compressed = compressed; + -+ calculate_miptree_layout(mt); ++ calculate_miptree_layout(rmesa, mt); + + mt->bo = radeon_bo_open(rmesa->radeonScreen->bom, + 0, mt->totalsize, 1024, @@ -29636,7 +32682,7 @@ index 1ec06bc..f30eb1c 100644 drm_clip_rect_t *boxes ); diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c -index e964feb..49c7eae 100644 +index 8828533..56dbe74 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -35,6 +35,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -29758,8 +32804,12 @@ index e964feb..49c7eae 100644 +radeonGetParam(__DRIscreenPrivate *sPriv, int param, void *value) { int ret; - drm_radeon_getparam_t gp; -+ struct drm_radeon_info info; +- drm_radeon_getparam_t gp; +- +- gp.param = param; +- gp.value = value; ++ drm_radeon_getparam_t gp = { 0 }; ++ struct drm_radeon_info info = { 0 }; + + if (sPriv->drm_version.major >= 2) { + info.value = (uint64_t)value; @@ -29778,16 +32828,13 @@ index e964feb..49c7eae 100644 + gp.param = param; + gp.value = value; -- gp.param = param; -- gp.value = value; -- - ret = drmCommandWriteRead( fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp)); + ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp)); + } return ret; } -@@ -330,6 +352,12 @@ static const __DRItexOffsetExtension radeonTexOffsetExtension = { +@@ -335,6 +357,12 @@ static const __DRItexOffsetExtension radeonTexOffsetExtension = { { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, radeonSetTexOffset, }; @@ -29800,7 +32847,7 @@ index e964feb..49c7eae 100644 #endif #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) -@@ -344,6 +372,12 @@ static const __DRItexOffsetExtension r200texOffsetExtension = { +@@ -349,6 +377,12 @@ static const __DRItexOffsetExtension r200texOffsetExtension = { { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, r200SetTexOffset, }; @@ -29813,7 +32860,7 @@ index e964feb..49c7eae 100644 #endif #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) -@@ -351,137 +385,18 @@ static const __DRItexOffsetExtension r300texOffsetExtension = { +@@ -356,137 +390,18 @@ static const __DRItexOffsetExtension r300texOffsetExtension = { { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, r300SetTexOffset, }; @@ -29959,7 +33006,19 @@ index e964feb..49c7eae 100644 case PCI_CHIP_RADEON_LY: case PCI_CHIP_RADEON_LZ: case PCI_CHIP_RADEON_QY: -@@ -819,9 +734,161 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) +@@ -561,11 +476,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + screen->chip_family = CHIP_FAMILY_RS300; + break; + +- /* 9500 with 1 pipe verified by: Reid Linnemann */ + case PCI_CHIP_R300_AD: +- screen->chip_family = CHIP_FAMILY_RV350; +- screen->chip_flags = RADEON_CHIPSET_TCL; +- break; + case PCI_CHIP_R300_AE: + case PCI_CHIP_R300_AF: + case PCI_CHIP_R300_AG: +@@ -824,9 +735,145 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) default: fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", @@ -30012,19 +33071,6 @@ index e964feb..49c7eae 100644 + { + int ret; + -+#ifdef RADEON_PARAM_KERNEL_MM -+ ret = radeonGetParam(sPriv, RADEON_PARAM_KERNEL_MM, &screen->kernel_mm); -+ -+ if (ret && ret != -EINVAL) { -+ FREE( screen ); -+ fprintf(stderr, "drm_radeon_getparam_t (RADEON_OFFSET): %d\n", ret); -+ return NULL; -+ } -+ -+ if (ret == -EINVAL) -+ screen->kernel_mm = 0; -+#endif -+ + ret = radeonGetParam(sPriv, RADEON_PARAM_GART_BUFFER_OFFSET, + &screen->gart_buffer_offset); + @@ -30058,62 +33104,59 @@ index e964feb..49c7eae 100644 + screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25); + } + -+ if (!screen->kernel_mm) { -+ screen->mmio.handle = dri_priv->registerHandle; -+ screen->mmio.size = dri_priv->registerSize; -+ if ( drmMap( sPriv->fd, -+ screen->mmio.handle, -+ screen->mmio.size, -+ &screen->mmio.map ) ) { -+ FREE( screen ); -+ __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ ); -+ return NULL; -+ } ++ screen->mmio.handle = dri_priv->registerHandle; ++ screen->mmio.size = dri_priv->registerSize; ++ if ( drmMap( sPriv->fd, ++ screen->mmio.handle, ++ screen->mmio.size, ++ &screen->mmio.map ) ) { ++ FREE( screen ); ++ __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ ); ++ return NULL; ++ } + -+ RADEONMMIO = screen->mmio.map; ++ RADEONMMIO = screen->mmio.map; + -+ screen->status.handle = dri_priv->statusHandle; -+ screen->status.size = dri_priv->statusSize; -+ if ( drmMap( sPriv->fd, -+ screen->status.handle, -+ screen->status.size, -+ &screen->status.map ) ) { -+ drmUnmap( screen->mmio.map, screen->mmio.size ); -+ FREE( screen ); -+ __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ ); -+ return NULL; -+ } -+ screen->scratch = (__volatile__ uint32_t *) -+ ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); ++ screen->status.handle = dri_priv->statusHandle; ++ screen->status.size = dri_priv->statusSize; ++ if ( drmMap( sPriv->fd, ++ screen->status.handle, ++ screen->status.size, ++ &screen->status.map ) ) { ++ drmUnmap( screen->mmio.map, screen->mmio.size ); ++ FREE( screen ); ++ __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ ); ++ return NULL; ++ } ++ screen->scratch = (__volatile__ uint32_t *) ++ ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); + -+ screen->buffers = drmMapBufs( sPriv->fd ); -+ if ( !screen->buffers ) { ++ screen->buffers = drmMapBufs( sPriv->fd ); ++ if ( !screen->buffers ) { ++ drmUnmap( screen->status.map, screen->status.size ); ++ drmUnmap( screen->mmio.map, screen->mmio.size ); ++ FREE( screen ); ++ __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ ); ++ return NULL; ++ } ++ ++ if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) { ++ screen->gartTextures.handle = dri_priv->gartTexHandle; ++ screen->gartTextures.size = dri_priv->gartTexMapSize; ++ if ( drmMap( sPriv->fd, ++ screen->gartTextures.handle, ++ screen->gartTextures.size, ++ (drmAddressPtr)&screen->gartTextures.map ) ) { ++ drmUnmapBufs( screen->buffers ); + drmUnmap( screen->status.map, screen->status.size ); + drmUnmap( screen->mmio.map, screen->mmio.size ); + FREE( screen ); -+ __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ ); ++ __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__); + return NULL; -+ } -+ -+ if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) { -+ screen->gartTextures.handle = dri_priv->gartTexHandle; -+ screen->gartTextures.size = dri_priv->gartTexMapSize; -+ if ( drmMap( sPriv->fd, -+ screen->gartTextures.handle, -+ screen->gartTextures.size, -+ (drmAddressPtr)&screen->gartTextures.map ) ) { -+ drmUnmapBufs( screen->buffers ); -+ drmUnmap( screen->status.map, screen->status.size ); -+ drmUnmap( screen->mmio.map, screen->mmio.size ); -+ FREE( screen ); -+ __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__); -+ return NULL; -+ } -+ -+ screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base; -+ } -+ } ++ } + ++ screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base; ++ } + + ret = radeon_set_screen_flags(screen, dri_priv->deviceID); + if (ret == -1) @@ -30122,7 +33165,7 @@ index e964feb..49c7eae 100644 if ((screen->chip_family == CHIP_FAMILY_R350 || screen->chip_family == CHIP_FAMILY_R300) && sPriv->ddx_version.minor < 2) { fprintf(stderr, "xf86-video-ati-6.6.2 or newer needed for Radeon 9500/9700/9800 cards.\n"); -@@ -846,10 +913,9 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) +@@ -851,8 +898,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->cpp = dri_priv->bpp / 8; screen->AGPMode = dri_priv->AGPMode; @@ -30130,12 +33173,9 @@ index e964feb..49c7eae 100644 - &temp); + ret = radeonGetParam(sPriv, RADEON_PARAM_FB_LOCATION, &temp); if (ret) { -- if (screen->chip_family < CHIP_FAMILY_RS600) -+ if (screen->chip_family < CHIP_FAMILY_RS600 && !screen->kernel_mm) + if (screen->chip_family < CHIP_FAMILY_RS600) screen->fbLocation = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16; - else { - FREE( screen ); -@@ -861,8 +927,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) +@@ -866,8 +912,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) } if (screen->chip_family >= CHIP_FAMILY_R300) { @@ -30145,39 +33185,41 @@ index e964feb..49c7eae 100644 if (ret) { fprintf(stderr, "Unable to get num_pipes, need newer drm\n"); switch (screen->chip_family) { -@@ -951,26 +1016,158 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) - screen->extensions[i++] = &driMediaStreamCounterExtension.base; +@@ -893,6 +938,17 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + } else { + screen->num_gb_pipes = temp; + } ++ ++ /* pipe overrides */ ++ switch (dri_priv->deviceID) { ++ case PCI_CHIP_R300_AD: /* 9500 with 1 quadpipe verified by: Reid Linnemann */ ++ case PCI_CHIP_RV410_5E4C: /* RV410 SE only have 1 quadpipe */ ++ case PCI_CHIP_RV410_5E4F: /* RV410 SE only have 1 quadpipe */ ++ screen->num_gb_pipes = 1; ++ break; ++ default: ++ break; ++ } } -+ if (!screen->kernel_mm) { -+#if !RADEON_COMMON -+ screen->extensions[i++] = &radeonTexOffsetExtension.base; -+#endif -+ -+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) -+ if (IS_R200_CLASS(screen)) -+ screen->extensions[i++] = &r200AllocateExtension.base; -+ -+ screen->extensions[i++] = &r200texOffsetExtension.base; -+#endif -+ -+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) -+ screen->extensions[i++] = &r300texOffsetExtension.base; -+#endif -+ } -+ -+ screen->extensions[i++] = NULL; -+ sPriv->extensions = screen->extensions; -+ -+ screen->driScreen = sPriv; -+ screen->sarea_priv_offset = dri_priv->sarea_priv_offset; + if ( sPriv->drm_version.minor >= 10 ) { +@@ -962,7 +1018,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + + #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) + if (IS_R200_CLASS(screen)) +- screen->extensions[i++] = &r200AllocateExtension.base; ++ screen->extensions[i++] = &r200AllocateExtension.base; + + screen->extensions[i++] = &r200texOffsetExtension.base; + #endif +@@ -976,6 +1032,154 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) + + screen->driScreen = sPriv; + screen->sarea_priv_offset = dri_priv->sarea_priv_offset; + screen->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA + + screen->sarea_priv_offset); + -+ if (screen->kernel_mm) -+ screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd); -+ else -+ screen->bom = radeon_bo_manager_legacy_ctor(screen); ++ screen->bom = radeon_bo_manager_legacy_ctor(screen); + if (screen->bom == NULL) { + free(screen); + return NULL; @@ -30192,7 +33234,7 @@ index e964feb..49c7eae 100644 + radeonScreenPtr screen; + int i; + int ret; -+ uint32_t device_id; ++ uint32_t device_id = 0; + uint32_t temp = 0; + + /* Allocate the private area */ @@ -30215,6 +33257,15 @@ index e964feb..49c7eae 100644 + screen->kernel_mm = 1; + screen->chip_flags = 0; + ++ /* if we have kms we can support all of these */ ++ screen->drmSupportsCubeMapsR200 = 1; ++ screen->drmSupportsBlendColor = 1; ++ screen->drmSupportsTriPerf = 1; ++ screen->drmSupportsFragShader = 1; ++ screen->drmSupportsPointSprites = 1; ++ screen->drmSupportsCubeMapsR100 = 1; ++ screen->drmSupportsVertexProgram = 1; ++ + ret = radeonGetParam(sPriv, RADEON_PARAM_IRQ_NR, &screen->irq); + + ret = radeonGetParam(sPriv, RADEON_PARAM_DEVICE_ID, &device_id); @@ -30255,6 +33306,18 @@ index e964feb..49c7eae 100644 + } else { + screen->num_gb_pipes = temp; + } ++ ++ /* pipe overrides */ ++ switch (device_id) { ++ case PCI_CHIP_R300_AD: /* 9500 with 1 quadpipe verified by: Reid Linnemann */ ++ case PCI_CHIP_RV410_5E4C: /* RV410 SE only have 1 quadpipe */ ++ case PCI_CHIP_RV410_5E4F: /* RV410 SE only have 1 quadpipe */ ++ screen->num_gb_pipes = 1; ++ break; ++ default: ++ break; ++ } ++ + } + + if (screen->chip_family <= CHIP_FAMILY_RS200) @@ -30277,29 +33340,25 @@ index e964feb..49c7eae 100644 + screen->extensions[i++] = &driMediaStreamCounterExtension.base; + } + - #if !RADEON_COMMON -- screen->extensions[i++] = &radeonTexOffsetExtension.base; ++#if !RADEON_COMMON + screen->extensions[i++] = &radeonTexBufferExtension.base; - #endif - - #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) - if (IS_R200_CLASS(screen)) - screen->extensions[i++] = &r200AllocateExtension.base; - -- screen->extensions[i++] = &r200texOffsetExtension.base; ++#endif ++ ++#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) ++ if (IS_R200_CLASS(screen)) ++ screen->extensions[i++] = &r200AllocateExtension.base; ++ + screen->extensions[i++] = &r200TexBufferExtension.base; - #endif - - #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) -- screen->extensions[i++] = &r300texOffsetExtension.base; ++#endif ++ ++#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) + screen->extensions[i++] = &r300TexBufferExtension.base; - #endif - - screen->extensions[i++] = NULL; - sPriv->extensions = screen->extensions; - - screen->driScreen = sPriv; -- screen->sarea_priv_offset = dri_priv->sarea_priv_offset; ++#endif ++ ++ screen->extensions[i++] = NULL; ++ sPriv->extensions = screen->extensions; ++ ++ screen->driScreen = sPriv; + screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd); + if (screen->bom == NULL) { + free(screen); @@ -30308,7 +33367,7 @@ index e964feb..49c7eae 100644 return screen; } -@@ -979,23 +1176,32 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) +@@ -984,23 +1188,32 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) static void radeonDestroyScreen( __DRIscreenPrivate *sPriv ) { @@ -30354,7 +33413,7 @@ index e964feb..49c7eae 100644 } -@@ -1004,16 +1210,21 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv ) +@@ -1009,16 +1222,21 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv ) static GLboolean radeonInitDriver( __DRIscreenPrivate *sPriv ) { @@ -30382,7 +33441,7 @@ index e964feb..49c7eae 100644 /** * Create the Mesa framebuffer and renderbuffers for a given window/drawable. * -@@ -1026,101 +1237,111 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, +@@ -1031,101 +1249,111 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, const __GLcontextModes *mesaVis, GLboolean isPixmap ) { @@ -30574,7 +33633,7 @@ index e964feb..49c7eae 100644 /** * Choose the appropriate CreateContext function based on the chipset. * Eventually, all drivers will go through this process. -@@ -1131,25 +1352,21 @@ static GLboolean radeonCreateContext(const __GLcontextModes * glVisual, +@@ -1136,25 +1364,21 @@ static GLboolean radeonCreateContext(const __GLcontextModes * glVisual, { __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); @@ -30610,7 +33669,7 @@ index e964feb..49c7eae 100644 /** -@@ -1211,13 +1428,103 @@ radeonInitScreen(__DRIscreenPrivate *psp) +@@ -1216,13 +1440,103 @@ radeonInitScreen(__DRIscreenPrivate *psp) if (!radeonInitDriver(psp)) return NULL; @@ -30664,7 +33723,7 @@ index e964feb..49c7eae 100644 + driInitSingleExtension( NULL, ATI_fs_extension ); + driInitExtensions( NULL, point_extensions, GL_FALSE ); +#endif - ++ + if (!radeonInitDriver(psp)) { + return NULL; + } @@ -30704,7 +33763,7 @@ index e964feb..49c7eae 100644 + else + configs = driConcatConfigs(configs, new_configs); + } -+ + + if (configs == NULL) { + fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__, + __LINE__); @@ -30716,7 +33775,7 @@ index e964feb..49c7eae 100644 /** * Get information about previous buffer swaps. -@@ -1225,31 +1532,26 @@ radeonInitScreen(__DRIscreenPrivate *psp) +@@ -1230,31 +1544,26 @@ radeonInitScreen(__DRIscreenPrivate *psp) static int getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ) { @@ -30758,7 +33817,7 @@ index e964feb..49c7eae 100644 const struct __DriverAPIRec driDriverAPI = { .InitScreen = radeonInitScreen, .DestroyScreen = radeonDestroyScreen, -@@ -1266,23 +1568,7 @@ const struct __DriverAPIRec driDriverAPI = { +@@ -1271,23 +1580,7 @@ const struct __DriverAPIRec driDriverAPI = { .WaitForSBC = NULL, .SwapBuffersMSC = NULL, .CopySubBuffer = radeonCopySubBuffer, @@ -31457,7 +34516,7 @@ index 9abe086..ea6a2e7 100644 #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c -index 32bcff3..28eea44 100644 +index b656100..d9a7ef6 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -47,6 +47,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -32306,11 +35365,11 @@ index 32bcff3..28eea44 100644 + if (rmesa->radeon.dma.current) + radeon_validate_bo(&rmesa->radeon, rmesa->radeon.dma.current, + RADEON_GEM_DOMAIN_GTT, 0); - --void radeonValidateState( GLcontext *ctx ) ++ + return radeon_revalidate_bos(ctx); +} -+ + +-void radeonValidateState( GLcontext *ctx ) +GLboolean radeonValidateState( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); @@ -32339,16 +35398,18 @@ index 32bcff3..28eea44 100644 /* Need an event driven matrix update? */ if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) -@@ -2295,7 +2141,7 @@ void radeonValidateState( GLcontext *ctx ) +@@ -2295,7 +2141,9 @@ void radeonValidateState( GLcontext *ctx ) } - rmesa->NewGLState = 0; + rmesa->radeon.NewGLState = 0; ++ ++ return GL_TRUE; } -@@ -2306,7 +2152,7 @@ static void radeonInvalidateState( GLcontext *ctx, GLuint new_state ) +@@ -2306,7 +2154,7 @@ static void radeonInvalidateState( GLcontext *ctx, GLuint new_state ) _vbo_InvalidateState( ctx, new_state ); _tnl_InvalidateState( ctx, new_state ); _ae_invalidate_state( ctx, new_state ); @@ -32357,7 +35418,7 @@ index 32bcff3..28eea44 100644 } -@@ -2330,16 +2176,17 @@ static GLboolean check_material( GLcontext *ctx ) +@@ -2330,16 +2178,17 @@ static GLboolean check_material( GLcontext *ctx ) static void radeonWrapRunPipeline( GLcontext *ctx ) { @@ -32419,7 +35480,7 @@ index 2171879..a7c8eef 100644 diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c -index 57dc380..174a7e1 100644 +index 57dc380..c00f59f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state_init.c +++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c @@ -38,39 +38,140 @@ @@ -32607,7 +35668,7 @@ index 57dc380..174a7e1 100644 } -@@ -146,81 +247,373 @@ CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT)) +@@ -146,81 +247,384 @@ CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT)) CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT)) CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT)) @@ -32735,8 +35796,9 @@ index 57dc380..174a7e1 100644 + + OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); -+ } -+ + } + +- rmesa->state.color.clear = 0x00000000; + OUT_BATCH(atom->cmd[CTX_CMD_2]); + + if (!rrb || !rrb->bo) { @@ -32821,14 +35883,24 @@ index 57dc380..174a7e1 100644 + + OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0)); + OUT_BATCH(cbpitch); - } - -- rmesa->state.color.clear = 0x00000000; ++ } ++ + // if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) { + // OUT_BATCH_TABLE((atom->cmd + 14), 4); + // } + + END_BATCH(); ++ BEGIN_BATCH_NO_AUTOSTATE(4); ++ OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0)); ++ OUT_BATCH(0); ++ OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0)); ++ if (rrb) { ++ OUT_BATCH(((rrb->width - 1) << RADEON_RE_WIDTH_SHIFT) | ++ ((rrb->height - 1) << RADEON_RE_HEIGHT_SHIFT)); ++ } else { ++ OUT_BATCH(0); ++ } ++ END_BATCH(); +} + +static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom) @@ -33030,7 +36102,7 @@ index 57dc380..174a7e1 100644 ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 ); ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 ); ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 ); -@@ -233,20 +626,29 @@ void radeonInitState( radeonContextPtr rmesa ) +@@ -233,20 +637,29 @@ void radeonInitState( radeonContextPtr rmesa ) ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 ); ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 ); ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 ); @@ -33070,7 +36142,7 @@ index 57dc380..174a7e1 100644 } ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 ); ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 ); -@@ -268,43 +670,43 @@ void radeonInitState( radeonContextPtr rmesa ) +@@ -268,43 +681,43 @@ void radeonInitState( radeonContextPtr rmesa ) ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 ); ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 ); ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 ); @@ -33145,7 +36217,7 @@ index 57dc380..174a7e1 100644 rmesa->hw.grd.cmd[GRD_CMD_0] = cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 ); rmesa->hw.fog.cmd[FOG_CMD_0] = -@@ -331,6 +733,22 @@ void radeonInitState( radeonContextPtr rmesa ) +@@ -331,6 +744,22 @@ void radeonInitState( radeonContextPtr rmesa ) cmdvec( RADEON_VS_UCP_ADDR + i, 1, 4 ); } @@ -33168,7 +36240,7 @@ index 57dc380..174a7e1 100644 rmesa->last_ReallyEnabled = -1; /* Initial Harware state: -@@ -352,19 +770,7 @@ void radeonInitState( radeonContextPtr rmesa ) +@@ -352,19 +781,7 @@ void radeonInitState( radeonContextPtr rmesa ) RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO ); @@ -33189,7 +36261,7 @@ index 57dc380..174a7e1 100644 RADEON_STENCIL_TEST_ALWAYS | RADEON_STENCIL_FAIL_KEEP | RADEON_STENCIL_ZPASS_KEEP | -@@ -374,7 +780,7 @@ void radeonInitState( radeonContextPtr rmesa ) +@@ -374,7 +791,7 @@ void radeonInitState( radeonContextPtr rmesa ) if (rmesa->using_hyperz) { rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_COMPRESSION_ENABLE | RADEON_Z_DECOMPRESSION_ENABLE; @@ -33198,7 +36270,7 @@ index 57dc380..174a7e1 100644 /* works for q3, but slight rendering errors with glxgears ? */ /* rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/ /* need this otherwise get lots of lockups with q3 ??? */ -@@ -386,10 +792,9 @@ void radeonInitState( radeonContextPtr rmesa ) +@@ -386,10 +803,9 @@ void radeonInitState( radeonContextPtr rmesa ) RADEON_ANTI_ALIAS_NONE); rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = (RADEON_PLANE_MASK_ENABLE | @@ -33210,7 +36282,7 @@ index 57dc380..174a7e1 100644 case DRI_CONF_DITHER_XERRORDIFFRESET: rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_INIT; break; -@@ -397,31 +802,18 @@ void radeonInitState( radeonContextPtr rmesa ) +@@ -397,31 +813,18 @@ void radeonInitState( radeonContextPtr rmesa ) rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_SCALE_DITHER_ENABLE; break; } @@ -33247,7 +36319,7 @@ index 57dc380..174a7e1 100644 rmesa->hw.set.cmd[SET_SE_CNTL] = (RADEON_FFACE_CULL_CCW | RADEON_BFACE_SOLID | RADEON_FFACE_SOLID | -@@ -444,7 +836,7 @@ void radeonInitState( radeonContextPtr rmesa ) +@@ -444,7 +847,7 @@ void radeonInitState( radeonContextPtr rmesa ) RADEON_VC_NO_SWAP; #endif @@ -33256,7 +36328,7 @@ index 57dc380..174a7e1 100644 rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] |= RADEON_TCL_BYPASS; } -@@ -491,8 +883,8 @@ void radeonInitState( radeonContextPtr rmesa ) +@@ -491,8 +894,8 @@ void radeonInitState( radeonContextPtr rmesa ) (2 << RADEON_TXFORMAT_HEIGHT_SHIFT)); /* Initialize the texture offset to the start of the card texture heap */ @@ -33267,7 +36339,7 @@ index 57dc380..174a7e1 100644 rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0; rmesa->hw.tex[i].cmd[TEX_PP_TXCBLEND] = -@@ -513,15 +905,15 @@ void radeonInitState( radeonContextPtr rmesa ) +@@ -513,15 +916,15 @@ void radeonInitState( radeonContextPtr rmesa ) rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0; rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_0] = @@ -33288,7 +36360,7 @@ index 57dc380..174a7e1 100644 } /* Can only add ST1 at the time of doing some multitex but can keep -@@ -613,5 +1005,7 @@ void radeonInitState( radeonContextPtr rmesa ) +@@ -613,5 +1016,7 @@ void radeonInitState( radeonContextPtr rmesa ) rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE; rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE; @@ -33972,7 +37044,7 @@ index 779e9ae..df6708f 100644 if (RADEON_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "Radeon end tcl fallback %s\n", diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c -index b0aec21..21509c6 100644 +index f2b6deb..2549d5c 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -33998,7 +37070,7 @@ index b0aec21..21509c6 100644 switch ( minf ) { case GL_NEAREST: case GL_NEAREST_MIPMAP_NEAREST: -@@ -244,433 +248,13 @@ static void radeonSetTexBorderColor( radeonTexObjPtr t, GLubyte c[4] ) +@@ -249,433 +253,13 @@ static void radeonSetTexBorderColor( radeonTexObjPtr t, const GLfloat color[4] ) t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] ); } @@ -34034,7 +37106,7 @@ index b0aec21..21509c6 100644 - radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT ); - radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy ); - radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter ); -- radeonSetTexBorderColor( t, texObj->_BorderChan ); +- radeonSetTexBorderColor( t, texObj->BorderColor ); - } - - return t; @@ -34433,7 +37505,7 @@ index b0aec21..21509c6 100644 GLuint unit = ctx->Texture.CurrentUnit; struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; -@@ -701,7 +285,7 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target, +@@ -706,7 +290,7 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target, * functions, one mapping [-1.0,0.0] to [-128,0] and one mapping * [0.0,4.0] to [0,127]. */ @@ -34442,7 +37514,7 @@ index b0aec21..21509c6 100644 0.0 : -1.0; bias = CLAMP( *param, min, 4.0 ); if ( bias == 0 ) { -@@ -734,7 +318,7 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target, +@@ -739,7 +323,7 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target, struct gl_texture_object *texObj, GLenum pname, const GLfloat *params ) { @@ -34451,7 +37523,7 @@ index b0aec21..21509c6 100644 if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { fprintf( stderr, "%s( %s )\n", __FUNCTION__, -@@ -762,57 +346,51 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target, +@@ -767,57 +351,51 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target, case GL_TEXTURE_MAX_LEVEL: case GL_TEXTURE_MIN_LOD: case GL_TEXTURE_MAX_LOD: @@ -34531,7 +37603,7 @@ index b0aec21..21509c6 100644 /* Free mipmap images and the texture object itself */ _mesa_delete_texture_object(ctx, texObj); } -@@ -832,7 +410,7 @@ static void radeonTexGen( GLcontext *ctx, +@@ -837,7 +415,7 @@ static void radeonTexGen( GLcontext *ctx, GLenum pname, const GLfloat *params ) { @@ -34540,7 +37612,7 @@ index b0aec21..21509c6 100644 GLuint unit = ctx->Texture.CurrentUnit; rmesa->recheck_texgen[unit] = GL_TRUE; } -@@ -846,29 +424,40 @@ static void radeonTexGen( GLcontext *ctx, +@@ -851,29 +429,40 @@ static void radeonTexGen( GLcontext *ctx, static struct gl_texture_object * radeonNewTextureObject( GLcontext *ctx, GLuint name, GLenum target ) { @@ -34567,7 +37639,7 @@ index b0aec21..21509c6 100644 + radeonSetTexWrap( t, t->base.WrapS, t->base.WrapT ); + radeonSetTexMaxAnisotropy( t, t->base.MaxAnisotropy ); + radeonSetTexFilter( t, t->base.MinFilter, t->base.MagFilter ); -+ radeonSetTexBorderColor( t, t->base._BorderChan ); ++ radeonSetTexBorderColor( t, t->base.BorderColor ); + return &t->base; } @@ -34592,7 +37664,7 @@ index b0aec21..21509c6 100644 functions->TexEnv = radeonTexEnv; functions->TexParameter = radeonTexParameter; -@@ -877,5 +466,12 @@ void radeonInitTextureFuncs( struct dd_function_table *functions ) +@@ -882,5 +471,12 @@ void radeonInitTextureFuncs( struct dd_function_table *functions ) functions->CompressedTexImage2D = radeonCompressedTexImage2D; functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; diff --git a/sources b/sources index 4fc5f8c..322d52f 100644 --- a/sources +++ b/sources @@ -1,3 +1,3 @@ 6ae05158e678f4594343f32c2ca50515 gl-manpages-1.0.1.tar.bz2 a5ec51ed9f0a55dc3462d90d52ff899c xdriinfo-1.0.2.tar.bz2 -2469ad7640d26d1aed61452cfe62fd49 mesa-20090322.tar.bz2 +d7ff7c44fe42f2639845a2975804792d mesa-20090428.tar.bz2