pingou / rpms / mesa

Forked from rpms/mesa 6 years ago
Clone
Blob Blame History Raw
diff --git a/Makefile b/Makefile
index ebc5254..ee40b75 100644
--- a/Makefile
+++ b/Makefile
@@ -174,10 +174,10 @@ ultrix-gcc:
 
 # Rules for making release tarballs
 
-DIRECTORY = Mesa-7.3
-LIB_NAME = MesaLib-7.3
-DEMO_NAME = MesaDemos-7.3
-GLUT_NAME = MesaGLUT-7.3
+DIRECTORY = Mesa-7.4
+LIB_NAME = MesaLib-7.4
+DEMO_NAME = MesaDemos-7.4
+GLUT_NAME = MesaGLUT-7.4
 
 MAIN_FILES = \
 	$(DIRECTORY)/Makefile*						\
diff --git a/configure.ac b/configure.ac
index 33c1072..73caf00 100644
--- a/configure.ac
+++ b/configure.ac
@@ -46,10 +46,14 @@ esac
 MKDEP_OPTIONS=-fdepend
 dnl Ask gcc where it's keeping its secret headers
 if test "x$GCC" = xyes; then
-    GCC_INCLUDES=`$CC -print-file-name=include`
-    if test "x$GCC_INCLUDES" != x; then
-        MKDEP_OPTIONS="$MKDEP_OPTIONS -I$GCC_INCLUDES"
-    fi
+    for dir in include include-fixed; do
+        GCC_INCLUDES=`$CC -print-file-name=$dir`
+        if test "x$GCC_INCLUDES" != x && \
+           test "$GCC_INCLUDES" != "$dir" && \
+           test -d "$GCC_INCLUDES"; then
+            MKDEP_OPTIONS="$MKDEP_OPTIONS -I$GCC_INCLUDES"
+        fi
+    done
 fi
 AC_SUBST([MKDEP_OPTIONS])
 
@@ -362,7 +366,7 @@ default_driver="xlib"
 case "$host_os" in
 linux*)
     case "$host_cpu" in
-    i*86|x86_64|powerpc*) default_driver="dri";;
+    i*86|x86_64|powerpc*|sparc*) default_driver="dri";;
     esac
     ;;
 *freebsd* | dragonfly*)
diff --git a/docs/relnotes-7.3.html b/docs/relnotes-7.3.html
index c083fcb..df89884 100644
--- a/docs/relnotes-7.3.html
+++ b/docs/relnotes-7.3.html
@@ -22,13 +22,21 @@ Some drivers don't support all the features required in OpenGL 2.1.
 </p>
 <p>
 See the <a href="install.html">Compiling/Installing page</a> for prerequisites
-for DRI ardware acceleration.
+for DRI hardware acceleration.
 </p>
 
 
 <h2>MD5 checksums</h2>
 <pre>
-tbd
+8ed03191432b22d118d88d6db497f304  MesaLib-7.3.tar.gz
+781e7811a6ed5c97b2b8defefc8ffbc9  MesaLib-7.3.tar.bz2
+3ccba9a1734ed6d4b3389e1535d90fbf  MesaLib-7.3.zip
+d312e974b31043b13b61bac5fbf00b87  MesaDemos-7.3.tar.gz
+3f0741394069bdf2329565a387396cda  MesaDemos-7.3.tar.bz2
+4d0887fd4c66a824295cdd619f6d34cb  MesaDemos-7.3.zip
+2d7661b66022bcb8878728f3d5bd33ab  MesaGLUT-7.3.tar.gz
+abe8036a724c1a483bdad6b5a55ddc1a  MesaGLUT-7.3.tar.bz2
+5f247819b47e2a7c62d07a6afe5262fb  MesaGLUT-7.3.zip
 </pre>
 
 
diff --git a/docs/relnotes-7.4.html b/docs/relnotes-7.4.html
new file mode 100644
index 0000000..8ad23e5
--- /dev/null
+++ b/docs/relnotes-7.4.html
@@ -0,0 +1,79 @@
+<HTML>
+
+<TITLE>Mesa Release Notes</TITLE>
+
+<head><link rel="stylesheet" type="text/css" href="mesa.css"></head>
+
+<BODY>
+
+<body bgcolor="#eeeeee">
+
+<H1>Mesa 7.4 Release Notes / date TBD</H1>
+
+<p>
+Mesa 7.4 is a stable development release fixing bugs since the 7.3 release.
+</p>
+<p>
+Mesa 7.4 implements the OpenGL 2.1 API, but the version reported by
+glGetString(GL_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 2.1.
+</p>
+<p>
+See the <a href="install.html">Compiling/Installing page</a> for prerequisites
+for DRI ardware acceleration.
+</p>
+
+
+<h2>MD5 checksums</h2>
+<pre>
+tbd
+</pre>
+
+
+<h2>New features</h2>
+<ul>
+<li>Added MESA_GLX_FORCE_DIRECT env var for Xlib/software driver
+<li>GLSL version 1.20 is returnd by the GL_SHADING_LANGUAGE_VERSION query
+</ul>
+
+
+<h2>Bug fixes</h2>
+<ul>
+<li>glGetActiveUniform() returned wrong size for some array types
+<li>Fixed some error checking in glUniform()
+<li>Fixed a potential glTexImage('proxy target') segfault
+<li>Fixed bad reference counting for 1D/2D texture arrays
+<li>Fixed VBO + glPush/PopClientAttrib() bug #19835
+<li>Assorted i965 driver bug fixes
+<li>Fixed a Windows compilation failure in s_triangle.c
+<li>Fixed a GLSL array indexing bug
+<li>Fixes for building on Haiku
+</ul>
+
+<h2>Changes</h2>
+<ul>
+</ul>
+
+
+
+<h2>Driver Status</h2>
+
+<pre>
+Driver			Status
+----------------------	----------------------
+DRI drivers		varies with the driver
+XMesa/GLX (on Xlib)	implements OpenGL 2.1
+OSMesa (off-screen)	implements OpenGL 2.1
+Windows/Win32		implements OpenGL 2.1
+Glide (3dfx Voodoo1/2)	implements OpenGL 1.3
+SVGA			unsupported
+Wind River UGL		unsupported
+DJGPP			unsupported
+GGI			unsupported
+BeOS			unsupported
+Allegro			unsupported
+D3D			unsupported
+</pre>
+
+</body>
+</html>
diff --git a/docs/relnotes.html b/docs/relnotes.html
index 020e485..61c6a20 100644
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -20,6 +20,7 @@ The release notes summarize what's new or changed in each Mesa release.
 </p>
 
 <UL>
+<LI><A HREF="relnotes-7.4.html">7.4 release notes</A>
 <LI><A HREF="relnotes-7.3.html">7.3 release notes</A>
 <LI><A HREF="relnotes-7.2.html">7.2 release notes</A>
 <LI><A HREF="relnotes-7.1.html">7.1 release notes</A>
diff --git a/docs/xlibdriver.html b/docs/xlibdriver.html
index d95f4d5..029e2b1 100644
--- a/docs/xlibdriver.html
+++ b/docs/xlibdriver.html
@@ -169,6 +169,20 @@ the Gamma FAQ</a>
 </p>
 
 
+<H2>Direct Rendering Flag</H2>
+<p>
+Some applications won't run with indirect rendering contexts (which is
+what the Xlib driver supports).
+To force the glXIsDirect() query to return True, set the MESA_GLX_FORCE_DIRECT
+environment variable.
+For example:
+</p>
+<pre>
+	$ export MESA_GLX_FORCE_DIRECT=1
+</pre>
+
+
+
 <H2>Overlay Planes</H2>
 <p>
 Hardware overlay planes are supported by the Xlib driver.  To
@@ -268,6 +282,8 @@ This extension was added in Mesa 2.6
    MESA_BACK_BUFFER - specifies how to implement the back color buffer (X only)
    MESA_PRIVATE_CMAP - force aux/tk libraries to use private colormaps (X only)
    MESA_GAMMA - gamma correction coefficients (X only)
+   MESA_GLX_FORCE_DIRECT - report that the driver is direct rendering, even
+      though it's not.
 </pre>
 
 
diff --git a/include/GL/gl.h b/include/GL/gl.h
index 3891a71..6b4f3f5 100644
--- a/include/GL/gl.h
+++ b/include/GL/gl.h
@@ -1,6 +1,6 @@
 /*
  * Mesa 3-D graphics library
- * Version:  6.5.1
+ * Version:  7.4
  *
  * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
  *
diff --git a/progs/glsl/multitex.c b/progs/glsl/multitex.c
index 096d40f..b4be463 100644
--- a/progs/glsl/multitex.c
+++ b/progs/glsl/multitex.c
@@ -47,9 +47,12 @@ static const char *TexFiles[2] =
 
 static GLuint Program;
 
-static GLfloat Xrot = -90.0, Yrot = .0, Zrot = 0.0;
+static GLfloat Xrot = 0.0, Yrot = .0, Zrot = 0.0;
 static GLfloat EyeDist = 10;
 static GLboolean Anim = GL_TRUE;
+static GLboolean UseArrays = GL_TRUE;
+
+static GLint VertCoord_attr = -1, TexCoord0_attr = -1, TexCoord1_attr = -1;
 
 
 /* value[0] = tex unit */
@@ -60,32 +63,62 @@ static struct uniform_info Uniforms[] = {
 };
 
 
+static const GLfloat Tex0Coords[4][2] = {
+   { 0.0, 0.0 }, { 2.0, 0.0 }, { 2.0, 2.0 }, { 0.0, 2.0 }
+};
+
+static const GLfloat Tex1Coords[4][2] = {
+   { 0.0, 0.0 }, { 1.0, 0.0 }, { 1.0, 1.0 }, { 0.0, 1.0 }
+};
+
+static const GLfloat VertCoords[4][2] = {
+   { -3.0, -3.0 }, { 3.0, -3.0 }, { 3.0, 3.0 }, { -3.0, 3.0 }
+};
+
+
 static void
-DrawPolygon(GLfloat size)
+DrawPolygonArray(void)
 {
-   glPushMatrix();
-   glRotatef(90, 1, 0, 0);
-   glNormal3f(0, 0, 1);
-   glBegin(GL_POLYGON);
+   if (VertCoord_attr >= 0) {
+      glVertexAttribPointer_func(VertCoord_attr, 2, GL_FLOAT, GL_FALSE,
+                                 0, VertCoords);
+      glEnableVertexAttribArray_func(VertCoord_attr);
+   }
+   else {
+      glVertexPointer(2, GL_FLOAT, 0, VertCoords);
+      glEnable(GL_VERTEX_ARRAY);
+   }
 
-   glMultiTexCoord2f(GL_TEXTURE0, 0, 0);
-   glMultiTexCoord2f(GL_TEXTURE1, 0, 0);
-   glVertex2f(-size, -size);
+   glVertexAttribPointer_func(TexCoord0_attr, 2, GL_FLOAT, GL_FALSE,
+                              0, Tex0Coords);
+   glEnableVertexAttribArray_func(TexCoord0_attr);
 
-   glMultiTexCoord2f(GL_TEXTURE0, 2, 0);
-   glMultiTexCoord2f(GL_TEXTURE1, 1, 0);
-   glVertex2f( size, -size);
+   glVertexAttribPointer_func(TexCoord1_attr, 2, GL_FLOAT, GL_FALSE,
+                              0, Tex1Coords);
+   glEnableVertexAttribArray_func(TexCoord1_attr);
 
-   glMultiTexCoord2f(GL_TEXTURE0, 2, 2);
-   glMultiTexCoord2f(GL_TEXTURE1, 1, 1);
-   glVertex2f( size,  size);
+   glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+}
 
-   glMultiTexCoord2f(GL_TEXTURE0, 0, 2);
-   glMultiTexCoord2f(GL_TEXTURE1, 0, 1);
-   glVertex2f(-size,  size);
+
+static void
+DrawPolygonVert(void)
+{
+   GLuint i;
+
+   glBegin(GL_TRIANGLE_FAN);
+
+   for (i = 0; i < 4; i++) {
+      glVertexAttrib2fv_func(TexCoord0_attr, Tex0Coords[i]);
+      glVertexAttrib2fv_func(TexCoord1_attr, Tex1Coords[i]);
+
+      if (VertCoord_attr >= 0)
+         glVertexAttrib2fv_func(VertCoord_attr, VertCoords[i]);
+      else
+         glVertex2fv(VertCoords[i]);
+   }
 
    glEnd();
-   glPopMatrix();
 }
 
 
@@ -100,7 +133,10 @@ draw(void)
       glRotatef(Yrot, 0, 1, 0);
       glRotatef(Xrot, 1, 0, 0);
 
-      DrawPolygon(3.0);
+      if (UseArrays)
+         DrawPolygonArray();
+      else
+         DrawPolygonVert();
 
    glPopMatrix();
 
@@ -123,8 +159,11 @@ key(unsigned char k, int x, int y)
    (void) x;
    (void) y;
    switch (k) {
-   case ' ':
    case 'a':
+      UseArrays = !UseArrays;
+      printf("Arrays: %d\n", UseArrays);
+      break;
+   case ' ':
       Anim = !Anim;
       if (Anim)
          glutIdleFunc(idle);
@@ -232,6 +271,13 @@ CreateProgram(const char *vertProgFile, const char *fragProgFile,
 
    InitUniforms(program, uniforms);
 
+   TexCoord0_attr = glGetAttribLocation_func(program, "TexCoord0");
+   TexCoord1_attr = glGetAttribLocation_func(program, "TexCoord1");
+   VertCoord_attr = glGetAttribLocation_func(program, "VertCoord");
+   printf("TexCoord0_attr = %d\n", TexCoord0_attr);
+   printf("TexCoord1_attr = %d\n", TexCoord1_attr);
+   printf("VertCoord_attr = %d\n", VertCoord_attr);
+
    return program;
 }
 
diff --git a/progs/glsl/multitex.vert b/progs/glsl/multitex.vert
index 5518ca1..4fae3b7 100644
--- a/progs/glsl/multitex.vert
+++ b/progs/glsl/multitex.vert
@@ -2,9 +2,13 @@
 // Brian Paul
 
 
+attribute vec4 TexCoord0, TexCoord1;
+attribute vec4 VertCoord;
+
 void main() 
 {
-   gl_TexCoord[0] = gl_MultiTexCoord0;
-   gl_TexCoord[1] = gl_MultiTexCoord1;
-   gl_Position = ftransform();
+   gl_TexCoord[0] = TexCoord0;
+   gl_TexCoord[1] = TexCoord1;
+   // note: may use gl_Vertex or VertCoord here for testing:
+   gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;
 }
diff --git a/progs/util/extfuncs.h b/progs/util/extfuncs.h
index cf6b29d..070414e 100644
--- a/progs/util/extfuncs.h
+++ b/progs/util/extfuncs.h
@@ -46,6 +46,13 @@ static PFNGLVERTEXATTRIB1FPROC glVertexAttrib1f_func = NULL;
 static PFNGLVERTEXATTRIB2FPROC glVertexAttrib2f_func = NULL;
 static PFNGLVERTEXATTRIB3FPROC glVertexAttrib3f_func = NULL;
 static PFNGLVERTEXATTRIB4FPROC glVertexAttrib4f_func = NULL;
+static PFNGLVERTEXATTRIB1FVPROC glVertexAttrib1fv_func = NULL;
+static PFNGLVERTEXATTRIB2FVPROC glVertexAttrib2fv_func = NULL;
+static PFNGLVERTEXATTRIB3FVPROC glVertexAttrib3fv_func = NULL;
+static PFNGLVERTEXATTRIB4FVPROC glVertexAttrib4fv_func = NULL;
+static PFNGLVERTEXATTRIBPOINTERPROC glVertexAttribPointer_func = NULL;
+static PFNGLENABLEVERTEXATTRIBARRAYPROC glEnableVertexAttribArray_func = NULL;
+static PFNGLDISABLEVERTEXATTRIBARRAYPROC glDisableVertexAttribArray_func = NULL;
 
 /* OpenGL 2.1 */
 static PFNGLUNIFORMMATRIX2X3FVPROC glUniformMatrix2x3fv_func = NULL;
@@ -126,6 +133,14 @@ GetExtensionFuncs(void)
    glVertexAttrib2f_func = (PFNGLVERTEXATTRIB2FPROC) glutGetProcAddress("glVertexAttrib2f");
    glVertexAttrib3f_func = (PFNGLVERTEXATTRIB3FPROC) glutGetProcAddress("glVertexAttrib3f");
    glVertexAttrib4f_func = (PFNGLVERTEXATTRIB4FPROC) glutGetProcAddress("glVertexAttrib4f");
+   glVertexAttrib1fv_func = (PFNGLVERTEXATTRIB1FVPROC) glutGetProcAddress("glVertexAttrib1fv");
+   glVertexAttrib2fv_func = (PFNGLVERTEXATTRIB2FVPROC) glutGetProcAddress("glVertexAttrib2fv");
+   glVertexAttrib3fv_func = (PFNGLVERTEXATTRIB3FVPROC) glutGetProcAddress("glVertexAttrib3fv");
+   glVertexAttrib4fv_func = (PFNGLVERTEXATTRIB4FVPROC) glutGetProcAddress("glVertexAttrib4fv");
+
+   glVertexAttribPointer_func = (PFNGLVERTEXATTRIBPOINTERPROC) glutGetProcAddress("glVertexAttribPointer");
+   glEnableVertexAttribArray_func = (PFNGLENABLEVERTEXATTRIBARRAYPROC) glutGetProcAddress("glEnableVertexAttribArray");
+   glDisableVertexAttribArray_func = (PFNGLDISABLEVERTEXATTRIBARRAYPROC) glutGetProcAddress("glDisableVertexAttribArray");
 
    /* OpenGL 2.1 */
    glUniformMatrix2x3fv_func = (PFNGLUNIFORMMATRIX2X3FVPROC) glutGetProcAddress("glUniformMatrix2x3fv");
diff --git a/progs/xdemos/glxpixmap.c b/progs/xdemos/glxpixmap.c
index 9db4df2..e1abd22 100644
--- a/progs/xdemos/glxpixmap.c
+++ b/progs/xdemos/glxpixmap.c
@@ -178,6 +178,7 @@ int main( int argc, char *argv[] )
    glColor3f( 0.0, 1.0, 1.0 );
    glRectf( -0.75, -0.75, 0.75, 0.75 );
    glFlush();
+   glXWaitGL();
 
    XMapWindow( dpy, win );
 
diff --git a/src/glx/x11/dri2_glx.c b/src/glx/x11/dri2_glx.c
index 2bee677..639aa19 100644
--- a/src/glx/x11/dri2_glx.c
+++ b/src/glx/x11/dri2_glx.c
@@ -60,6 +60,9 @@ struct __GLXDRIdisplayPrivateRec {
     int driMajor;
     int driMinor;
     int driPatch;
+
+    unsigned long configureSeqno;
+    Bool (*oldConfigProc)(Display *, XEvent *, xEvent *);
 };
 
 struct __GLXDRIcontextPrivateRec {
@@ -73,6 +76,10 @@ struct __GLXDRIdrawablePrivateRec {
     __DRIbuffer buffers[5];
     int bufferCount;
     int width, height;
+    unsigned long configureSeqno;
+    int have_back;
+    int have_front;
+    int have_fake_front;
 };
 
 static void dri2DestroyContext(__GLXDRIcontext *context,
@@ -166,6 +173,7 @@ static __GLXDRIdrawable *dri2CreateDrawable(__GLXscreenConfigs *psc,
     pdraw->base.xDrawable = xDrawable;
     pdraw->base.drawable = drawable;
     pdraw->base.psc = psc;
+    pdraw->configureSeqno = ~0;
 
     DRI2CreateDrawable(psc->dpy, xDrawable);
 
@@ -190,6 +198,10 @@ static void dri2CopySubBuffer(__GLXDRIdrawable *pdraw,
     XRectangle xrect;
     XserverRegion region;
 
+    /* Check we have the right attachments */
+    if (!(priv->have_front && priv->have_back))
+    	return;
+
     xrect.x = x;
     xrect.y = priv->height - y - height;
     xrect.width = width;
@@ -208,6 +220,47 @@ static void dri2SwapBuffers(__GLXDRIdrawable *pdraw)
     dri2CopySubBuffer(pdraw, 0, 0, priv->width, priv->height);
 }
 
+static void dri2WaitX(__GLXDRIdrawable *pdraw)
+{
+    __GLXDRIdrawablePrivate *priv = (__GLXDRIdrawablePrivate *) pdraw;
+    XRectangle xrect;
+    XserverRegion region;
+
+    /* Check we have the right attachments */
+    if (!(priv->have_fake_front && priv->have_front))
+    	return;
+
+    xrect.x = 0;
+    xrect.y = 0;
+    xrect.width = priv->width;
+    xrect.height = priv->height;
+
+    region = XFixesCreateRegion(pdraw->psc->dpy, &xrect, 1);
+    DRI2CopyRegion(pdraw->psc->dpy, pdraw->drawable, region,
+		   DRI2BufferFakeFrontLeft, DRI2BufferFrontLeft);
+    XFixesDestroyRegion(pdraw->psc->dpy, region);
+}
+
+static void dri2WaitGL(__GLXDRIdrawable *pdraw)
+{
+    __GLXDRIdrawablePrivate *priv = (__GLXDRIdrawablePrivate *) pdraw;
+    XRectangle xrect;
+    XserverRegion region;
+
+    if (!(priv->have_fake_front && priv->have_front))
+    	return;
+
+    xrect.x = 0;
+    xrect.y = 0;
+    xrect.width = priv->width;
+    xrect.height = priv->height;
+
+    region = XFixesCreateRegion(pdraw->psc->dpy, &xrect, 1);
+    DRI2CopyRegion(pdraw->psc->dpy, pdraw->drawable, region,
+		   DRI2BufferFrontLeft, DRI2BufferFakeFrontLeft);
+    XFixesDestroyRegion(pdraw->psc->dpy, region);
+}
+
 static void dri2DestroyScreen(__GLXscreenConfigs *psc)
 {
     /* Free the direct rendering per screen data */
@@ -223,9 +276,30 @@ dri2GetBuffers(__DRIdrawable *driDrawable,
 	       int *out_count, void *loaderPrivate)
 {
     __GLXDRIdrawablePrivate *pdraw = loaderPrivate;
+    __GLXdisplayPrivate *dpyPriv = __glXInitialize(pdraw->base.psc->dpy);
+    __GLXDRIdisplayPrivate *pdp = (__GLXDRIdisplayPrivate *)dpyPriv->dri2Display;
     DRI2Buffer *buffers;
     int i;
 
+    /**
+     * Check if a ConfigureNotify has come in since we last asked for the
+     * buffers associated with this drawable.  If not, we can assume that they're
+     * the same set at glViewport time, and save a synchronous round-trip to the
+     * X Server.
+     */
+    if (pdraw->configureSeqno == pdp->configureSeqno &&
+	count == pdraw->bufferCount) {
+	for (i = 0; i < count; i++) {
+	    if (pdraw->buffers[i].attachment != attachments[i])
+		break;
+	}
+	if (i == count) {
+	    *out_count = pdraw->bufferCount;
+	    return pdraw->buffers;
+	}
+    }
+    pdraw->configureSeqno = pdp->configureSeqno;
+
     buffers = DRI2GetBuffers(pdraw->base.psc->dpy, pdraw->base.xDrawable,
 			     width, height, attachments, count, out_count);
     if (buffers == NULL)
@@ -233,6 +307,10 @@ dri2GetBuffers(__DRIdrawable *driDrawable,
 
     pdraw->width = *width;
     pdraw->height = *height;
+    pdraw->bufferCount = *out_count;
+    pdraw->have_front = 0;
+    pdraw->have_fake_front = 0;
+    pdraw->have_back = 0;
 
     /* This assumes the DRI2 buffer attachment tokens matches the
      * __DRIbuffer tokens. */
@@ -242,6 +320,12 @@ dri2GetBuffers(__DRIdrawable *driDrawable,
 	pdraw->buffers[i].pitch = buffers[i].pitch;
 	pdraw->buffers[i].cpp = buffers[i].cpp;
 	pdraw->buffers[i].flags = buffers[i].flags;
+	if (pdraw->buffers[i].attachment == __DRI_BUFFER_FRONT_LEFT)
+	    pdraw->have_front = 1;
+	if (pdraw->buffers[i].attachment == __DRI_BUFFER_FAKE_FRONT_LEFT)
+	    pdraw->have_fake_front = 1;
+	if (pdraw->buffers[i].attachment == __DRI_BUFFER_BACK_LEFT)
+	    pdraw->have_back = 1;
     }
 
     Xfree(buffers);
@@ -332,6 +416,8 @@ static __GLXDRIscreen *dri2CreateScreen(__GLXscreenConfigs *psc, int screen,
     psp->createContext = dri2CreateContext;
     psp->createDrawable = dri2CreateDrawable;
     psp->swapBuffers = dri2SwapBuffers;
+    psp->waitGL = dri2WaitGL;
+    psp->waitX = dri2WaitX;
 
     /* DRI2 suports SubBuffer through DRI2CopyRegion, so it's always
      * available.*/
@@ -359,6 +445,28 @@ static void dri2DestroyDisplay(__GLXDRIdisplay *dpy)
     Xfree(dpy);
 }
 
+/**
+ * Makes a note on receiving ConfigureNotify that we need to re-check the
+ * DRI2 buffers, as window sizes may have resulted in reallocation.
+ */
+static Bool dri2ConfigureNotifyProc(Display *dpy, XEvent *re, xEvent *event)
+{
+    __GLXdisplayPrivate *dpyPriv = __glXInitialize(dpy);
+    __GLXDRIdisplayPrivate *pdp;
+    Bool ret;
+
+    /* We should always be able to find our pdp, as it only gets torn down
+     * when the Display is torn down.
+     */
+    pdp = (__GLXDRIdisplayPrivate *)dpyPriv->dri2Display;
+
+    ret = pdp->oldConfigProc(dpy, re, event);
+
+    pdp->configureSeqno = re->xconfigure.serial;
+
+    return ret;
+}
+
 /*
  * Allocate, initialize and return a __DRIdisplayPrivate object.
  * This is called from __glXInitialize() when we are given a new
@@ -381,6 +489,9 @@ _X_HIDDEN __GLXDRIdisplay *dri2CreateDisplay(Display *dpy)
 	return NULL;
     }
 
+    pdp->oldConfigProc = XESetWireToEvent(dpy, ConfigureNotify,
+					  dri2ConfigureNotifyProc);
+
     pdp->driPatch = 0;
 
     pdp->base.destroyDisplay = dri2DestroyDisplay;
diff --git a/src/glx/x11/dri_glx.c b/src/glx/x11/dri_glx.c
index 44724d2..3089aa1 100644
--- a/src/glx/x11/dri_glx.c
+++ b/src/glx/x11/dri_glx.c
@@ -655,6 +655,8 @@ static __GLXDRIscreen *driCreateScreen(__GLXscreenConfigs *psc, int screen,
     psp->createContext = driCreateContext;
     psp->createDrawable = driCreateDrawable;
     psp->swapBuffers = driSwapBuffers;
+    psp->waitX = NULL;
+    psp->waitGL = NULL;
 
     return psp;
 }
diff --git a/src/glx/x11/glxclient.h b/src/glx/x11/glxclient.h
index 16f6074..d37b3ce 100644
--- a/src/glx/x11/glxclient.h
+++ b/src/glx/x11/glxclient.h
@@ -139,6 +139,8 @@ struct __GLXDRIscreenRec {
     void (*swapBuffers)(__GLXDRIdrawable *pdraw);
     void (*copySubBuffer)(__GLXDRIdrawable *pdraw,
 			  int x, int y, int width, int height);
+    void (*waitX)(__GLXDRIdrawable *pdraw);
+    void (*waitGL)(__GLXDRIdrawable *pdraw);
 };
 
 struct __GLXDRIcontextRec {
@@ -602,6 +604,7 @@ extern void __glXSendLargeCommand(__GLXcontext *, const GLvoid *, GLint,
 				  const GLvoid *, GLint);
 
 /* Initialize the GLX extension for dpy */
+extern __GLXdisplayPrivate * __glXGetPrivateFromDisplay(Display *dpy);
 extern __GLXdisplayPrivate *__glXInitialize(Display*);
 
 /************************************************************************/
diff --git a/src/glx/x11/glxcmds.c b/src/glx/x11/glxcmds.c
index c68b6ac..fc0e593 100644
--- a/src/glx/x11/glxcmds.c
+++ b/src/glx/x11/glxcmds.c
@@ -611,11 +611,15 @@ PUBLIC void glXWaitGL(void)
 
 #ifdef GLX_DIRECT_RENDERING
     if (gc->driContext) {
-/* This bit of ugliness unwraps the glFinish function */
-#ifdef glFinish
-#undef glFinish
-#endif
-	glFinish();
+    	int screen;
+    	__GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, gc->currentDrawable, &screen);
+
+    	if ( pdraw != NULL ) {
+	    __GLXscreenConfigs * const psc = GetGLXScreenConfigs(dpy, screen);
+	    glFlush();
+	    if (psc->driScreen->waitGL != NULL)
+	    	(*psc->driScreen->waitGL)(pdraw);
+	}
 	return;
     }
 #endif
@@ -647,7 +651,15 @@ PUBLIC void glXWaitX(void)
 
 #ifdef GLX_DIRECT_RENDERING
     if (gc->driContext) {
-	XSync(dpy, False);
+    	int screen;
+    	__GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, gc->currentDrawable, &screen);
+
+    	if ( pdraw != NULL ) {
+	    __GLXscreenConfigs * const psc = GetGLXScreenConfigs(dpy, screen);
+	    if (psc->driScreen->waitX != NULL)
+	    	(*psc->driScreen->waitX)(pdraw);
+	} else
+	    XSync(dpy, False);
 	return;
     }
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index ded0796..f990bde 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -60,16 +60,16 @@ struct brw_wm_prog_key {
    GLuint aa_dest_stencil_reg:3;
    GLuint dest_depth_reg:3;
    GLuint nr_depth_regs:3;
-   GLuint projtex_mask:8;
-   GLuint shadowtex_mask:8;
    GLuint computes_depth:1;	/* could be derived from program string */
    GLuint source_depth_to_render_target:1;
    GLuint flat_shade:1;
    GLuint runtime_check_aads_emit:1;
    
-   GLuint yuvtex_mask:8;
-   GLuint yuvtex_swap_mask:8;	/* UV swaped */
-   GLuint pad1:16;
+   GLuint projtex_mask:16;
+   GLuint shadowtex_mask:16;
+   GLuint yuvtex_mask:16;
+   GLuint yuvtex_swap_mask:16;	/* UV swaped */
+   //   GLuint pad1:16;
 
    GLuint program_string_id:32;
    GLuint origin_x, origin_y;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index 205a716..fca7b7a 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -379,14 +379,22 @@ static void pass0_precalc_mov( struct brw_wm_compile *c,
 {
    const struct prog_dst_register *dst = &inst->DstReg;
    GLuint writemask = inst->DstReg.WriteMask;
+   struct brw_wm_ref *refs[4];
    GLuint i;
 
    /* Get the effect of a MOV by manipulating our register table:
+    * First get all refs, then assign refs.  This ensures that "in-place"
+    * swizzles such as:
+    *   MOV t, t.xxyx
+    * are handled correctly.  Previously, these two steps were done in
+    * one loop and the above case was incorrectly handled.
     */
    for (i = 0; i < 4; i++) {
-      if (writemask & (1<<i)) {	    
-	 pass0_set_fpreg_ref( c, dst->File, dst->Index, i, 
-			      get_new_ref(c, inst->SrcReg[0], i, NULL));
+      refs[i] = get_new_ref(c, inst->SrcReg[0], i, NULL);
+   }
+   for (i = 0; i < 4; i++) {
+      if (writemask & (1 << i)) {	    
+         pass0_set_fpreg_ref( c, dst->File, dst->Index, i, refs[i]);
       }
    }
 }
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index 048286c..348da34 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -168,6 +168,7 @@ struct intel_context
 
       GLint saved_vp_x, saved_vp_y;
       GLsizei saved_vp_width, saved_vp_height;
+      GLenum saved_matrix_mode;
    } meta;
 
    GLint refcount;
diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c
index cf2f32d..5e32288 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel.c
@@ -181,6 +181,7 @@ intel_meta_set_passthrough_transform(struct intel_context *intel)
    intel->meta.saved_vp_y = ctx->Viewport.Y;
    intel->meta.saved_vp_width = ctx->Viewport.Width;
    intel->meta.saved_vp_height = ctx->Viewport.Height;
+   intel->meta.saved_matrix_mode = ctx->Transform.MatrixMode;
 
    _mesa_Viewport(0, 0, ctx->DrawBuffer->Width, ctx->DrawBuffer->Height);
 
@@ -202,6 +203,8 @@ intel_meta_restore_transform(struct intel_context *intel)
    _mesa_MatrixMode(GL_MODELVIEW);
    _mesa_PopMatrix();
 
+   _mesa_MatrixMode(intel->meta.saved_matrix_mode);
+
    _mesa_Viewport(intel->meta.saved_vp_x, intel->meta.saved_vp_y,
 		  intel->meta.saved_vp_width, intel->meta.saved_vp_height);
 }
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
index 0e83afa..bb36649 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
@@ -92,7 +92,7 @@ intel_texture_drawpixels(GLcontext * ctx,
       return GL_FALSE;
    }
 
-   /* We don't have a way to generate fragments with stencil values which *
+   /* We don't have a way to generate fragments with stencil values which
     * will set the resulting stencil value.
     */
    if (format == GL_STENCIL_INDEX)
@@ -225,6 +225,10 @@ intel_stencil_drawpixels(GLcontext * ctx,
       return GL_FALSE;
    }
 
+   /* We don't support stencil testing/ops here */
+   if (ctx->Stencil.Enabled)
+      return GL_FALSE;
+
    /* We use FBOs for our wrapping of the depthbuffer into a color
     * destination.
     */
diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h
index 55a73ea..f6bd1eb 100644
--- a/src/mesa/drivers/dri/radeon/radeon_chipset.h
+++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h
@@ -247,6 +247,9 @@
 #define PCI_CHIP_RS350_7835		0x7835
 #define PCI_CHIP_RS690_791E             0x791E
 #define PCI_CHIP_RS690_791F             0x791F
+#define PCI_CHIP_RS600_793F             0x793F
+#define PCI_CHIP_RS600_7941             0x7941
+#define PCI_CHIP_RS600_7942             0x7942
 #define PCI_CHIP_RS740_796C             0x796C
 #define PCI_CHIP_RS740_796D             0x796D
 #define PCI_CHIP_RS740_796E             0x796E
@@ -270,6 +273,7 @@ enum {
    CHIP_FAMILY_R420,
    CHIP_FAMILY_RV410,
    CHIP_FAMILY_RS400,
+   CHIP_FAMILY_RS600,
    CHIP_FAMILY_RS690,
    CHIP_FAMILY_RS740,
    CHIP_FAMILY_RV515,
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 5f32dd5..81337da 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -680,6 +680,12 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
       screen->chip_family = CHIP_FAMILY_RS400;
       break;
 
+   case PCI_CHIP_RS600_793F:
+   case PCI_CHIP_RS600_7941:
+   case PCI_CHIP_RS600_7942:
+      screen->chip_family = CHIP_FAMILY_RS600;
+      break;
+
    case PCI_CHIP_RS690_791E:
    case PCI_CHIP_RS690_791F:
       screen->chip_family = CHIP_FAMILY_RS690;
@@ -838,7 +844,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
    ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION,
                          &temp);
    if (ret) {
-       if (screen->chip_family < CHIP_FAMILY_RS690)
+       if (screen->chip_family < CHIP_FAMILY_RS600)
 	   screen->fbLocation      = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16;
        else {
            FREE( screen );
@@ -849,7 +855,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
        screen->fbLocation = (temp & 0xffff) << 16;
    }
 
-   if (screen->chip_family >= CHIP_FAMILY_RV515) {
+   if (screen->chip_family >= CHIP_FAMILY_R300) {
        ret = radeonGetParam( sPriv->fd, RADEON_PARAM_NUM_GB_PIPES,
 			     &temp);
        if (ret) {
diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c
index ea35852..73fde86 100644
--- a/src/mesa/drivers/x11/fakeglx.c
+++ b/src/mesa/drivers/x11/fakeglx.c
@@ -1,8 +1,9 @@
 /*
  * Mesa 3-D graphics library
- * Version:  7.1
+ * Version:  7.5
  *
- * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2009  VMware, Inc.   All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -1392,6 +1393,25 @@ Fake_glXChooseVisual( Display *dpy, int screen, int *list )
 }
 
 
+/**
+ * Init basic fields of a new fake_glx_context.
+ * If the MESA_GLX_FORCE_DIRECT env var is set, the context will be marked as
+ * a direct rendering context.  Some apps won't run without this.
+ */
+static void
+init_glx_context(struct fake_glx_context *glxCtx, Display *dpy)
+{
+   GLboolean direct = _mesa_getenv("MESA_GLX_FORCE_DIRECT") ? GL_TRUE : GL_FALSE;
+   glxCtx->xmesaContext->direct = direct;
+   glxCtx->glxContext.isDirect = direct;
+   glxCtx->glxContext.currentDpy = dpy;
+   glxCtx->glxContext.xid = (XID) glxCtx;  /* self pointer */
+
+   assert((void *) glxCtx == (void *) &(glxCtx->glxContext));
+}
+
+
+
 static GLXContext
 Fake_glXCreateContext( Display *dpy, XVisualInfo *visinfo,
                        GLXContext share_list, Bool direct )
@@ -1430,12 +1450,7 @@ Fake_glXCreateContext( Display *dpy, XVisualInfo *visinfo,
       return NULL;
    }
 
-   glxCtx->xmesaContext->direct = GL_FALSE;
-   glxCtx->glxContext.isDirect = GL_FALSE;
-   glxCtx->glxContext.currentDpy = dpy;
-   glxCtx->glxContext.xid = (XID) glxCtx;  /* self pointer */
-
-   assert((void *) glxCtx == (void *) &(glxCtx->glxContext));
+   init_glx_context(glxCtx, dpy);
 
    return (GLXContext) glxCtx;
 }
@@ -2441,12 +2456,7 @@ Fake_glXCreateNewContext( Display *dpy, GLXFBConfig config,
       return NULL;
    }
 
-   glxCtx->xmesaContext->direct = GL_FALSE;
-   glxCtx->glxContext.isDirect = GL_FALSE;
-   glxCtx->glxContext.currentDpy = dpy;
-   glxCtx->glxContext.xid = (XID) glxCtx;  /* self pointer */
-
-   assert((void *) glxCtx == (void *) &(glxCtx->glxContext));
+   init_glx_context(glxCtx, dpy);
 
    return (GLXContext) glxCtx;
 }
@@ -2664,12 +2674,7 @@ Fake_glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int re
       return NULL;
    }
 
-   glxCtx->xmesaContext->direct = GL_FALSE;
-   glxCtx->glxContext.isDirect = GL_FALSE;
-   glxCtx->glxContext.currentDpy = dpy;
-   glxCtx->glxContext.xid = (XID) glxCtx;  /* self pointer */
-
-   assert((void *) glxCtx == (void *) &(glxCtx->glxContext));
+   init_glx_context(glxCtx, dpy);
 
    return (GLXContext) glxCtx;
 }
diff --git a/src/mesa/glapi/glthread.h b/src/mesa/glapi/glthread.h
index 27ccd2e..8594a90 100644
--- a/src/mesa/glapi/glthread.h
+++ b/src/mesa/glapi/glthread.h
@@ -225,9 +225,21 @@ typedef xmutex_rec _glthread_Mutex;
  */
 #ifdef BEOS_THREADS
 
+/* Problem with OS.h and this file on haiku */
+#ifndef __HAIKU__
 #include <kernel/OS.h>
+#endif
+
 #include <support/TLS.h>
 
+/* The only two typedefs required here
+ * this is cause of the OS.h problem
+ */
+#ifdef __HAIKU__
+typedef int32 thread_id;
+typedef int32 sem_id;
+#endif
+
 typedef struct {
    int32        key;
    int          initMagic;
diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
index 825c841..f77d331 100644
--- a/src/mesa/main/attrib.c
+++ b/src/mesa/main/attrib.c
@@ -1275,9 +1275,6 @@ adjust_buffer_object_ref_counts(struct gl_array_attrib *array, GLint step)
       array->ArrayObj->TexCoord[i].BufferObj->RefCount += step;
    for (i = 0; i < VERT_ATTRIB_MAX; i++)
       array->ArrayObj->VertexAttrib[i].BufferObj->RefCount += step;
-
-   array->ArrayBufferObj->RefCount += step;
-   array->ElementArrayBufferObj->RefCount += step;
 }
 
 
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 59fe8e2..df90610 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -794,6 +794,13 @@ _mesa_DeleteBuffersARB(GLsizei n, const GLuint *ids)
 
          ASSERT(bufObj->Name == ids[i]);
 
+         if (bufObj->Pointer) {
+            /* if mapped, unmap it now */
+            ctx->Driver.UnmapBuffer(ctx, 0, bufObj);
+            bufObj->Access = GL_READ_WRITE_ARB;
+            bufObj->Pointer = NULL;
+         }
+
          unbind(ctx, &ctx->Array.ArrayObj->Vertex.BufferObj, bufObj);
          unbind(ctx, &ctx->Array.ArrayObj->Normal.BufferObj, bufObj);
          unbind(ctx, &ctx->Array.ArrayObj->Color.BufferObj, bufObj);
@@ -944,8 +951,10 @@ _mesa_BufferDataARB(GLenum target, GLsizeiptrARB size,
    }
    
    if (bufObj->Pointer) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glBufferDataARB(buffer is mapped)" );
-      return;
+      /* Unmap the existing buffer.  We'll replace it now.  Not an error. */
+      ctx->Driver.UnmapBuffer(ctx, target, bufObj);
+      bufObj->Access = GL_READ_WRITE_ARB;
+      bufObj->Pointer = NULL;
    }  
 
    ASSERT(ctx->Driver.BufferData);
@@ -1061,11 +1070,8 @@ _mesa_UnmapBufferARB(GLenum target)
       return GL_FALSE;
    }
 
-   if (ctx->Driver.UnmapBuffer) {
-      status = ctx->Driver.UnmapBuffer( ctx, target, bufObj );
-   }
-
-   bufObj->Access = GL_READ_WRITE_ARB; /* initial value, OK? */
+   status = ctx->Driver.UnmapBuffer( ctx, target, bufObj );
+   bufObj->Access = GL_READ_WRITE_ARB;
    bufObj->Pointer = NULL;
 
    return status;
diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index cf1198c..f74576e 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -191,7 +191,7 @@
 #define MAX_PROGRAM_CALL_DEPTH 8
 #define MAX_PROGRAM_TEMPS 128
 #define MAX_PROGRAM_ADDRESS_REGS 2
-#define MAX_UNIFORMS 256   /**< number of vec4 uniforms */
+#define MAX_UNIFORMS 1024  /**< number of vec4 uniforms */
 #define MAX_VARYING 8      /**< number of float[4] vectors */
 #define MAX_SAMPLERS MAX_TEXTURE_IMAGE_UNITS
 #define MAX_PROGRAM_INPUTS 32
diff --git a/src/mesa/main/dlopen.c b/src/mesa/main/dlopen.c
index becef81..d9d1152 100644
--- a/src/mesa/main/dlopen.c
+++ b/src/mesa/main/dlopen.c
@@ -36,6 +36,15 @@
 #include <dlfcn.h>
 #endif
 
+#if defined(_WIN32)
+#include <windows.h>
+#endif
+
+#if defined(__HAIKU__)
+/* for NULL */
+#include <stdio.h>
+#endif
+
 
 /**
  * Wrapper for dlopen().
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 9522f04..97cfa26 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -204,7 +204,7 @@ _mesa_enable_sw_extensions(GLcontext *ctx)
    ctx->Extensions.ARB_shading_language_100 = GL_TRUE;
 #endif
 #if FEATURE_ARB_shading_language_120
-   ctx->Extensions.ARB_shading_language_120 = GL_FALSE; /* not quite done */
+   ctx->Extensions.ARB_shading_language_120 = GL_TRUE;
 #endif
    ctx->Extensions.ARB_shadow = GL_TRUE;
    ctx->Extensions.ARB_texture_border_clamp = GL_TRUE;
@@ -427,7 +427,7 @@ _mesa_enable_2_1_extensions(GLcontext *ctx)
    ctx->Extensions.EXT_texture_sRGB = GL_TRUE;
 #endif
 #ifdef FEATURE_ARB_shading_language_120
-   ctx->Extensions.ARB_shading_language_120 = GL_FALSE; /* not quite done */
+   ctx->Extensions.ARB_shading_language_120 = GL_TRUE;
 #endif
 }
 
diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c
index 94bf5de..e1008d7 100644
--- a/src/mesa/main/getstring.c
+++ b/src/mesa/main/getstring.c
@@ -84,7 +84,7 @@ compute_version(const GLcontext *ctx)
                               ctx->Extensions.ARB_texture_non_power_of_two &&
                               ctx->Extensions.EXT_blend_equation_separate);
    const GLboolean ver_2_1 = (ver_2_0 &&
-                              /*ctx->Extensions.ARB_shading_language_120 &&*/
+                              ctx->Extensions.ARB_shading_language_120 &&
                               ctx->Extensions.EXT_pixel_buffer_object &&
                               ctx->Extensions.EXT_texture_sRGB);
    if (ver_2_1)
diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
index 13b571d..d71d59c 100644
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -325,7 +325,8 @@ static INLINE int iround(float f)
 }
 #define IROUND(x)  iround(x)
 #elif defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__) && \
-			(!defined(__BEOS__) || (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95)))
+			(!(defined(__BEOS__) || defined(__HAIKU__))  || \
+			(__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95)))
 static INLINE int iround(float f)
 {
    int r;
diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c
index 6fe54c7..5284b7d 100644
--- a/src/mesa/main/state.c
+++ b/src/mesa/main/state.c
@@ -173,13 +173,16 @@ update_arrays( GLcontext *ctx )
 }
 
 
+/**
+ * Update the following fields:
+ *   ctx->VertexProgram._Enabled
+ *   ctx->FragmentProgram._Enabled
+ *   ctx->ATIFragmentShader._Enabled
+ * This needs to be done before texture state validation.
+ */
 static void
-update_program(GLcontext *ctx)
+update_program_enables(GLcontext *ctx)
 {
-   const struct gl_shader_program *shProg = ctx->Shader.CurrentProgram;
-   const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current;
-   const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current;
-
    /* These _Enabled flags indicate if the program is enabled AND valid. */
    ctx->VertexProgram._Enabled = ctx->VertexProgram.Enabled
       && ctx->VertexProgram.Current->Base.Instructions;
@@ -187,6 +190,29 @@ update_program(GLcontext *ctx)
       && ctx->FragmentProgram.Current->Base.Instructions;
    ctx->ATIFragmentShader._Enabled = ctx->ATIFragmentShader.Enabled
       && ctx->ATIFragmentShader.Current->Instructions[0];
+}
+
+
+/**
+ * Update vertex/fragment program state.  In particular, update these fields:
+ *   ctx->VertexProgram._Current
+ *   ctx->VertexProgram._TnlProgram,
+ * These point to the highest priority enabled vertex/fragment program or are
+ * NULL if fixed-function processing is to be done.
+ *
+ * This function needs to be called after texture state validation in case
+ * we're generating a fragment program from fixed-function texture state.
+ *
+ * \return bitfield which will indicate _NEW_PROGRAM state if a new vertex
+ * or fragment program is being used.
+ */
+static GLbitfield
+update_program(GLcontext *ctx)
+{
+   const struct gl_shader_program *shProg = ctx->Shader.CurrentProgram;
+   const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current;
+   const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current;
+   GLbitfield new_state = 0x0;
 
    /*
     * Set the ctx->VertexProgram._Current and ctx->FragmentProgram._Current
@@ -256,15 +282,23 @@ update_program(GLcontext *ctx)
 
    /* Let the driver know what's happening:
     */
-   if (ctx->FragmentProgram._Current != prevFP && ctx->Driver.BindProgram) {
-      ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB,
-                              (struct gl_program *) ctx->FragmentProgram._Current);
+   if (ctx->FragmentProgram._Current != prevFP) {
+      new_state |= _NEW_PROGRAM;
+      if (ctx->Driver.BindProgram) {
+         ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB,
+                          (struct gl_program *) ctx->FragmentProgram._Current);
+      }
    }
    
-   if (ctx->VertexProgram._Current != prevVP && ctx->Driver.BindProgram) {
-      ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB,
-                              (struct gl_program *) ctx->VertexProgram._Current);
+   if (ctx->VertexProgram._Current != prevVP) {
+      new_state |= _NEW_PROGRAM;
+      if (ctx->Driver.BindProgram) {
+         ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB,
+                            (struct gl_program *) ctx->VertexProgram._Current);
+      }
    }
+
+   return new_state;
 }
 
 
@@ -425,10 +459,29 @@ _mesa_update_state_locked( GLcontext *ctx )
 {
    GLbitfield new_state = ctx->NewState;
    GLbitfield prog_flags = _NEW_PROGRAM;
+   GLbitfield new_prog_state = 0x0;
 
    if (MESA_VERBOSE & VERBOSE_STATE)
       _mesa_print_state("_mesa_update_state", new_state);
 
+   /* Determine which state flags effect vertex/fragment program state */
+   if (ctx->FragmentProgram._MaintainTexEnvProgram) {
+      prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR);
+   }
+   if (ctx->VertexProgram._MaintainTnlProgram) {
+      prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX |
+                     _NEW_TRANSFORM | _NEW_POINT |
+                     _NEW_FOG | _NEW_LIGHT |
+                     _MESA_NEW_NEED_EYE_COORDS);
+   }
+
+   /*
+    * Now update derived state info
+    */
+
+   if (new_state & prog_flags)
+      update_program_enables( ctx );
+
    if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION))
       _mesa_update_modelview_project( ctx, new_state );
 
@@ -488,19 +541,13 @@ _mesa_update_state_locked( GLcontext *ctx )
    if (new_state & _MESA_NEW_NEED_EYE_COORDS) 
       _mesa_update_tnl_spaces( ctx, new_state );
 
-   if (ctx->FragmentProgram._MaintainTexEnvProgram) {
-      prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR);
-   }
-   if (ctx->VertexProgram._MaintainTnlProgram) {
-      prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX |
-                     _NEW_TRANSFORM | _NEW_POINT |
-                     _NEW_FOG | _NEW_LIGHT |
-                     _MESA_NEW_NEED_EYE_COORDS);
+   if (new_state & prog_flags) {
+      /* When we generate programs from fixed-function vertex/fragment state
+       * this call may generate/bind a new program.  If so, we need to
+       * propogate the _NEW_PROGRAM flag to the driver.
+       */
+      new_prog_state |= update_program( ctx );
    }
-   if (new_state & prog_flags)
-      update_program( ctx );
-
-
 
    /*
     * Give the driver a chance to act upon the new_state flags.
@@ -511,7 +558,8 @@ _mesa_update_state_locked( GLcontext *ctx )
     * Set ctx->NewState to zero to avoid recursion if
     * Driver.UpdateState() has to call FLUSH_VERTICES().  (fixed?)
     */
-   new_state = ctx->NewState;
+ out:
+   new_state = ctx->NewState | new_prog_state;
    ctx->NewState = 0;
    ctx->Driver.UpdateState(ctx, new_state);
    ctx->Array.NewState = 0;
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 9e968ba..a437b9c 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -2603,7 +2603,7 @@ _mesa_TexImage2D( GLenum target, GLint level, GLint internalFormat,
                               1, border)) {
          /* when error, clear all proxy texture image parameters */
          if (texImage)
-            clear_teximage_fields(ctx->Texture.ProxyTex[TEXTURE_2D_INDEX]->Image[0][level]);
+            clear_teximage_fields(texImage);
       }
       else {
          /* no error, set the tex image parameters */
diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
index 7848f0b..c937dac 100644
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -979,11 +979,11 @@ _mesa_BindTexture( GLenum target, GLuint texName )
          ASSERT(texUnit->CurrentRect);
          break;
       case GL_TEXTURE_1D_ARRAY_EXT:
-         texUnit->Current1DArray = newTexObj;
+         _mesa_reference_texobj(&texUnit->Current1DArray, newTexObj);
          ASSERT(texUnit->Current1DArray);
          break;
       case GL_TEXTURE_2D_ARRAY_EXT:
-         texUnit->Current2DArray = newTexObj;
+         _mesa_reference_texobj(&texUnit->Current2DArray, newTexObj);
          ASSERT(texUnit->Current2DArray);
          break;
       default:
diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c
index 9bfb7e0..7b5d904 100644
--- a/src/mesa/main/texstate.c
+++ b/src/mesa/main/texstate.c
@@ -498,25 +498,27 @@ update_texture_state( GLcontext *ctx )
       texUnit->_ReallyEnabled = 0;
       texUnit->_GenFlags = 0;
 
-      /* Get the bitmask of texture enables.
+      /* Get the bitmask of texture target enables.
        * enableBits will be a mask of the TEXTURE_*_BIT flags indicating
        * which texture targets are enabled (fixed function) or referenced
        * by a fragment shader/program.  When multiple flags are set, we'll
        * settle on the one with highest priority (see texture_override below).
        */
-      if (fprog || vprog) {
-         enableBits = 0x0;
-         if (fprog)
-            enableBits |= fprog->Base.TexturesUsed[unit];
-         if (vprog)
-            enableBits |= vprog->Base.TexturesUsed[unit];
+      enableBits = 0x0;
+      if (vprog) {
+         enableBits |= vprog->Base.TexturesUsed[unit];
+      }
+      if (fprog) {
+         enableBits |= fprog->Base.TexturesUsed[unit];
       }
       else {
-         if (!texUnit->Enabled)
-            continue;
-         enableBits = texUnit->Enabled;
+         /* fixed-function fragment program */
+         enableBits |= texUnit->Enabled;
       }
 
+      if (enableBits == 0x0)
+         continue;
+
       ASSERT(texUnit->Current1D);
       ASSERT(texUnit->Current2D);
       ASSERT(texUnit->Current3D);
diff --git a/src/mesa/main/version.h b/src/mesa/main/version.h
index 3d874c8..84dcb26 100644
--- a/src/mesa/main/version.h
+++ b/src/mesa/main/version.h
@@ -1,6 +1,6 @@
 /*
  * Mesa 3-D graphics library
- * Version:  7.3
+ * Version:  7.4
  *
  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
  *
@@ -29,9 +29,9 @@
 
 /* Mesa version */
 #define MESA_MAJOR 7
-#define MESA_MINOR 3
+#define MESA_MINOR 4
 #define MESA_PATCH 0
-#define MESA_VERSION_STRING "7.3"
+#define MESA_VERSION_STRING "7.4"
 
 /* To make version comparison easy */
 #define MESA_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
diff --git a/src/mesa/shader/prog_instruction.h b/src/mesa/shader/prog_instruction.h
index 268afc5..0ef0251 100644
--- a/src/mesa/shader/prog_instruction.h
+++ b/src/mesa/shader/prog_instruction.h
@@ -240,12 +240,21 @@ typedef enum prog_opcode {
 
 
 /**
+ * Number of bits for the src/dst register Index field.
+ * This limits the size of temp/uniform register files.
+ */
+#define INST_INDEX_BITS 10
+
+
+/**
  * Instruction source register.
  */
 struct prog_src_register
 {
    GLuint File:4;	/**< One of the PROGRAM_* register file values. */
-   GLint Index:9;	/**< May be negative for relative addressing. */
+   GLint Index:(INST_INDEX_BITS+1); /**< Extra bit here for sign bit.
+                                     * May be negative for relative addressing.
+                                     */
    GLuint Swizzle:12;
    GLuint RelAddr:1;
 
@@ -289,7 +298,7 @@ struct prog_src_register
 struct prog_dst_register
 {
    GLuint File:4;      /**< One of the PROGRAM_* register file values */
-   GLuint Index:8;
+   GLuint Index:INST_INDEX_BITS;  /**< Unsigned, never negative */
    GLuint WriteMask:4;
    GLuint RelAddr:1;
 
@@ -322,8 +331,7 @@ struct prog_dst_register
     */
    GLuint CondSrc:1;
    /*@}*/
-
-   GLuint pad:30;
+   GLuint pad:28;
 };
 
 
diff --git a/src/mesa/shader/prog_print.c b/src/mesa/shader/prog_print.c
index 0ec13a4..b67a87f 100644
--- a/src/mesa/shader/prog_print.c
+++ b/src/mesa/shader/prog_print.c
@@ -85,6 +85,9 @@ file_string(enum register_file f, gl_prog_print_mode mode)
 static const char *
 arb_input_attrib_string(GLint index, GLenum progType)
 {
+   /*
+    * These strings should match the VERT_ATTRIB_x and FRAG_ATTRIB_x tokens.
+    */
    const char *vertAttribs[] = {
       "vertex.position",
       "vertex.weight",
@@ -159,6 +162,9 @@ arb_input_attrib_string(GLint index, GLenum progType)
 static const char *
 arb_output_attrib_string(GLint index, GLenum progType)
 {
+   /*
+    * These strings should match the VERT_RESULT_x and FRAG_RESULT_x tokens.
+    */
    const char *vertResults[] = {
       "result.position",
       "result.color.primary",
@@ -183,7 +189,12 @@ arb_output_attrib_string(GLint index, GLenum progType)
    };
    const char *fragResults[] = {
       "result.color",
-      "result.depth"
+      "result.color(half)",
+      "result.depth",
+      "result.color[0]",
+      "result.color[1]",
+      "result.color[2]",
+      "result.color[3]"
    };
 
    if (progType == GL_VERTEX_PROGRAM_ARB) {
diff --git a/src/mesa/shader/program.c b/src/mesa/shader/program.c
index d114828..828db38 100644
--- a/src/mesa/shader/program.c
+++ b/src/mesa/shader/program.c
@@ -53,6 +53,15 @@ _mesa_init_program(GLcontext *ctx)
 {
    GLuint i;
 
+   /*
+    * If this assertion fails, we need to increase the field
+    * size for register indexes.
+    */
+   ASSERT(ctx->Const.VertexProgram.MaxUniformComponents / 4
+          <= (1 << INST_INDEX_BITS));
+   ASSERT(ctx->Const.FragmentProgram.MaxUniformComponents / 4
+          <= (1 << INST_INDEX_BITS));
+
    ctx->Program.ErrorPos = -1;
    ctx->Program.ErrorString = _mesa_strdup("");
 
diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c
index b3d66c5..28b668d 100644
--- a/src/mesa/shader/shader_api.c
+++ b/src/mesa/shader/shader_api.c
@@ -1,8 +1,9 @@
 /*
  * Mesa 3-D graphics library
- * Version:  7.2
+ * Version:  7.5
  *
  * Copyright (C) 2004-2008  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -801,6 +802,27 @@ is_integer_type(GLenum type)
 }
 
 
+static GLboolean
+is_sampler_type(GLenum type)
+{
+   switch (type) {
+   case GL_SAMPLER_1D:
+   case GL_SAMPLER_2D:
+   case GL_SAMPLER_3D:
+   case GL_SAMPLER_CUBE:
+   case GL_SAMPLER_1D_SHADOW:
+   case GL_SAMPLER_2D_SHADOW:
+   case GL_SAMPLER_2D_RECT_ARB:
+   case GL_SAMPLER_2D_RECT_SHADOW_ARB:
+   case GL_SAMPLER_1D_ARRAY_EXT:
+   case GL_SAMPLER_2D_ARRAY_EXT:
+      return GL_TRUE;
+   default:
+      return GL_FALSE;
+   }
+}
+
+
 static void
 _mesa_get_active_attrib(GLcontext *ctx, GLuint program, GLuint index,
                         GLsizei maxLength, GLsizei *length, GLint *size,
@@ -866,6 +888,7 @@ _mesa_get_active_uniform(GLcontext *ctx, GLuint program, GLuint index,
 {
    const struct gl_shader_program *shProg;
    const struct gl_program *prog;
+   const struct gl_program_parameter *param;
    GLint progPos;
 
    shProg = _mesa_lookup_shader_program_err(ctx, program, "glGetActiveUniform");
@@ -891,14 +914,30 @@ _mesa_get_active_uniform(GLcontext *ctx, GLuint program, GLuint index,
    if (!prog || progPos < 0)
       return; /* should never happen */
 
-   if (nameOut)
-      copy_string(nameOut, maxLength, length,
-                  prog->Parameters->Parameters[progPos].Name);
-   if (size)
-      *size = prog->Parameters->Parameters[progPos].Size
-         / sizeof_glsl_type(prog->Parameters->Parameters[progPos].DataType);
-   if (type)
-      *type = prog->Parameters->Parameters[progPos].DataType;
+   ASSERT(progPos < prog->Parameters->NumParameters);
+   param = &prog->Parameters->Parameters[progPos];
+
+   if (nameOut) {
+      copy_string(nameOut, maxLength, length, param->Name);
+   }
+
+   if (size) {
+      GLint typeSize = sizeof_glsl_type(param->DataType);
+      if (param->Size > typeSize) {
+         /* This is an array.
+          * Array elements are placed on vector[4] boundaries so they're
+          * a multiple of four floats.  We round typeSize up to next multiple
+          * of four to get the right size below.
+          */
+         typeSize = (typeSize + 3) & ~3;
+      }
+      /* Note that the returned size is in units of the <type>, not bytes */
+      *size = param->Size / typeSize;
+   }
+
+   if (type) {
+      *type = param->DataType;
+   }
 }
 
 
@@ -1135,24 +1174,30 @@ get_uniform_rows_cols(const struct gl_program_parameter *p,
 }
 
 
-#define MAX_UNIFORM_ELEMENTS 16
-
 /**
- * Helper for GetUniformfv(), GetUniformiv()
- * Returns number of elements written to 'params' output.
+ * Helper for get_uniform[fi]v() functions.
+ * Given a shader program name and uniform location, return a pointer
+ * to the shader program and return the program parameter position.
  */
-static GLuint
-get_uniformfv(GLcontext *ctx, GLuint program, GLint location,
-              GLfloat *params)
+static void
+lookup_uniform_parameter(GLcontext *ctx, GLuint program, GLint location,
+                         struct gl_program **progOut, GLint *paramPosOut)
 {
    struct gl_shader_program *shProg
       = _mesa_lookup_shader_program_err(ctx, program, "glGetUniform[if]v");
-   if (shProg) {
-      if (shProg->Uniforms &&
-          location >= 0 && location < (GLint) shProg->Uniforms->NumUniforms) {
-         GLint progPos;
-         const struct gl_program *prog = NULL;
+   struct gl_program *prog = NULL;
+   GLint progPos = -1;
+
+   /* if shProg is NULL, we'll have already recorded an error */
 
+   if (shProg) {
+      if (!shProg->Uniforms ||
+          location < 0 ||
+          location >= (GLint) shProg->Uniforms->NumUniforms) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,  "glGetUniformfv(location)");
+      }
+      else {
+         /* OK, find the gl_program and program parameter location */
          progPos = shProg->Uniforms->Uniforms[location].VertPos;
          if (progPos >= 0) {
             prog = &shProg->VertexProgram->Base;
@@ -1163,33 +1208,11 @@ get_uniformfv(GLcontext *ctx, GLuint program, GLint location,
                prog = &shProg->FragmentProgram->Base;
             }
          }
-
-         ASSERT(prog);
-         if (prog) {
-            const struct gl_program_parameter *p =
-               &prog->Parameters->Parameters[progPos];
-            GLint rows, cols, i, j, k;
-
-            /* See uniformiv() below */                    
-            assert(p->Size <= MAX_UNIFORM_ELEMENTS);
-
-            get_uniform_rows_cols(p, &rows, &cols);
-
-            k = 0;
-            for (i = 0; i < rows; i++) {
-               for (j = 0; j < cols; j++ ) {
-                  params[k++] = prog->Parameters->ParameterValues[progPos+i][j];
-               }
-            }
-
-            return p->Size;
-         }
-      }
-      else {
-         _mesa_error(ctx, GL_INVALID_OPERATION, "glGetUniformfv(location)");
       }
    }
-   return 0;
+
+   *progOut = prog;
+   *paramPosOut = progPos;
 }
 
 
@@ -1200,23 +1223,54 @@ static void
 _mesa_get_uniformfv(GLcontext *ctx, GLuint program, GLint location,
                     GLfloat *params)
 {
-   (void) get_uniformfv(ctx, program, location, params);
+   struct gl_program *prog;
+   GLint paramPos;
+
+   lookup_uniform_parameter(ctx, program, location, &prog, &paramPos);
+
+   if (prog) {
+      const struct gl_program_parameter *p =
+         &prog->Parameters->Parameters[paramPos];
+      GLint rows, cols, i, j, k;
+
+      get_uniform_rows_cols(p, &rows, &cols);
+
+      k = 0;
+      for (i = 0; i < rows; i++) {
+         for (j = 0; j < cols; j++ ) {
+            params[k++] = prog->Parameters->ParameterValues[paramPos+i][j];
+         }
+      }
+   }
 }
 
 
 /**
  * Called via ctx->Driver.GetUniformiv().
+ * \sa _mesa_get_uniformfv, only difference is a cast.
  */
 static void
 _mesa_get_uniformiv(GLcontext *ctx, GLuint program, GLint location,
                     GLint *params)
 {
-   GLfloat fparams[MAX_UNIFORM_ELEMENTS];
-   GLuint n = get_uniformfv(ctx, program, location, fparams);
-   GLuint i;
-   assert(n <= MAX_UNIFORM_ELEMENTS);
-   for (i = 0; i < n; i++) {
-      params[i] = (GLint) fparams[i];
+   struct gl_program *prog;
+   GLint paramPos;
+
+   lookup_uniform_parameter(ctx, program, location, &prog, &paramPos);
+
+   if (prog) {
+      const struct gl_program_parameter *p =
+         &prog->Parameters->Parameters[paramPos];
+      GLint rows, cols, i, j, k;
+
+      get_uniform_rows_cols(p, &rows, &cols);
+
+      k = 0;
+      for (i = 0; i < rows; i++) {
+         for (j = 0; j < cols; j++ ) {
+            params[k++] = (GLint) prog->Parameters->ParameterValues[paramPos+i][j];
+         }
+      }
    }
 }
 
@@ -1401,7 +1455,8 @@ _mesa_use_program(GLcontext *ctx, GLuint program)
          return;
       }
       if (!shProg->LinkStatus) {
-         _mesa_error(ctx, GL_INVALID_OPERATION, "glUseProgram");
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glUseProgram(program %u not linked)", program);
          return;
       }
    }
@@ -1447,27 +1502,6 @@ _mesa_update_shader_textures_used(struct gl_program *prog)
 }
 
 
-static GLboolean
-is_sampler_type(GLenum type)
-{
-   switch (type) {
-   case GL_SAMPLER_1D:
-   case GL_SAMPLER_2D:
-   case GL_SAMPLER_3D:
-   case GL_SAMPLER_CUBE:
-   case GL_SAMPLER_1D_SHADOW:
-   case GL_SAMPLER_2D_SHADOW:
-   case GL_SAMPLER_2D_RECT_ARB:
-   case GL_SAMPLER_2D_RECT_SHADOW_ARB:
-   case GL_SAMPLER_1D_ARRAY_EXT:
-   case GL_SAMPLER_2D_ARRAY_EXT:
-      return GL_TRUE;
-   default:
-      return GL_FALSE;
-   }
-}
-
-
 /**
  * Check if the type given by userType is allowed to set a uniform of the
  * target type.  Generally, equivalence is required, but setting Boolean
@@ -1506,10 +1540,10 @@ compatible_types(GLenum userType, GLenum targetType)
  * \param program  the program whose uniform to update
  * \param index  the index of the program parameter for the uniform
  * \param offset  additional parameter slot offset (for arrays)
- * \param type  the datatype of the uniform
+ * \param type  the incoming datatype of 'values'
  * \param count  the number of uniforms to set
- * \param elems  number of elements per uniform
- * \param values  the new values
+ * \param elems  number of elements per uniform (1, 2, 3 or 4)
+ * \param values  the new values, of datatype 'type'
  */
 static void
 set_program_uniform(GLcontext *ctx, struct gl_program *program,
@@ -1519,8 +1553,12 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program,
 {
    struct gl_program_parameter *param =
       &program->Parameters->Parameters[index];
+   const GLboolean isUniformBool = is_boolean_type(param->DataType);
+   const GLboolean areIntValues = is_integer_type(type);
 
    assert(offset >= 0);
+   assert(elems >= 1);
+   assert(elems <= 4);
 
    if (!compatible_types(type, param->DataType)) {
       _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(type mismatch)");
@@ -1535,27 +1573,36 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program,
    if (param->Type == PROGRAM_SAMPLER) {
       /* This controls which texture unit which is used by a sampler */
       GLuint texUnit, sampler;
+      GLint i;
 
       /* data type for setting samplers must be int */
-      if (type != GL_INT || count != 1) {
+      if (type != GL_INT) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
                      "glUniform(only glUniform1i can be used "
                      "to set sampler uniforms)");
          return;
       }
 
-      sampler = (GLuint) program->Parameters->ParameterValues[index][0];
-      texUnit = ((GLuint *) values)[0];
+      /* XXX arrays of samplers haven't been tested much, but it's not a
+       * common thing...
+       */
+      for (i = 0; i < count; i++) {
+         sampler = (GLuint) program->Parameters->ParameterValues[index + i][0];
+         texUnit = ((GLuint *) values)[i];
+
+         /* check that the sampler (tex unit index) is legal */
+         if (texUnit >= ctx->Const.MaxTextureImageUnits) {
+            _mesa_error(ctx, GL_INVALID_VALUE,
+                        "glUniform1(invalid sampler/tex unit index)");
+            return;
+         }
 
-      /* check that the sampler (tex unit index) is legal */
-      if (texUnit >= ctx->Const.MaxTextureImageUnits) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glUniform1(invalid sampler/tex unit index)");
-         return;
+         /* This maps a sampler to a texture unit: */
+         if (sampler < MAX_SAMPLERS) {
+            program->SamplerUnits[sampler] = texUnit;
+         }
       }
 
-      /* This maps a sampler to a texture unit: */
-      program->SamplerUnits[sampler] = texUnit;
       _mesa_update_shader_textures_used(program);
 
       FLUSH_VERTICES(ctx, _NEW_TEXTURE);
@@ -1563,20 +1610,36 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program,
    else {
       /* ordinary uniform variable */
       GLsizei k, i;
-      GLint slots = (param->Size + 3) / 4;
+      const GLint slots = (param->Size + 3) / 4;
+      const GLint typeSize = sizeof_glsl_type(param->DataType);
 
-      if (count * elems > (GLint) param->Size) {
-         _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(count too large)");
-         return;
+      if (param->Size > typeSize) {
+         /* an array */
+         /* we'll ignore extra data below */
+      }
+      else {
+         /* non-array: count must be one */
+         if (count != 1) {
+            _mesa_error(ctx, GL_INVALID_OPERATION,
+                        "glUniform(uniform is not an array)");
+            return;
+         }
       }
 
-      if (count > slots)
-         count = slots;
-
+      /* loop over number of array elements */
       for (k = 0; k < count; k++) {
-         GLfloat *uniformVal =
-            program->Parameters->ParameterValues[index + offset + k];
-         if (is_integer_type(type)) {
+         GLfloat *uniformVal;
+
+         if (offset + k >= slots) {
+            /* Extra array data is ignored */
+            break;
+         }
+
+         /* uniformVal (the destination) is always float[4] */
+         uniformVal = program->Parameters->ParameterValues[index + offset + k];
+
+         if (areIntValues) {
+            /* convert user's ints to floats */
             const GLint *iValues = ((const GLint *) values) + k * elems;
             for (i = 0; i < elems; i++) {
                uniformVal[i] = (GLfloat) iValues[i];
@@ -1590,7 +1653,7 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program,
          }
 
          /* if the uniform is bool-valued, convert to 1.0 or 0.0 */
-         if (is_boolean_type(param->DataType)) {
+         if (isUniformBool) {
             for (i = 0; i < elems; i++) {
                uniformVal[i] = uniformVal[i] ? 1.0 : 0.0;
             }
@@ -1619,6 +1682,11 @@ _mesa_uniform(GLcontext *ctx, GLint location, GLsizei count,
    if (location == -1)
       return;   /* The standard specifies this as a no-op */
 
+   if (location < -1) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(location)");
+      return;
+   }
+
    split_location_offset(&location, &offset);
 
    if (location < 0 || location >= (GLint) shProg->Uniforms->NumUniforms) {
@@ -1758,6 +1826,11 @@ _mesa_uniform_matrix(GLcontext *ctx, GLint cols, GLint rows,
    if (location == -1)
       return;   /* The standard specifies this as a no-op */
 
+   if (location < -1) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glUniformMatrix(location)");
+      return;
+   }
+
    split_location_offset(&location, &offset);
 
    if (location < 0 || location >= (GLint) shProg->Uniforms->NumUniforms) {
diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c
index 11340d2..cfdb868 100644
--- a/src/mesa/shader/slang/slang_codegen.c
+++ b/src/mesa/shader/slang/slang_codegen.c
@@ -3662,7 +3662,7 @@ _slang_gen_assignment(slang_assemble_ctx * A, slang_operation *oper)
       if (lhs && rhs) {
          /* convert lhs swizzle into writemask */
          const GLuint swizzle = root_swizzle(lhs->Store);
-         GLuint writemask, newSwizzle;
+         GLuint writemask, newSwizzle = 0x0;
          if (!swizzle_to_writemask(A, swizzle, &writemask, &newSwizzle)) {
             /* Non-simple writemask, need to swizzle right hand side in
              * order to put components into the right place.
diff --git a/src/mesa/shader/slang/slang_compile.c b/src/mesa/shader/slang/slang_compile.c
index 818b90b..26a0598 100644
--- a/src/mesa/shader/slang/slang_compile.c
+++ b/src/mesa/shader/slang/slang_compile.c
@@ -1450,7 +1450,7 @@ parse_expression(slang_parse_ctx * C, slang_output_ctx * O,
       case OP_CALL:
          {
             GLboolean array_constructor = GL_FALSE;
-            GLint array_constructor_size;
+            GLint array_constructor_size = 0;
 
             op->type = SLANG_OPER_CALL;
             op->a_id = parse_identifier(C);
diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c
index ea446fa..80ccc79 100644
--- a/src/mesa/shader/slang/slang_emit.c
+++ b/src/mesa/shader/slang/slang_emit.c
@@ -164,7 +164,7 @@ _slang_var_swizzle(GLint size, GLint comp)
 {
    switch (size) {
    case 1:
-      return MAKE_SWIZZLE4(comp, comp, comp, comp);
+      return MAKE_SWIZZLE4(comp, SWIZZLE_NIL, SWIZZLE_NIL, SWIZZLE_NIL);
    case 2:
       return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_NIL, SWIZZLE_NIL);
    case 3:
@@ -451,7 +451,7 @@ emit_arl_load(slang_emit_info *emitInfo,
    struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ARL);
    inst->SrcReg[0].File = file;
    inst->SrcReg[0].Index = index;
-   inst->SrcReg[0].Swizzle = swizzle;
+   inst->SrcReg[0].Swizzle = fix_swizzle(swizzle);
    inst->DstReg.File = PROGRAM_ADDRESS;
    inst->DstReg.Index = 0;
    inst->DstReg.WriteMask = WRITEMASK_X;
@@ -873,6 +873,7 @@ emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
 
    if (n->Children[0]->Store->Size != n->Children[1]->Store->Size) {
       slang_info_log_error(emitInfo->log, "invalid operands to == or !=");
+      n->Store = NULL;
       return NULL;
    }
 
@@ -902,6 +903,7 @@ emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
       slang_ir_storage tempStore;
 
       if (!alloc_local_temp(emitInfo, &tempStore, 4)) {
+         n->Store = NULL;
          return NULL;
          /* out of temps */
       }
@@ -1358,6 +1360,7 @@ emit_copy(slang_emit_info *emitInfo, slang_ir_node *n)
 
 #if PEEPHOLE_OPTIMIZATIONS
    if (inst &&
+       (n->Children[1]->Opcode != IR_SWIZZLE) &&
        _slang_is_temp(emitInfo->vt, n->Children[1]->Store) &&
        (inst->DstReg.File == n->Children[1]->Store->File) &&
        (inst->DstReg.Index == n->Children[1]->Store->Index) &&
@@ -1374,13 +1377,9 @@ emit_copy(slang_emit_info *emitInfo, slang_ir_node *n)
        * becomes:
        *   MUL a, x, y;
        */
-      if (n->Children[1]->Opcode != IR_SWIZZLE)
-         _slang_free_temp(emitInfo->vt, n->Children[1]->Store);
-      *n->Children[1]->Store = *n->Children[0]->Store;
 
       /* fixup the previous instruction (which stored the RHS result) */
       assert(n->Children[0]->Store->Index >= 0);
-
       storage_to_dst_reg(&inst->DstReg, n->Children[0]->Store);
       return inst;
    }
@@ -1813,6 +1812,25 @@ emit_cont_break_if_true(slang_emit_info *emitInfo, slang_ir_node *n)
 }
 
 
+/**
+ * Return the size of a swizzle mask given that some swizzle components
+ * may be NIL/undefined.  For example:
+ *  swizzle_size(".zzxx") = 4
+ *  swizzle_size(".xy??") = 2
+ *  swizzle_size(".w???") = 1
+ */
+static GLuint
+swizzle_size(GLuint swizzle)
+{
+   GLuint i;
+   for (i = 0; i < 4; i++) {
+      if (GET_SWZ(swizzle, i) == SWIZZLE_NIL)
+         return i;
+   }
+   return 4;
+}
+
+
 static struct prog_instruction *
 emit_swizzle(slang_emit_info *emitInfo, slang_ir_node *n)
 {
@@ -1820,14 +1838,25 @@ emit_swizzle(slang_emit_info *emitInfo, slang_ir_node *n)
 
    inst = emit(emitInfo, n->Children[0]);
 
-#if 0
-   assert(n->Store->Parent);
-   /* Apply this node's swizzle to parent's storage */
-   GLuint swizzle = n->Store->Swizzle;
-   _slang_copy_ir_storage(n->Store, n->Store->Parent);
-   n->Store->Swizzle = _slang_swizzle_swizzle(n->Store->Swizzle, swizzle);
+   if (!n->Store->Parent) {
+      /* this covers a case such as "(b ? p : q).x" */
+      n->Store->Parent = n->Children[0]->Store;
+      assert(n->Store->Parent);
+   }
+
+   {
+      const GLuint swizzle = n->Store->Swizzle;
+      /* new storage is parent storage with updated Swizzle + Size fields */
+      _slang_copy_ir_storage(n->Store, n->Store->Parent);
+      /* Apply this node's swizzle to parent's storage */
+      n->Store->Swizzle = _slang_swizzle_swizzle(n->Store->Swizzle, swizzle);
+      /* Update size */
+      n->Store->Size = swizzle_size(n->Store->Swizzle);
+   }
+
    assert(!n->Store->Parent);
-#endif
+   assert(n->Store->Index >= 0);
+
    return inst;
 }
 
@@ -2120,6 +2149,10 @@ emit_var_ref(slang_emit_info *emitInfo, slang_ir_node *n)
       /* mark var as used */
       _mesa_use_uniform(emitInfo->prog->Parameters, (char *) n->Var->a_name);
    }
+   else if (n->Store->File == PROGRAM_INPUT) {
+      assert(n->Store->Index >= 0);
+      emitInfo->prog->InputsRead |= (1 << n->Store->Index);
+   }
 
    if (n->Store->Index < 0) {
       /* probably ran out of registers */
@@ -2424,7 +2457,9 @@ _slang_emit_code(slang_ir_node *n, slang_var_table *vt,
       maxUniforms = ctx->Const.VertexProgram.MaxUniformComponents / 4;
    }
    if (prog->Parameters->NumParameters > maxUniforms) {
-      slang_info_log_error(log, "Constant/uniform register limit exceeded");
+      slang_info_log_error(log, "Constant/uniform register limit exceeded "
+                           "(max=%u vec4)", maxUniforms);
+
       return GL_FALSE;
    }
 
diff --git a/src/mesa/shader/slang/slang_link.c b/src/mesa/shader/slang/slang_link.c
index c6d5cc0..b8427ca 100644
--- a/src/mesa/shader/slang/slang_link.c
+++ b/src/mesa/shader/slang/slang_link.c
@@ -318,7 +318,7 @@ _slang_resolve_attributes(struct gl_shader_program *shProg,
 {
    GLint attribMap[MAX_VERTEX_ATTRIBS];
    GLuint i, j;
-   GLbitfield usedAttributes;
+   GLbitfield usedAttributes; /* generics only, not legacy attributes */
 
    assert(origProg != linkedProg);
    assert(origProg->Target == GL_VERTEX_PROGRAM_ARB);
@@ -342,6 +342,15 @@ _slang_resolve_attributes(struct gl_shader_program *shProg,
       usedAttributes |= (1 << attr);
    }
 
+   /* If gl_Vertex is used, that actually counts against the limit
+    * on generic vertex attributes.  This avoids the ambiguity of
+    * whether glVertexAttrib4fv(0, v) sets legacy attribute 0 (vert pos)
+    * or generic attribute[0].  If gl_Vertex is used, we want the former.
+    */
+   if (origProg->InputsRead & VERT_BIT_POS) {
+      usedAttributes |= 0x1;
+   }
+
    /* initialize the generic attribute map entries to -1 */
    for (i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
       attribMap[i] = -1;
@@ -384,7 +393,7 @@ _slang_resolve_attributes(struct gl_shader_program *shProg,
                    * Start at 1 since generic attribute 0 always aliases
                    * glVertex/position.
                    */
-                  for (attr = 1; attr < MAX_VERTEX_ATTRIBS; attr++) {
+                  for (attr = 0; attr < MAX_VERTEX_ATTRIBS; attr++) {
                      if (((1 << attr) & usedAttributes) == 0)
                         break;
                   }
@@ -486,8 +495,33 @@ _slang_update_inputs_outputs(struct gl_program *prog)
             maxAddrReg = MAX2(maxAddrReg, (GLuint) (inst->SrcReg[j].Index + 1));
          }
       }
+
       if (inst->DstReg.File == PROGRAM_OUTPUT) {
          prog->OutputsWritten |= 1 << inst->DstReg.Index;
+         if (inst->DstReg.RelAddr) {
+            /* If the output attribute is indexed with relative addressing
+             * we know that it must be a varying or texcoord such as
+             * gl_TexCoord[i] = v;  In this case, mark all the texcoords
+             * or varying outputs as being written.  It's not an error if
+             * a vertex shader writes varying vars that aren't used by the
+             * fragment shader.  But it is an error for a fragment shader
+             * to use varyings that are not written by the vertex shader.
+             */
+            if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
+               if (inst->DstReg.Index == VERT_RESULT_TEX0) {
+                  /* mark all texcoord outputs as written */
+                  const GLbitfield mask =
+                     ((1 << MAX_TEXTURE_COORD_UNITS) - 1) << VERT_RESULT_TEX0;
+                  prog->OutputsWritten |= mask;
+               }
+               else if (inst->DstReg.Index == VERT_RESULT_VAR0) {
+                  /* mark all generic varying outputs as written */
+                  const GLbitfield mask =
+                     ((1 << MAX_VARYING) - 1) << VERT_RESULT_VAR0;
+                  prog->OutputsWritten |= mask;
+               }
+            }
+         }
       }
       else if (inst->DstReg.File == PROGRAM_ADDRESS) {
          maxAddrReg = MAX2(maxAddrReg, inst->DstReg.Index + 1);
diff --git a/src/mesa/shader/slang/slang_vartable.c b/src/mesa/shader/slang/slang_vartable.c
index de0c939..a4ebacc 100644
--- a/src/mesa/shader/slang/slang_vartable.c
+++ b/src/mesa/shader/slang/slang_vartable.c
@@ -4,6 +4,7 @@
 #include "shader/prog_print.h"
 #include "slang_compile.h"
 #include "slang_compile_variable.h"
+#include "slang_emit.h"
 #include "slang_mem.h"
 #include "slang_vartable.h"
 #include "slang_ir.h"
@@ -72,9 +73,8 @@ _slang_delete_var_table(slang_var_table *vt)
 
 
 /**
- * Create new table, put at head, return ptr to it.
- * XXX we should take a maxTemps parameter to indicate how many temporaries
- * are available for the current shader/program target.
+ * Create new table on top of vartable stack.
+ * Used when we enter a {} block.
  */
 void
 _slang_push_var_table(slang_var_table *vt)
@@ -95,7 +95,8 @@ _slang_push_var_table(slang_var_table *vt)
 
 
 /**
- * Destroy given table, return ptr to Parent
+ * Pop top entry from variable table.
+ * Used when we leave a {} block.
  */
 void
 _slang_pop_var_table(slang_var_table *vt)
@@ -125,10 +126,12 @@ _slang_pop_var_table(slang_var_table *vt)
       else
          comp = 0;
 
-      assert(store->Index >= 0);
-      for (j = 0; j < store->Size; j++) {
-         assert(t->Temps[store->Index * 4 + j + comp] == VAR);
-         t->Temps[store->Index * 4 + j + comp] = FREE;
+      /* store->Index may be -1 if we run out of registers */
+      if (store->Index >= 0) {
+         for (j = 0; j < store->Size; j++) {
+            assert(t->Temps[store->Index * 4 + j + comp] == VAR);
+            t->Temps[store->Index * 4 + j + comp] = FREE;
+         }
       }
       store->Index = -1;
    }
@@ -156,7 +159,7 @@ _slang_pop_var_table(slang_var_table *vt)
 
 
 /**
- * Add a new variable to the given symbol table.
+ * Add a new variable to the given var/symbol table.
  */
 void
 _slang_add_variable(slang_var_table *vt, slang_variable *v)
@@ -214,6 +217,7 @@ alloc_reg(slang_var_table *vt, GLint size, GLboolean isTemp)
    for (i = 0; i <= vt->MaxRegisters * 4 - size; i += step) {
       GLuint found = 0;
       for (j = 0; j < (GLuint) size; j++) {
+         assert(i + j < 4 * MAX_PROGRAM_TEMPS);
          if (i + j < vt->MaxRegisters * 4 && t->Temps[i + j] == FREE) {
             found++;
          }
@@ -225,13 +229,17 @@ alloc_reg(slang_var_table *vt, GLint size, GLboolean isTemp)
          /* found block of size free regs */
          if (size > 1)
             assert(i % 4 == 0);
-         for (j = 0; j < (GLuint) size; j++)
+         for (j = 0; j < (GLuint) size; j++) {
+            assert(i + j < 4 * MAX_PROGRAM_TEMPS);
             t->Temps[i + j] = isTemp ? TEMP : VAR;
+         }
          assert(i < MAX_PROGRAM_TEMPS * 4);
          t->ValSize[i] = size;
          return i;
       }
    }
+
+   /* if we get here, we ran out of registers */
    return -1;
 }
 
@@ -259,21 +267,7 @@ _slang_alloc_var(slang_var_table *vt, slang_ir_storage *store)
       return GL_FALSE;
 
    store->Index = i / 4;
-   if (store->Size == 1) {
-      const GLuint comp = i % 4;
-      store->Swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
-   }
-   else if (store->Size == 2) {
-      store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
-                                     SWIZZLE_NIL, SWIZZLE_NIL);
-   }
-   else if (store->Size == 3) {
-      store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
-                                     SWIZZLE_Z, SWIZZLE_NIL);
-   }
-   else {
-      store->Swizzle = SWIZZLE_NOOP;
-   }
+   store->Swizzle = _slang_var_swizzle(store->Size, i % 4);
 
    if (dbg)
       printf("Alloc var storage sz %d at %d.%s (level %d) store %p\n",
@@ -301,20 +295,7 @@ _slang_alloc_temp(slang_var_table *vt, slang_ir_storage *store)
    assert(store->Index < 0);
 
    store->Index = i / 4;
-   if (store->Size == 1) {
-      const GLuint comp = i % 4;
-      store->Swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
-   }
-   else {
-      /* XXX improve swizzled for size=2/3, use for writemask... */
-#if 1
-      if (store->Size == 2) {
-         store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
-                                        SWIZZLE_NIL, SWIZZLE_NIL);
-      }
-#endif
-      store->Swizzle = SWIZZLE_NOOP;
-   }
+   store->Swizzle = _slang_var_swizzle(store->Size, i % 4);
 
    if (dbg) printf("Alloc temp sz %d at %d.%s (level %d) store %p\n",
                    store->Size, store->Index,
diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c
index 525cf9d..c0bda32 100644
--- a/src/mesa/swrast/s_fragprog.c
+++ b/src/mesa/swrast/s_fragprog.c
@@ -40,20 +40,27 @@ static void
 fetch_texel_lod( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
                  GLuint unit, GLfloat color[4] )
 {
-   GLchan rgba[4];
-   SWcontext *swrast = SWRAST_CONTEXT(ctx);
    const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
 
-   if (texObj)
+   if (texObj) {
+      SWcontext *swrast = SWRAST_CONTEXT(ctx);
+      GLchan rgba[4];
+
       lambda = CLAMP(lambda, texObj->MinLod, texObj->MaxLod);
 
-   /* XXX use a float-valued TextureSample routine here!!! */
-   swrast->TextureSample[unit](ctx, texObj, 1, (const GLfloat (*)[4]) texcoord,
-                               &lambda, &rgba);
-   color[0] = CHAN_TO_FLOAT(rgba[0]);
-   color[1] = CHAN_TO_FLOAT(rgba[1]);
-   color[2] = CHAN_TO_FLOAT(rgba[2]);
-   color[3] = CHAN_TO_FLOAT(rgba[3]);
+      /* XXX use a float-valued TextureSample routine here!!! */
+      swrast->TextureSample[unit](ctx, texObj, 1,
+                                  (const GLfloat (*)[4]) texcoord,
+                                  &lambda, &rgba);
+      color[0] = CHAN_TO_FLOAT(rgba[0]);
+      color[1] = CHAN_TO_FLOAT(rgba[1]);
+      color[2] = CHAN_TO_FLOAT(rgba[2]);
+      color[3] = CHAN_TO_FLOAT(rgba[3]);
+   }
+   else {
+      color[0] = color[1] = color[2] = 0.0F;
+      color[3] = 1.0F;
+   }
 }
 
 
@@ -69,13 +76,14 @@ fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
 {
    SWcontext *swrast = SWRAST_CONTEXT(ctx);
    const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
-   GLfloat lambda;
-   GLchan rgba[4];
 
    if (texObj) {
-      const struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel];
+      const struct gl_texture_image *texImg =
+         texObj->Image[0][texObj->BaseLevel];
       const GLfloat texW = (GLfloat) texImg->WidthScale;
       const GLfloat texH = (GLfloat) texImg->HeightScale;
+      GLfloat lambda;
+      GLchan rgba[4];
 
       lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
                                       texdx[1], texdy[1], /* dt/dx, dt/dy */
@@ -85,14 +93,20 @@ fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
                                       1.0F / texcoord[3]) + lodBias;
 
       lambda = CLAMP(lambda, texObj->MinLod, texObj->MaxLod);
-   }
 
-   swrast->TextureSample[unit](ctx, texObj, 1, (const GLfloat (*)[4]) texcoord,
-                               &lambda, &rgba);
-   color[0] = CHAN_TO_FLOAT(rgba[0]);
-   color[1] = CHAN_TO_FLOAT(rgba[1]);
-   color[2] = CHAN_TO_FLOAT(rgba[2]);
-   color[3] = CHAN_TO_FLOAT(rgba[3]);
+      /* XXX use a float-valued TextureSample routine here!!! */
+      swrast->TextureSample[unit](ctx, texObj, 1,
+                                  (const GLfloat (*)[4]) texcoord,
+                                  &lambda, &rgba);
+      color[0] = CHAN_TO_FLOAT(rgba[0]);
+      color[1] = CHAN_TO_FLOAT(rgba[1]);
+      color[2] = CHAN_TO_FLOAT(rgba[2]);
+      color[3] = CHAN_TO_FLOAT(rgba[3]);
+   }
+   else {
+      color[0] = color[1] = color[2] = 0.0F;
+      color[3] = 1.0F;
+   }
 }
 
 
diff --git a/src/mesa/swrast/s_triangle.c b/src/mesa/swrast/s_triangle.c
index a2e8433..a501f42 100644
--- a/src/mesa/swrast/s_triangle.c
+++ b/src/mesa/swrast/s_triangle.c
@@ -265,9 +265,6 @@ affine_span(GLcontext *ctx, SWspan *span,
    GLchan sample[4];  /* the filtered texture sample */
    const GLuint texEnableSave = ctx->Texture._EnabledUnits;
 
-   /* Disable tex units so they're not re-applied in swrast_write_rgba_span */
-   ctx->Texture._EnabledUnits = 0x0;
-
    /* Instead of defining a function for each mode, a test is done
     * between the outer and inner loops. This is to reduce code size
     * and complexity. Observe that an optimizing compiler kills
@@ -396,6 +393,9 @@ affine_span(GLcontext *ctx, SWspan *span,
    GLuint i;
    GLchan *dest = span->array->rgba[0];
 
+   /* Disable tex units so they're not re-applied in swrast_write_rgba_span */
+   ctx->Texture._EnabledUnits = 0x0;
+
    span->intTex[0] -= FIXED_HALF;
    span->intTex[1] -= FIXED_HALF;
    switch (info->filter) {
diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c
index 5e2a582..19ec556 100644
--- a/src/mesa/tnl/t_context.c
+++ b/src/mesa/tnl/t_context.c
@@ -109,24 +109,28 @@ _tnl_InvalidateState( GLcontext *ctx, GLuint new_state )
 
    tnl->pipeline.new_state |= new_state;
 
-   /* Calculate tnl->render_inputs:
+   /* Calculate tnl->render_inputs.  This bitmask indicates which vertex
+    * attributes need to be emitted to the rasterizer.
     */
    if (ctx->Visual.rgbMode) {
       GLuint i;
 
       RENDERINPUTS_ZERO( tnl->render_inputs_bitset );
       RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_POS );
+
       if (!fp || (fp->Base.InputsRead & FRAG_BIT_COL0)) {
          RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR0 );
       }
+
+      if (NEED_SECONDARY_COLOR(ctx))
+         RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR1 );
+
       for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
-         if (ctx->Texture._EnabledCoordUnits & (1 << i)) {
+         if (ctx->Texture._EnabledCoordUnits & (1 << i) ||
+             (fp && fp->Base.InputsRead & FRAG_BIT_TEX(i))) {
             RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) );
          }
       }
-
-      if (NEED_SECONDARY_COLOR(ctx))
-         RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR1 );
    }
    else {
       RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_POS );
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index d48f523..f6daa25 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -148,11 +148,14 @@ static void vbo_exec_copy_to_current( struct vbo_exec_context *exec )
          /* Note: the exec->vtx.current[i] pointers point into the
           * ctx->Current.Attrib and ctx->Light.Material.Attrib arrays.
           */
+         if (exec->vtx.attrptr[i]) {
+
 	 COPY_CLEAN_4V(current, 
 		       exec->vtx.attrsz[i], 
 		       exec->vtx.attrptr[i]);
 
-	 
+	 }
+
 	 /* Given that we explicitly state size here, there is no need
 	  * for the COPY_CLEAN above, could just copy 16 bytes and be
 	  * done.  The only problem is when Mesa accesses ctx->Current
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 92356ba..ad8b6e8 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -175,7 +175,20 @@ static void vbo_exec_bind_arrays( GLcontext *ctx )
          exec->vtx.inputs[attr + 16] = &vbo->generic_currval[attr];
       }
       map = vbo->map_vp_arb;
+
+      /* check if VERT_ATTRIB_POS is not read but VERT_BIT_GENERIC0 is read.
+       * In that case we effectively need to route the data from
+       * glVertexAttrib(0, val) calls to feed into the GENERIC0 input.
+       */
+      if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 0 &&
+          (ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_GENERIC0)) {
+         exec->vtx.inputs[16] = exec->vtx.inputs[0];
+         exec->vtx.attrsz[16] = exec->vtx.attrsz[0];
+         exec->vtx.attrsz[0] = 0;
+      }
       break;
+   default:
+      assert(0);
    }
 
    /* Make all active attributes (including edgeflag) available as
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index ed82f09..ed40b5c 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -110,6 +110,9 @@ static void vbo_bind_vertex_list( GLcontext *ctx,
    GLuint data = node->buffer_offset;
    const GLuint *map;
    GLuint attr;
+   GLubyte node_attrsz[VBO_ATTRIB_MAX];  /* copy of node->attrsz[] */
+
+   memcpy(node_attrsz, node->attrsz, sizeof(node->attrsz));
 
    /* Install the default (ie Current) attributes first, then overlay
     * all active ones.
@@ -135,13 +138,26 @@ static void vbo_bind_vertex_list( GLcontext *ctx,
          save->inputs[attr + 16] = &vbo->generic_currval[attr];
       }
       map = vbo->map_vp_arb;
+
+      /* check if VERT_ATTRIB_POS is not read but VERT_BIT_GENERIC0 is read.
+       * In that case we effectively need to route the data from
+       * glVertexAttrib(0, val) calls to feed into the GENERIC0 input.
+       */
+      if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 0 &&
+          (ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_GENERIC0)) {
+         save->inputs[16] = save->inputs[0];
+         node_attrsz[16] = node_attrsz[0];
+         node_attrsz[0] = 0;
+      }
       break;
+   default:
+      assert(0);
    }
 
    for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
       GLuint src = map[attr];
 
-      if (node->attrsz[src]) {
+      if (node_attrsz[src]) {
          /* override the default array set above */
          save->inputs[attr] = &arrays[attr];
 
diff --git a/src/mesa/x86/gen_matypes.c b/src/mesa/x86/gen_matypes.c
index afb4b11..8c690b4 100644
--- a/src/mesa/x86/gen_matypes.c
+++ b/src/mesa/x86/gen_matypes.c
@@ -61,7 +61,7 @@ do {									\
    printf( "\n" );							\
 } while (0)
 
-#if defined(__BEOS__) || defined(_LP64)
+#if defined(__BEOS__) || defined(__HAIKU__) || defined(_LP64)
 #define OFFSET( s, t, m )						\
    printf( "#define %s\t%ld\n", s, offsetof( t, m ) );
 #else
@@ -69,7 +69,7 @@ do {									\
    printf( "#define %s\t%d\n", s, offsetof( t, m ) );
 #endif
 
-#if defined(__BEOS__) || defined(_LP64)
+#if defined(__BEOS__) || defined(__HAIKU__) || defined(_LP64)
 #define SIZEOF( s, t )							\
    printf( "#define %s\t%ld\n", s, sizeof(t) );
 #else