Blob Blame History Raw
commit 68888189cf8d460ef6f8f2f1431a6ffe9fcd8134
Author: Alex Deucher <alex@samba.(none)>
Date:   Wed Feb 27 03:09:28 2008 -0500

    RADEON: fix textured video uploads

commit 7cfad216a3f1486be194c972b9b6bcf070358586
Author: Alex Deucher <alex@samba.(none)>
Date:   Wed Feb 27 02:13:10 2008 -0500

    RADEON: fix clipping for textured video

commit a2dca1d68d751def34ef3c6f836574173737bf76
Author: Alex Deucher <alex@samba.(none)>
Date:   Tue Feb 26 16:38:29 2008 -0500

    RADEON: Convert textured video to use pipelined uploads

commit b4fa1ce9d2da04d94521a82d3c2e95f0fe985ccc
Author: Alex Deucher <alex@samba.(none)>
Date:   Tue Feb 26 14:45:25 2008 -0500

    R300/R500: fix up clipping
    
    Based on patch from Peter Zubaj.

commit 44e527a117ab0a363135ff066c7f7e0c12e3dc89
Author: Alex Deucher <alex@samba.(none)>
Date:   Tue Feb 26 14:33:24 2008 -0500

    R300: fix cordinate clamping in render code
    
    Based on Peter's fix for textured video

commit 00ec17ad53d7ad43f19c9b723794ac1b8ef86826
Author: Peter Zubaj <pzubaj at marticonet.sk>
Date:   Tue Feb 26 14:26:14 2008 -0500

    R300: Fix clamping and pitch for textured video

commit 4207793f9b6c891cb17ba715223d2002668885e3
Author: Alex Deucher <alex@samba.(none)>
Date:   Tue Feb 26 13:16:22 2008 -0500

    R300/R500: set the number graphics pipes properly
    
    This should fix the checkerboard issues on r300/r350 cards.

commit 823d8bf49e4c0a8c839354179942c5a1944c178d
Author: Alex Deucher <alex@samba.(none)>
Date:   Tue Feb 26 12:25:14 2008 -0500

    R300/R500: clean up magic numbers in textured video

commit 8ea75b268f11794f4a7e7bac52cb256490ed3fd2
Author: Dave Airlie <airlied@linux.ie>
Date:   Tue Feb 26 16:29:19 2008 +1000

    regs: fix spelling properly

commit f2816064a6c2c4c35ccba74b9aa80547e25c012e
Author: Dave Airlie <airlied@linux.ie>
Date:   Tue Feb 26 16:28:24 2008 +1000

    regs: fix spelling mistake
    
    pointed out by plaes on irc

commit 9d2ca30b90607085578dde1f314db663bd5f82ec
Author: Alex Deucher <alex@samba.(none)>
Date:   Mon Feb 25 17:34:00 2008 -0500

    R300/R500: clean up magic numbers in render code

commit 153ad6fcf704cbf9f811d9986cd4baf04e82c9d2
Author: Dave Airlie <airlied@linux.ie>
Date:   Mon Feb 25 07:10:48 2008 +1000

    fixup check for EXA composite pointed out by Alan Swanson

commit 85e470e64f629de72e361c77770e2e29998d1bf4
Merge: 27ddb39... 1b84c76...
Author: Alex Deucher <alex@samba.(none)>
Date:   Sun Feb 24 05:37:22 2008 -0500

    Merge master and fix conflicts
    
    Merge branch 'master' of ssh://agd5f@git.freedesktop.org/git/xorg/driver/xf86-video-ati
    
    Conflicts:
    
    	src/radeon_commonfuncs.c

commit 27ddb39b12a0b54e099fd5274c4c91f08e2d2822
Author: Alex Deucher <alex@samba.(none)>
Date:   Sun Feb 24 05:30:11 2008 -0500

    R300: clean up magic numbers in RADEONInit3DEngine

commit d4c20f33ad6a1f88615cd7e09ad3638896873f9e
Author: Alex Deucher <alex@samba.(none)>
Date:   Sun Feb 24 04:46:10 2008 -0500

    R300: replace magic numbers in cache flush

commit e52f1c8d2647b81d891ec0728dd582941a76c83f
Author: Alex Deucher <alex@samba.(none)>
Date:   Sun Feb 24 04:43:18 2008 -0500

    R300: fill in some more 3D bitfields

commit 1b84c76f27c8d24cb42beae26abf000721901c1c
Author: Dave Airlie <airlied@redhat.com>
Date:   Sun Feb 24 19:20:36 2008 +1100

    rs690: initial textured video support

commit 68158124366db883a901e960fe5f05f8df5efa42
Author: Dave Airlie <airlied@redhat.com>
Date:   Tue Feb 19 19:51:18 2008 +1100

    rs690: initial rs690 rotate
    
    suffers same problem as r500 with clipping

commit e614bb6965588bf09dcb87f5e08e67120ec9847f
Author: Dave Airlie <airlied@redhat.com>
Date:   Sun Feb 24 01:46:05 2008 -0500

    r500: convert fragprog to use register values

commit 6ce9ee47c75620b2e5d211c5d59d17271a6a7b19
Author: Dave Airlie <airlied@redhat.com>
Date:   Sat Feb 23 22:49:34 2008 -0500

    r500: add textured video Xv adapter support

commit 9aaf8b33b22b6ba112869558ae54e021b9487ad2
Author: Dave Airlie <airlied@redhat.com>
Date:   Sat Feb 23 22:16:25 2008 -0500

    r500: initial rotate support - not fully working yet.
    
    Just an example of how to setup and run the r500 3D engine for rotation.
    this rotates for me but I get some strange clipping on the bottom of my screen

commit 05dc3e4fc19d056ce99a7b110665adab2ca1ea21
Author: Adam Jackson <ajax@redhat.com>
Date:   Sat Feb 23 20:29:51 2008 -0500

    Clarify R500 US setup.

commit 080606ad528972623e1ed1124d8efe7705a73446
Author: Adam Jackson <ajax@redhat.com>
Date:   Sat Feb 23 20:21:17 2008 -0500

    Add the R500 US index/data pair.

commit edfb3b6bbf0ee17ace8e6ba704a6f54e249fec63
Author: Alex Deucher <alex@samba.(none)>
Date:   Sat Feb 23 19:59:33 2008 -0500

    RADEON: no textured video yet on XPRESS chips
    
    Still need to sort out the VAP and PVS stuff

commit 4146bfe5d00e40a86d17826fac50d04b2469621d
Author: Alex Deucher <alex@samba.(none)>
Date:   Sat Feb 23 19:21:52 2008 -0500

    R500: fix typo in new r5xx regs

commit d9be9f34b0d3313e7b22b2a8bb0a8924ad3116bf
Author: Alex Deucher <alex@samba.(none)>
Date:   Sat Feb 23 19:06:30 2008 -0500

    RADEON: add textured video support for r1xx-r4xx radeons
    
    Based on the kdrive ati video code by Eric Anholt.
    R3xx/R4xx still have some clipping issues in certain situations

commit 9dc4acad79196e9d5d94dd710773bfa83456d47f
Author: Alex Deucher <alex@cube.(none)>
Date:   Sat Feb 23 18:29:00 2008 -0500

    RS6xx: gpio entry for DDIA varies depending on the number of DFP ports

commit ed0a93edf28155308e7ab9d8705581bb38455ea0
Author: Adam Jackson <ajax@redhat.com>
Date:   Sat Feb 23 15:02:17 2008 -0500

    Fix R500_US_CONFIG.

commit bc2bd6f841b51aeed3b6b4a47dbe758c200bc5a6
Author: Adam Jackson <ajax@redhat.com>
Date:   Sat Feb 23 14:34:18 2008 -0500

    Add R500 unified shader register block.

commit a7b5c3bb74fc4de5e38a75ac31656445ce823464
Author: Alex Deucher <alex@cube.(none)>
Date:   Fri Feb 22 19:35:11 2008 -0500

    RS6xx: fix DDC on DDIA output (usually HDMI port)

commit 3327a681e21101cc6f6e162f4e29f9937b69ccc3
Author: Alex Deucher <alex@cube.(none)>
Date:   Fri Feb 22 17:05:56 2008 -0500

    ATOM: properly set up DDIA output on RS6xx boards

commit 1d0e9ab8b9451101b1b91943546f6c5833c21b3f
Author: Michel Dänzer <michel@tungstengraphics.com>
Date:   Wed Feb 20 10:21:49 2008 +0100

    radeon: Fix typo flagged by gcc -Wall.

commit b5bd442b60dbc72fe4c1e928ab864aeb0fd7a3cb
Author: Alex Deucher <alex@botch2.(none)>
Date:   Tue Feb 19 20:47:40 2008 -0500

    R100: fix render accel for transforms
    
    Not sure why we had a separate broken path for r100 vertex
    submission.

commit a0a73208a21546ac120fb9a463261836c9ea7b55
Author: Alex Deucher <alex@botch2.(none)>
Date:   Tue Feb 19 20:11:19 2008 -0500

    RADEON: restore clock gating and CP clock errata on VT switch
    
    This may help people with hangs on resume

commit b77e2aff7453a9f370beba37ca3c25b92b3f97ff
Author: Alex Deucher <alex@botch2.(none)>
Date:   Tue Feb 19 19:55:41 2008 -0500

    RADEON: fix DDC types 5 and 6

commit af82172a82f2bdf96e571def659a1c70f92dfdbf
Author: Alex Deucher <alex@botch2.(none)>
Date:   Tue Feb 19 19:39:35 2008 -0500

    RADEON: update man page with supported chips
diff --git a/man/radeon.man b/man/radeon.man
index b4ade32..86be965 100644
--- a/man/radeon.man
+++ b/man/radeon.man
@@ -53,7 +53,7 @@ Radeon 9100 IGP
 Radeon 9200 IGP
 .TP 12
 .B RS400
-Radeon XPRESS 200/200M IGP (2d only)
+Radeon XPRESS 200/200M IGP
 .TP 12
 .B RV280
 Radeon 9200PRO/9200/9200SE, M9+
@@ -90,6 +90,30 @@ Radeon X800, M28 PCIE
 .TP 12
 .B R480/R481
 Radeon X850 PCIE/AGP
+.TP 12
+.B RV515
+Radeon X1300/X1400/X1500
+.TP 12
+.B R520
+Radeon X1800
+.TP 12
+.B RV530/RV560
+Radeon X1600/X1650/X1700
+.TP 12
+.B RV570/R580
+Radeon X1900/X1950
+.TP 12
+.B RS600/RS690
+Radeon X1200
+.TP 12
+.B R600
+Radeon HD 2900
+.TP 12
+.B RV610/RV630
+Radeon HD 2400/2600
+.TP 12
+.B RV670
+Radeon HD 3850/3870
 
 .SH CONFIGURATION DETAILS
 Please refer to __xconfigfile__(__filemansuffix__) for general configuration
diff --git a/src/Makefile.am b/src/Makefile.am
index a146df3..e0799a5 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -114,6 +114,7 @@ radeon_drv_la_SOURCES = \
 	radeon_driver.c radeon_video.c radeon_bios.c radeon_mm_i2c.c \
 	radeon_vip.c radeon_misc.c radeon_probe.c \
 	legacy_crtc.c legacy_output.c \
+	radeon_textured_video.c \
 	radeon_crtc.c radeon_output.c radeon_modes.c radeon_tv.c \
 	$(RADEON_ATOMBIOS_SOURCES) radeon_atombios.c radeon_atomwrapper.c \
 	$(RADEON_DRI_SRCS) $(RADEON_EXA_SOURCES) atombios_output.c atombios_crtc.c
diff --git a/src/atombios_output.c b/src/atombios_output.c
index 6c638b1..07d212f 100644
--- a/src/atombios_output.c
+++ b/src/atombios_output.c
@@ -235,6 +235,35 @@ atombios_external_tmds_setup(xf86OutputPtr output, DisplayModePtr mode)
 }
 
 static int
+atombios_ddia_setup(xf86OutputPtr output, DisplayModePtr mode)
+{
+    RADEONInfoPtr info       = RADEONPTR(output->scrn);
+    DVO_ENCODER_CONTROL_PS_ALLOCATION disp_data;
+    AtomBiosArgRec data;
+    unsigned char *space;
+
+    disp_data.sDVOEncoder.ucAction = ATOM_ENABLE;
+    disp_data.sDVOEncoder.usPixelClock = mode->Clock / 10;
+
+    if (mode->Clock > 165000)
+	disp_data.sDVOEncoder.usDevAttr.sDigAttrib.ucAttribute = PANEL_ENCODER_MISC_DUAL;
+    else
+	disp_data.sDVOEncoder.usDevAttr.sDigAttrib.ucAttribute = 0;
+
+    data.exec.index = GetIndexIntoMasterTable(COMMAND, DVOEncoderControl);
+    data.exec.dataSpace = (void *)&space;
+    data.exec.pspace = &disp_data;
+
+    if (RHDAtomBiosFunc(info->atomBIOS->scrnIndex, info->atomBIOS, ATOMBIOS_EXEC, &data) == ATOM_SUCCESS) {
+	ErrorF("DDIA setup success\n");
+	return ATOM_SUCCESS;
+    }
+
+    ErrorF("DDIA setup failed\n");
+    return ATOM_NOT_IMPLEMENTED;
+}
+
+static int
 atombios_output_tmds1_setup(xf86OutputPtr output, DisplayModePtr mode)
 {
     RADEONInfoPtr info       = RADEONPTR(output->scrn);
@@ -536,6 +565,7 @@ atombios_output_mode_set(xf86OutputPtr output,
 			 DisplayModePtr adjusted_mode)
 {
     RADEONOutputPrivatePtr radeon_output = output->driver_private;
+    RADEONInfoPtr info       = RADEONPTR(output->scrn);
 
     atombios_output_scaler_setup(output, mode);
     atombios_set_output_crtc_source(output);
@@ -551,9 +581,12 @@ atombios_output_mode_set(xf86OutputPtr output,
     } else if (radeon_output->MonType == MT_DFP) {
        if (radeon_output->devices & ATOM_DEVICE_DFP1_SUPPORT)
 	   atombios_output_tmds1_setup(output, adjusted_mode);
-       else if (radeon_output->devices & ATOM_DEVICE_DFP2_SUPPORT)
-	   atombios_external_tmds_setup(output, adjusted_mode);
-       else if (radeon_output->devices & ATOM_DEVICE_DFP3_SUPPORT)
+       else if (radeon_output->devices & ATOM_DEVICE_DFP2_SUPPORT) {
+	   if (info->IsIGP)
+	       atombios_ddia_setup(output, adjusted_mode);
+	   else
+	       atombios_external_tmds_setup(output, adjusted_mode);
+       } else if (radeon_output->devices & ATOM_DEVICE_DFP3_SUPPORT)
 	   atombios_output_tmds2_setup(output, adjusted_mode);
     } else if (radeon_output->MonType == MT_LCD) {
 	if (radeon_output->devices & ATOM_DEVICE_LCD1_SUPPORT)
diff --git a/src/radeon.h b/src/radeon.h
index 7d63f28..aba3c0f 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -184,6 +184,8 @@ typedef enum {
 				   * for something else.
 				   */
 
+#define xFixedToFloat(f) (((float) (f)) / 65536)
+
 #define RADEON_LOGLEVEL_DEBUG 4
 
 /* for Xv, outputs */
diff --git a/src/radeon_atombios.c b/src/radeon_atombios.c
index ddd332f..88c220b 100644
--- a/src/radeon_atombios.c
+++ b/src/radeon_atombios.c
@@ -1759,7 +1759,15 @@ RADEONGetATOMConnectorInfoFromBIOSConnectorTable (ScrnInfoPtr pScrn)
 	    (i == ATOM_DEVICE_TV2_INDEX) ||
 	    (i == ATOM_DEVICE_CV_INDEX))
 	    info->BiosConnector[i].ddc_i2c.valid = FALSE;
-	else
+	else if ((i == ATOM_DEVICE_DFP3_INDEX) && info->IsIGP) {
+	    /* DDIA port uses non-standard gpio entry */
+	    if (info->BiosConnector[ATOM_DEVICE_DFP2_INDEX].valid)
+		info->BiosConnector[i].ddc_i2c =
+		    RADEONLookupGPIOLineForDDC(pScrn, ci.sucI2cId.sbfAccess.bfI2C_LineMux + 2);
+	    else
+		info->BiosConnector[i].ddc_i2c =
+		    RADEONLookupGPIOLineForDDC(pScrn, ci.sucI2cId.sbfAccess.bfI2C_LineMux + 1);
+	} else
 	    info->BiosConnector[i].ddc_i2c =
 		RADEONLookupGPIOLineForDDC(pScrn, ci.sucI2cId.sbfAccess.bfI2C_LineMux);
 
@@ -1772,7 +1780,7 @@ RADEONGetATOMConnectorInfoFromBIOSConnectorTable (ScrnInfoPtr pScrn)
 		info->BiosConnector[i].TMDSType = TMDS_EXT;
 	} else if (i == ATOM_DEVICE_DFP3_INDEX) {
 	    if (info->IsIGP)
-		info->BiosConnector[i].TMDSType = TMDS_EXT;
+		info->BiosConnector[i].TMDSType = TMDS_DDIA;
 	    else
 		info->BiosConnector[i].TMDSType = TMDS_LVTMA;
 	} else
diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c
index 8c4b598..0250aef 100644
--- a/src/radeon_commonfuncs.c
+++ b/src/radeon_commonfuncs.c
@@ -30,6 +30,8 @@
 #include "config.h"
 #endif
 
+#include "ati_pciids_gen.h"
+
 #if defined(ACCEL_MMIO) && defined(ACCEL_CP)
 #error Cannot define both MMIO and CP acceleration!
 #endif
@@ -53,142 +55,182 @@
 static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 {
     RADEONInfoPtr  info       = RADEONPTR(pScrn);
+    CARD32 gb_tile_config;
     ACCEL_PREAMBLE();
 
     info->texW[0] = info->texH[0] = info->texW[1] = info->texH[1] = 1;
 
-    if (IS_R300_VARIANT) {
+    if (IS_R300_VARIANT || IS_AVIVO_VARIANT || info->ChipFamily == CHIP_FAMILY_RS690) {
+
 	BEGIN_ACCEL(3);
-	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA);
-	OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, 0x3);
-	OUT_ACCEL_REG(R300_WAIT_UNTIL, 0x30000);
+	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D);
+	OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, R300_ZC_FLUSH | R300_ZC_FREE);
+	OUT_ACCEL_REG(R300_WAIT_UNTIL, R300_WAIT_2D_IDLECLEAN | R300_WAIT_3D_IDLECLEAN);
 	FINISH_ACCEL();
 
+	gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16 | R300_SUBPIXEL_1_16);
+
+	if ((info->Chipset == PCI_CHIP_RV410_5E4C) ||
+	    (info->Chipset == PCI_CHIP_RV410_5E4F)) {
+	    /* RV410 SE chips */
+	    gb_tile_config |= R300_PIPE_COUNT_RV350;
+	} else if ((info->ChipFamily == CHIP_FAMILY_RV350) ||
+		   (info->ChipFamily == CHIP_FAMILY_RV380) ||
+		   (info->ChipFamily == CHIP_FAMILY_RS400)) {
+	    /* RV3xx, RS4xx chips */
+	    gb_tile_config |= R300_PIPE_COUNT_RV350;
+	} else if ((info->ChipFamily == CHIP_FAMILY_R300) ||
+		   (info->ChipFamily == CHIP_FAMILY_R350)) {
+	    /* R3xx chips */
+	    gb_tile_config |= R300_PIPE_COUNT_R300;
+	} else if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
+		   (info->ChipFamily == CHIP_FAMILY_RS690)) {
+	    /* RV4xx, RS6xx chips */
+	    gb_tile_config |= R300_PIPE_COUNT_R420_3P;
+	} else {
+	    /* R4xx, R5xx chips */
+	    gb_tile_config |= R300_PIPE_COUNT_R420;
+	}
+
 	BEGIN_ACCEL(3);
-	OUT_ACCEL_REG(R300_GB_TILE_CONFIG, 0x10011);
-	OUT_ACCEL_REG(R300_GB_SELECT,0x0);
-	OUT_ACCEL_REG(R300_GB_ENABLE, 0x0);
+	OUT_ACCEL_REG(R300_GB_TILE_CONFIG, gb_tile_config);
+	OUT_ACCEL_REG(R300_GB_SELECT, 0);
+	OUT_ACCEL_REG(R300_GB_ENABLE, 0);
 	FINISH_ACCEL();
 
 	BEGIN_ACCEL(3);
-	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA);
-	OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, 0x3);
-	OUT_ACCEL_REG(R300_WAIT_UNTIL, 0x30000);
+	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D);
+	OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, R300_ZC_FLUSH | R300_ZC_FREE);
+	OUT_ACCEL_REG(R300_WAIT_UNTIL, R300_WAIT_2D_IDLECLEAN | R300_WAIT_3D_IDLECLEAN);
 	FINISH_ACCEL();
 
 	BEGIN_ACCEL(5);
-	OUT_ACCEL_REG(R300_GB_AA_CONFIG, 0x0);
-	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA);
-	OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, 0x3);
-	OUT_ACCEL_REG(R300_GB_MSPOS0, 0x78888888);
-	OUT_ACCEL_REG(R300_GB_MSPOS1, 0x08888888);
+	OUT_ACCEL_REG(R300_GB_AA_CONFIG, 0);
+	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D);
+	OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, R300_ZC_FLUSH | R300_ZC_FREE);
+	OUT_ACCEL_REG(R300_GB_MSPOS0, ((8 << R300_MS_X0_SHIFT) |
+				       (8 << R300_MS_Y0_SHIFT) |
+				       (8 << R300_MS_X1_SHIFT) |
+				       (8 << R300_MS_Y1_SHIFT) |
+				       (8 << R300_MS_X2_SHIFT) |
+				       (8 << R300_MS_Y2_SHIFT) |
+				       (8 << R300_MSBD0_Y_SHIFT) |
+				       (7 << R300_MSBD0_X_SHIFT)));
+	OUT_ACCEL_REG(R300_GB_MSPOS1, ((8 << R300_MS_X3_SHIFT) |
+				       (8 << R300_MS_Y3_SHIFT) |
+				       (8 << R300_MS_X4_SHIFT) |
+				       (8 << R300_MS_Y4_SHIFT) |
+				       (8 << R300_MS_X5_SHIFT) |
+				       (8 << R300_MS_Y5_SHIFT) |
+				       (8 << R300_MSBD1_SHIFT)));
 	FINISH_ACCEL();
 
 	BEGIN_ACCEL(4);
-	OUT_ACCEL_REG(R300_GA_POLY_MODE, 0x120);
-	OUT_ACCEL_REG(R300_GA_ROUND_MODE, 0x5);
-	OUT_ACCEL_REG(R300_GA_COLOR_CONTROL, 0xAAAA);
-	OUT_ACCEL_REG(R300_GA_OFFSET, 0x0);
-	FINISH_ACCEL();
-
-	BEGIN_ACCEL(26);
-	OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, 0x0);
-	OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
-	OUT_ACCEL_REG(R300_VAP_CNTL, 0x300456);
-	OUT_ACCEL_REG(R300_VAP_VTE_CNTL, 0x300);
-	OUT_ACCEL_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0x0);
-	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, 0x4a014001);
-	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, 0x6b01);
-	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0xf688f688);
-	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_1, 0xf688);
-	OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, 0x100400);
-	OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, 0x1);
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x00f00203);
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x00d10001);
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x01248001);
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x01248001);
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x00f02203);
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x00d10141);
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x01248141);
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x01248141);
-
-	OUT_ACCEL_REG(R300_VAP_PVS_FLOW_CNTL_OPC, 0x0);
-	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, 0x1);
-	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (0x2 << 3) | 0x2);
-
-	OUT_ACCEL_REG(R300_VAP_GB_VERT_CLIP_ADJ, 0x3f800000);
-	OUT_ACCEL_REG(R300_VAP_GB_VERT_DISC_ADJ, 0x3f800000);
-	OUT_ACCEL_REG(R300_VAP_GB_HORZ_CLIP_ADJ, 0x3f800000);
-	OUT_ACCEL_REG(R300_VAP_GB_HORZ_DISC_ADJ, 0x3f800000);
-	OUT_ACCEL_REG(R300_VAP_CLIP_CNTL, 0x10000);
+	OUT_ACCEL_REG(R300_GA_POLY_MODE, R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE);
+	OUT_ACCEL_REG(R300_GA_ROUND_MODE, (R300_GEOMETRY_ROUND_NEAREST |
+					   R300_COLOR_ROUND_NEAREST));
+	OUT_ACCEL_REG(R300_GA_COLOR_CONTROL, (R300_RGB0_SHADING_GOURAUD |
+					      R300_ALPHA0_SHADING_GOURAUD |
+					      R300_RGB1_SHADING_GOURAUD |
+					      R300_ALPHA1_SHADING_GOURAUD |
+					      R300_RGB2_SHADING_GOURAUD |
+					      R300_ALPHA2_SHADING_GOURAUD |
+					      R300_RGB3_SHADING_GOURAUD |
+					      R300_ALPHA3_SHADING_GOURAUD));
+	OUT_ACCEL_REG(R300_GA_OFFSET, 0);
 	FINISH_ACCEL();
 
-	BEGIN_ACCEL(7);
-	OUT_ACCEL_REG(R300_SU_TEX_WRAP, 0x0);
-	OUT_ACCEL_REG(R300_SU_POLY_OFFSET_ENABLE, 0x0);
-	OUT_ACCEL_REG(R300_SU_CULL_MODE, 0x4);
+	BEGIN_ACCEL(5);
+	OUT_ACCEL_REG(R300_SU_TEX_WRAP, 0);
+	OUT_ACCEL_REG(R300_SU_POLY_OFFSET_ENABLE, 0);
+	OUT_ACCEL_REG(R300_SU_CULL_MODE, R300_FACE_NEG);
 	OUT_ACCEL_REG(R300_SU_DEPTH_SCALE, 0x4b7fffff);
-	OUT_ACCEL_REG(R300_SU_DEPTH_OFFSET, 0x0);
-	OUT_ACCEL_REG(R300_RS_COUNT, 0x40002);
-	OUT_ACCEL_REG(R300_RS_IP_0, 0x1610000);
+	OUT_ACCEL_REG(R300_SU_DEPTH_OFFSET, 0);
 	FINISH_ACCEL();
 
 	BEGIN_ACCEL(5);
-	OUT_ACCEL_REG(R300_US_W_FMT, 0x0);
-	OUT_ACCEL_REG(R300_US_OUT_FMT_1, 0x1B0F);
-	OUT_ACCEL_REG(R300_US_OUT_FMT_2, 0x1B0F);
-	OUT_ACCEL_REG(R300_US_OUT_FMT_3, 0x1B0F);
-	OUT_ACCEL_REG(R300_US_OUT_FMT_0, 0x1B01);
+	OUT_ACCEL_REG(R300_US_W_FMT, 0);
+	OUT_ACCEL_REG(R300_US_OUT_FMT_1, (R300_OUT_FMT_UNUSED |
+					  R300_OUT_FMT_C0_SEL_BLUE |
+					  R300_OUT_FMT_C1_SEL_GREEN |
+					  R300_OUT_FMT_C2_SEL_RED |
+					  R300_OUT_FMT_C3_SEL_ALPHA));
+	OUT_ACCEL_REG(R300_US_OUT_FMT_2, (R300_OUT_FMT_UNUSED |
+					  R300_OUT_FMT_C0_SEL_BLUE |
+					  R300_OUT_FMT_C1_SEL_GREEN |
+					  R300_OUT_FMT_C2_SEL_RED |
+					  R300_OUT_FMT_C3_SEL_ALPHA));
+	OUT_ACCEL_REG(R300_US_OUT_FMT_3, (R300_OUT_FMT_UNUSED |
+					  R300_OUT_FMT_C0_SEL_BLUE |
+					  R300_OUT_FMT_C1_SEL_GREEN |
+					  R300_OUT_FMT_C2_SEL_RED |
+					  R300_OUT_FMT_C3_SEL_ALPHA));
+	OUT_ACCEL_REG(R300_US_OUT_FMT_0, (R300_OUT_FMT_C4_10 |
+					  R300_OUT_FMT_C0_SEL_BLUE |
+					  R300_OUT_FMT_C1_SEL_GREEN |
+					  R300_OUT_FMT_C2_SEL_RED |
+					  R300_OUT_FMT_C3_SEL_ALPHA));
 	FINISH_ACCEL();
 
-	BEGIN_ACCEL(2);
-	OUT_ACCEL_REG(R300_RS_INST_COUNT, 0xC0);
-	OUT_ACCEL_REG(R300_RS_INST_0, 0x8);
-	FINISH_ACCEL();
 
 	BEGIN_ACCEL(3);
-	OUT_ACCEL_REG(R300_FG_DEPTH_SRC, 0x0);
-	OUT_ACCEL_REG(R300_FG_FOG_BLEND, 0x0);
-	OUT_ACCEL_REG(R300_FG_ALPHA_FUNC, 0x0);
+	OUT_ACCEL_REG(R300_FG_DEPTH_SRC, 0);
+	OUT_ACCEL_REG(R300_FG_FOG_BLEND, 0);
+	OUT_ACCEL_REG(R300_FG_ALPHA_FUNC, 0);
 	FINISH_ACCEL();
 
 	BEGIN_ACCEL(12);
-	OUT_ACCEL_REG(R300_RB3D_ZSTENCILCNTL, 0x0);
-	OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, 0x3);
-	OUT_ACCEL_REG(R300_RB3D_BW_CNTL, 0x0);
-	OUT_ACCEL_REG(R300_RB3D_ZCNTL, 0x0);
-	OUT_ACCEL_REG(R300_RB3D_ZTOP, 0x0);
-	OUT_ACCEL_REG(R300_RB3D_ROPCNTL, 0x0);
-
-	OUT_ACCEL_REG(R300_RB3D_AARESOLVE_CTL, 0x0);
-	OUT_ACCEL_REG(R300_RB3D_COLOR_CHANNEL_MASK, 0xf);
-	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA);
-	OUT_ACCEL_REG(R300_RB3D_CCTL, 0x0);
-	OUT_ACCEL_REG(R300_RB3D_DITHER_CTL, 0x0);
-	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA);
+	OUT_ACCEL_REG(R300_RB3D_ZSTENCILCNTL, 0);
+	OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, R300_ZC_FLUSH | R300_ZC_FREE);
+	OUT_ACCEL_REG(R300_RB3D_BW_CNTL, 0);
+	OUT_ACCEL_REG(R300_RB3D_ZCNTL, 0);
+	OUT_ACCEL_REG(R300_RB3D_ZTOP, 0);
+	OUT_ACCEL_REG(R300_RB3D_ROPCNTL, 0);
+
+	OUT_ACCEL_REG(R300_RB3D_AARESOLVE_CTL, 0);
+	OUT_ACCEL_REG(R300_RB3D_COLOR_CHANNEL_MASK, (R300_BLUE_MASK_EN |
+						     R300_GREEN_MASK_EN |
+						     R300_RED_MASK_EN |
+						     R300_ALPHA_MASK_EN));
+	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D);
+	OUT_ACCEL_REG(R300_RB3D_CCTL, 0);
+	OUT_ACCEL_REG(R300_RB3D_DITHER_CTL, 0);
+	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D);
 	FINISH_ACCEL();
 
 	BEGIN_ACCEL(7);
 	OUT_ACCEL_REG(R300_SC_EDGERULE, 0xA5294A5);
-	OUT_ACCEL_REG(R300_SC_SCISSOR0, 0x0);
-	OUT_ACCEL_REG(R300_SC_SCISSOR1, 0x3ffffff);
-	OUT_ACCEL_REG(R300_SC_CLIP_0_A, 0x880440);
-	OUT_ACCEL_REG(R300_SC_CLIP_0_B, 0xff0ff0);
+	OUT_ACCEL_REG(R300_SC_SCISSOR0, ((0 << R300_SCISSOR_X_SHIFT) |
+					 (0 << R300_SCISSOR_Y_SHIFT)));
+	OUT_ACCEL_REG(R300_SC_SCISSOR1, ((8191 << R300_SCISSOR_X_SHIFT) |
+					 (8191 << R300_SCISSOR_Y_SHIFT)));
+
+	if (IS_R300_VARIANT || (info->ChipFamily == CHIP_FAMILY_RS690)) {
+	    /* clip has offset 1440 */
+	    OUT_ACCEL_REG(R300_SC_CLIP_0_A, ((1088 << R300_CLIP_X_SHIFT) |
+					     (1088 << R300_CLIP_Y_SHIFT)));
+	    OUT_ACCEL_REG(R300_SC_CLIP_0_B, (((1080 + 2048) << R300_CLIP_X_SHIFT) |
+					     ((1080 + 2048) << R300_CLIP_Y_SHIFT)));
+	} else {
+	    OUT_ACCEL_REG(R300_SC_CLIP_0_A, ((0 << R300_CLIP_X_SHIFT) |
+					     (0 << R300_CLIP_Y_SHIFT)));
+	    OUT_ACCEL_REG(R300_SC_CLIP_0_B, ((4080 << R300_CLIP_X_SHIFT) |
+					     (4080 << R300_CLIP_Y_SHIFT)));
+	}
 	OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
 	OUT_ACCEL_REG(R300_SC_SCREENDOOR, 0xffffff);
 	FINISH_ACCEL();
-    } else if ((info->ChipFamily == CHIP_FAMILY_RV250) || 
-	       (info->ChipFamily == CHIP_FAMILY_RV280) || 
-	       (info->ChipFamily == CHIP_FAMILY_RS300) || 
+    } else if ((info->ChipFamily == CHIP_FAMILY_RV250) ||
+	       (info->ChipFamily == CHIP_FAMILY_RV280) ||
+	       (info->ChipFamily == CHIP_FAMILY_RS300) ||
 	       (info->ChipFamily == CHIP_FAMILY_R200)) {
 
 	BEGIN_ACCEL(7);
-        if (info->ChipFamily == CHIP_FAMILY_RS300) {
-            OUT_ACCEL_REG(R200_SE_VAP_CNTL_STATUS, RADEON_TCL_BYPASS);
-        } else {
-            OUT_ACCEL_REG(R200_SE_VAP_CNTL_STATUS, 0);
-        }
+	if (info->ChipFamily == CHIP_FAMILY_RS300) {
+	    OUT_ACCEL_REG(R200_SE_VAP_CNTL_STATUS, RADEON_TCL_BYPASS);
+	} else {
+	    OUT_ACCEL_REG(R200_SE_VAP_CNTL_STATUS, 0);
+	}
 	OUT_ACCEL_REG(R200_PP_CNTL_X, 0);
 	OUT_ACCEL_REG(R200_PP_TXMULTI_CTL_0, 0);
 	OUT_ACCEL_REG(R200_SE_VTX_STATE_CNTL, 0);
@@ -199,11 +241,11 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 	FINISH_ACCEL();
     } else {
 	BEGIN_ACCEL(2);
-        if ((info->ChipFamily == CHIP_FAMILY_RADEON) ||
-            (info->ChipFamily == CHIP_FAMILY_RV200))
-            OUT_ACCEL_REG(RADEON_SE_CNTL_STATUS, 0);
-        else
-            OUT_ACCEL_REG(RADEON_SE_CNTL_STATUS, RADEON_TCL_BYPASS);
+	if ((info->ChipFamily == CHIP_FAMILY_RADEON) ||
+	    (info->ChipFamily == CHIP_FAMILY_RV200))
+	    OUT_ACCEL_REG(RADEON_SE_CNTL_STATUS, 0);
+	else
+	    OUT_ACCEL_REG(RADEON_SE_CNTL_STATUS, RADEON_TCL_BYPASS);
 	OUT_ACCEL_REG(RADEON_SE_COORD_FMT,
 	    RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
 	    RADEON_VTX_ST0_NONPARAMETRIC |
@@ -217,12 +259,12 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
     OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, 0x07ff07ff);
     OUT_ACCEL_REG(RADEON_AUX_SC_CNTL, 0);
     OUT_ACCEL_REG(RADEON_RB3D_PLANEMASK, 0xffffffff);
-    OUT_ACCEL_REG(RADEON_SE_CNTL, RADEON_DIFFUSE_SHADE_GOURAUD |
-				  RADEON_BFACE_SOLID | 
-				  RADEON_FFACE_SOLID |
-				  RADEON_VTX_PIX_CENTER_OGL |
-				  RADEON_ROUND_MODE_ROUND |
-				  RADEON_ROUND_PREC_4TH_PIX);
+    OUT_ACCEL_REG(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD |
+				   RADEON_BFACE_SOLID |
+				   RADEON_FFACE_SOLID |
+				   RADEON_VTX_PIX_CENTER_OGL |
+				   RADEON_ROUND_MODE_ROUND |
+				   RADEON_ROUND_PREC_4TH_PIX));
     FINISH_ACCEL();
 }
 
diff --git a/src/radeon_driver.c b/src/radeon_driver.c
index 9c5fce6..5cf8d51 100644
--- a/src/radeon_driver.c
+++ b/src/radeon_driver.c
@@ -3485,7 +3485,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen,
     RADEONDGAInit(pScreen);
 
     /* Init Xv */
-    if (!IS_AVIVO_VARIANT) {
+    if (info->ChipFamily < CHIP_FAMILY_R600) {
 	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
 		       "Initializing Xv\n");
 	RADEONInitVideo(pScreen);
@@ -4906,6 +4906,17 @@ Bool RADEONEnterVT(int scrnIndex, int flags)
     /* Makes sure the engine is idle before doing anything */
     RADEONWaitForIdleMMIO(pScrn);
 
+    if (info->IsMobility && !IS_AVIVO_VARIANT) {
+        if (xf86ReturnOptValBool(info->Options, OPTION_DYNAMIC_CLOCKS, FALSE)) {
+	    RADEONSetDynamicClock(pScrn, 1);
+        } else {
+	    RADEONSetDynamicClock(pScrn, 0);
+        }
+    }
+
+    if (IS_R300_VARIANT || IS_RV100_VARIANT)
+	RADEONForceSomeClocks(pScrn);
+
     pScrn->vtSema = TRUE;
     for (i = 0; i < xf86_config->num_crtc; i++) {
 	xf86CrtcPtr	crtc = xf86_config->crtc[i];
diff --git a/src/radeon_exa.c b/src/radeon_exa.c
index 3b0c734..4da4841 100644
--- a/src/radeon_exa.c
+++ b/src/radeon_exa.c
@@ -105,23 +105,6 @@ RADEONLog2(int val)
 	return bits - 1;
 }
 
-static __inline__ int
-RADEONPow2(int num)
-{
-    int pot = 2;
-
-    if (num <= 2)
-	return num;
-
-    while (pot < num) {
-	pot *= 2;
-    }
-
-    return pot;
-}
-
-
-
 static __inline__ CARD32 F_TO_DW(float val)
 {
     union {
diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c
index 20b96a5..10221c0 100644
--- a/src/radeon_exa_funcs.c
+++ b/src/radeon_exa_funcs.c
@@ -533,11 +533,11 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen)
 
 #ifdef RENDER
     if (info->RenderAccel) {
-	if ((info->ChipFamily >= CHIP_FAMILY_RV515) ||
+	if ((info->ChipFamily >= CHIP_FAMILY_R600) ||
 	    (info->ChipFamily == CHIP_FAMILY_RS400))
 		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
 			       "unsupported on XPRESS, R500 and newer cards.\n");
-	else if (IS_R300_VARIANT) {
+	else if (IS_R300_VARIANT || (IS_AVIVO_VARIANT && info->ChipFamily <= CHIP_FAMILY_RS690)) {
 		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
 			       "enabled for R300 type cards.\n");
 		info->exa->CheckComposite = R300CheckComposite;
diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index 6003587..9bbccb5 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -809,8 +809,8 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
 
     txformat1 = R300TexFormats[i].card_fmt;
 
-    txformat0 = (((RADEONPow2(w) - 1) << R300_TXWIDTH_SHIFT) |
-		 ((RADEONPow2(h) - 1) << R300_TXHEIGHT_SHIFT));
+    txformat0 = (((w - 1) << R300_TXWIDTH_SHIFT) |
+		 ((h - 1) << R300_TXHEIGHT_SHIFT));
 
     if (pPict->repeat) {
 	ErrorF("repeat\n");
@@ -822,15 +822,18 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
 	txformat0 |= R300_TXPITCH_EN;
 
 
-    info->texW[unit] = RADEONPow2(w);
-    info->texH[unit] = RADEONPow2(h);
+    info->texW[unit] = w;
+    info->texH[unit] = h;
+
+    txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
+		R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST));
 
     switch (pPict->filter) {
     case PictFilterNearest:
-	txfilter = (R300_TX_MAG_FILTER_NEAREST | R300_TX_MIN_FILTER_NEAREST);
+	txfilter |= (R300_TX_MAG_FILTER_NEAREST | R300_TX_MIN_FILTER_NEAREST);
 	break;
     case PictFilterBilinear:
-	txfilter = (R300_TX_MAG_FILTER_LINEAR | R300_TX_MIN_FILTER_LINEAR);
+	txfilter |= (R300_TX_MAG_FILTER_LINEAR | R300_TX_MIN_FILTER_LINEAR);
 	break;
     default:
 	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
@@ -937,6 +940,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
     CARD32 txenable, colorpitch;
     CARD32 blendcntl;
     int pixel_shift;
+    int has_tcl = (info->ChipFamily != CHIP_FAMILY_RS690 && info->ChipFamily != CHIP_FAMILY_RS400);
     ACCEL_PREAMBLE();
 
     TRACE;
@@ -975,22 +979,223 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 
     RADEON_SWITCH_TO_3D();
 
-    /* setup pixel shader */
-    BEGIN_ACCEL(12);
-    OUT_ACCEL_REG(R300_US_CONFIG, 0x8);
-    OUT_ACCEL_REG(R300_US_PIXSIZE, 0x0);
-    OUT_ACCEL_REG(R300_US_CODE_OFFSET, 0x40040);
-    OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0x0);
-    OUT_ACCEL_REG(R300_US_CODE_ADDR_1, 0x0);
-    OUT_ACCEL_REG(R300_US_CODE_ADDR_2, 0x0);
-    OUT_ACCEL_REG(R300_US_CODE_ADDR_3, 0x400000);
-    OUT_ACCEL_REG(R300_US_TEX_INST_0, 0x8000);
-    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, 0x1f800000);
-    OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, 0x50a80);
-    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, 0x1800000);
-    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, 0x00040889);
+    /* setup the VAP */
+
+    if (has_tcl) {
+	BEGIN_ACCEL(28);
+	OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, 0);
+	OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+	OUT_ACCEL_REG(R300_VAP_CNTL, ((6 << R300_PVS_NUM_SLOTS_SHIFT) |
+				      (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+				      (4 << R300_PVS_NUM_FPUS_SHIFT) |
+				      (12 << R300_VF_MAX_VTX_NUM_SHIFT)));
+    } else {
+	BEGIN_ACCEL(10);
+	OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, R300_PVS_BYPASS);
+	OUT_ACCEL_REG(R300_VAP_CNTL, ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+				      (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+				      (4 << R300_PVS_NUM_FPUS_SHIFT) |
+				      (5 << R300_VF_MAX_VTX_NUM_SHIFT)));
+    }
+
+    OUT_ACCEL_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
+    OUT_ACCEL_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0);
+
+    if (has_tcl) {
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
+		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
+		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
+		       R300_SIGNED_0 |
+		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
+		       (10 << R300_DST_VEC_LOC_1_SHIFT) |
+		       R300_SIGNED_1));
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
+		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
+		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
+		       (11 << R300_DST_VEC_LOC_2_SHIFT) |
+		       R300_LAST_VEC_2 |
+		       R300_SIGNED_2));
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
+		      ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_0_SHIFT) |
+		       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_0_SHIFT) |
+		       (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_0_SHIFT) |
+		       (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_0_SHIFT) |
+		       ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
+			<< R300_WRITE_ENA_0_SHIFT) |
+		       (R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_1_SHIFT) |
+		       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_1_SHIFT) |
+		       (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_1_SHIFT) |
+		       (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_1_SHIFT) |
+		       ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
+			<< R300_WRITE_ENA_1_SHIFT)));
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_1,
+		      ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_2_SHIFT) |
+		       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_2_SHIFT) |
+		       (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_2_SHIFT) |
+		       (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_2_SHIFT) |
+		       ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
+			<< R300_WRITE_ENA_2_SHIFT)));
+    } else {
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
+		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
+		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
+		       R300_SIGNED_0 |
+		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
+		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
+		       R300_SIGNED_1));
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
+		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
+		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
+		       (7 << R300_DST_VEC_LOC_2_SHIFT) |
+		       R300_LAST_VEC_2 |
+		       R300_SIGNED_2));
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
+		      ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_0_SHIFT) |
+		       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_0_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_0_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_0_SHIFT) |
+		       ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y)
+			<< R300_WRITE_ENA_0_SHIFT) |
+		       (R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_1_SHIFT) |
+		       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_1_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_1_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_1_SHIFT) |
+		       ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y)
+			<< R300_WRITE_ENA_1_SHIFT)));
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_1,
+		      ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_2_SHIFT) |
+		       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_2_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_2_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_2_SHIFT) |
+		       ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y)
+			<< R300_WRITE_ENA_2_SHIFT)));
+    }
+
+    /* setup the vertex shader */
+    if (has_tcl) {
+	OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
+		      ((0 << R300_PVS_FIRST_INST_SHIFT) |
+		       (1 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+		       (1 << R300_PVS_LAST_INST_SHIFT)));
+	OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
+		      (1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x00f00203);
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x00d10001);
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x01248001);
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x01248001);
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x00f02203);
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x00d10141);
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x01248141);
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x01248141);
+
+	OUT_ACCEL_REG(R300_VAP_PVS_FLOW_CNTL_OPC, 0);
+
+	OUT_ACCEL_REG(R300_VAP_GB_VERT_CLIP_ADJ, 0x3f800000);
+	OUT_ACCEL_REG(R300_VAP_GB_VERT_DISC_ADJ, 0x3f800000);
+	OUT_ACCEL_REG(R300_VAP_GB_HORZ_CLIP_ADJ, 0x3f800000);
+	OUT_ACCEL_REG(R300_VAP_GB_HORZ_DISC_ADJ, 0x3f800000);
+	OUT_ACCEL_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
+    }
+    OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
+    OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
+		  ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
+		   (2 << R300_TEX_1_COMP_CNT_SHIFT)));
+
     FINISH_ACCEL();
 
+    /* setup pixel shader */
+    if (IS_R300_VARIANT || info->ChipFamily == CHIP_FAMILY_RS690) {
+      BEGIN_ACCEL(16);
+      OUT_ACCEL_REG(R300_RS_COUNT,
+		    ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+		     R300_RS_COUNT_HIRES_EN));
+      OUT_ACCEL_REG(R300_RS_IP_0,
+		    (R300_RS_TEX_PTR(0) |
+		     R300_RS_COL_PTR(0) |
+		     R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA) |
+		     R300_RS_SEL_S(R300_RS_SEL_C0) |
+		     R300_RS_SEL_T(R300_RS_SEL_C1) |
+		     R300_RS_SEL_R(R300_RS_SEL_K0) |
+		     R300_RS_SEL_Q(R300_RS_SEL_K1)));
+      OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_TX_OFFSET_RS(6));
+      OUT_ACCEL_REG(R300_RS_INST_0, R300_RS_INST_TEX_CN_WRITE);
+      OUT_ACCEL_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX);
+      OUT_ACCEL_REG(R300_US_PIXSIZE, 0);
+      OUT_ACCEL_REG(R300_US_CODE_OFFSET,
+		    (R300_ALU_CODE_OFFSET(0) |
+		     R300_ALU_CODE_SIZE(1) |
+		     R300_TEX_CODE_OFFSET(0) |
+		     R300_TEX_CODE_SIZE(1)));
+      OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0);
+      OUT_ACCEL_REG(R300_US_CODE_ADDR_1, 0);
+      OUT_ACCEL_REG(R300_US_CODE_ADDR_2, 0);
+      OUT_ACCEL_REG(R300_US_CODE_ADDR_3, 0x400000);
+      OUT_ACCEL_REG(R300_US_TEX_INST_0, 0x8000);
+      OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, 0x1f800000);
+      OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, 0x50a80);
+      OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, 0x1800000);
+      OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, 0x00040889);
+      FINISH_ACCEL();
+    } else {
+      BEGIN_ACCEL(23);
+      OUT_ACCEL_REG(R300_RS_COUNT,
+		    ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+		     R300_RS_COUNT_HIRES_EN));
+      OUT_ACCEL_REG(R500_RS_IP_0, (0 << R500_RS_IP_TEX_PTR_S_SHIFT) | (1 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+		    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
+
+      OUT_ACCEL_REG(R300_RS_INST_COUNT, 0);
+      OUT_ACCEL_REG(R500_RS_INST_0, R500_RS_INST_TEX_CN_WRITE);
+      OUT_ACCEL_REG(R300_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
+      OUT_ACCEL_REG(R300_US_PIXSIZE, 0);
+      OUT_ACCEL_REG(R500_US_FC_CTRL, 0);
+      OUT_ACCEL_REG(R500_US_CODE_ADDR, R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
+      OUT_ACCEL_REG(R500_US_CODE_RANGE, R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
+      OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
+      OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, 0);
+      // 7807
+      OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, R500_INST_TYPE_TEX | R500_INST_TEX_SEM_WAIT | 
+		    R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK);
+      
+      OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE |
+		    R500_TEX_IGNORE_UNCOVERED);
+
+      OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G |
+		    R500_TEX_DST_ADDR(0) | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | R500_TEX_DST_B_SWIZ_B |
+		    R500_TEX_DST_A_SWIZ_A);
+      OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // TEX_ADDR_DXDY
+      OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
+      OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
+
+      // 0x78105
+      OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT | R500_INST_LAST |
+		    R500_INST_RGB_OMASK_R | R500_INST_RGB_OMASK_G | R500_INST_RGB_OMASK_B | R500_INST_ALPHA_OMASK);
+
+      OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST |
+		    R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST | R500_RGB_SRCP_OP_1_MINUS_2RGB0); //0x10040000
+      OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST |
+		    R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST | R500_ALPHA_SRCP_OP_1_MINUS_2A0); //0x10040000
+
+      OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA,
+		    R500_ALU_RGB_SEL_A_SRC0 |
+		    R500_ALU_RGB_R_SWIZ_A_R | R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B |
+		    R500_ALU_RGB_SEL_B_SRC0 |
+		    R500_ALU_RGB_R_SWIZ_B_1 | R500_ALU_RGB_B_SWIZ_B_1 | R500_ALU_RGB_G_SWIZ_B_1);//0x00db0220
+
+      OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, R500_ALPHA_OP_MAD | 
+		    R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_1);//0x00c0c000)
+
+      OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, R500_ALU_RGBA_OP_MAD |
+		    R500_ALU_RGBA_R_SWIZ_0 | R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 |
+		    R500_ALU_RGBA_A_SWIZ_0);//0x20490000
+      FINISH_ACCEL();
+    }
+
     BEGIN_ACCEL(6);
     OUT_ACCEL_REG(R300_TX_INVALTAGS, 0x0);
     OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
@@ -1000,7 +1205,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 
     blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
     OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, blendcntl);
-    OUT_ACCEL_REG(R300_RB3D_ABLENDCNTL, 0x0);
+    OUT_ACCEL_REG(R300_RB3D_ABLENDCNTL, 0);
 
 #if 0
     /* IN operator: Multiply src by mask components or mask alpha.
@@ -1094,8 +1299,6 @@ static inline void transformPoint(PictTransform *transform, xPointFixed *point)
 }
 #endif
 
-#define xFixedToFloat(f) (((float) (f)) / 65536)
-
 static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
 				     int srcX, int srcY,
 				     int maskX, int maskY,
@@ -1103,7 +1306,6 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
 				     int w, int h)
 {
     RINFO_FROM_SCREEN(pDst->drawable.pScreen);
-    int srcXend, srcYend, maskXend, maskYend;
     int vtx_count;
     xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight;
     xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight;
@@ -1114,11 +1316,6 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
     /* ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
        srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
 
-    srcXend = srcX + w;
-    srcYend = srcY + h;
-    maskXend = maskX + w;
-    maskYend = maskY + h;
-
     srcTopLeft.x     = IntToxFixed(srcX);
     srcTopLeft.y     = IntToxFixed(srcY);
     srcTopRight.x    = IntToxFixed(srcX + w);
@@ -1152,7 +1349,7 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
 
     vtx_count = VTX_COUNT;
 
-    if (IS_R300_VARIANT) {
+    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
 	BEGIN_ACCEL(1);
 	OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count);
 	FINISH_ACCEL();
@@ -1172,7 +1369,7 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
 		 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
 		 (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
     } else {
-	if (IS_R300_VARIANT)
+	if (IS_R300_VARIANT || IS_AVIVO_VARIANT)
 	    BEGIN_RING(4 * vtx_count + 6);
 	else
 	    BEGIN_RING(4 * vtx_count + 2);
@@ -1185,7 +1382,7 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
     }
 
 #else /* ACCEL_CP */
-    if (IS_R300_VARIANT)
+    if (IS_R300_VARIANT || IS_AVIVO_VARIANT)
 	BEGIN_ACCEL(3 + vtx_count * 4);
     else
 	BEGIN_ACCEL(1 + vtx_count * 4);
@@ -1202,29 +1399,21 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
     }
 #endif
 
-    if (info->texW[0] == 1 && info->texH[0] == 1 &&
-	info->texW[1] == 1 && info->texH[1] == 1) {
-	VTX_OUT(dstX,     dstY,       srcX,     srcY,	  maskX,    maskY);
-	VTX_OUT(dstX,     dstY + h,   srcX,     srcYend,  maskX,    maskYend);
-	VTX_OUT(dstX + w, dstY + h,   srcXend,  srcYend,  maskXend, maskYend);
-	VTX_OUT(dstX + w, dstY,	      srcXend,  srcY,     maskXend, maskY);
-    } else {
-	VTX_OUT((float)dstX,                                      (float)dstY,
-	        xFixedToFloat(srcTopLeft.x) / info->texW[0],      xFixedToFloat(srcTopLeft.y) / info->texH[0],
-	        xFixedToFloat(maskTopLeft.x) / info->texW[1],     xFixedToFloat(maskTopLeft.y) / info->texH[1]);
-	VTX_OUT((float)dstX,                                      (float)(dstY + h),
-	        xFixedToFloat(srcBottomLeft.x) / info->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->texH[0],
-	        xFixedToFloat(maskBottomLeft.x) / info->texW[1],  xFixedToFloat(maskBottomLeft.y) / info->texH[1]);
-	VTX_OUT((float)(dstX + w),                                (float)(dstY + h),
-	        xFixedToFloat(srcBottomRight.x) / info->texW[0],  xFixedToFloat(srcBottomRight.y) / info->texH[0],
-	        xFixedToFloat(maskBottomRight.x) / info->texW[1], xFixedToFloat(maskBottomRight.y) / info->texH[1]);
-	VTX_OUT((float)(dstX + w),                                (float)dstY,
-	        xFixedToFloat(srcTopRight.x) / info->texW[0],     xFixedToFloat(srcTopRight.y) / info->texH[0],
-	        xFixedToFloat(maskTopRight.x) / info->texW[1],    xFixedToFloat(maskTopRight.y) / info->texH[1]);
-    }
-
-    if (IS_R300_VARIANT) {
-	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA);
+    VTX_OUT((float)dstX,                                      (float)dstY,
+	    xFixedToFloat(srcTopLeft.x) / info->texW[0],      xFixedToFloat(srcTopLeft.y) / info->texH[0],
+	    xFixedToFloat(maskTopLeft.x) / info->texW[1],     xFixedToFloat(maskTopLeft.y) / info->texH[1]);
+    VTX_OUT((float)dstX,                                      (float)(dstY + h),
+	    xFixedToFloat(srcBottomLeft.x) / info->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->texH[0],
+	    xFixedToFloat(maskBottomLeft.x) / info->texW[1],  xFixedToFloat(maskBottomLeft.y) / info->texH[1]);
+    VTX_OUT((float)(dstX + w),                                (float)(dstY + h),
+	    xFixedToFloat(srcBottomRight.x) / info->texW[0],  xFixedToFloat(srcBottomRight.y) / info->texH[0],
+	    xFixedToFloat(maskBottomRight.x) / info->texW[1], xFixedToFloat(maskBottomRight.y) / info->texH[1]);
+    VTX_OUT((float)(dstX + w),                                (float)dstY,
+	    xFixedToFloat(srcTopRight.x) / info->texW[0],     xFixedToFloat(srcTopRight.y) / info->texH[0],
+	    xFixedToFloat(maskTopRight.x) / info->texW[1],    xFixedToFloat(maskTopRight.y) / info->texH[1]);
+
+    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
+	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D);
 	OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
     }
 
@@ -1237,7 +1426,6 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
     LEAVE_DRAW(0);
 }
 #undef VTX_OUT
-#undef VTX_OUT4
 
 #ifdef ONLY_ONCE
 static void RadeonDoneComposite(PixmapPtr pDst)
@@ -1248,3 +1436,4 @@ static void RadeonDoneComposite(PixmapPtr pDst)
 #endif /* ONLY_ONCE */
 
 #undef ONLY_ONCE
+#undef FUNC_NAME
diff --git a/src/radeon_output.c b/src/radeon_output.c
index aceb3d8..62cc5d4 100644
--- a/src/radeon_output.c
+++ b/src/radeon_output.c
@@ -74,11 +74,12 @@ const RADEONMonitorType MonTypeID[10] = {
   MT_DP
 };
 
-const char *TMDSTypeName[4] = {
+const char *TMDSTypeName[5] = {
   "None",
   "Internal",
   "External",
   "LVTMA",
+  "DDIA"
 };
 
 const char *DACTypeName[4] = {
@@ -393,7 +394,7 @@ void RADEONConnectorFindMonitor(ScrnInfoPtr pScrn, xf86OutputPtr output)
     /* panel is probably busted or not connected */
     if ((radeon_output->MonType == MT_LCD) &&
 	((radeon_output->PanelXRes == 0) || (radeon_output->PanelYRes == 0)))
-	radeon_output->MonType == MT_NONE;
+	radeon_output->MonType = MT_NONE;
 
     if (output->MonInfo) {
 	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EDID data from the display on output: %s ----------------------\n",
@@ -1754,12 +1755,23 @@ legacy_setup_i2c_bus(int ddc_line)
     i2c.put_data_mask = RADEON_GPIO_EN_0;
     i2c.get_clk_mask = RADEON_GPIO_Y_1;
     i2c.get_data_mask = RADEON_GPIO_Y_0;
-    i2c.mask_clk_reg = ddc_line;
-    i2c.mask_data_reg = ddc_line;
-    i2c.put_clk_reg = ddc_line;
-    i2c.put_data_reg = ddc_line;
-    i2c.get_clk_reg = ddc_line;
-    i2c.get_data_reg = ddc_line;
+    if ((ddc_line == RADEON_LCD_GPIO_MASK) ||
+	(ddc_line == RADEON_MDGPIO_EN_REG)) {
+	i2c.mask_clk_reg = ddc_line;
+	i2c.mask_data_reg = ddc_line;
+	i2c.put_clk_reg = ddc_line;
+	i2c.put_data_reg = ddc_line;
+	i2c.get_clk_reg = ddc_line + 4;
+	i2c.get_data_reg = ddc_line + 4;
+    } else {
+	i2c.mask_clk_reg = ddc_line;
+	i2c.mask_data_reg = ddc_line;
+	i2c.put_clk_reg = ddc_line;
+	i2c.put_data_reg = ddc_line;
+	i2c.get_clk_reg = ddc_line;
+	i2c.get_data_reg = ddc_line;
+    }
+
     if (ddc_line)
 	i2c.valid = TRUE;
     else
diff --git a/src/radeon_probe.h b/src/radeon_probe.h
index a3cf1fc..9c1bdc5 100644
--- a/src/radeon_probe.h
+++ b/src/radeon_probe.h
@@ -103,7 +103,8 @@ typedef enum
     TMDS_NONE    = 0,
     TMDS_INT     = 1,
     TMDS_EXT     = 2,
-    TMDS_LVTMA   = 3
+    TMDS_LVTMA   = 3,
+    TMDS_DDIA    = 4
 } RADEONTmdsType;
 
 typedef enum
diff --git a/src/radeon_reg.h b/src/radeon_reg.h
index 61cdb15..046c52b 100644
--- a/src/radeon_reg.h
+++ b/src/radeon_reg.h
@@ -3819,53 +3819,265 @@
 #define R600_BIOS_7_SCRATCH               0x1740
 
 #define R300_GB_TILE_CONFIG				0x4018
+#       define R300_ENABLE_TILING                       (1 << 0)
+#       define R300_PIPE_COUNT_RV350                    (0 << 1)
+#       define R300_PIPE_COUNT_R300                     (3 << 1)
+#       define R300_PIPE_COUNT_R420_3P                  (6 << 1)
+#       define R300_PIPE_COUNT_R420                     (7 << 1)
+#       define R300_TILE_SIZE_8                         (0 << 4)
+#       define R300_TILE_SIZE_16                        (1 << 4)
+#       define R300_TILE_SIZE_32                        (2 << 4)
+#       define R300_SUBPIXEL_1_12                       (0 << 16)
+#       define R300_SUBPIXEL_1_16                       (1 << 16)
 #define R300_GB_SELECT				        0x401c
 #define R300_GB_ENABLE				        0x4008
 #define R300_GB_AA_CONFIG				0x4020
 #define R300_GB_MSPOS0				        0x4010
+#       define R300_MS_X0_SHIFT                         0
+#       define R300_MS_Y0_SHIFT                         4
+#       define R300_MS_X1_SHIFT                         8
+#       define R300_MS_Y1_SHIFT                         12
+#       define R300_MS_X2_SHIFT                         16
+#       define R300_MS_Y2_SHIFT                         20
+#       define R300_MSBD0_Y_SHIFT                       24
+#       define R300_MSBD0_X_SHIFT                       28
 #define R300_GB_MSPOS1				        0x4014
+#       define R300_MS_X3_SHIFT                         0
+#       define R300_MS_Y3_SHIFT                         4
+#       define R300_MS_X4_SHIFT                         8
+#       define R300_MS_Y4_SHIFT                         12
+#       define R300_MS_X5_SHIFT                         16
+#       define R300_MS_Y5_SHIFT                         20
+#       define R300_MSBD1_SHIFT                         24
 
 #define R300_GA_POLY_MODE				0x4288
+#       define R300_FRONT_PTYPE_POINT                   (0 << 4)
+#       define R300_FRONT_PTYPE_LINE                    (1 << 4)
+#       define R300_FRONT_PTYPE_TRIANGE                 (2 << 4)
+#       define R300_BACK_PTYPE_POINT                    (0 << 7)
+#       define R300_BACK_PTYPE_LINE                     (1 << 7)
+#       define R300_BACK_PTYPE_TRIANGE                  (2 << 7)
 #define R300_GA_ROUND_MODE				0x428c
+#       define R300_GEOMETRY_ROUND_TRUNC                (0 << 0)
+#       define R300_GEOMETRY_ROUND_NEAREST              (1 << 0)
+#       define R300_COLOR_ROUND_TRUNC                   (0 << 2)
+#       define R300_COLOR_ROUND_NEAREST                 (1 << 2)
 #define R300_GA_COLOR_CONTROL			        0x4278
+#       define R300_RGB0_SHADING_SOLID                  (0 << 0)
+#       define R300_RGB0_SHADING_FLAT                   (1 << 0)
+#       define R300_RGB0_SHADING_GOURAUD                (2 << 0)
+#       define R300_ALPHA0_SHADING_SOLID                (0 << 2)
+#       define R300_ALPHA0_SHADING_FLAT                 (1 << 2)
+#       define R300_ALPHA0_SHADING_GOURAUD              (2 << 2)
+#       define R300_RGB1_SHADING_SOLID                  (0 << 4)
+#       define R300_RGB1_SHADING_FLAT                   (1 << 4)
+#       define R300_RGB1_SHADING_GOURAUD                (2 << 4)
+#       define R300_ALPHA1_SHADING_SOLID                (0 << 6)
+#       define R300_ALPHA1_SHADING_FLAT                 (1 << 6)
+#       define R300_ALPHA1_SHADING_GOURAUD              (2 << 6)
+#       define R300_RGB2_SHADING_SOLID                  (0 << 8)
+#       define R300_RGB2_SHADING_FLAT                   (1 << 8)
+#       define R300_RGB2_SHADING_GOURAUD                (2 << 8)
+#       define R300_ALPHA2_SHADING_SOLID                (0 << 10)
+#       define R300_ALPHA2_SHADING_FLAT                 (1 << 10)
+#       define R300_ALPHA2_SHADING_GOURAUD              (2 << 10)
+#       define R300_RGB3_SHADING_SOLID                  (0 << 12)
+#       define R300_RGB3_SHADING_FLAT                   (1 << 12)
+#       define R300_RGB3_SHADING_GOURAUD                (2 << 12)
+#       define R300_ALPHA3_SHADING_SOLID                (0 << 14)
+#       define R300_ALPHA3_SHADING_FLAT                 (1 << 14)
+#       define R300_ALPHA3_SHADING_GOURAUD              (2 << 14)
 #define R300_GA_OFFSET				        0x4290
 
 #define R300_VAP_CNTL_STATUS				0x2140
+#       define R300_PVS_BYPASS                          (1 << 8)
 #define R300_VAP_PVS_STATE_FLUSH_REG		        0x2284
 #define R300_VAP_CNTL				        0x2080
+#       define R300_PVS_NUM_SLOTS_SHIFT                 0
+#       define R300_PVS_NUM_CNTLRS_SHIFT                4
+#       define R300_PVS_NUM_FPUS_SHIFT                  8
+#       define R300_VF_MAX_VTX_NUM_SHIFT                18
+#       define R300_GL_CLIP_SPACE_DEF                   (0 << 22)
+#       define R300_DX_CLIP_SPACE_DEF                   (1 << 22)
 #define R300_VAP_VTE_CNTL				0x20B0
+#       define R300_VPORT_X_SCALE_ENA                   (1 << 0)
+#       define R300_VPORT_X_OFFSET_ENA                  (1 << 1)
+#       define R300_VPORT_Y_SCALE_ENA                   (1 << 2)
+#       define R300_VPORT_Y_OFFSET_ENA                  (1 << 3)
+#       define R300_VPORT_Z_SCALE_ENA                   (1 << 4)
+#       define R300_VPORT_Z_OFFSET_ENA                  (1 << 5)
+#       define R300_VTX_XY_FMT                          (1 << 8)
+#       define R300_VTX_Z_FMT                           (1 << 9)
+#       define R300_VTX_W0_FMT                          (1 << 10)
 #define R300_VAP_PSC_SGN_NORM_CNTL		        0x21DC
 #define R300_VAP_PROG_STREAM_CNTL_0		        0x2150
+#       define R300_DATA_TYPE_0_SHIFT                   0
+#       define R300_DATA_TYPE_FLOAT_1                   0
+#       define R300_DATA_TYPE_FLOAT_2                   1
+#       define R300_DATA_TYPE_FLOAT_3                   2
+#       define R300_DATA_TYPE_FLOAT_4                   3
+#       define R300_DATA_TYPE_BYTE                      4
+#       define R300_DATA_TYPE_D3DCOLOR                  5
+#       define R300_DATA_TYPE_SHORT_2                   6
+#       define R300_DATA_TYPE_SHORT_4                   7
+#       define R300_DATA_TYPE_VECTOR_3_TTT              8
+#       define R300_DATA_TYPE_VECTOR_3_EET              9
+#       define R300_SKIP_DWORDS_0_SHIFT                 4
+#       define R300_DST_VEC_LOC_0_SHIFT                 8
+#       define R300_LAST_VEC_0                          (1 << 13)
+#       define R300_SIGNED_0                            (1 << 14)
+#       define R300_NORMALIZE_0                         (1 << 15)
+#       define R300_DATA_TYPE_1_SHIFT                   16
+#       define R300_SKIP_DWORDS_1_SHIFT                 20
+#       define R300_DST_VEC_LOC_1_SHIFT                 24
+#       define R300_LAST_VEC_1                          (1 << 29)
+#       define R300_SIGNED_1                            (1 << 30)
+#       define R300_NORMALIZE_1                         (1 << 31)
 #define R300_VAP_PROG_STREAM_CNTL_1		        0x2154
+#       define R300_DATA_TYPE_2_SHIFT                   0
+#       define R300_SKIP_DWORDS_2_SHIFT                 4
+#       define R300_DST_VEC_LOC_2_SHIFT                 8
+#       define R300_LAST_VEC_2                          (1 << 13)
+#       define R300_SIGNED_2                            (1 << 14)
+#       define R300_NORMALIZE_2                         (1 << 15)
+#       define R300_DATA_TYPE_3_SHIFT                   16
+#       define R300_SKIP_DWORDS_3_SHIFT                 20
+#       define R300_DST_VEC_LOC_3_SHIFT                 24
+#       define R300_LAST_VEC_3                          (1 << 29)
+#       define R300_SIGNED_3                            (1 << 30)
+#       define R300_NORMALIZE_3                         (1 << 31)
 #define R300_VAP_PROG_STREAM_CNTL_EXT_0	                0x21e0
+#       define R300_SWIZZLE_SELECT_X_0_SHIFT            0
+#       define R300_SWIZZLE_SELECT_Y_0_SHIFT            3
+#       define R300_SWIZZLE_SELECT_Z_0_SHIFT            6
+#       define R300_SWIZZLE_SELECT_W_0_SHIFT            9
+#       define R300_SWIZZLE_SELECT_X                    0
+#       define R300_SWIZZLE_SELECT_Y                    1
+#       define R300_SWIZZLE_SELECT_Z                    2
+#       define R300_SWIZZLE_SELECT_W                    3
+#       define R300_SWIZZLE_SELECT_FP_ZERO              4
+#       define R300_SWIZZLE_SELECT_FP_ONE               5
+#       define R300_WRITE_ENA_0_SHIFT                   12
+#       define R300_WRITE_ENA_X                         1
+#       define R300_WRITE_ENA_Y                         2
+#       define R300_WRITE_ENA_Z                         4
+#       define R300_WRITE_ENA_W                         8
+#       define R300_SWIZZLE_SELECT_X_1_SHIFT            16
+#       define R300_SWIZZLE_SELECT_Y_1_SHIFT            19
+#       define R300_SWIZZLE_SELECT_Z_1_SHIFT            22
+#       define R300_SWIZZLE_SELECT_W_1_SHIFT            25
+#       define R300_WRITE_ENA_1_SHIFT                   28
 #define R300_VAP_PROG_STREAM_CNTL_EXT_1	                0x21e4
+#       define R300_SWIZZLE_SELECT_X_2_SHIFT            0
+#       define R300_SWIZZLE_SELECT_Y_2_SHIFT            3
+#       define R300_SWIZZLE_SELECT_Z_2_SHIFT            6
+#       define R300_SWIZZLE_SELECT_W_2_SHIFT            9
+#       define R300_WRITE_ENA_2_SHIFT                   12
+#       define R300_SWIZZLE_SELECT_X_3_SHIFT            16
+#       define R300_SWIZZLE_SELECT_Y_3_SHIFT            19
+#       define R300_SWIZZLE_SELECT_Z_3_SHIFT            22
+#       define R300_SWIZZLE_SELECT_W_3_SHIFT            25
+#       define R300_WRITE_ENA_3_SHIFT                   28
 #define R300_VAP_PVS_CODE_CNTL_0			0x22D0
+#       define R300_PVS_FIRST_INST_SHIFT                0
+#       define R300_PVS_XYZW_VALID_INST_SHIFT           10
+#       define R300_PVS_LAST_INST_SHIFT                 20
 #define R300_VAP_PVS_CODE_CNTL_1			0x22D8
+#       define R300_PVS_LAST_VTX_SRC_INST_SHIFT         0
 #define R300_VAP_PVS_VECTOR_INDX_REG		        0x2200
 #define R300_VAP_PVS_VECTOR_DATA_REG		        0x2204
 #define R300_VAP_PVS_FLOW_CNTL_OPC		        0x22DC
 #define R300_VAP_OUT_VTX_FMT_0			        0x2090
+#       define R300_VTX_POS_PRESENT                     (1 << 0)
+#       define R300_VTX_COLOR_0_PRESENT                 (1 << 1)
+#       define R300_VTX_COLOR_1_PRESENT                 (1 << 2)
+#       define R300_VTX_COLOR_2_PRESENT                 (1 << 3)
+#       define R300_VTX_COLOR_3_PRESENT                 (1 << 4)
+#       define R300_VTX_PT_SIZE_PRESENT                 (1 << 16)
 #define R300_VAP_OUT_VTX_FMT_1			        0x2094
+#       define R300_TEX_0_COMP_CNT_SHIFT                0
+#       define R300_TEX_1_COMP_CNT_SHIFT                3
+#       define R300_TEX_2_COMP_CNT_SHIFT                6
+#       define R300_TEX_3_COMP_CNT_SHIFT                9
+#       define R300_TEX_4_COMP_CNT_SHIFT                12
+#       define R300_TEX_5_COMP_CNT_SHIFT                15
+#       define R300_TEX_6_COMP_CNT_SHIFT                18
+#       define R300_TEX_7_COMP_CNT_SHIFT                21
 #define R300_VAP_VTX_SIZE				0x20b4
 #define R300_VAP_GB_VERT_CLIP_ADJ		        0x2220
 #define R300_VAP_GB_VERT_DISC_ADJ		        0x2224
 #define R300_VAP_GB_HORZ_CLIP_ADJ		        0x2228
 #define R300_VAP_GB_HORZ_DISC_ADJ		        0x222c
 #define R300_VAP_CLIP_CNTL				0x221c
+#       define R300_UCP_ENA_0                           (1 << 0)
+#       define R300_UCP_ENA_1                           (1 << 1)
+#       define R300_UCP_ENA_2                           (1 << 2)
+#       define R300_UCP_ENA_3                           (1 << 3)
+#       define R300_UCP_ENA_4                           (1 << 4)
+#       define R300_UCP_ENA_5                           (1 << 5)
+#       define R300_PS_UCP_MODE_SHIFT                   14
+#       define R300_CLIP_DISABLE                        (1 << 16)
+#       define R300_UCP_CULL_ONLY_ENA                   (1 << 17)
+#       define R300_BOUNDARY_EDGE_FLAG_ENA              (1 << 18)
 
 #define R300_SU_TEX_WRAP				0x42a0
 #define R300_SU_POLY_OFFSET_ENABLE		        0x42b4
 #define R300_SU_CULL_MODE				0x42b8
+#       define R300_CULL_FRONT                          (1 << 0)
+#       define R300_CULL_BACK                           (1 << 1)
+#       define R300_FACE_POS                            (0 << 2)
+#       define R300_FACE_NEG                            (1 << 2)
 #define R300_SU_DEPTH_SCALE				0x42c0
 #define R300_SU_DEPTH_OFFSET			        0x42c4
 
 #define R300_RS_COUNT				        0x4300
+#	define R300_RS_COUNT_IT_COUNT_SHIFT		0
+#	define R300_RS_COUNT_IC_COUNT_SHIFT		7
+#	define R300_RS_COUNT_HIRES_EN			(1 << 18)
+
 #define R300_RS_IP_0				        0x4310
+#	define R300_RS_TEX_PTR(x)		        (x << 0)
+#	define R300_RS_COL_PTR(x)		        (x << 6)
+#	define R300_RS_COL_FMT(x)		        (x << 9)
+#	define R300_RS_COL_FMT_RGBA		        0
+#	define R300_RS_COL_FMT_RGB0		        2
+#	define R300_RS_COL_FMT_RGB1		        3
+#	define R300_RS_COL_FMT_000A		        4
+#	define R300_RS_COL_FMT_0000		        5
+#	define R300_RS_COL_FMT_0001		        6
+#	define R300_RS_COL_FMT_111A		        8
+#	define R300_RS_COL_FMT_1110		        9
+#	define R300_RS_COL_FMT_1111		        10
+#	define R300_RS_SEL_S(x)		                (x << 13)
+#	define R300_RS_SEL_T(x)		                (x << 16)
+#	define R300_RS_SEL_R(x)		                (x << 19)
+#	define R300_RS_SEL_Q(x)		                (x << 22)
+#	define R300_RS_SEL_C0		                0
+#	define R300_RS_SEL_C1		                1
+#	define R300_RS_SEL_C2		                2
+#	define R300_RS_SEL_C3		                3
+#	define R300_RS_SEL_K0		                4
+#	define R300_RS_SEL_K1		                5
 #define R300_RS_INST_COUNT				0x4304
+#	define R300_INST_COUNT_RS(x)		        (x << 0)
+#	define R300_RS_W_EN			        (1 << 4)
+#	define R300_TX_OFFSET_RS(x)		        (x << 5)
 #define R300_RS_INST_0				        0x4330
+#       define R300_RS_INST_TEX_CN_WRITE		(1 << 3)
 
 #define R300_TX_INVALTAGS				0x4100
 #define R300_TX_FILTER0_0				0x4400
+#       define R300_TX_CLAMP_S(x)                       (x << 0)
+#       define R300_TX_CLAMP_T(x)                       (x << 3)
+#       define R300_TX_CLAMP_R(x)                       (x << 6)
+#       define R300_TX_CLAMP_WRAP                       0
+#       define R300_TX_CLAMP_MIRROR                     1
+#       define R300_TX_CLAMP_CLAMP_LAST                 2
+#       define R300_TX_CLAMP_MIRROR_CLAMP_LAST          3
+#       define R300_TX_CLAMP_CLAMP_BORDER               4
+#       define R300_TX_CLAMP_MIRROR_CLAMP_BORDER        5
+#       define R300_TX_CLAMP_CLAMP_GL                   6
+#       define R300_TX_CLAMP_MIRROR_CLAMP_GL            7
 #       define R300_TX_MAG_FILTER_NEAREST               (1 << 9)
 #       define R300_TX_MIN_FILTER_NEAREST               (1 << 11)
 #       define R300_TX_MAG_FILTER_LINEAR                (2 << 9)
@@ -3901,6 +4113,8 @@
 #	define R300_TX_FORMAT_A8R8G8B8	    	    0x13     /* no swizzle */
 #	define R300_TX_FORMAT_B8G8_B8G8	    	    0x14     /* no swizzle */
 #	define R300_TX_FORMAT_G8R8_G8B8	    	    0x15     /* no swizzle */
+#	define R300_TX_FORMAT_VYUY422	    	    0x14     /* no swizzle */
+#	define R300_TX_FORMAT_YVYU422	    	    0x15     /* no swizzle */
 #	define R300_TX_FORMAT_X24_Y8	    	    0x1e
 #	define R300_TX_FORMAT_X32	    	    0x1e
 	/* Floating point formats */
@@ -3945,6 +4159,10 @@
 		| (R300_TX_FORMAT_##FMT)				\
 		)
 
+#       define R300_TX_FORMAT_YUV_TO_RGB_CLAMP         (1 << 22)
+#       define R300_TX_FORMAT_YUV_TO_RGB_NO_CLAMP      (2 << 22)
+#       define R300_TX_FORMAT_SWAP_YUV                 (1 << 24)
+
 #define R300_TX_FORMAT2_0				0x4500
 #define R300_TX_OFFSET_0				0x4540
 #       define R300_ENDIAN_SWAP_16_BIT                  (1 << 0)
@@ -3961,9 +4179,53 @@
 #define R300_US_OUT_FMT_2				0x46ac
 #define R300_US_OUT_FMT_3				0x46b0
 #define R300_US_OUT_FMT_0				0x46a4
+#       define R300_OUT_FMT_C4_8                        (0 << 0)
+#       define R300_OUT_FMT_C4_10                       (1 << 0)
+#       define R300_OUT_FMT_C4_10_GAMMA                 (2 << 0)
+#       define R300_OUT_FMT_C_16                        (3 << 0)
+#       define R300_OUT_FMT_C2_16                       (4 << 0)
+#       define R300_OUT_FMT_C4_16                       (5 << 0)
+#       define R300_OUT_FMT_C_16_MPEG                   (6 << 0)
+#       define R300_OUT_FMT_C2_16_MPEG                  (7 << 0)
+#       define R300_OUT_FMT_C2_4                        (8 << 0)
+#       define R300_OUT_FMT_C_3_3_2                     (9 << 0)
+#       define R300_OUT_FMT_C_6_5_6                     (10 << 0)
+#       define R300_OUT_FMT_C_11_11_10                  (11 << 0)
+#       define R300_OUT_FMT_C_10_11_11                  (12 << 0)
+#       define R300_OUT_FMT_C_2_10_10_10                (13 << 0)
+#       define R300_OUT_FMT_UNUSED                      (15 << 0)
+#       define R300_OUT_FMT_C_16_FP                     (16 << 0)
+#       define R300_OUT_FMT_C2_16_FP                    (17 << 0)
+#       define R300_OUT_FMT_C4_16_FP                    (18 << 0)
+#       define R300_OUT_FMT_C_32_FP                     (19 << 0)
+#       define R300_OUT_FMT_C2_32_FP                    (20 << 0)
+#       define R300_OUT_FMT_C4_32_FP                    (21 << 0)
+#       define R300_OUT_FMT_C0_SEL_ALPHA                (0 << 8)
+#       define R300_OUT_FMT_C0_SEL_RED                  (1 << 8)
+#       define R300_OUT_FMT_C0_SEL_GREEN                (2 << 8)
+#       define R300_OUT_FMT_C0_SEL_BLUE                 (3 << 8)
+#       define R300_OUT_FMT_C1_SEL_ALPHA                (0 << 10)
+#       define R300_OUT_FMT_C1_SEL_RED                  (1 << 10)
+#       define R300_OUT_FMT_C1_SEL_GREEN                (2 << 10)
+#       define R300_OUT_FMT_C1_SEL_BLUE                 (3 << 10)
+#       define R300_OUT_FMT_C2_SEL_ALPHA                (0 << 12)
+#       define R300_OUT_FMT_C2_SEL_RED                  (1 << 12)
+#       define R300_OUT_FMT_C2_SEL_GREEN                (2 << 12)
+#       define R300_OUT_FMT_C2_SEL_BLUE                 (3 << 12)
+#       define R300_OUT_FMT_C3_SEL_ALPHA                (0 << 14)
+#       define R300_OUT_FMT_C3_SEL_RED                  (1 << 14)
+#       define R300_OUT_FMT_C3_SEL_GREEN                (2 << 14)
+#       define R300_OUT_FMT_C3_SEL_BLUE                 (3 << 14)
 #define R300_US_CONFIG				        0x4600
+#       define R300_NLEVEL_SHIFT                        0
+#       define R300_FIRST_TEX                           (1 << 3)
+#       define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO     (1 << 1)
 #define R300_US_PIXSIZE				        0x4604
 #define R300_US_CODE_OFFSET				0x4608
+#       define R300_ALU_CODE_OFFSET(x)                  (x << 0)
+#       define R300_ALU_CODE_SIZE(x)                    (x << 6)
+#       define R300_TEX_CODE_OFFSET(x)                  (x << 13)
+#       define R300_TEX_CODE_SIZE(x)                    (x << 18)
 #define R300_US_CODE_ADDR_0				0x4610
 #define R300_US_CODE_ADDR_1				0x4614
 #define R300_US_CODE_ADDR_2				0x4618
@@ -3979,8 +4241,14 @@
 #define R300_FG_ALPHA_FUNC				0x4bd4
 
 #define R300_RB3D_DSTCACHE_CTLSTAT		        0x4e4c
+#       define R300_DC_FLUSH_3D                         (2 << 0)
+#       define R300_DC_FREE_3D                          (2 << 2)
 #define R300_RB3D_ZCACHE_CTLSTAT			0x4f18
+#       define R300_ZC_FLUSH                            (1 << 0)
+#       define R300_ZC_FREE                             (1 << 1)
 #define R300_WAIT_UNTIL				        0x1720
+#       define R300_WAIT_2D_IDLECLEAN                   (1 << 16)
+#       define R300_WAIT_3D_IDLECLEAN                   (1 << 17)
 #define R300_RB3D_ZSTENCILCNTL			        0x4f04
 #define R300_RB3D_ZCACHE_CTLSTAT		        0x4f18
 #define R300_RB3D_BW_CNTL				0x4f1c
@@ -4009,6 +4277,10 @@
 
 #define R300_RB3D_AARESOLVE_CTL			        0x4e88
 #define R300_RB3D_COLOR_CHANNEL_MASK	                0x4e0c
+#       define R300_BLUE_MASK_EN                        (1 << 0)
+#       define R300_GREEN_MASK_EN                       (1 << 1)
+#       define R300_RED_MASK_EN                         (1 << 2)
+#       define R300_ALPHA_MASK_EN                       (1 << 3)
 #define R300_RB3D_COLOR_CLEAR_VALUE                     0x4e14
 #define R300_RB3D_DSTCACHE_CTLSTAT		        0x4e4c
 #define R300_RB3D_CCTL				        0x4e00
@@ -4017,9 +4289,524 @@
 #define R300_SC_EDGERULE				0x43a8
 #define R300_SC_SCISSOR0				0x43e0
 #define R300_SC_SCISSOR1				0x43e4
+#       define R300_SCISSOR_X_SHIFT                     0
+#       define R300_SCISSOR_Y_SHIFT                     13
 #define R300_SC_CLIP_0_A				0x43b0
 #define R300_SC_CLIP_0_B				0x43b4
+#       define R300_CLIP_X_SHIFT                        0
+#       define R300_CLIP_Y_SHIFT                        13
 #define R300_SC_CLIP_RULE				0x43d0
 #define R300_SC_SCREENDOOR				0x43e8
 
+/* R500 US has to be loaded through an index/data pair */
+#define R500_GA_US_VECTOR_INDEX				0x4250
+#   define R500_US_VECTOR_INDEX(x)			(x << 0)
+#   define R500_US_VECTOR_TYPE_INST			(0 << 16)
+#   define R500_US_VECTOR_TYPE_CONST			(1 << 16)
+#   define R500_US_VECTOR_CLAMP				(1 << 17)
+#define R500_GA_US_VECTOR_DATA				0x4254
+
+/*
+ * The R500 unified shader (US) registers come in banks of 512 each, one
+ * for each instruction slot in the shader.  You can't touch them directly.
+ * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive
+ * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the
+ * instruction is fully specified.
+ */
+#define R500_US_ALU_ALPHA_INST_0			0xa800
+#   define R500_ALPHA_OP_MAD				0
+#   define R500_ALPHA_OP_DP				1
+#   define R500_ALPHA_OP_MIN				2
+#   define R500_ALPHA_OP_MAX				3
+/* #define R500_ALPHA_OP_RESERVED			4 */
+#   define R500_ALPHA_OP_CND				5
+#   define R500_ALPHA_OP_CMP				6
+#   define R500_ALPHA_OP_FRC				7
+#   define R500_ALPHA_OP_EX2				8
+#   define R500_ALPHA_OP_LN2				9
+#   define R500_ALPHA_OP_RCP				10
+#   define R500_ALPHA_OP_RSQ				11
+#   define R500_ALPHA_OP_SIN				12
+#   define R500_ALPHA_OP_COS				13
+#   define R500_ALPHA_OP_MDH				14
+#   define R500_ALPHA_OP_MDV				15
+#   define R500_ALPHA_ADDRD(x)				(x << 4)
+#   define R500_ALPHA_ADDRD_REL				(1 << 11)
+#   define R500_ALPHA_SEL_A_SRC0			(0 << 12)
+#   define R500_ALPHA_SEL_A_SRC1			(1 << 12)
+#   define R500_ALPHA_SEL_A_SRC2			(2 << 12)
+#   define R500_ALPHA_SEL_A_SRCP			(3 << 12)
+#   define R500_ALPHA_SWIZ_A_R				(0 << 14)
+#   define R500_ALPHA_SWIZ_A_G				(1 << 14)
+#   define R500_ALPHA_SWIZ_A_B				(2 << 14)
+#   define R500_ALPHA_SWIZ_A_A				(3 << 14)
+#   define R500_ALPHA_SWIZ_A_0				(4 << 14)
+#   define R500_ALPHA_SWIZ_A_HALF			(5 << 14)
+#   define R500_ALPHA_SWIZ_A_1				(6 << 14)
+/* #define R500_ALPHA_SWIZ_A_UNUSED			(7 << 14) */
+#   define R500_ALPHA_MOD_A_NOP				(0 << 17)
+#   define R500_ALPHA_MOD_A_NEG				(1 << 17)
+#   define R500_ALPHA_MOD_A_ABS				(2 << 17)
+#   define R500_ALPHA_MOD_A_NAB				(3 << 17)
+#   define R500_ALPHA_SEL_B_SRC0			(0 << 19)
+#   define R500_ALPHA_SEL_B_SRC1			(1 << 19)
+#   define R500_ALPHA_SEL_B_SRC2			(2 << 19)
+#   define R500_ALPHA_SEL_B_SRCP			(3 << 19)
+#   define R500_ALPHA_SWIZ_B_R				(0 << 21)
+#   define R500_ALPHA_SWIZ_B_G				(1 << 21)
+#   define R500_ALPHA_SWIZ_B_B				(2 << 21)
+#   define R500_ALPHA_SWIZ_B_A				(3 << 21)
+#   define R500_ALPHA_SWIZ_B_0				(4 << 21)
+#   define R500_ALPHA_SWIZ_B_HALF			(5 << 21)
+#   define R500_ALPHA_SWIZ_B_1				(6 << 21)
+/* #define R500_ALPHA_SWIZ_B_UNUSED			(7 << 21) */
+#   define R500_ALPHA_MOD_B_NOP				(0 << 24)
+#   define R500_ALPHA_MOD_B_NEG				(1 << 24)
+#   define R500_ALPHA_MOD_B_ABS				(2 << 24)
+#   define R500_ALPHA_MOD_B_NAB				(3 << 24)
+#   define R500_ALPHA_OMOD_IDENTITY			(0 << 26)
+#   define R500_ALPHA_OMOD_MUL_2			(1 << 26)
+#   define R500_ALPHA_OMOD_MUL_4			(2 << 26)
+#   define R500_ALPHA_OMOD_MUL_8			(3 << 26)
+#   define R500_ALPHA_OMOD_DIV_2			(4 << 26)
+#   define R500_ALPHA_OMOD_DIV_4			(5 << 26)
+#   define R500_ALPHA_OMOD_DIV_8			(6 << 26)
+#   define R500_ALPHA_OMOD_DISABLE			(7 << 26)
+#   define R500_ALPHA_TARGET(x)				(x << 29)
+#   define R500_ALPHA_W_OMASK				(1 << 31)
+#define R500_US_ALU_ALPHA_ADDR_0			0x9800
+#   define R500_ALPHA_ADDR0(x)				(x << 0)
+#   define R500_ALPHA_ADDR0_CONST			(1 << 8)
+#   define R500_ALPHA_ADDR0_REL				(1 << 9)
+#   define R500_ALPHA_ADDR1(x)				(x << 10)
+#   define R500_ALPHA_ADDR1_CONST			(1 << 18)
+#   define R500_ALPHA_ADDR1_REL				(1 << 19)
+#   define R500_ALPHA_ADDR2(x)				(x << 20)
+#   define R500_ALPHA_ADDR2_CONST			(1 << 28)
+#   define R500_ALPHA_ADDR2_REL				(1 << 29)
+#   define R500_ALPHA_SRCP_OP_1_MINUS_2A0		(0 << 30)
+#   define R500_ALPHA_SRCP_OP_A1_MINUS_A0		(1 << 30)
+#   define R500_ALPHA_SRCP_OP_A1_PLUS_A0		(2 << 30)
+#   define R500_ALPHA_SRCP_OP_1_PLUS_A0			(3 << 30)
+#define R500_US_ALU_RGBA_INST_0				0xb000
+#   define R500_ALU_RGBA_OP_MAD				(0 << 0)
+#   define R500_ALU_RGBA_OP_DP3				(1 << 0)
+#   define R500_ALU_RGBA_OP_DP4				(2 << 0)
+#   define R500_ALU_RGBA_OP_D2A				(3 << 0)
+#   define R500_ALU_RGBA_OP_MIN				(4 << 0)
+#   define R500_ALU_RGBA_OP_MAX				(5 << 0)
+/* #define R500_ALU_RGBA_OP_RESERVED			(6 << 0) */
+#   define R500_ALU_RGBA_OP_CND				(7 << 0)
+#   define R500_ALU_RGBA_OP_CMP				(8 << 0)
+#   define R500_ALU_RGBA_OP_FRC				(9 << 0)
+#   define R500_ALU_RGBA_OP_SOP				(10 << 0)
+#   define R500_ALU_RGBA_OP_MDH				(11 << 0)
+#   define R500_ALU_RGBA_OP_MDV				(12 << 0)
+#   define R500_ALU_RGBA_ADDRD(x)			(x << 4)
+#   define R500_ALU_RGBA_ADDRD_REL			(1 << 11)
+#   define R500_ALU_RGBA_SEL_C_SRC0			(0 << 12)
+#   define R500_ALU_RGBA_SEL_C_SRC1			(1 << 12)
+#   define R500_ALU_RGBA_SEL_C_SRC2			(2 << 12)
+#   define R500_ALU_RGBA_SEL_C_SRCP			(3 << 12)
+#   define R500_ALU_RGBA_R_SWIZ_R			(0 << 14)
+#   define R500_ALU_RGBA_R_SWIZ_G			(1 << 14)
+#   define R500_ALU_RGBA_R_SWIZ_B			(2 << 14)
+#   define R500_ALU_RGBA_R_SWIZ_A			(3 << 14)
+#   define R500_ALU_RGBA_R_SWIZ_0			(4 << 14)
+#   define R500_ALU_RGBA_R_SWIZ_HALF			(5 << 14)
+#   define R500_ALU_RGBA_R_SWIZ_1			(6 << 14)
+/* #define R500_ALU_RGBA_R_SWIZ_UNUSED			(7 << 14) */
+#   define R500_ALU_RGBA_G_SWIZ_R			(0 << 17)
+#   define R500_ALU_RGBA_G_SWIZ_G			(1 << 17)
+#   define R500_ALU_RGBA_G_SWIZ_B			(2 << 17)
+#   define R500_ALU_RGBA_G_SWIZ_A			(3 << 17)
+#   define R500_ALU_RGBA_G_SWIZ_0			(4 << 17)
+#   define R500_ALU_RGBA_G_SWIZ_HALF			(5 << 17)
+#   define R500_ALU_RGBA_G_SWIZ_1			(6 << 17)
+/* #define R500_ALU_RGBA_G_SWIZ_UNUSED			(7 << 17) */
+#   define R500_ALU_RGBA_B_SWIZ_R			(0 << 20)
+#   define R500_ALU_RGBA_B_SWIZ_G			(1 << 20)
+#   define R500_ALU_RGBA_B_SWIZ_B			(2 << 20)
+#   define R500_ALU_RGBA_B_SWIZ_A			(3 << 20)
+#   define R500_ALU_RGBA_B_SWIZ_0			(4 << 20)
+#   define R500_ALU_RGBA_B_SWIZ_HALF			(5 << 20)
+#   define R500_ALU_RGBA_B_SWIZ_1			(6 << 20)
+/* #define R500_ALU_RGBA_B_SWIZ_UNUSED			(7 << 20) */
+#   define R500_ALU_RGBA_MOD_C_NOP			(0 << 23)
+#   define R500_ALU_RGBA_MOD_C_NEG			(1 << 23)
+#   define R500_ALU_RGBA_MOD_C_ABS			(2 << 23)
+#   define R500_ALU_RGBA_MOD_C_NAB			(3 << 23)
+#   define R500_ALU_RGBA_ALPHA_SEL_C_SRC0		(0 << 25)
+#   define R500_ALU_RGBA_ALPHA_SEL_C_SRC1		(1 << 25)
+#   define R500_ALU_RGBA_ALPHA_SEL_C_SRC2		(2 << 25)
+#   define R500_ALU_RGBA_ALPHA_SEL_C_SRCP		(3 << 25)
+#   define R500_ALU_RGBA_A_SWIZ_R			(0 << 27)
+#   define R500_ALU_RGBA_A_SWIZ_G			(1 << 27)
+#   define R500_ALU_RGBA_A_SWIZ_B			(2 << 27)
+#   define R500_ALU_RGBA_A_SWIZ_A			(3 << 27)
+#   define R500_ALU_RGBA_A_SWIZ_0			(4 << 27)
+#   define R500_ALU_RGBA_A_SWIZ_HALF			(5 << 27)
+#   define R500_ALU_RGBA_A_SWIZ_1			(6 << 27)
+/* #define R500_ALU_RGBA_A_SWIZ_UNUSED			(7 << 27) */
+#   define R500_ALU_RGBA_ALPHA_MOD_C_NOP		(0 << 30)
+#   define R500_ALU_RGBA_ALPHA_MOD_C_NEG		(1 << 30)
+#   define R500_ALU_RGBA_ALPHA_MOD_C_ABS		(2 << 30)
+#   define R500_ALU_RGBA_ALPHA_MOD_C_NAB		(3 << 30)
+#define R500_US_ALU_RGB_INST_0				0xa000
+#   define R500_ALU_RGB_SEL_A_SRC0			(0 << 0)
+#   define R500_ALU_RGB_SEL_A_SRC1			(1 << 0)
+#   define R500_ALU_RGB_SEL_A_SRC2			(2 << 0)
+#   define R500_ALU_RGB_SEL_A_SRCP			(3 << 0)
+#   define R500_ALU_RGB_R_SWIZ_A_R			(0 << 2)
+#   define R500_ALU_RGB_R_SWIZ_A_G			(1 << 2)
+#   define R500_ALU_RGB_R_SWIZ_A_B			(2 << 2)
+#   define R500_ALU_RGB_R_SWIZ_A_A			(3 << 2)
+#   define R500_ALU_RGB_R_SWIZ_A_0			(4 << 2)
+#   define R500_ALU_RGB_R_SWIZ_A_HALF			(5 << 2)
+#   define R500_ALU_RGB_R_SWIZ_A_1			(6 << 2)
+/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED			(7 << 2) */
+#   define R500_ALU_RGB_G_SWIZ_A_R			(0 << 5)
+#   define R500_ALU_RGB_G_SWIZ_A_G			(1 << 5)
+#   define R500_ALU_RGB_G_SWIZ_A_B			(2 << 5)
+#   define R500_ALU_RGB_G_SWIZ_A_A			(3 << 5)
+#   define R500_ALU_RGB_G_SWIZ_A_0			(4 << 5)
+#   define R500_ALU_RGB_G_SWIZ_A_HALF			(5 << 5)
+#   define R500_ALU_RGB_G_SWIZ_A_1			(6 << 5)
+/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED			(7 << 5) */
+#   define R500_ALU_RGB_B_SWIZ_A_R			(0 << 8)
+#   define R500_ALU_RGB_B_SWIZ_A_G			(1 << 8)
+#   define R500_ALU_RGB_B_SWIZ_A_B			(2 << 8)
+#   define R500_ALU_RGB_B_SWIZ_A_A			(3 << 8)
+#   define R500_ALU_RGB_B_SWIZ_A_0			(4 << 8)
+#   define R500_ALU_RGB_B_SWIZ_A_HALF			(5 << 8)
+#   define R500_ALU_RGB_B_SWIZ_A_1			(6 << 8)
+/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED			(7 << 8) */
+#   define R500_ALU_RGB_MOD_A_NOP			(0 << 11)
+#   define R500_ALU_RGB_MOD_A_NEG			(1 << 11)
+#   define R500_ALU_RGB_MOD_A_ABS			(2 << 11)
+#   define R500_ALU_RGB_MOD_A_NAB			(3 << 11)
+#   define R500_ALU_RGB_SEL_B_SRC0			(0 << 13)
+#   define R500_ALU_RGB_SEL_B_SRC1			(1 << 13)
+#   define R500_ALU_RGB_SEL_B_SRC2			(2 << 13)
+#   define R500_ALU_RGB_SEL_B_SRCP			(3 << 13)
+#   define R500_ALU_RGB_R_SWIZ_B_R			(0 << 15)
+#   define R500_ALU_RGB_R_SWIZ_B_G			(1 << 15)
+#   define R500_ALU_RGB_R_SWIZ_B_B			(2 << 15)
+#   define R500_ALU_RGB_R_SWIZ_B_A			(3 << 15)
+#   define R500_ALU_RGB_R_SWIZ_B_0			(4 << 15)
+#   define R500_ALU_RGB_R_SWIZ_B_HALF			(5 << 15)
+#   define R500_ALU_RGB_R_SWIZ_B_1			(6 << 15)
+/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED			(7 << 15) */
+#   define R500_ALU_RGB_G_SWIZ_B_R			(0 << 18)
+#   define R500_ALU_RGB_G_SWIZ_B_G			(1 << 18)
+#   define R500_ALU_RGB_G_SWIZ_B_B			(2 << 18)
+#   define R500_ALU_RGB_G_SWIZ_B_A			(3 << 18)
+#   define R500_ALU_RGB_G_SWIZ_B_0			(4 << 18)
+#   define R500_ALU_RGB_G_SWIZ_B_HALF			(5 << 18)
+#   define R500_ALU_RGB_G_SWIZ_B_1			(6 << 18)
+/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED			(7 << 18) */
+#   define R500_ALU_RGB_B_SWIZ_B_R			(0 << 21)
+#   define R500_ALU_RGB_B_SWIZ_B_G			(1 << 21)
+#   define R500_ALU_RGB_B_SWIZ_B_B			(2 << 21)
+#   define R500_ALU_RGB_B_SWIZ_B_A			(3 << 21)
+#   define R500_ALU_RGB_B_SWIZ_B_0			(4 << 21)
+#   define R500_ALU_RGB_B_SWIZ_B_HALF			(5 << 21)
+#   define R500_ALU_RGB_B_SWIZ_B_1			(6 << 21)
+/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED			(7 << 21) */
+#   define R500_ALU_RGB_MOD_B_NOP			(0 << 24)
+#   define R500_ALU_RGB_MOD_B_NEG			(1 << 24)
+#   define R500_ALU_RGB_MOD_B_ABS			(2 << 24)
+#   define R500_ALU_RGB_MOD_B_NAB			(3 << 24)
+#   define R500_ALU_RGB_OMOD_IDENTITY			(0 << 26)
+#   define R500_ALU_RGB_OMOD_MUL_2			(1 << 26)
+#   define R500_ALU_RGB_OMOD_MUL_4			(2 << 26)
+#   define R500_ALU_RGB_OMOD_MUL_8			(3 << 26)
+#   define R500_ALU_RGB_OMOD_DIV_2			(4 << 26)
+#   define R500_ALU_RGB_OMOD_DIV_4			(5 << 26)
+#   define R500_ALU_RGB_OMOD_DIV_8			(6 << 26)
+#   define R500_ALU_RGB_OMOD_DISABLE			(7 << 26)
+#   define R500_ALU_RGB_TARGET(x)			(x << 29)
+#   define R500_ALU_RGB_WMASK				(1 << 31)
+#define R500_US_ALU_RGB_ADDR_0				0x9000
+#   define R500_RGB_ADDR0(x)				(x << 0)
+#   define R500_RGB_ADDR0_CONST				(1 << 8)
+#   define R500_RGB_ADDR0_REL				(1 << 9)
+#   define R500_RGB_ADDR1(x)				(x << 10)
+#   define R500_RGB_ADDR1_CONST				(1 << 18)
+#   define R500_RGB_ADDR1_REL				(1 << 19)
+#   define R500_RGB_ADDR2(x)				(x << 20)
+#   define R500_RGB_ADDR2_CONST				(1 << 28)
+#   define R500_RGB_ADDR2_REL				(1 << 29)
+#   define R500_RGB_SRCP_OP_1_MINUS_2RGB0		(0 << 30)
+#   define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0		(1 << 30)
+#   define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0		(2 << 30)
+#   define R500_RGB_SRCP_OP_1_PLUS_RGB0			(3 << 30)
+#define R500_US_CMN_INST_0				0xb800
+#   define R500_INST_TYPE_ALU				(0 << 0)
+#   define R500_INST_TYPE_OUT				(1 << 0)
+#   define R500_INST_TYPE_FC				(2 << 0)
+#   define R500_INST_TYPE_TEX				(3 << 0)
+#   define R500_INST_TEX_SEM_WAIT			(1 << 2)
+#   define R500_INST_RGB_PRED_SEL_NONE			(0 << 3)
+#   define R500_INST_RGB_PRED_SEL_RGBA			(1 << 3)
+#   define R500_INST_RGB_PRED_SEL_RRRR			(2 << 3)
+#   define R500_INST_RGB_PRED_SEL_GGGG			(3 << 3)
+#   define R500_INST_RGB_PRED_SEL_BBBB			(4 << 3)
+#   define R500_INST_RGB_PRED_SEL_AAAA			(5 << 3)
+#   define R500_INST_RGB_PRED_INV			(1 << 6)
+#   define R500_INST_WRITE_INACTIVE			(1 << 7)
+#   define R500_INST_LAST				(1 << 8)
+#   define R500_INST_NOP				(1 << 9)
+#   define R500_INST_ALU_WAIT				(1 << 10)
+#   define R500_INST_RGB_WMASK_R			(1 << 11)
+#   define R500_INST_RGB_WMASK_G			(1 << 12)
+#   define R500_INST_RGB_WMASK_B			(1 << 13)
+#   define R500_INST_ALPHA_WMASK			(1 << 14)
+#   define R500_INST_RGB_OMASK_R			(1 << 15)
+#   define R500_INST_RGB_OMASK_G			(1 << 16)
+#   define R500_INST_RGB_OMASK_B			(1 << 17)
+#   define R500_INST_ALPHA_OMASK			(1 << 18)
+#   define R500_INST_RGB_CLAMP				(1 << 19)
+#   define R500_INST_ALPHA_CLAMP			(1 << 20)
+#   define R500_INST_ALU_RESULT_SEL			(1 << 21)
+#   define R500_INST_ALPHA_PRED_INV			(1 << 22)
+#   define R500_INST_ALU_RESULT_OP_EQ			(0 << 23)
+#   define R500_INST_ALU_RESULT_OP_LT			(1 << 23)
+#   define R500_INST_ALU_RESULT_OP_GE			(2 << 23)
+#   define R500_INST_ALU_RESULT_OP_NE			(3 << 23)
+#   define R500_INST_ALPHA_PRED_SEL_NONE		(0 << 25)
+#   define R500_INST_ALPHA_PRED_SEL_RGBA		(1 << 25)
+#   define R500_INST_ALPHA_PRED_SEL_RRRR		(2 << 25)
+#   define R500_INST_ALPHA_PRED_SEL_GGGG		(3 << 25)
+#   define R500_INST_ALPHA_PRED_SEL_BBBB		(4 << 25)
+#   define R500_INST_ALPHA_PRED_SEL_AAAA		(5 << 25)
+/* XXX next four are kind of guessed */
+#   define R500_INST_STAT_WE_R				(1 << 28)
+#   define R500_INST_STAT_WE_G				(1 << 29)
+#   define R500_INST_STAT_WE_B				(1 << 30)
+#   define R500_INST_STAT_WE_A				(1 << 31)
+/* note that these are 8 bit lengths, despite the offsets, at least for R500 */
+#define R500_US_CODE_ADDR				0x4630
+#   define R500_US_CODE_START_ADDR(x)			(x << 0)
+#   define R500_US_CODE_END_ADDR(x)			(x << 16)
+#define R500_US_CODE_OFFSET				0x4638
+#   define R500_US_CODE_OFFSET_ADDR(x)			(x << 0)
+#define R500_US_CODE_RANGE				0x4634
+#   define R500_US_CODE_RANGE_ADDR(x)			(x << 0)
+#   define R500_US_CODE_RANGE_SIZE(x)			(x << 16)
+#define R500_US_CONFIG					0x4600
+#   define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO		(1 << 1)
+#define R500_US_FC_ADDR_0				0xa000
+#   define R500_FC_BOOL_ADDR(x)				(x << 0)
+#   define R500_FC_INT_ADDR(x)				(x << 8)
+#   define R500_FC_JUMP_ADDR(x)				(x << 16)
+#   define R500_FC_JUMP_GLOBAL				(1 << 31)
+#define R500_US_FC_BOOL_CONST				0x4620
+#   define R500_FC_KBOOL(x)				(x)
+#define R500_US_FC_CTRL					0x4624
+#   define R500_FC_TEST_EN				(1 << 30)
+#   define R500_FC_FULL_FC_EN				(1 << 31)
+#define R500_US_FC_INST_0				0x9800
+#   define R500_FC_OP_JUMP				(0 << 0)
+#   define R500_FC_OP_LOOP				(1 << 0)
+#   define R500_FC_OP_ENDLOOP				(2 << 0)
+#   define R500_FC_OP_REP				(3 << 0)
+#   define R500_FC_OP_ENDREP				(4 << 0)
+#   define R500_FC_OP_BREAKLOOP				(5 << 0)
+#   define R500_FC_OP_BREAKREP				(6 << 0)
+#   define R500_FC_OP_CONTINUE				(7 << 0)
+#   define R500_FC_B_ELSE				(1 << 4)
+#   define R500_FC_JUMP_ANY				(1 << 5)
+#   define R500_FC_A_OP_NONE				(0 << 6)
+#   define R500_FC_A_OP_POP				(1 << 6)
+#   define R500_FC_A_OP_PUSH				(2 << 6)
+#   define R500_FC_JUMP_FUNC(x)				(x << 8)
+#   define R500_FC_B_POP_CNT(x)				(x << 16)
+#   define R500_FC_B_OP0_NONE				(0 << 24)
+#   define R500_FC_B_OP0_DECR				(1 << 24)
+#   define R500_FC_B_OP0_INCR				(2 << 24)
+#   define R500_FC_B_OP1_DECR				(0 << 26)
+#   define R500_FC_B_OP1_NONE				(1 << 26)
+#   define R500_FC_B_OP1_INCR				(2 << 26)
+#   define R500_FC_IGNORE_UNCOVERED			(1 << 28)
+#define R500_US_FC_INT_CONST_0				0x4c00
+#   define R500_FC_INT_CONST_KR(x)			(x << 0)
+#   define R500_FC_INT_CONST_KG(x)			(x << 8)
+#   define R500_FC_INT_CONST_KB(x)			(x << 16)
+/* _0 through _15 */
+#define R500_US_FORMAT0_0				0x4640
+#   define R500_FORMAT_TXWIDTH(x)			(x << 0)
+#   define R500_FORMAT_TXHEIGHT(x)			(x << 11)
+#   define R500_FORMAT_TXDEPTH(x)			(x << 22)
+/* _0 through _3 */
+#define R500_US_OUT_FMT_0				0x46a4
+#   define R500_OUT_FMT_C4_8				(0 << 0)
+#   define R500_OUT_FMT_C4_10				(1 << 0)
+#   define R500_OUT_FMT_C4_10_GAMMA			(2 << 0)
+#   define R500_OUT_FMT_C_16				(3 << 0)
+#   define R500_OUT_FMT_C2_16				(4 << 0)
+#   define R500_OUT_FMT_C4_16				(5 << 0)
+#   define R500_OUT_FMT_C_16_MPEG			(6 << 0)
+#   define R500_OUT_FMT_C2_16_MPEG			(7 << 0)
+#   define R500_OUT_FMT_C2_4				(8 << 0)
+#   define R500_OUT_FMT_C_3_3_2				(9 << 0)
+#   define R500_OUT_FMT_C_6_5_6				(10 << 0)
+#   define R500_OUT_FMT_C_11_11_10			(11 << 0)
+#   define R500_OUT_FMT_C_10_11_11			(12 << 0)
+#   define R500_OUT_FMT_C_2_10_10_10			(13 << 0)
+/* #define R500_OUT_FMT_RESERVED			(14 << 0) */
+#   define R500_OUT_FMT_UNUSED				(15 << 0)
+#   define R500_OUT_FMT_C_16_FP				(16 << 0)
+#   define R500_OUT_FMT_C2_16_FP			(17 << 0)
+#   define R500_OUT_FMT_C4_16_FP			(18 << 0)
+#   define R500_OUT_FMT_C_32_FP				(19 << 0)
+#   define R500_OUT_FMT_C2_32_FP			(20 << 0)
+#   define R500_OUT_FMT_C4_32_FP			(21 << 0)
+#   define R500_C0_SEL_A				(0 << 8)
+#   define R500_C0_SEL_R				(1 << 8)
+#   define R500_C0_SEL_G				(2 << 8)
+#   define R500_C0_SEL_B				(3 << 8)
+#   define R500_C1_SEL_A				(0 << 10)
+#   define R500_C1_SEL_R				(1 << 10)
+#   define R500_C1_SEL_G				(2 << 10)
+#   define R500_C1_SEL_B				(3 << 10)
+#   define R500_C2_SEL_A				(0 << 12)
+#   define R500_C2_SEL_R				(1 << 12)
+#   define R500_C2_SEL_G				(2 << 12)
+#   define R500_C2_SEL_B				(3 << 12)
+#   define R500_C3_SEL_A				(0 << 14)
+#   define R500_C3_SEL_R				(1 << 14)
+#   define R500_C3_SEL_G				(2 << 14)
+#   define R500_C3_SEL_B				(3 << 14)
+#   define R500_OUT_SIGN(x)				(x << 16)
+#   define R500_ROUND_ADJ				(1 << 20)
+#define R500_US_PIXSIZE					0x4604
+#   define R500_PIX_SIZE(x)				(x)
+#define R500_US_TEX_ADDR_0				0x9800
+#   define R500_TEX_SRC_ADDR(x)				(x << 0)
+#   define R500_TEX_SRC_ADDR_REL			(1 << 7)
+#   define R500_TEX_SRC_S_SWIZ_R			(0 << 8)
+#   define R500_TEX_SRC_S_SWIZ_G			(1 << 8)
+#   define R500_TEX_SRC_S_SWIZ_B			(2 << 8)
+#   define R500_TEX_SRC_S_SWIZ_A			(3 << 8)
+#   define R500_TEX_SRC_T_SWIZ_R			(0 << 10)
+#   define R500_TEX_SRC_T_SWIZ_G			(1 << 10)
+#   define R500_TEX_SRC_T_SWIZ_B			(2 << 10)
+#   define R500_TEX_SRC_T_SWIZ_A			(3 << 10)
+#   define R500_TEX_SRC_R_SWIZ_R			(0 << 12)
+#   define R500_TEX_SRC_R_SWIZ_G			(1 << 12)
+#   define R500_TEX_SRC_R_SWIZ_B			(2 << 12)
+#   define R500_TEX_SRC_R_SWIZ_A			(3 << 12)
+#   define R500_TEX_SRC_Q_SWIZ_R			(0 << 14)
+#   define R500_TEX_SRC_Q_SWIZ_G			(1 << 14)
+#   define R500_TEX_SRC_Q_SWIZ_B			(2 << 14)
+#   define R500_TEX_SRC_Q_SWIZ_A			(3 << 14)
+#   define R500_TEX_DST_ADDR(x)				(x << 16)
+#   define R500_TEX_DST_ADDR_REL			(1 << 23)
+#   define R500_TEX_DST_R_SWIZ_R			(0 << 24)
+#   define R500_TEX_DST_R_SWIZ_G			(1 << 24)
+#   define R500_TEX_DST_R_SWIZ_B			(2 << 24)
+#   define R500_TEX_DST_R_SWIZ_A			(3 << 24)
+#   define R500_TEX_DST_G_SWIZ_R			(0 << 26)
+#   define R500_TEX_DST_G_SWIZ_G			(1 << 26)
+#   define R500_TEX_DST_G_SWIZ_B			(2 << 26)
+#   define R500_TEX_DST_G_SWIZ_A			(3 << 26)
+#   define R500_TEX_DST_B_SWIZ_R			(0 << 28)
+#   define R500_TEX_DST_B_SWIZ_G			(1 << 28)
+#   define R500_TEX_DST_B_SWIZ_B			(2 << 28)
+#   define R500_TEX_DST_B_SWIZ_A			(3 << 28)
+#   define R500_TEX_DST_A_SWIZ_R			(0 << 30)
+#   define R500_TEX_DST_A_SWIZ_G			(1 << 30)
+#   define R500_TEX_DST_A_SWIZ_B			(2 << 30)
+#   define R500_TEX_DST_A_SWIZ_A			(3 << 30)
+#define R500_US_TEX_ADDR_DXDY_0				0xa000
+#   define R500_DX_ADDR(x)				(x << 0)
+#   define R500_DX_ADDR_REL				(1 << 7)
+#   define R500_DX_S_SWIZ_R				(0 << 8)
+#   define R500_DX_S_SWIZ_G				(1 << 8)
+#   define R500_DX_S_SWIZ_B				(2 << 8)
+#   define R500_DX_S_SWIZ_A				(3 << 8)
+#   define R500_DX_T_SWIZ_R				(0 << 10)
+#   define R500_DX_T_SWIZ_G				(1 << 10)
+#   define R500_DX_T_SWIZ_B				(2 << 10)
+#   define R500_DX_T_SWIZ_A				(3 << 10)
+#   define R500_DX_R_SWIZ_R				(0 << 12)
+#   define R500_DX_R_SWIZ_G				(1 << 12)
+#   define R500_DX_R_SWIZ_B				(2 << 12)
+#   define R500_DX_R_SWIZ_A				(3 << 12)
+#   define R500_DX_Q_SWIZ_R				(0 << 14)
+#   define R500_DX_Q_SWIZ_G				(1 << 14)
+#   define R500_DX_Q_SWIZ_B				(2 << 14)
+#   define R500_DX_Q_SWIZ_A				(3 << 14)
+#   define R500_DY_ADDR(x)				(x << 16)
+#   define R500_DY_ADDR_REL				(1 << 17)
+#   define R500_DY_S_SWIZ_R				(0 << 24)
+#   define R500_DY_S_SWIZ_G				(1 << 24)
+#   define R500_DY_S_SWIZ_B				(2 << 24)
+#   define R500_DY_S_SWIZ_A				(3 << 24)
+#   define R500_DY_T_SWIZ_R				(0 << 26)
+#   define R500_DY_T_SWIZ_G				(1 << 26)
+#   define R500_DY_T_SWIZ_B				(2 << 26)
+#   define R500_DY_T_SWIZ_A				(3 << 26)
+#   define R500_DY_R_SWIZ_R				(0 << 28)
+#   define R500_DY_R_SWIZ_G				(1 << 28)
+#   define R500_DY_R_SWIZ_B				(2 << 28)
+#   define R500_DY_R_SWIZ_A				(3 << 28)
+#   define R500_DY_Q_SWIZ_R				(0 << 30)
+#   define R500_DY_Q_SWIZ_G				(1 << 30)
+#   define R500_DY_Q_SWIZ_B				(2 << 30)
+#   define R500_DY_Q_SWIZ_A				(3 << 30)
+#define R500_US_TEX_INST_0				0x9000
+#   define R500_TEX_ID(x)				(x << 16)
+#   define R500_TEX_INST_NOP				(0 << 22)
+#   define R500_TEX_INST_LD				(1 << 22)
+#   define R500_TEX_INST_TEXKILL			(2 << 22)
+#   define R500_TEX_INST_PROJ				(3 << 22)
+#   define R500_TEX_INST_LODBIAS			(4 << 22)
+#   define R500_TEX_INST_LOD				(5 << 22)
+#   define R500_TEX_INST_DXDY				(6 << 22)
+#   define R500_TEX_SEM_ACQUIRE				(1 << 25)
+#   define R500_TEX_IGNORE_UNCOVERED			(1 << 26)
+#   define R500_TEX_UNSCALED				(1 << 27)
+#define R500_US_W_FMT					0x46b4
+#   define R500_W_FMT_W0				(0 << 0)
+#   define R500_W_FMT_W24				(1 << 0)
+#   define R500_W_FMT_W24FP				(2 << 0)
+#   define R500_W_SRC_US				(0 << 2)
+#   define R500_W_SRC_RAS				(1 << 2)
+
+#define R500_GA_US_VECTOR_INDEX 0x4250
+#define R500_GA_US_VECTOR_DATA 0x4254
+
+#define R500_RS_INST_0					0x4320
+#define R500_RS_INST_TEX_ID_SHIFT			0
+#define R500_RS_INST_TEX_CN_WRITE			(1 << 4)
+#define R500_RS_INST_TEX_ADDR_SHIFT			5
+#define R500_RS_INST_COL_ID_SHIFT			12
+#define R500_RS_INST_COL_CN_NO_WRITE			(0 << 16)
+#define R500_RS_INST_COL_CN_WRITE			(1 << 16)
+#define R500_RS_INST_COL_CN_WRITE_FBUFFER		(2 << 16)
+#define R500_RS_INST_COL_CN_WRITE_BACKFACE		(3 << 16)
+#define R500_RS_INST_COL_COL_ADDR_SHIFT			18
+#define R500_RS_INST_TEX_ADJ				(1 << 25)
+#define R500_RS_INST_W_CN				(1 << 26)
+
+#define R500_US_FC_CTRL					0x4624
+#define R500_US_CODE_ADDR				0x4630
+#define R500_US_CODE_RANGE 				0x4634
+#define R500_US_CODE_OFFSET 				0x4638
+
+#define R500_RS_IP_0					0x4074
+#define R500_RS_IP_PTR_K0				62
+#define R500_RS_IP_PTR_K1 				63
+#define R500_RS_IP_TEX_PTR_S_SHIFT 			0
+#define R500_RS_IP_TEX_PTR_T_SHIFT 			6
+#define R500_RS_IP_TEX_PTR_R_SHIFT 			12
+#define R500_RS_IP_TEX_PTR_Q_SHIFT 			18
+#define R500_RS_IP_COL_PTR_SHIFT 			24
+#define R500_RS_IP_COL_FMT_SHIFT 			27
+#define R500_RS_IP_COL_FMT_RGBA				(0<<27)
+#define R500_RS_IP_OFFSET_EN 				(1 << 31)
+
+
 #endif
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
new file mode 100644
index 0000000..329a834
--- /dev/null
+++ b/src/radeon_textured_video.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright 2008 Alex Deucher
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *
+ * Based on radeon_exa_render.c and kdrive ati_video.c by Eric Anholt, et al.
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+
+#include "radeon.h"
+#include "radeon_reg.h"
+#include "radeon_macros.h"
+#include "radeon_probe.h"
+#include "radeon_video.h"
+
+#include <X11/extensions/Xv.h>
+#include "fourcc.h"
+
+#define IMAGE_MAX_WIDTH		2048
+#define IMAGE_MAX_HEIGHT	2048
+
+static Bool
+RADEONTilingEnabled(ScrnInfoPtr pScrn, PixmapPtr pPix)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+
+#ifdef USE_EXA
+    if (info->useEXA) {
+	if (info->tilingEnabled && exaGetPixmapOffset(pPix) == 0)
+	    return TRUE;
+	else
+	    return FALSE;
+    } else
+#endif
+	{
+	    if (info->tilingEnabled)
+		return TRUE;
+	    else
+		return FALSE;
+	}
+}
+
+static __inline__ CARD32 F_TO_DW(float val)
+{
+    union {
+	float f;
+	CARD32 l;
+    } tmp;
+    tmp.f = val;
+    return tmp.l;
+}
+
+#define ACCEL_MMIO
+#define VIDEO_PREAMBLE()	unsigned char *RADEONMMIO = info->MMIO
+#define BEGIN_VIDEO(n)		RADEONWaitForFifo(pScrn, (n))
+#define OUT_VIDEO_REG(reg, val)	OUTREG(reg, val)
+#define OUT_VIDEO_REG_F(reg, val) OUTREG(reg, F_TO_DW(val))
+#define FINISH_VIDEO()
+
+#include "radeon_textured_videofuncs.c"
+
+#undef ACCEL_MMIO
+#undef VIDEO_PREAMBLE
+#undef BEGIN_VIDEO
+#undef OUT_VIDEO_REG
+#undef FINISH_VIDEO
+
+#ifdef XF86DRI
+
+#define ACCEL_CP
+#define VIDEO_PREAMBLE()						\
+    RING_LOCALS;							\
+    RADEONCP_REFRESH(pScrn, info)
+#define BEGIN_VIDEO(n)		BEGIN_RING(2*(n))
+#define OUT_VIDEO_REG(reg, val)	OUT_RING_REG(reg, val)
+#define FINISH_VIDEO()		ADVANCE_RING()
+#define OUT_VIDEO_RING_F(x) OUT_RING(F_TO_DW(x))
+
+#include "radeon_textured_videofuncs.c"
+
+#endif /* XF86DRI */
+
+static int
+RADEONPutImageTextured(ScrnInfoPtr pScrn,
+		       short src_x, short src_y,
+		       short drw_x, short drw_y,
+		       short src_w, short src_h,
+		       short drw_w, short drw_h,
+		       int id,
+		       unsigned char *buf,
+		       short width,
+		       short height,
+		       Bool sync,
+		       RegionPtr clipBoxes,
+		       pointer data,
+		       DrawablePtr pDraw)
+{
+    ScreenPtr pScreen = pScrn->pScreen;
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    RADEONPortPrivPtr pPriv = (RADEONPortPrivPtr)data;
+    INT32 x1, x2, y1, y2;
+    int srcPitch, srcPitch2, dstPitch;
+    int s2offset, s3offset, tmp;
+    int top, left, npixels, nlines, size;
+    BoxRec dstBox;
+    int dst_width = width, dst_height = height;
+
+    /* make the compiler happy */
+    s2offset = s3offset = srcPitch2 = 0;
+
+    /* Clip */
+    x1 = src_x;
+    x2 = src_x + src_w;
+    y1 = src_y;
+    y2 = src_y + src_h;
+
+    dstBox.x1 = drw_x;
+    dstBox.x2 = drw_x + drw_w;
+    dstBox.y1 = drw_y;
+    dstBox.y2 = drw_y + drw_h;
+
+    if (!xf86XVClipVideoHelper(&dstBox, &x1, &x2, &y1, &y2, clipBoxes, width, height))
+	return Success;
+
+    src_w = (x2 - x1) >> 16;
+    src_h = (y2 - y1) >> 16;
+    drw_w = dstBox.x2 - dstBox.x1;
+    drw_h = dstBox.y2 - dstBox.y1;
+
+    if ((x1 >= x2) || (y1 >= y2))
+	return Success;
+
+    switch(id) {
+    case FOURCC_YV12:
+    case FOURCC_I420:
+	dstPitch = ((dst_width << 1) + 15) & ~15;
+	srcPitch = (width + 3) & ~3;
+	srcPitch2 = ((width >> 1) + 3) & ~3;
+	size = dstPitch * dst_height;
+	break;
+    case FOURCC_UYVY:
+    case FOURCC_YUY2:
+    default:
+	dstPitch = ((dst_width << 1) + 15) & ~15;
+	srcPitch = (width << 1);
+	srcPitch2 = 0;
+	size = dstPitch * dst_height;
+	break;
+    }
+
+#ifdef XF86DRI
+   if (info->directRenderingEnabled && info->DMAForXv)
+       /* The upload blit only supports multiples of 64 bytes */
+       dstPitch = (dstPitch + 63) & ~63;
+   else
+#endif
+       dstPitch = (dstPitch + 15) & ~15;
+
+    if (pPriv->video_memory != NULL && size != pPriv->size) {
+	RADEONFreeMemory(pScrn, pPriv->video_memory);
+	pPriv->video_memory = NULL;
+    }
+
+    if (pPriv->video_memory == NULL) {
+	pPriv->video_offset = RADEONAllocateMemory(pScrn,
+						       &pPriv->video_memory,
+						       size * 2);
+	if (pPriv->video_offset == 0)
+	    return BadAlloc;
+    }
+
+    if (pDraw->type == DRAWABLE_WINDOW)
+	pPriv->pPixmap = (*pScreen->GetWindowPixmap)((WindowPtr)pDraw);
+    else
+	pPriv->pPixmap = (PixmapPtr)pDraw;
+
+#ifdef USE_EXA
+    if (info->useEXA) {
+	/* Force the pixmap into framebuffer so we can draw to it. */
+	exaMoveInPixmap(pPriv->pPixmap);
+    }
+#endif
+
+    if (!info->useEXA &&
+	(((char *)pPriv->pPixmap->devPrivate.ptr < (char *)info->FB) ||
+	 ((char *)pPriv->pPixmap->devPrivate.ptr >= (char *)info->FB +
+	  info->FbMapSize))) {
+	/* If the pixmap wasn't in framebuffer, then we have no way in XAA to
+	 * force it there. So, we simply refuse to draw and fail.
+	 */
+	return BadAlloc;
+    }
+
+    /* copy data */
+    top = y1 >> 16;
+    left = (x1 >> 16) & ~1;
+    npixels = ((((x2 + 0xffff) >> 16) + 1) & ~1) - left;
+
+    pPriv->src_offset = pPriv->video_offset + info->fbLocation;
+    pPriv->src_addr = (CARD8 *)(info->FB + pPriv->video_offset + (top * dstPitch));
+    pPriv->src_pitch = dstPitch;
+    pPriv->size = size;
+    pPriv->pDraw = pDraw;
+
+#if 0
+    ErrorF("src_offset: 0x%x\n", pPriv->src_offset);
+    ErrorF("src_addr: 0x%x\n", pPriv->src_addr);
+    ErrorF("src_pitch: 0x%x\n", pPriv->src_pitch);
+#endif
+
+    switch(id) {
+    case FOURCC_YV12:
+    case FOURCC_I420:
+	top &= ~1;
+	nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
+	s2offset = srcPitch * height;
+	s3offset = (srcPitch2 * (height >> 1)) + s2offset;
+	top &= ~1;
+	pPriv->src_addr += left << 1;
+	tmp = ((top >> 1) * srcPitch2) + (left >> 1);
+	s2offset += tmp;
+	s3offset += tmp;
+	if (id == FOURCC_I420) {
+	    tmp = s2offset;
+	    s2offset = s3offset;
+	    s3offset = tmp;
+	}
+	RADEONCopyMungedData(pScrn, buf + (top * srcPitch) + left,
+			     buf + s2offset, buf + s3offset, pPriv->src_addr,
+			     srcPitch, srcPitch2, dstPitch, nlines, npixels);
+	break;
+    case FOURCC_UYVY:
+    case FOURCC_YUY2:
+    default:
+	nlines = ((y2 + 0xffff) >> 16) - top;
+	RADEONCopyData(pScrn, buf, pPriv->src_addr, srcPitch, dstPitch, nlines, npixels, 2);
+	break;
+    }
+
+    /* update cliplist */
+    if (!REGION_EQUAL(pScrn->pScreen, &pPriv->clip, clipBoxes)) {
+	REGION_COPY(pScrn->pScreen, &pPriv->clip, clipBoxes);
+    }
+
+    pPriv->id = id;
+    pPriv->src_w = src_w;
+    pPriv->src_h = src_h;
+    pPriv->drw_x = drw_x;
+    pPriv->drw_y = drw_y;
+    pPriv->dst_w = drw_w;
+    pPriv->dst_h = drw_h;
+    pPriv->w = width;
+    pPriv->h = height;
+
+#ifdef XF86DRI
+    if (info->directRenderingEnabled)
+	RADEONDisplayTexturedVideoCP(pScrn, pPriv);
+    else
+#endif
+	RADEONDisplayTexturedVideoMMIO(pScrn, pPriv);
+
+    return Success;
+}
+
+/* client libraries expect an encoding */
+static XF86VideoEncodingRec DummyEncoding[1] =
+{
+    {
+	0,
+	"XV_IMAGE",
+	IMAGE_MAX_WIDTH, IMAGE_MAX_HEIGHT,
+	{1, 1}
+    }
+};
+
+#define NUM_FORMATS 3
+
+static XF86VideoFormatRec Formats[NUM_FORMATS] =
+{
+    {15, TrueColor}, {16, TrueColor}, {24, TrueColor}
+};
+
+#define NUM_ATTRIBUTES 0
+
+static XF86AttributeRec Attributes[NUM_ATTRIBUTES] =
+{
+};
+
+#define NUM_IMAGES 4
+
+static XF86ImageRec Images[NUM_IMAGES] =
+{
+    XVIMAGE_YUY2,
+    XVIMAGE_YV12,
+    XVIMAGE_I420,
+    XVIMAGE_UYVY
+};
+
+XF86VideoAdaptorPtr
+RADEONSetupImageTexturedVideo(ScreenPtr pScreen)
+{
+    RADEONPortPrivPtr pPortPriv;
+    XF86VideoAdaptorPtr adapt;
+    int i;
+    int num_texture_ports = 16;
+
+    adapt = xcalloc(1, sizeof(XF86VideoAdaptorRec) + num_texture_ports *
+		    (sizeof(RADEONPortPrivRec) + sizeof(DevUnion)));
+    if (adapt == NULL)
+	return NULL;
+
+    adapt->type = XvWindowMask | XvInputMask | XvImageMask;
+    adapt->flags = 0;
+    adapt->name = "Radeon Textured Video";
+    adapt->nEncodings = 1;
+    adapt->pEncodings = DummyEncoding;
+    adapt->nFormats = NUM_FORMATS;
+    adapt->pFormats = Formats;
+    adapt->nPorts = num_texture_ports;
+    adapt->pPortPrivates = (DevUnion*)(&adapt[1]);
+
+    pPortPriv =
+	(RADEONPortPrivPtr)(&adapt->pPortPrivates[num_texture_ports]);
+
+    adapt->nAttributes = NUM_ATTRIBUTES;
+    adapt->pAttributes = Attributes;
+    adapt->pImages = Images;
+    adapt->nImages = NUM_IMAGES;
+    adapt->PutVideo = NULL;
+    adapt->PutStill = NULL;
+    adapt->GetVideo = NULL;
+    adapt->GetStill = NULL;
+    adapt->StopVideo = RADEONStopVideo;
+    adapt->SetPortAttribute = RADEONSetPortAttribute;
+    adapt->GetPortAttribute = RADEONGetPortAttribute;
+    adapt->QueryBestSize = RADEONQueryBestSize;
+    adapt->PutImage = RADEONPutImageTextured;
+    adapt->ReputImage = NULL;
+    adapt->QueryImageAttributes = RADEONQueryImageAttributes;
+
+    for (i = 0; i < num_texture_ports; i++) {
+	RADEONPortPrivPtr pPriv = &pPortPriv[i];
+
+	pPriv->textured = TRUE;
+	pPriv->videoStatus = 0;
+	pPriv->currentBuffer = 0;
+	pPriv->doubleBuffer = 0;
+
+	/* gotta uninit this someplace, XXX: shouldn't be necessary for textured */
+	REGION_NULL(pScreen, &pPriv->clip);
+	adapt->pPortPrivates[i].ptr = (pointer) (pPriv);
+    }
+
+    return adapt;
+}
+
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
new file mode 100644
index 0000000..e0f3bba
--- /dev/null
+++ b/src/radeon_textured_videofuncs.c
@@ -0,0 +1,596 @@
+/*
+ * Copyright 2008 Alex Deucher
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *
+ * Based on radeon_exa_render.c and kdrive ati_video.c by Eric Anholt, et al.
+ *
+ */
+
+#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
+#error Cannot define both MMIO and CP acceleration!
+#endif
+
+#if !defined(UNIXCPP) || defined(ANSICPP)
+#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
+#else
+#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
+#endif
+
+#ifdef ACCEL_MMIO
+#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
+#else
+#ifdef ACCEL_CP
+#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
+#else
+#error No accel type defined!
+#endif
+#endif
+
+#define VTX_DWORD_COUNT 4
+
+#ifdef ACCEL_CP
+
+#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
+do {								\
+    OUT_VIDEO_RING_F(_dstX);						\
+    OUT_VIDEO_RING_F(_dstY);						\
+    OUT_VIDEO_RING_F(_srcX);						\
+    OUT_VIDEO_RING_F(_srcY);						\
+} while (0)
+
+#else /* ACCEL_CP */
+
+#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
+do {								\
+    OUT_VIDEO_REG_F(RADEON_SE_PORT_DATA0, _dstX);		\
+    OUT_VIDEO_REG_F(RADEON_SE_PORT_DATA0, _dstY);		\
+    OUT_VIDEO_REG_F(RADEON_SE_PORT_DATA0, _srcX);		\
+    OUT_VIDEO_REG_F(RADEON_SE_PORT_DATA0, _srcY);		\
+} while (0)
+
+#endif /* !ACCEL_CP */
+
+static void
+FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    PixmapPtr pPixmap = pPriv->pPixmap;
+    CARD32 txformat;
+    CARD32 txfilter, txformat0, txformat1, txoffset, txpitch;
+    CARD32 dst_offset, dst_pitch, dst_format;
+    CARD32 txenable, colorpitch;
+    CARD32 blendcntl;
+    int dstxoff, dstyoff, pixel_shift;
+    VIDEO_PREAMBLE();
+
+    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
+    int nBox = REGION_NUM_RECTS(&pPriv->clip);
+
+    pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
+
+#ifdef USE_EXA
+    if (info->useEXA) {
+	dst_offset = exaGetPixmapOffset(pPixmap) + info->fbLocation;
+	dst_pitch = exaGetPixmapPitch(pPixmap);
+    } else
+#endif
+	{
+	    dst_offset = (pPixmap->devPrivate.ptr - info->FB) +
+		info->fbLocation + pScrn->fbOffset;
+	    dst_pitch = pPixmap->devKind;
+	}
+
+#ifdef COMPOSITE
+    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
+    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
+#else
+    dstxoff = 0;
+    dstyoff = 0;
+#endif
+
+#if 0
+    ErrorF("dst_offset: 0x%x\n", dst_offset);
+    ErrorF("dst_pitch: 0x%x\n", dst_pitch);
+    ErrorF("dstxoff: 0x%x\n", dstxoff);
+    ErrorF("dstyoff: 0x%x\n", dstyoff);
+    ErrorF("src_offset: 0x%x\n", pPriv->src_offset);
+    ErrorF("src_pitch: 0x%x\n", pPriv->src_pitch);
+#endif
+
+    if (!info->XInited3D)
+	RADEONInit3DEngine(pScrn);
+
+    /* we can probably improve this */
+    BEGIN_VIDEO(2);
+    OUT_VIDEO_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH);
+    /* We must wait for 3d to idle, in case source was just written as a dest. */
+    OUT_VIDEO_REG(RADEON_WAIT_UNTIL,
+		RADEON_WAIT_HOST_IDLECLEAN | RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
+    FINISH_VIDEO();
+
+    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
+	int has_tcl = (info->ChipFamily != CHIP_FAMILY_RS690 && info->ChipFamily != CHIP_FAMILY_RS400);
+
+	switch (pPixmap->drawable.bitsPerPixel) {
+	case 16:
+	    if (pPixmap->drawable.depth == 15)
+		dst_format = R300_COLORFORMAT_ARGB1555;
+	    else
+		dst_format = R300_COLORFORMAT_RGB565;
+	    break;
+	case 32:
+	    dst_format = R300_COLORFORMAT_ARGB8888;
+	    break;
+	default:
+	    return;
+	}
+
+	colorpitch = dst_pitch >> pixel_shift;
+	colorpitch |= dst_format;
+
+	if (RADEONTilingEnabled(pScrn, pPixmap))
+	    colorpitch |= R300_COLORTILE;
+
+	if (pPriv->id == FOURCC_UYVY)
+	    txformat1 = R300_TX_FORMAT_YVYU422;
+	else
+	    txformat1 = R300_TX_FORMAT_VYUY422;
+
+	txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
+
+	txformat0 = (((pPriv->w - 1) << R300_TXWIDTH_SHIFT) |
+		     ((pPriv->h - 1) << R300_TXHEIGHT_SHIFT));
+
+	txformat0 |= R300_TXPITCH_EN;
+
+	info->texW[0] = pPriv->w;
+	info->texH[0] = pPriv->h;
+
+	txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
+		    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
+		    R300_TX_MAG_FILTER_LINEAR | R300_TX_MIN_FILTER_LINEAR);
+
+	/* pitch is in pixels */
+	txpitch = pPriv->src_pitch / 2;
+	txpitch -= 1;
+
+	txoffset = pPriv->src_offset;
+
+	BEGIN_VIDEO(6);
+	OUT_VIDEO_REG(R300_TX_FILTER0_0, txfilter);
+	OUT_VIDEO_REG(R300_TX_FILTER1_0, 0);
+	OUT_VIDEO_REG(R300_TX_FORMAT0_0, txformat0);
+	OUT_VIDEO_REG(R300_TX_FORMAT1_0, txformat1);
+	OUT_VIDEO_REG(R300_TX_FORMAT2_0, txpitch);
+	OUT_VIDEO_REG(R300_TX_OFFSET_0, txoffset);
+	FINISH_VIDEO();
+
+	txenable = R300_TEX_0_ENABLE;
+
+	/* setup the VAP */
+	if (has_tcl) {
+	    BEGIN_VIDEO(26);
+	    OUT_VIDEO_REG(R300_VAP_CNTL_STATUS, 0);
+	    OUT_VIDEO_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+	    OUT_VIDEO_REG(R300_VAP_CNTL, ((6 << R300_PVS_NUM_SLOTS_SHIFT) |
+					  (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+					  (4 << R300_PVS_NUM_FPUS_SHIFT) |
+					  (12 << R300_VF_MAX_VTX_NUM_SHIFT)));
+	} else {
+	    BEGIN_VIDEO(8);
+	    OUT_VIDEO_REG(R300_VAP_CNTL_STATUS, R300_PVS_BYPASS);
+	    OUT_VIDEO_REG(R300_VAP_CNTL, ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+					  (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+					  (4 << R300_PVS_NUM_FPUS_SHIFT) |
+					  (5 << R300_VF_MAX_VTX_NUM_SHIFT)));
+	}
+
+	OUT_VIDEO_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
+	OUT_VIDEO_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0);
+
+	if (has_tcl) {
+	    OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_0,
+			  ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+			   (0 << R300_SKIP_DWORDS_0_SHIFT) |
+			   (0 << R300_DST_VEC_LOC_0_SHIFT) |
+			   R300_SIGNED_0 |
+			   (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+			   (0 << R300_SKIP_DWORDS_1_SHIFT) |
+			   (10 << R300_DST_VEC_LOC_1_SHIFT) |
+			   R300_LAST_VEC_1 |
+			   R300_SIGNED_1));
+	    OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
+			  ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_0_SHIFT) |
+			   (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_0_SHIFT) |
+			   (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_0_SHIFT) |
+			   (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_0_SHIFT) |
+			   ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
+			    << R300_WRITE_ENA_0_SHIFT) |
+			   (R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_1_SHIFT) |
+			   (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_1_SHIFT) |
+			   (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_1_SHIFT) |
+			   (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_1_SHIFT) |
+			   ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
+			    << R300_WRITE_ENA_1_SHIFT)));
+	} else {
+	    OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_0,
+			  ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+			   (0 << R300_SKIP_DWORDS_0_SHIFT) |
+			   (0 << R300_DST_VEC_LOC_0_SHIFT) |
+			   R300_SIGNED_0 |
+			   (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+			   (0 << R300_SKIP_DWORDS_1_SHIFT) |
+			   (6 << R300_DST_VEC_LOC_1_SHIFT) |
+			   R300_LAST_VEC_1 |
+			   R300_SIGNED_1));
+	    OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
+			  ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_0_SHIFT) |
+			   (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_0_SHIFT) |
+			   (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_0_SHIFT) |
+			   (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_0_SHIFT) |
+			   ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y)
+			    << R300_WRITE_ENA_0_SHIFT) |
+			   (R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_1_SHIFT) |
+			   (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_1_SHIFT) |
+			   (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_1_SHIFT) |
+			   (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_1_SHIFT) |
+			   ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y)
+			    << R300_WRITE_ENA_1_SHIFT)));
+	}
+
+	/* setup vertex shader */
+	if (has_tcl) {
+	    OUT_VIDEO_REG(R300_VAP_PVS_CODE_CNTL_0,
+			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
+			   (1 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+			   (1 << R300_PVS_LAST_INST_SHIFT)));
+	    OUT_VIDEO_REG(R300_VAP_PVS_CODE_CNTL_1,
+			  (1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x00f00203);
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x00d10001);
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x01248001);
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x01248001);
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x00f02203);
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x00d10141);
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x01248141);
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,0x01248141);
+	    OUT_VIDEO_REG(R300_VAP_PVS_FLOW_CNTL_OPC, 0);
+
+
+	    OUT_VIDEO_REG(R300_VAP_GB_VERT_CLIP_ADJ, 0x3f800000);
+	    OUT_VIDEO_REG(R300_VAP_GB_VERT_DISC_ADJ, 0x3f800000);
+	    OUT_VIDEO_REG(R300_VAP_GB_HORZ_CLIP_ADJ, 0x3f800000);
+	    OUT_VIDEO_REG(R300_VAP_GB_HORZ_DISC_ADJ, 0x3f800000);
+	    OUT_VIDEO_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
+	}
+
+	OUT_VIDEO_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
+	OUT_VIDEO_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT));
+	FINISH_VIDEO();
+
+	/* setup pixel shader */
+	if (IS_R300_VARIANT || info->ChipFamily == CHIP_FAMILY_RS690) {
+	    BEGIN_VIDEO(16);
+	    OUT_VIDEO_REG(R300_RS_COUNT,
+			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+			   R300_RS_COUNT_HIRES_EN));
+	    OUT_VIDEO_REG(R300_RS_IP_0,
+			  (R300_RS_TEX_PTR(0) |
+			   R300_RS_COL_PTR(0) |
+			   R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA) |
+			   R300_RS_SEL_S(R300_RS_SEL_C0) |
+			   R300_RS_SEL_T(R300_RS_SEL_C1) |
+			   R300_RS_SEL_R(R300_RS_SEL_K0) |
+			   R300_RS_SEL_Q(R300_RS_SEL_K1)));
+	    OUT_VIDEO_REG(R300_RS_INST_COUNT, R300_TX_OFFSET_RS(6));
+	    OUT_VIDEO_REG(R300_RS_INST_0, R300_RS_INST_TEX_CN_WRITE);
+	    OUT_VIDEO_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX);
+	    OUT_VIDEO_REG(R300_US_PIXSIZE, 0);
+	    OUT_VIDEO_REG(R300_US_CODE_OFFSET,
+			  (R300_ALU_CODE_OFFSET(0) |
+			   R300_ALU_CODE_SIZE(1) |
+			   R300_TEX_CODE_OFFSET(0) |
+			   R300_TEX_CODE_SIZE(1)));
+	    OUT_VIDEO_REG(R300_US_CODE_ADDR_0, 0);
+	    OUT_VIDEO_REG(R300_US_CODE_ADDR_1, 0);
+	    OUT_VIDEO_REG(R300_US_CODE_ADDR_2, 0);
+	    OUT_VIDEO_REG(R300_US_CODE_ADDR_3, 0x400000);
+	    OUT_VIDEO_REG(R300_US_TEX_INST_0, 0x8000);
+	    OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR_0, 0x1f800000);
+	    OUT_VIDEO_REG(R300_US_ALU_RGB_INST_0, 0x50a80);
+	    OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR_0, 0x1800000);
+	    OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST_0, 0x00040889);
+	    FINISH_VIDEO();
+	} else {
+	    BEGIN_VIDEO(22);
+	    OUT_VIDEO_REG(R300_RS_COUNT,
+			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+			   R300_RS_COUNT_HIRES_EN));
+	    OUT_VIDEO_REG(R500_RS_IP_0, (0 << R500_RS_IP_TEX_PTR_S_SHIFT) | (1 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+			  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
+
+	    OUT_VIDEO_REG(R300_RS_INST_COUNT, 0);
+	    OUT_VIDEO_REG(R500_RS_INST_0, R500_RS_INST_TEX_CN_WRITE);
+	    OUT_VIDEO_REG(R300_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
+	    OUT_VIDEO_REG(R300_US_PIXSIZE, 0);
+	    OUT_VIDEO_REG(R500_US_FC_CTRL, 0);
+	    OUT_VIDEO_REG(R500_US_CODE_ADDR, R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
+	    OUT_VIDEO_REG(R500_US_CODE_RANGE, R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
+	    OUT_VIDEO_REG(R500_US_CODE_OFFSET, 0);
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_INDEX, 0);
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00007807);
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x06400000);
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0xe4000400);
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00078105);
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x10040000);
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x10040000);
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00db0220);
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00c0c000);
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x20490000);
+	    FINISH_VIDEO();
+	}
+
+	BEGIN_VIDEO(6);
+	OUT_VIDEO_REG(R300_TX_INVALTAGS, 0);
+	OUT_VIDEO_REG(R300_TX_ENABLE, txenable);
+
+	OUT_VIDEO_REG(R300_RB3D_COLOROFFSET0, dst_offset);
+	OUT_VIDEO_REG(R300_RB3D_COLORPITCH0, colorpitch);
+
+	blendcntl = RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO;
+	OUT_VIDEO_REG(R300_RB3D_BLENDCNTL, blendcntl);
+	OUT_VIDEO_REG(R300_RB3D_ABLENDCNTL, 0);
+	FINISH_VIDEO();
+
+	BEGIN_VIDEO(1);
+	OUT_VIDEO_REG(R300_VAP_VTX_SIZE, VTX_DWORD_COUNT);
+	FINISH_VIDEO();
+
+    } else {
+
+	/* Same for R100/R200 */
+	switch (pPixmap->drawable.bitsPerPixel) {
+	case 16:
+	    if (pPixmap->drawable.depth == 15)
+		dst_format = RADEON_COLOR_FORMAT_ARGB1555;
+	    else
+		dst_format = RADEON_COLOR_FORMAT_RGB565;
+	    break;
+	case 32:
+	    dst_format = RADEON_COLOR_FORMAT_ARGB8888;
+	    break;
+	default:
+	    return;
+	}
+
+	if (pPriv->id == FOURCC_UYVY)
+	    txformat = RADEON_TXFORMAT_YVYU422;
+	else
+	    txformat = RADEON_TXFORMAT_VYUY422;
+
+	txformat |= RADEON_TXFORMAT_NON_POWER2;
+
+	colorpitch = dst_pitch >> pixel_shift;
+
+	if (RADEONTilingEnabled(pScrn, pPixmap))
+	    colorpitch |= RADEON_COLOR_TILE_ENABLE;
+
+	BEGIN_VIDEO(5);
+
+	OUT_VIDEO_REG(RADEON_PP_CNTL,
+		    RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
+	OUT_VIDEO_REG(RADEON_RB3D_CNTL,
+		    dst_format | RADEON_ALPHA_BLEND_ENABLE);
+	OUT_VIDEO_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
+
+	OUT_VIDEO_REG(RADEON_RB3D_COLORPITCH, colorpitch);
+
+	OUT_VIDEO_REG(RADEON_RB3D_BLENDCNTL,
+		    RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
+
+	FINISH_VIDEO();
+
+
+	if ((info->ChipFamily == CHIP_FAMILY_RV250) ||
+	    (info->ChipFamily == CHIP_FAMILY_RV280) ||
+	    (info->ChipFamily == CHIP_FAMILY_RS300) ||
+	    (info->ChipFamily == CHIP_FAMILY_R200)) {
+
+	    info->texW[0] = pPriv->w;
+	    info->texH[0] = pPriv->h;
+
+	    BEGIN_VIDEO(12);
+
+	    OUT_VIDEO_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
+	    OUT_VIDEO_REG(R200_SE_VTX_FMT_1,
+			(2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
+
+	    OUT_VIDEO_REG(R200_PP_TXFILTER_0,
+			R200_MAG_FILTER_LINEAR |
+			R200_MIN_FILTER_LINEAR |
+			R200_YUV_TO_RGB);
+	    OUT_VIDEO_REG(R200_PP_TXFORMAT_0, txformat);
+	    OUT_VIDEO_REG(R200_PP_TXFORMAT_X_0, 0);
+	    OUT_VIDEO_REG(R200_PP_TXSIZE_0,
+			(pPriv->w - 1) |
+			((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
+	    OUT_VIDEO_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
+
+	    OUT_VIDEO_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
+
+	    OUT_VIDEO_REG(R200_PP_TXCBLEND_0,
+			R200_TXC_ARG_A_ZERO |
+			R200_TXC_ARG_B_ZERO |
+			R200_TXC_ARG_C_R0_COLOR |
+			R200_TXC_OP_MADD);
+	    OUT_VIDEO_REG(R200_PP_TXCBLEND2_0,
+			R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
+	    OUT_VIDEO_REG(R200_PP_TXABLEND_0,
+			R200_TXA_ARG_A_ZERO |
+			R200_TXA_ARG_B_ZERO |
+			R200_TXA_ARG_C_R0_ALPHA |
+			R200_TXA_OP_MADD);
+	    OUT_VIDEO_REG(R200_PP_TXABLEND2_0,
+			R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
+	    FINISH_VIDEO();
+	} else {
+
+	    info->texW[0] = 1;
+	    info->texH[0] = 1;
+
+	    BEGIN_VIDEO(8);
+
+	    OUT_VIDEO_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY |
+			RADEON_SE_VTX_FMT_ST0);
+
+	    OUT_VIDEO_REG(RADEON_PP_TXFILTER_0, RADEON_MAG_FILTER_LINEAR |
+			RADEON_MIN_FILTER_LINEAR |
+			RADEON_YUV_TO_RGB);
+	    OUT_VIDEO_REG(RADEON_PP_TXFORMAT_0, txformat);
+	    OUT_VIDEO_REG(RADEON_PP_TXOFFSET_0, pPriv->src_offset);
+	    OUT_VIDEO_REG(RADEON_PP_TXCBLEND_0,
+			RADEON_COLOR_ARG_A_ZERO |
+			RADEON_COLOR_ARG_B_ZERO |
+			RADEON_COLOR_ARG_C_T0_COLOR |
+			RADEON_BLEND_CTL_ADD |
+			RADEON_CLAMP_TX);
+	    OUT_VIDEO_REG(RADEON_PP_TXABLEND_0,
+			RADEON_ALPHA_ARG_A_ZERO |
+			RADEON_ALPHA_ARG_B_ZERO |
+			RADEON_ALPHA_ARG_C_T0_ALPHA |
+			RADEON_BLEND_CTL_ADD |
+			RADEON_CLAMP_TX);
+
+	    OUT_VIDEO_REG(RADEON_PP_TEX_SIZE_0,
+			(pPriv->w - 1) |
+			((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
+	    OUT_VIDEO_REG(RADEON_PP_TEX_PITCH_0,
+			pPriv->src_pitch - 32);
+	    FINISH_VIDEO();
+	}
+    }
+
+    while (nBox--) {
+	int srcX, srcY, srcw, srch;
+	int dstX, dstY, dstw, dsth;
+	xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight;
+	dstX = pBox->x1 + dstxoff;
+	dstY = pBox->y1 + dstyoff;
+	dstw = pBox->x2 - pBox->x1;
+	dsth = pBox->y2 - pBox->y1;
+
+	srcX = ((pBox->x1 - pPriv->drw_x) *
+		pPriv->src_w) / pPriv->dst_w;
+	srcY = ((pBox->y1 - pPriv->drw_y) *
+		pPriv->src_h) / pPriv->dst_h;
+
+	srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
+	srch = (pPriv->src_h * dsth) / pPriv->dst_h;
+
+	srcTopLeft.x     = IntToxFixed(srcX);
+	srcTopLeft.y     = IntToxFixed(srcY);
+	srcTopRight.x    = IntToxFixed(srcX + srcw);
+	srcTopRight.y    = IntToxFixed(srcY);
+	srcBottomLeft.x  = IntToxFixed(srcX);
+	srcBottomLeft.y  = IntToxFixed(srcY + srch);
+	srcBottomRight.x = IntToxFixed(srcX + srcw);
+	srcBottomRight.y = IntToxFixed(srcY + srch);
+
+
+#if 0
+	ErrorF("dst: %d, %d, %d, %d\n", dstX, dstY, dstw, dsth);
+	ErrorF("src: %d, %d, %d, %d\n", srcX, srcY, srcw, srch);
+#endif
+
+#ifdef ACCEL_CP
+	if (info->ChipFamily < CHIP_FAMILY_R200) {
+	    BEGIN_RING(4 * VTX_DWORD_COUNT + 3);
+	    OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
+				4 * VTX_DWORD_COUNT + 1));
+	    OUT_RING(RADEON_CP_VC_FRMT_XY |
+		     RADEON_CP_VC_FRMT_ST0);
+	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN |
+		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+		     RADEON_CP_VC_CNTL_MAOS_ENABLE |
+		     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+		     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
+	} else {
+	    if (IS_R300_VARIANT || IS_AVIVO_VARIANT)
+		BEGIN_RING(4 * VTX_DWORD_COUNT + 6);
+	    else
+		BEGIN_RING(4 * VTX_DWORD_COUNT + 2);
+	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
+				4 * VTX_DWORD_COUNT));
+	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN |
+		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+		     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
+	}
+#else /* ACCEL_CP */
+	if (IS_R300_VARIANT || IS_AVIVO_VARIANT)
+	    BEGIN_VIDEO(3 + VTX_DWORD_COUNT * 4);
+	else
+	    BEGIN_VIDEO(1 + VTX_DWORD_COUNT * 4);
+
+	if (info->ChipFamily < CHIP_FAMILY_R200) {
+	    OUT_VIDEO_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_FAN |
+					      RADEON_VF_PRIM_WALK_DATA |
+					      RADEON_VF_RADEON_MODE |
+					      4 << RADEON_VF_NUM_VERTICES_SHIFT));
+	} else {
+	    OUT_VIDEO_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
+					      RADEON_VF_PRIM_WALK_DATA |
+					      4 << RADEON_VF_NUM_VERTICES_SHIFT));
+	}
+#endif
+
+	VTX_OUT((float)dstX,                                      (float)dstY,
+		xFixedToFloat(srcTopLeft.x) / info->texW[0],      xFixedToFloat(srcTopLeft.y) / info->texH[0]);
+	VTX_OUT((float)dstX,                                      (float)(dstY + dsth),
+		xFixedToFloat(srcBottomLeft.x) / info->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->texH[0]);
+	VTX_OUT((float)(dstX + dstw),                                (float)(dstY + dsth),
+		xFixedToFloat(srcBottomRight.x) / info->texW[0],  xFixedToFloat(srcBottomRight.y) / info->texH[0]);
+	VTX_OUT((float)(dstX + dstw),                                (float)dstY,
+		xFixedToFloat(srcTopRight.x) / info->texW[0],     xFixedToFloat(srcTopRight.y) / info->texH[0]);
+
+	if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
+	    OUT_VIDEO_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D);
+	    OUT_VIDEO_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+	}
+
+#ifdef ACCEL_CP
+	ADVANCE_RING();
+#else
+	FINISH_VIDEO();
+#endif /* !ACCEL_CP */
+
+	pBox++;
+    }
+
+    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
+}
+
+#undef VTX_OUT
+#undef FUNC_NAME
diff --git a/src/radeon_video.c b/src/radeon_video.c
index 0b3f6ca..7502e1e 100644
--- a/src/radeon_video.c
+++ b/src/radeon_video.c
@@ -74,19 +74,10 @@
 static void RADEONInitOffscreenImages(ScreenPtr);
 
 static XF86VideoAdaptorPtr RADEONSetupImageVideo(ScreenPtr);
-static int  RADEONSetPortAttribute(ScrnInfoPtr, Atom, INT32, pointer);
-static int  RADEONGetPortAttribute(ScrnInfoPtr, Atom ,INT32 *, pointer);
-static void RADEONStopVideo(ScrnInfoPtr, pointer, Bool);
-static void RADEONQueryBestSize(ScrnInfoPtr, Bool, short, short, short, short,
-			unsigned int *, unsigned int *, pointer);
 static int  RADEONPutImage(ScrnInfoPtr, short, short, short, short, short,
 			short, short, short, int, unsigned char*, short,
 			short, Bool, RegionPtr, pointer,
 			DrawablePtr);
-static int  RADEONQueryImageAttributes(ScrnInfoPtr, int, unsigned short *,
-			unsigned short *,  int *, int *);
-static void RADEONFreeMemory(ScrnInfoPtr pScrn, void *mem_struct);
-
 static void RADEONVideoTimerCallback(ScrnInfoPtr pScrn, Time now);
 static int RADEONPutVideo(ScrnInfoPtr pScrn, short src_x, short src_y, short drw_x, short drw_y,
                         short src_w, short src_h, short drw_w, short drw_h, 
@@ -270,29 +261,37 @@ ATIVideoSave(ScreenPtr pScreen, ExaOffscreenArea *area)
 void RADEONInitVideo(ScreenPtr pScreen)
 {
     ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+    RADEONInfoPtr    info = RADEONPTR(pScrn);
     XF86VideoAdaptorPtr *adaptors, *newAdaptors = NULL;
-    XF86VideoAdaptorPtr newAdaptor = NULL;
+    XF86VideoAdaptorPtr overlayAdaptor = NULL, texturedAdaptor = NULL;
     int num_adaptors;
 
-    newAdaptor = RADEONSetupImageVideo(pScreen);
-    RADEONInitOffscreenImages(pScreen);
+
     num_adaptors = xf86XVListGenericAdaptors(pScrn, &adaptors);
+    newAdaptors = xalloc((num_adaptors + 2) * sizeof(XF86VideoAdaptorPtr *));
+    if (newAdaptors == NULL)
+	return;
 
-    if(newAdaptor) {
-	if(!num_adaptors) {
-	    num_adaptors = 1;
-	    adaptors = &newAdaptor;
-	} else {
-	    newAdaptors =  /* need to free this someplace */
-		xalloc((num_adaptors + 1) * sizeof(XF86VideoAdaptorPtr*));
-	    if(newAdaptors) {
-		memcpy(newAdaptors, adaptors, num_adaptors *
-					sizeof(XF86VideoAdaptorPtr));
-		newAdaptors[num_adaptors] = newAdaptor;
-		adaptors = newAdaptors;
-		num_adaptors++;
-	    }
-	}
+    memcpy(newAdaptors, adaptors, num_adaptors * sizeof(XF86VideoAdaptorPtr));
+    adaptors = newAdaptors;
+
+    if (!IS_AVIVO_VARIANT) {
+	overlayAdaptor = RADEONSetupImageVideo(pScreen);
+	if (overlayAdaptor != NULL) {
+	    adaptors[num_adaptors++] = overlayAdaptor;
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Set up overlay video\n");
+	} else
+	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Failed to set up overlay video\n");
+	RADEONInitOffscreenImages(pScreen);
+    }
+
+    if (info->ChipFamily != CHIP_FAMILY_RS400) {
+	texturedAdaptor = RADEONSetupImageTexturedVideo(pScreen);
+	if (texturedAdaptor != NULL) {
+	    adaptors[num_adaptors++] = texturedAdaptor;
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Set up textured video\n");
+	} else
+	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Failed to set up textured video\n");
     }
 
     if(num_adaptors)
@@ -300,6 +299,7 @@ void RADEONInitVideo(ScreenPtr pScreen)
 
     if(newAdaptors)
 	xfree(newAdaptors);
+
 }
 
 /* client libraries expect an encoding */
@@ -1611,6 +1611,8 @@ RADEONSetupImageVideo(ScreenPtr pScreen)
     pPriv = (RADEONPortPrivPtr)(adapt->pPortPrivates[0].ptr);
     REGION_NULL(pScreen, &(pPriv->clip));
 
+    pPriv->textured = FALSE;
+
     if(pPriv->theatre != NULL) 
     {
 	/* video decoder is present, extend capabilities */
@@ -1626,13 +1628,16 @@ RADEONSetupImageVideo(ScreenPtr pScreen)
     return adapt;
 }
 
-static void
+void
 RADEONStopVideo(ScrnInfoPtr pScrn, pointer data, Bool cleanup)
 {
   RADEONInfoPtr info = RADEONPTR(pScrn);
   unsigned char *RADEONMMIO = info->MMIO;
   RADEONPortPrivPtr pPriv = (RADEONPortPrivPtr)data;
 
+    if (pPriv->textured)
+	return;
+
   REGION_EMPTY(pScrn->pScreen, &pPriv->clip);
 
   if(cleanup) {
@@ -1663,7 +1668,7 @@ RADEONStopVideo(ScrnInfoPtr pScrn, pointer data, Bool cleanup)
   }
 }
 
-static int
+int
 RADEONSetPortAttribute(ScrnInfoPtr  pScrn,
 		       Atom	    attribute,
 		       INT32	    value,
@@ -1675,6 +1680,9 @@ RADEONSetPortAttribute(ScrnInfoPtr  pScrn,
     Bool		setAlpha = FALSE;
     unsigned char *RADEONMMIO = info->MMIO;
 
+    if (pPriv->textured)
+	return BadMatch;
+
     RADEON_SYNC(info, pScrn);
 
 #define RTFSaturation(a)   (1.0 + ((a)*1.0)/1000.0)
@@ -1937,7 +1945,7 @@ RADEONSetPortAttribute(ScrnInfoPtr  pScrn,
     return Success;
 }
 
-static int
+int
 RADEONGetPortAttribute(ScrnInfoPtr  pScrn,
 		       Atom	    attribute,
 		       INT32	    *value,
@@ -1946,6 +1954,9 @@ RADEONGetPortAttribute(ScrnInfoPtr  pScrn,
     RADEONInfoPtr	info = RADEONPTR(pScrn);
     RADEONPortPrivPtr	pPriv = (RADEONPortPrivPtr)data;
 
+    if (pPriv->textured)
+	return BadMatch;
+
     if (info->accelOn) RADEON_SYNC(info, pScrn);
 
     if(attribute == xvAutopaintColorkey)
@@ -2031,7 +2042,7 @@ RADEONGetPortAttribute(ScrnInfoPtr  pScrn,
     return Success;
 }
 
-static void
+void
 RADEONQueryBestSize(
   ScrnInfoPtr pScrn,
   Bool motion,
@@ -2040,10 +2051,14 @@ RADEONQueryBestSize(
   unsigned int *p_w, unsigned int *p_h,
   pointer data
 ){
-   if(vid_w > (drw_w << 4))
-	drw_w = vid_w >> 4;
-   if(vid_h > (drw_h << 4))
-	drw_h = vid_h >> 4;
+    RADEONPortPrivPtr pPriv = (RADEONPortPrivPtr)data;
+
+    if (!pPriv->textured) {
+	if (vid_w > (drw_w << 4))
+	    drw_w = vid_w >> 4;
+	if (vid_h > (drw_h << 4))
+	    drw_h = vid_h >> 4;
+    }
 
   *p_w = drw_w;
   *p_h = drw_h;
@@ -2132,7 +2147,7 @@ static struct {
         {1.00, {{ 0,    32,  0,  0}, {-2,   29,  5,  0}, {-3,   27,  9, -1}, {-4,   24, 14, -2}, {-3,   19, 19, -3}, }}
     };
 
-static void
+void
 RADEONCopyData(
   ScrnInfoPtr pScrn,
   unsigned char *src,
@@ -2300,7 +2315,7 @@ static void RADEON_420_422(
 }
 #endif
 
-static void
+void
 RADEONCopyMungedData(
    ScrnInfoPtr pScrn,
    unsigned char *src1,
@@ -2405,7 +2420,7 @@ RADEONCopyMungedData(
  * is measured in bytes, and the offset from the beginning of card space is
  * returned.
  */
-static CARD32
+CARD32
 RADEONAllocateMemory(
    ScrnInfoPtr pScrn,
    void **mem_struct,
@@ -2482,7 +2497,7 @@ RADEONAllocateMemory(
     return offset;
 }
 
-static void
+void
 RADEONFreeMemory(
    ScrnInfoPtr pScrn,
    void *mem_struct
@@ -3122,7 +3137,7 @@ RADEONPutImage(
 }
 
 
-static int
+int
 RADEONQueryImageAttributes(
     ScrnInfoPtr pScrn,
     int id,
diff --git a/src/radeon_video.h b/src/radeon_video.h
index 072f40e..f897e07 100644
--- a/src/radeon_video.h
+++ b/src/radeon_video.h
@@ -81,8 +81,8 @@ typedef struct {
    Bool          autopaint_colorkey;
    xf86CrtcPtr   desired_crtc;
 
-#ifdef USE_EXA
    int              size;
+#ifdef USE_EXA
    ExaOffscreenArea *off_screen;
 #endif
 
@@ -90,6 +90,20 @@ typedef struct {
    int           video_offset;
 
    Atom          device_id, location_id, instance_id;
+
+    /* textured video */
+    Bool textured;
+    DrawablePtr pDraw;
+    PixmapPtr pPixmap;
+
+    CARD32 src_offset;
+    CARD32 src_pitch;
+    CARD8 *src_addr;
+
+    int id;
+    int src_w, src_h, dst_w, dst_h;
+    int w, h;
+    int drw_x, drw_y;
 } RADEONPortPrivRec, *RADEONPortPrivPtr;
 
 
@@ -99,5 +113,33 @@ void RADEONResetI2C(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv);
 void RADEONVIP_init(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv);
 void RADEONVIP_reset(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv);
 
+CARD32
+RADEONAllocateMemory(ScrnInfoPtr pScrn, void **mem_struct, int size);
+void
+RADEONFreeMemory(ScrnInfoPtr pScrn, void *mem_struct);
+
+int  RADEONSetPortAttribute(ScrnInfoPtr, Atom, INT32, pointer);
+int  RADEONGetPortAttribute(ScrnInfoPtr, Atom ,INT32 *, pointer);
+void RADEONStopVideo(ScrnInfoPtr, pointer, Bool);
+void RADEONQueryBestSize(ScrnInfoPtr, Bool, short, short, short, short,
+			 unsigned int *, unsigned int *, pointer);
+int  RADEONQueryImageAttributes(ScrnInfoPtr, int, unsigned short *,
+			unsigned short *,  int *, int *);
+
+XF86VideoAdaptorPtr
+RADEONSetupImageTexturedVideo(ScreenPtr pScreen);
+
+void
+RADEONCopyData(ScrnInfoPtr pScrn,
+	       unsigned char *src, unsigned char *dst,
+	       unsigned int srcPitch, unsigned int dstPitch,
+	       unsigned int h, unsigned int w, unsigned int bpp);
+
+void
+RADEONCopyMungedData(ScrnInfoPtr pScrn,
+		     unsigned char *src1, unsigned char *src2,
+		     unsigned char *src3, unsigned char *dst1,
+		     unsigned int srcPitch, unsigned int srcPitch2,
+		     unsigned int dstPitch, unsigned int h, unsigned int w);
 
 #endif