Blob Blame History Raw
diff --git a/configure.ac b/configure.ac
index 3848f4c..660ea1f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -22,7 +22,7 @@
 
 AC_PREREQ(2.57)
 AC_INIT([xf86-video-ati],
-        6.12.1,
+        6.12.1.99,
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         xf86-video-ati)
 
diff --git a/src/ati_pciids_gen.h b/src/ati_pciids_gen.h
index d532f16..3304e84 100644
--- a/src/ati_pciids_gen.h
+++ b/src/ati_pciids_gen.h
@@ -345,6 +345,8 @@
 #define PCI_CHIP_RV770_9456 0x9456
 #define PCI_CHIP_RV770_945A 0x945A
 #define PCI_CHIP_RV770_945B 0x945B
+#define PCI_CHIP_RV790_9460 0x9460
+#define PCI_CHIP_RV790_9462 0x9462
 #define PCI_CHIP_RV770_946A 0x946A
 #define PCI_CHIP_RV770_946B 0x946B
 #define PCI_CHIP_RV770_947A 0x947A
@@ -429,3 +431,10 @@
 #define PCI_CHIP_RS780_9612 0x9612
 #define PCI_CHIP_RS780_9613 0x9613
 #define PCI_CHIP_RS780_9614 0x9614
+#define PCI_CHIP_RS780_9615 0x9615
+#define PCI_CHIP_RS780_9616 0x9616
+#define PCI_CHIP_RS880_9710 0x9710
+#define PCI_CHIP_RS880_9711 0x9711
+#define PCI_CHIP_RS880_9712 0x9712
+#define PCI_CHIP_RS880_9713 0x9713
+#define PCI_CHIP_RS880_9714 0x9714
diff --git a/src/atombios_crtc.c b/src/atombios_crtc.c
index 50db578..31c032b 100644
--- a/src/atombios_crtc.c
+++ b/src/atombios_crtc.c
@@ -517,6 +517,9 @@ atombios_crtc_mode_set(xf86CrtcPtr crtc,
 
     if (IS_AVIVO_VARIANT) {
 	uint32_t fb_format;
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	uint32_t fb_swap = R600_D1GRPH_SWAP_ENDIAN_NONE;
+#endif
 
 	switch (crtc->scrn->bitsPerPixel) {
 	case 15:
@@ -524,10 +527,16 @@ atombios_crtc_mode_set(xf86CrtcPtr crtc,
 	    break;
 	case 16:
 	    fb_format = AVIVO_D1GRPH_CONTROL_DEPTH_16BPP | AVIVO_D1GRPH_CONTROL_16BPP_RGB565;
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	    fb_swap = R600_D1GRPH_SWAP_ENDIAN_16BIT;
+#endif
 	    break;
 	case 24:
 	case 32:
 	    fb_format = AVIVO_D1GRPH_CONTROL_DEPTH_32BPP | AVIVO_D1GRPH_CONTROL_32BPP_ARGB8888;
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	    fb_swap = R600_D1GRPH_SWAP_ENDIAN_32BIT;
+#endif
 	    break;
 	default:
 	    FatalError("Unsupported screen depth: %d\n", xf86GetDepth());
@@ -555,6 +564,11 @@ atombios_crtc_mode_set(xf86CrtcPtr crtc,
 	OUTREG(AVIVO_D1GRPH_SECONDARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, fb_location);
 	OUTREG(AVIVO_D1GRPH_CONTROL + radeon_crtc->crtc_offset, fb_format);
 
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	if (info->ChipFamily >= CHIP_FAMILY_R600)
+	    OUTREG(R600_D1GRPH_SWAP_CONTROL + radeon_crtc->crtc_offset, fb_swap);
+#endif
+
 	OUTREG(AVIVO_D1GRPH_SURFACE_OFFSET_X + radeon_crtc->crtc_offset, 0);
 	OUTREG(AVIVO_D1GRPH_SURFACE_OFFSET_Y + radeon_crtc->crtc_offset, 0);
 	OUTREG(AVIVO_D1GRPH_X_START + radeon_crtc->crtc_offset, 0);
diff --git a/src/atombios_output.c b/src/atombios_output.c
index 35d1767..60d6c10 100644
--- a/src/atombios_output.c
+++ b/src/atombios_output.c
@@ -61,7 +61,7 @@ const char *device_name[12] = {
 };
 
 static int
-atombios_output_dac_setup(xf86OutputPtr output, DisplayModePtr mode)
+atombios_output_dac_setup(xf86OutputPtr output, int action)
 {
     RADEONOutputPrivatePtr radeon_output = output->driver_private;
     RADEONInfoPtr info       = RADEONPTR(output->scrn);
@@ -71,6 +71,7 @@ atombios_output_dac_setup(xf86OutputPtr output, DisplayModePtr mode)
     AtomBiosArgRec data;
     unsigned char *space;
     int index = 0, num = 0;
+    int clock = radeon_output->pixel_clock;
 
     if (radeon_encoder == NULL)
 	return ATOM_NOT_IMPLEMENTED;
@@ -90,7 +91,7 @@ atombios_output_dac_setup(xf86OutputPtr output, DisplayModePtr mode)
 	break;
     }
 
-    disp_data.ucAction = ATOM_ENABLE;
+    disp_data.ucAction =action;
 
     if (radeon_output->active_device & (ATOM_DEVICE_CRT_SUPPORT))
 	disp_data.ucDacStandard = ATOM_DAC1_PS2;
@@ -113,7 +114,7 @@ atombios_output_dac_setup(xf86OutputPtr output, DisplayModePtr mode)
 	    break;
 	}
     }
-    disp_data.usPixelClock = cpu_to_le16(mode->Clock / 10);
+    disp_data.usPixelClock = cpu_to_le16(clock / 10);
 
     data.exec.index = index;
     data.exec.dataSpace = (void *)&space;
@@ -130,7 +131,7 @@ atombios_output_dac_setup(xf86OutputPtr output, DisplayModePtr mode)
 }
 
 static int
-atombios_output_tv_setup(xf86OutputPtr output, DisplayModePtr mode)
+atombios_output_tv_setup(xf86OutputPtr output, int action)
 {
     RADEONOutputPrivatePtr radeon_output = output->driver_private;
     radeon_tvout_ptr tvout = &radeon_output->tvout;
@@ -138,10 +139,11 @@ atombios_output_tv_setup(xf86OutputPtr output, DisplayModePtr mode)
     TV_ENCODER_CONTROL_PS_ALLOCATION disp_data;
     AtomBiosArgRec data;
     unsigned char *space;
+    int clock = radeon_output->pixel_clock;
 
     memset(&disp_data,0, sizeof(disp_data));
 
-    disp_data.sTVEncoder.ucAction = ATOM_ENABLE;
+    disp_data.sTVEncoder.ucAction = action;
 
     if (radeon_output->active_device & (ATOM_DEVICE_CV_SUPPORT))
 	disp_data.sTVEncoder.ucTvStandard = ATOM_TV_CV;
@@ -177,7 +179,7 @@ atombios_output_tv_setup(xf86OutputPtr output, DisplayModePtr mode)
 	}
     }
 
-    disp_data.sTVEncoder.usPixelClock = cpu_to_le16(mode->Clock / 10);
+    disp_data.sTVEncoder.usPixelClock = cpu_to_le16(clock / 10);
     data.exec.index = GetIndexIntoMasterTable(COMMAND, TVEncoderControl);
     data.exec.dataSpace = (void *)&space;
     data.exec.pspace = &disp_data;
@@ -193,19 +195,21 @@ atombios_output_tv_setup(xf86OutputPtr output, DisplayModePtr mode)
 }
 
 int
-atombios_external_tmds_setup(xf86OutputPtr output, DisplayModePtr mode)
+atombios_external_tmds_setup(xf86OutputPtr output, int action)
 {
+    RADEONOutputPrivatePtr radeon_output = output->driver_private;
     ScrnInfoPtr pScrn = output->scrn;
     RADEONInfoPtr info       = RADEONPTR(pScrn);
     ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION disp_data;
     AtomBiosArgRec data;
     unsigned char *space;
+    int clock = radeon_output->pixel_clock;
 
     memset(&disp_data,0, sizeof(disp_data));
 
-    disp_data.sXTmdsEncoder.ucEnable = ATOM_ENABLE;
+    disp_data.sXTmdsEncoder.ucEnable = action;
 
-    if (mode->Clock > 165000)
+    if (clock > 165000)
 	disp_data.sXTmdsEncoder.ucMisc = PANEL_ENCODER_MISC_DUAL;
 
     if (pScrn->rgbBits == 8)
@@ -225,19 +229,21 @@ atombios_external_tmds_setup(xf86OutputPtr output, DisplayModePtr mode)
 }
 
 static int
-atombios_output_ddia_setup(xf86OutputPtr output, DisplayModePtr mode)
+atombios_output_ddia_setup(xf86OutputPtr output, int action)
 {
+    RADEONOutputPrivatePtr radeon_output = output->driver_private;
     RADEONInfoPtr info       = RADEONPTR(output->scrn);
     DVO_ENCODER_CONTROL_PS_ALLOCATION disp_data;
     AtomBiosArgRec data;
     unsigned char *space;
+    int clock = radeon_output->pixel_clock;
 
     memset(&disp_data,0, sizeof(disp_data));
 
-    disp_data.sDVOEncoder.ucAction = ATOM_ENABLE;
-    disp_data.sDVOEncoder.usPixelClock = cpu_to_le16(mode->Clock / 10);
+    disp_data.sDVOEncoder.ucAction = action;
+    disp_data.sDVOEncoder.usPixelClock = cpu_to_le16(clock / 10);
 
-    if (mode->Clock > 165000)
+    if (clock > 165000)
 	disp_data.sDVOEncoder.usDevAttr.sDigAttrib.ucAttribute = PANEL_ENCODER_MISC_DUAL;
 
     data.exec.index = GetIndexIntoMasterTable(COMMAND, DVOEncoderControl);
@@ -254,7 +260,7 @@ atombios_output_ddia_setup(xf86OutputPtr output, DisplayModePtr mode)
 }
 
 static int
-atombios_output_digital_setup(xf86OutputPtr output, DisplayModePtr mode)
+atombios_output_digital_setup(xf86OutputPtr output, int action)
 {
     RADEONOutputPrivatePtr radeon_output = output->driver_private;
     ScrnInfoPtr pScrn = output->scrn;
@@ -267,6 +273,7 @@ atombios_output_digital_setup(xf86OutputPtr output, DisplayModePtr mode)
     int index = 0;
     int major, minor;
     int lvds_misc = 0;
+    int clock = radeon_output->pixel_clock;
 
     if (radeon_encoder == NULL)
 	return ATOM_NOT_IMPLEMENTED;
@@ -308,11 +315,11 @@ atombios_output_digital_setup(xf86OutputPtr output, DisplayModePtr mode)
 	switch (minor) {
 	case 1:
 	    disp_data.ucMisc = 0;
-	    disp_data.ucAction = PANEL_ENCODER_ACTION_ENABLE;
+	    disp_data.ucAction = action;
 	    if ((radeon_output->ConnectorType == CONNECTOR_HDMI_TYPE_A) ||
 		(radeon_output->ConnectorType == CONNECTOR_HDMI_TYPE_B))
 		disp_data.ucMisc |= PANEL_ENCODER_MISC_HDMI_TYPE;
-	    disp_data.usPixelClock = cpu_to_le16(mode->Clock / 10);
+	    disp_data.usPixelClock = cpu_to_le16(clock / 10);
 	    if (radeon_output->active_device & (ATOM_DEVICE_LCD_SUPPORT)) {
 		if (lvds_misc & (1 << 0))
 		    disp_data.ucMisc |= PANEL_ENCODER_MISC_DUAL;
@@ -321,7 +328,7 @@ atombios_output_digital_setup(xf86OutputPtr output, DisplayModePtr mode)
 	    } else {
 		if (radeon_output->linkb)
 		    disp_data.ucMisc |= PANEL_ENCODER_MISC_TMDS_LINKB;
-		if (mode->Clock > 165000)
+		if (clock > 165000)
 		    disp_data.ucMisc |= PANEL_ENCODER_MISC_DUAL;
 		if (pScrn->rgbBits == 8)
 		    disp_data.ucMisc |= (1 << 1);
@@ -331,7 +338,7 @@ atombios_output_digital_setup(xf86OutputPtr output, DisplayModePtr mode)
 	case 2:
 	case 3:
 	    disp_data2.ucMisc = 0;
-	    disp_data2.ucAction = PANEL_ENCODER_ACTION_ENABLE;
+	    disp_data2.ucAction = action;
 	    if (minor == 3) {
 		if (radeon_output->coherent_mode) {
 		    disp_data2.ucMisc |= PANEL_ENCODER_MISC_COHERENT;
@@ -341,7 +348,7 @@ atombios_output_digital_setup(xf86OutputPtr output, DisplayModePtr mode)
 	    if ((radeon_output->ConnectorType == CONNECTOR_HDMI_TYPE_A) ||
 		(radeon_output->ConnectorType == CONNECTOR_HDMI_TYPE_B))
 		disp_data2.ucMisc |= PANEL_ENCODER_MISC_HDMI_TYPE;
-	    disp_data2.usPixelClock = cpu_to_le16(mode->Clock / 10);
+	    disp_data2.usPixelClock = cpu_to_le16(clock / 10);
 	    disp_data2.ucTruncate = 0;
 	    disp_data2.ucSpatial = 0;
 	    disp_data2.ucTemporal = 0;
@@ -364,7 +371,7 @@ atombios_output_digital_setup(xf86OutputPtr output, DisplayModePtr mode)
 	    } else {
 		if (radeon_output->linkb)
 		    disp_data2.ucMisc |= PANEL_ENCODER_MISC_TMDS_LINKB;
-		if (mode->Clock > 165000)
+		if (clock > 165000)
 		    disp_data2.ucMisc |= PANEL_ENCODER_MISC_DUAL;
 	    }
 	    data.exec.pspace = &disp_data2;
@@ -488,16 +495,17 @@ dp_link_clock_for_mode_clock(int mode_clock)
 }
 
 static int
-atombios_output_dig_encoder_setup(xf86OutputPtr output, DisplayModePtr mode)
+atombios_output_dig_encoder_setup(xf86OutputPtr output, int action)
 {
     RADEONOutputPrivatePtr radeon_output = output->driver_private;
-    RADEONCrtcPrivatePtr radeon_crtc = output->crtc->driver_private;
     RADEONInfoPtr info       = RADEONPTR(output->scrn);
     radeon_encoder_ptr radeon_encoder = radeon_get_encoder(output);
     DIG_ENCODER_CONTROL_PS_ALLOCATION disp_data;
     AtomBiosArgRec data;
     unsigned char *space;
     int index = 0, major, minor, num = 0;
+    int clock = radeon_output->pixel_clock;
+    int dig_block = radeon_output->dig_block;
 
     if (radeon_encoder == NULL)
 	return ATOM_NOT_IMPLEMENTED;
@@ -505,11 +513,11 @@ atombios_output_dig_encoder_setup(xf86OutputPtr output, DisplayModePtr mode)
     memset(&disp_data,0, sizeof(disp_data));
 
     if (IS_DCE32_VARIANT) {
-	if (radeon_crtc->crtc_id)
+	if (dig_block)
 	    index = GetIndexIntoMasterTable(COMMAND, DIG2EncoderControl);
 	else
 	    index = GetIndexIntoMasterTable(COMMAND, DIG1EncoderControl);
-	num = radeon_crtc->crtc_id + 1;
+	num = dig_block + 1;
     } else {
 	switch (radeon_encoder->encoder_id) {
 	case ENCODER_OBJECT_ID_INTERNAL_TMDS1:
@@ -529,8 +537,8 @@ atombios_output_dig_encoder_setup(xf86OutputPtr output, DisplayModePtr mode)
 
     atombios_get_command_table_version(info->atomBIOS, index, &major, &minor);
 
-    disp_data.ucAction = ATOM_ENABLE;
-    disp_data.usPixelClock = cpu_to_le16(mode->Clock / 10);
+    disp_data.ucAction = action;
+    disp_data.usPixelClock = cpu_to_le16(clock / 10);
 
     if (IS_DCE32_VARIANT) {
 	switch (radeon_encoder->encoder_id) {
@@ -569,11 +577,11 @@ atombios_output_dig_encoder_setup(xf86OutputPtr output, DisplayModePtr mode)
 	else
 	    disp_data.ucConfig |= ATOM_ENCODER_CONFIG_LINKA;
 
-	if (dp_link_clock_for_mode_clock(mode->Clock) == 27000)
+	if (dp_link_clock_for_mode_clock(clock) == 27000)
 	    disp_data.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
 
-	disp_data.ucLaneNum = dp_lanes_for_mode_clock(mode->Clock);
-    } else if (mode->Clock > 165000) {
+	disp_data.ucLaneNum = dp_lanes_for_mode_clock(clock);
+    } else if (clock > 165000) {
 	disp_data.ucConfig |= ATOM_ENCODER_CONFIG_LINKA_B;
 	disp_data.ucLaneNum = 8;
     } else {
@@ -605,10 +613,9 @@ union dig_transmitter_control {
 };
 
 static int
-atombios_output_dig_transmitter_setup(xf86OutputPtr output, DisplayModePtr mode)
+atombios_output_dig_transmitter_setup(xf86OutputPtr output, int action)
 {
     RADEONOutputPrivatePtr radeon_output = output->driver_private;
-    RADEONCrtcPrivatePtr radeon_crtc = output->crtc->driver_private;
     RADEONInfoPtr info       = RADEONPTR(output->scrn);
     radeon_encoder_ptr radeon_encoder = radeon_get_encoder(output);
     union dig_transmitter_control disp_data;
@@ -616,6 +623,8 @@ atombios_output_dig_transmitter_setup(xf86OutputPtr output, DisplayModePtr mode)
     unsigned char *space;
     int index = 0, num = 0;
     int major, minor;
+    int clock = radeon_output->pixel_clock;
+    int dig_block = radeon_output->dig_block;
 
     if (radeon_encoder == NULL)
         return ATOM_NOT_IMPLEMENTED;
@@ -641,20 +650,20 @@ atombios_output_dig_transmitter_setup(xf86OutputPtr output, DisplayModePtr mode)
 
     atombios_get_command_table_version(info->atomBIOS, index, &major, &minor);
 
-    disp_data.v1.ucAction = ATOM_TRANSMITTER_ACTION_ENABLE;
+    disp_data.v1.ucAction = action;
 
     if (IS_DCE32_VARIANT) {
 	if (radeon_output->MonType == MT_DP) {
 	    disp_data.v2.usPixelClock =
-		cpu_to_le16(dp_link_clock_for_mode_clock(mode->Clock));
+		cpu_to_le16(dp_link_clock_for_mode_clock(clock));
 	    disp_data.v2.acConfig.fDPConnector = 1;
-	} else if (mode->Clock > 165000) {
-	    disp_data.v2.usPixelClock = cpu_to_le16((mode->Clock * 10 * 2) / 100);
+	} else if (clock > 165000) {
+	    disp_data.v2.usPixelClock = cpu_to_le16((clock * 10 * 2) / 100);
 	    disp_data.v2.acConfig.fDualLinkConnector = 1;
 	} else {
-	    disp_data.v2.usPixelClock = cpu_to_le16((mode->Clock * 10 * 4) / 100);
+	    disp_data.v2.usPixelClock = cpu_to_le16((clock * 10 * 4) / 100);
 	}
-	if (radeon_crtc->crtc_id)
+	if (dig_block)
 	    disp_data.v2.acConfig.ucEncoderSel = 1;
 
 	switch (radeon_encoder->encoder_id) {
@@ -684,9 +693,9 @@ atombios_output_dig_transmitter_setup(xf86OutputPtr output, DisplayModePtr mode)
 
 	if (radeon_output->MonType == MT_DP)
 	    disp_data.v1.usPixelClock =
-		cpu_to_le16(dp_link_clock_for_mode_clock(mode->Clock));
+		cpu_to_le16(dp_link_clock_for_mode_clock(clock));
 	else
-	    disp_data.v1.usPixelClock = cpu_to_le16((mode->Clock) / 10);
+	    disp_data.v1.usPixelClock = cpu_to_le16((clock) / 10);
 
 	switch (radeon_encoder->encoder_id) {
 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
@@ -704,7 +713,7 @@ atombios_output_dig_transmitter_setup(xf86OutputPtr output, DisplayModePtr mode)
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1:
 	    disp_data.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER;
 	    if (info->IsIGP) {
-		if (mode->Clock > 165000) {
+		if (clock > 165000) {
 		    disp_data.v1.ucConfig |= (ATOM_TRANSMITTER_CONFIG_8LANE_LINK |
 					      ATOM_TRANSMITTER_CONFIG_LINKA_B);
 		    /* guess */
@@ -724,7 +733,7 @@ atombios_output_dig_transmitter_setup(xf86OutputPtr output, DisplayModePtr mode)
 			disp_data.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_12_15;
 		}
 	    } else {
-		if (mode->Clock > 165000)
+		if (clock > 165000)
 		    disp_data.v1.ucConfig |= (ATOM_TRANSMITTER_CONFIG_8LANE_LINK |
 					      ATOM_TRANSMITTER_CONFIG_LINKA_B |
 					      ATOM_TRANSMITTER_CONFIG_LANE_0_7);
@@ -741,7 +750,7 @@ atombios_output_dig_transmitter_setup(xf86OutputPtr output, DisplayModePtr mode)
 	case ENCODER_OBJECT_ID_INTERNAL_LVTM1:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
 	    disp_data.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG2_ENCODER;
-	    if (mode->Clock > 165000)
+	    if (clock > 165000)
 		disp_data.v1.ucConfig |= (ATOM_TRANSMITTER_CONFIG_8LANE_LINK |
 					  ATOM_TRANSMITTER_CONFIG_LINKA_B |
 					  ATOM_TRANSMITTER_CONFIG_LANE_0_7);
@@ -767,7 +776,6 @@ atombios_output_dig_transmitter_setup(xf86OutputPtr output, DisplayModePtr mode)
 	    }
 	}
     }
-    radeon_output->transmitter_config = disp_data.v1.ucConfig;
 
     data.exec.index = index;
     data.exec.dataSpace = (void *)&space;
@@ -1125,7 +1133,7 @@ atombios_output_overscan_setup(xf86OutputPtr output, DisplayModePtr mode, Displa
 }
 
 static int
-atombios_output_scaler_setup(xf86OutputPtr output, DisplayModePtr mode)
+atombios_output_scaler_setup(xf86OutputPtr output)
 {
     RADEONInfoPtr info       = RADEONPTR(output->scrn);
     RADEONOutputPrivatePtr radeon_output = output->driver_private;
@@ -1213,63 +1221,6 @@ atombios_output_scaler_setup(xf86OutputPtr output, DisplayModePtr mode)
 
 }
 
-static int
-atombios_dig_dpms(xf86OutputPtr output, int mode)
-{
-    RADEONOutputPrivatePtr radeon_output = output->driver_private;
-    RADEONInfoPtr info       = RADEONPTR(output->scrn);
-    radeon_encoder_ptr radeon_encoder = radeon_get_encoder(output);
-    DIG_TRANSMITTER_CONTROL_PS_ALLOCATION disp_data;
-    AtomBiosArgRec data;
-    unsigned char *space;
-
-    if (radeon_encoder == NULL)
-	return ATOM_NOT_IMPLEMENTED;
-
-    memset(&disp_data, 0, sizeof(disp_data));
-
-    switch (mode) {
-    case DPMSModeOn:
-	disp_data.ucAction = ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT;
-	break;
-    case DPMSModeStandby:
-    case DPMSModeSuspend:
-    case DPMSModeOff:
-	disp_data.ucAction = ATOM_TRANSMITTER_ACTION_DISABLE_OUTPUT;
-	break;
-    }
-
-    disp_data.ucConfig = radeon_output->transmitter_config;
-
-    if (IS_DCE32_VARIANT)
-	data.exec.index = GetIndexIntoMasterTable(COMMAND, UNIPHYTransmitterControl);
-    else {
-	switch (radeon_encoder->encoder_id) {
-	case ENCODER_OBJECT_ID_INTERNAL_TMDS1:
-	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1:
-	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
-	    data.exec.index = GetIndexIntoMasterTable(COMMAND, DIG1TransmitterControl);
-	    break;
-	case ENCODER_OBJECT_ID_INTERNAL_LVDS:
-	case ENCODER_OBJECT_ID_INTERNAL_LVTM1:
-	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
-	    data.exec.index = GetIndexIntoMasterTable(COMMAND, DIG2TransmitterControl);
-	    break;
-	}
-    }
-    data.exec.dataSpace = (void *)&space;
-    data.exec.pspace = &disp_data;
-
-    if (RHDAtomBiosFunc(info->atomBIOS->scrnIndex, info->atomBIOS, ATOMBIOS_EXEC, &data) == ATOM_SUCCESS) {
-	ErrorF("Output DIG dpms success\n");
-	return ATOM_SUCCESS;
-    }
-
-    ErrorF("Output DIG dpms failed\n");
-    return ATOM_NOT_IMPLEMENTED;
-
-}
-
 void
 atombios_output_dpms(xf86OutputPtr output, int mode)
 {
@@ -1334,7 +1285,7 @@ atombios_output_dpms(xf86OutputPtr output, int mode)
     case DPMSModeOn:
 	radeon_encoder->devices |= radeon_output->active_device;
 	if (is_dig)
-	    (void)atombios_dig_dpms(output, mode);
+	    atombios_output_dig_transmitter_setup(output, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT);
 	else {
 	    disp_data.ucAction = ATOM_ENABLE;
 	    data.exec.index = index;
@@ -1355,7 +1306,7 @@ atombios_output_dpms(xf86OutputPtr output, int mode)
 	radeon_encoder->devices &= ~(radeon_output->active_device);
 	if (!radeon_encoder->devices) {
 	    if (is_dig)
-		(void)atombios_dig_dpms(output, mode);
+		atombios_output_dig_transmitter_setup(output, ATOM_TRANSMITTER_ACTION_DISABLE_OUTPUT);
 	    else {
 		disp_data.ucAction = ATOM_DISABLE;
 		data.exec.index = index;
@@ -1406,8 +1357,12 @@ atombios_set_output_crtc_source(xf86OutputPtr output)
 	default:
 	    if (IS_AVIVO_VARIANT)
 		crtc_src_param.ucCRTC = radeon_crtc->crtc_id;
-	    else
-		crtc_src_param.ucCRTC = radeon_crtc->crtc_id << 2;
+	    else {
+		if (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_DAC1)
+		    crtc_src_param.ucCRTC = radeon_crtc->crtc_id;
+		else
+		    crtc_src_param.ucCRTC = radeon_crtc->crtc_id << 2;
+	    }
 	    switch (radeon_encoder->encoder_id) {
 	    case ENCODER_OBJECT_ID_INTERNAL_TMDS1:
 	    case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1:
@@ -1559,13 +1514,16 @@ atombios_output_mode_set(xf86OutputPtr output,
 			 DisplayModePtr adjusted_mode)
 {
     RADEONOutputPrivatePtr radeon_output = output->driver_private;
+    RADEONCrtcPrivatePtr radeon_crtc = output->crtc->driver_private;
     radeon_encoder_ptr radeon_encoder = radeon_get_encoder(output);
     RADEONInfoPtr info       = RADEONPTR(output->scrn);
     if (radeon_encoder == NULL)
         return;
 
+    radeon_output->pixel_clock = adjusted_mode->Clock;
+    radeon_output->dig_block = radeon_crtc->crtc_id;
     atombios_output_overscan_setup(output, mode, adjusted_mode);
-    atombios_output_scaler_setup(output, adjusted_mode);
+    atombios_output_scaler_setup(output);
     atombios_set_output_crtc_source(output);
 
     if (IS_AVIVO_VARIANT) {
@@ -1580,29 +1538,31 @@ atombios_output_mode_set(xf86OutputPtr output,
     case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1:
     case ENCODER_OBJECT_ID_INTERNAL_LVDS:
     case ENCODER_OBJECT_ID_INTERNAL_LVTM1:
-	atombios_output_digital_setup(output, adjusted_mode);
+	atombios_output_digital_setup(output, PANEL_ENCODER_ACTION_ENABLE);
 	break;
     case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
     case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
     case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
     case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
-	atombios_output_dig_encoder_setup(output, adjusted_mode);
-	atombios_output_dig_transmitter_setup(output, adjusted_mode);
+	atombios_output_dig_encoder_setup(output, ATOM_ENABLE);
+	atombios_output_dig_transmitter_setup(output, ATOM_TRANSMITTER_ACTION_INIT);
+	atombios_output_dig_transmitter_setup(output, ATOM_TRANSMITTER_ACTION_SETUP);
+	atombios_output_dig_transmitter_setup(output, ATOM_TRANSMITTER_ACTION_ENABLE);
 	break;
     case ENCODER_OBJECT_ID_INTERNAL_DDI:
-	atombios_output_ddia_setup(output, adjusted_mode);
+	atombios_output_ddia_setup(output, ATOM_ENABLE);
 	break;
     case ENCODER_OBJECT_ID_INTERNAL_DVO1:
     case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
-	atombios_external_tmds_setup(output, adjusted_mode);
+	atombios_external_tmds_setup(output, ATOM_ENABLE);
 	break;
     case ENCODER_OBJECT_ID_INTERNAL_DAC1:
     case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
     case ENCODER_OBJECT_ID_INTERNAL_DAC2:
     case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
-	atombios_output_dac_setup(output, adjusted_mode);
+	atombios_output_dac_setup(output, ATOM_ENABLE);
 	if (radeon_output->active_device & (ATOM_DEVICE_TV_SUPPORT | ATOM_DEVICE_CV_SUPPORT))
-	    atombios_output_tv_setup(output, adjusted_mode);
+	    atombios_output_tv_setup(output, ATOM_ENABLE);
 	break;
     }
     atombios_apply_output_quirks(output, adjusted_mode);
diff --git a/src/legacy_output.c b/src/legacy_output.c
index 6223531..423a3e2 100644
--- a/src/legacy_output.c
+++ b/src/legacy_output.c
@@ -1589,6 +1589,7 @@ legacy_output_mode_set(xf86OutputPtr output, DisplayModePtr mode,
     if (radeon_encoder == NULL)
 	return;
 
+    radeon_output->pixel_clock = adjusted_mode->Clock;
     if (radeon_crtc->crtc_id == 0) {
 	ErrorF("set RMX\n");
 	is_primary = TRUE;
@@ -1614,7 +1615,7 @@ legacy_output_mode_set(xf86OutputPtr output, DisplayModePtr mode,
 	    unsigned char *RADEONMMIO = info->MMIO;
 	    uint32_t fp2_gen_cntl;
 
-	    atombios_external_tmds_setup(output, mode);
+	    atombios_external_tmds_setup(output, ATOM_ENABLE);
 	    fp2_gen_cntl = INREG(RADEON_FP2_GEN_CNTL) & ~R200_FP2_SOURCE_SEL_MASK;
 	    if (radeon_crtc->crtc_id == 1)
 		fp2_gen_cntl |= R200_FP2_SOURCE_SEL_CRTC2;
diff --git a/src/pcidb/ati_pciids.csv b/src/pcidb/ati_pciids.csv
index 4d4e625..b361d9d 100644
--- a/src/pcidb/ati_pciids.csv
+++ b/src/pcidb/ati_pciids.csv
@@ -346,6 +346,8 @@
 "0x9456","RV770_9456","RV770",,,,,,"ATI FirePro V8700 (FireGL)"
 "0x945A","RV770_945A","RV770",1,,,,,"ATI Mobility RADEON HD 4870"
 "0x945B","RV770_945B","RV770",1,,,,,"ATI Mobility RADEON M98"
+"0x9460","RV790_9460","RV770",,,,,,"ATI Radeon 4800 Series"
+"0x9462","RV790_9462","RV770",,,,,,"ATI Radeon 4800 Series"
 "0x946A","RV770_946A","RV770",1,,,,,"ATI FirePro M7750"
 "0x946B","RV770_946B","RV770",1,,,,,"ATI M98"
 "0x947A","RV770_947A","RV770",1,,,,,"ATI M98"
@@ -430,3 +432,10 @@
 "0x9612","RS780_9612","RS780",,1,,,1,"ATI Radeon HD 3200 Graphics"
 "0x9613","RS780_9613","RS780",,1,,,1,"ATI Radeon 3100 Graphics"
 "0x9614","RS780_9614","RS780",,1,,,1,"ATI Radeon HD 3300 Graphics"
+"0x9615","RS780_9615","RS780",,1,,,1,"ATI Radeon HD 3200 Graphics"
+"0x9616","RS780_9616","RS780",,1,,,1,"ATI Radeon 3000 Graphics"
+"0x9710","RS880_9710","RS880",,1,,,1,"ATI Radeon HD Graphics"
+"0x9711","RS880_9711","RS880",,1,,,1,"ATI Radeon Graphics"
+"0x9712","RS880_9712","RS880",1,1,,,1,"ATI Mobility Radeon HD Graphics"
+"0x9713","RS880_9713","RS880",1,1,,,1,"ATI Mobility Radeon Graphics"
+"0x9714","RS880_9714","RS880",,1,,,1,"ATI Radeon Graphics"
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 40f02e1..2dc33a8 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -1098,6 +1098,7 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
     unsigned int i;
     tex_resource_t  tex_res;
     tex_sampler_t   tex_samp;
+    int pix_r, pix_g, pix_b, pix_a;
 
     CLEAR (tex_res);
     CLEAR (tex_samp);
@@ -1142,46 +1143,102 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
     switch (pPict->format) {
     case PICT_a1r5g5b5:
     case PICT_a8r8g8b8:
-	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
-	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
-	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
-	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
+	pix_r = SQ_SEL_Z; /* R */
+	pix_g = SQ_SEL_Y; /* G */
+	pix_b = SQ_SEL_X; /* B */
+	pix_a = SQ_SEL_W; /* A */
 	break;
     case PICT_a8b8g8r8:
-	tex_res.dst_sel_x           = SQ_SEL_X; /* R */
-	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
-	tex_res.dst_sel_z           = SQ_SEL_Z; /* B */
-	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
+	pix_r = SQ_SEL_X; /* R */
+	pix_g = SQ_SEL_Y; /* G */
+	pix_b = SQ_SEL_Z; /* B */
+	pix_a = SQ_SEL_W; /* A */
 	break;
     case PICT_x8b8g8r8:
-	tex_res.dst_sel_x           = SQ_SEL_X; /* R */
-	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
-	tex_res.dst_sel_z           = SQ_SEL_Z; /* B */
-	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
+	pix_r = SQ_SEL_X; /* R */
+	pix_g = SQ_SEL_Y; /* G */
+	pix_b = SQ_SEL_Z; /* B */
+	pix_a = SQ_SEL_1; /* A */
 	break;
     case PICT_x1r5g5b5:
     case PICT_x8r8g8b8:
-	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
-	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
-	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
-	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
-	break;
     case PICT_r5g6b5:
-	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
-	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
-	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
-	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
+	pix_r = SQ_SEL_Z; /* R */
+	pix_g = SQ_SEL_Y; /* G */
+	pix_b = SQ_SEL_X; /* B */
+	pix_a = SQ_SEL_1; /* A */
 	break;
     case PICT_a8:
-	tex_res.dst_sel_x           = SQ_SEL_0; /* R */
-	tex_res.dst_sel_y           = SQ_SEL_0; /* G */
-	tex_res.dst_sel_z           = SQ_SEL_0; /* B */
-	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
+	pix_r = SQ_SEL_0; /* R */
+	pix_g = SQ_SEL_0; /* G */
+	pix_b = SQ_SEL_0; /* B */
+	pix_a = SQ_SEL_X; /* A */
 	break;
     default:
 	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
     }
 
+    if (unit == 0) {
+	if (!accel_state->has_mask) {
+	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
+		pix_r = SQ_SEL_0;
+		pix_g = SQ_SEL_0;
+		pix_b = SQ_SEL_0;
+	    }
+
+	    if (PICT_FORMAT_A(pPict->format) == 0)
+		pix_a = SQ_SEL_1;
+	} else {
+	    if (accel_state->component_alpha) {
+		if (accel_state->src_alpha) {
+		    if (PICT_FORMAT_A(pPict->format) == 0) {
+			pix_r = SQ_SEL_1;
+			pix_g = SQ_SEL_1;
+			pix_b = SQ_SEL_1;
+			pix_a = SQ_SEL_1;
+		    } else {
+			pix_r = pix_a;
+			pix_g = pix_a;
+			pix_b = pix_a;
+		    }
+		} else {
+		    if (PICT_FORMAT_A(pPict->format) == 0)
+			pix_a = SQ_SEL_1;
+		}
+	    } else {
+		if (PICT_FORMAT_RGB(pPict->format) == 0) {
+		    pix_r = SQ_SEL_0;
+		    pix_g = SQ_SEL_0;
+		    pix_b = SQ_SEL_0;
+		}
+
+		if (PICT_FORMAT_A(pPict->format) == 0)
+		    pix_a = SQ_SEL_1;
+	    }
+	}
+    } else {
+	if (accel_state->component_alpha) {
+	    if (PICT_FORMAT_A(pPict->format) == 0)
+		pix_a = SQ_SEL_1;
+	} else {
+	    if (PICT_FORMAT_A(pPict->format) == 0) {
+		pix_r = SQ_SEL_1;
+		pix_g = SQ_SEL_1;
+		pix_b = SQ_SEL_1;
+		pix_a = SQ_SEL_1;
+	    } else {
+		pix_r = pix_a;
+		pix_g = pix_a;
+		pix_b = pix_a;
+	    }
+	}
+    }
+
+    tex_res.dst_sel_x           = pix_r; /* R */
+    tex_res.dst_sel_y           = pix_g; /* G */
+    tex_res.dst_sel_z           = pix_b; /* B */
+    tex_res.dst_sel_w           = pix_a; /* A */
+
     tex_res.base_level          = 0;
     tex_res.last_level          = 0;
     tex_res.perf_modulation     = 0;
@@ -1324,14 +1381,26 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     uint32_t blendcntl, dst_format;
     cb_config_t cb_conf;
     shader_config_t vs_conf, ps_conf;
-    uint32_t ps[24];
 
     /* return FALSE; */
 
-    if (pMask)
+    if (pMask) {
 	accel_state->has_mask = TRUE;
-    else
+	if (pMaskPicture->componentAlpha) {
+	    accel_state->component_alpha = TRUE;
+	    if (R600BlendOp[op].src_alpha)
+		accel_state->src_alpha = TRUE;
+	    else
+		accel_state->src_alpha = FALSE;
+	} else {
+	    accel_state->component_alpha = FALSE;
+	    accel_state->src_alpha = FALSE;
+	}
+    } else {
 	accel_state->has_mask = FALSE;
+	accel_state->component_alpha = FALSE;
+	accel_state->src_alpha = FALSE;
+    }
 
     accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
     accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
@@ -1346,116 +1415,6 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     if (!R600GetDestFormat(pDstPicture, &dst_format))
 	return FALSE;
 
-    if (pMask) {
-	int src_a, src_r, src_g, src_b;
-	int mask_a, mask_r, mask_g, mask_b;
-
-	/* setup pixel shader */
-	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0) {
-	    src_r = SQ_SEL_0;
-	    src_g = SQ_SEL_0;
-	    src_b = SQ_SEL_0;
-	} else {
-	    src_r = SQ_SEL_X;
-	    src_g = SQ_SEL_Y;
-	    src_b = SQ_SEL_Z;
-	}
-
-	if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
-	    src_a = SQ_SEL_1;
-	} else {
-	    src_a = SQ_SEL_W;
-	}
-
-	if (pMaskPicture->componentAlpha) {
-	    if (R600BlendOp[op].src_alpha) {
-		if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
-		    src_r = SQ_SEL_1;
-		    src_g = SQ_SEL_1;
-		    src_b = SQ_SEL_1;
-		    src_a = SQ_SEL_1;
-		} else {
-		    src_r = SQ_SEL_W;
-		    src_g = SQ_SEL_W;
-		    src_b = SQ_SEL_W;
-		    src_a = SQ_SEL_W;
-		}
-
-		mask_r = SQ_SEL_X;
-		mask_g = SQ_SEL_Y;
-		mask_b = SQ_SEL_Z;
-
-		if (PICT_FORMAT_A(pMaskPicture->format) == 0) {
-		    mask_a = SQ_SEL_1;
-		} else {
-		    mask_a = SQ_SEL_W;
-		}
-	    } else {
-		src_r = SQ_SEL_X;
-		src_g = SQ_SEL_Y;
-		src_b = SQ_SEL_Z;
-
-		if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
-		    src_a = SQ_SEL_1;
-		} else {
-		    src_a = SQ_SEL_W;
-		}
-
-		mask_r = SQ_SEL_X;
-		mask_g = SQ_SEL_Y;
-		mask_b = SQ_SEL_Z;
-
-		if (PICT_FORMAT_A(pMaskPicture->format) == 0) {
-		    mask_a = SQ_SEL_1;
-		} else {
-		    mask_a = SQ_SEL_W;
-		}
-	    }
-	} else {
-	    if (PICT_FORMAT_A(pMaskPicture->format) == 0) {
-		mask_r = SQ_SEL_1;
-		mask_g = SQ_SEL_1;
-		mask_b = SQ_SEL_1;
-	    } else {
-		mask_r = SQ_SEL_W;
-		mask_g = SQ_SEL_W;
-		mask_b = SQ_SEL_W;
-	    }
-	    if (PICT_FORMAT_A(pMaskPicture->format) == 0) {
-		mask_a = SQ_SEL_1;
-	    } else {
-		mask_a = SQ_SEL_W;
-	    }
-	}
-
-	R600_comp_mask_ps(info->ChipFamily, ps,
-			  src_a, src_r, src_g, src_b,
-			  mask_a, mask_r, mask_g, mask_b);
-
-    } else {
-	int src_a, src_r, src_g, src_b;
-	/* setup pixel shader */
-	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0) {
-	    src_r = SQ_SEL_0;
-	    src_g = SQ_SEL_0;
-	    src_b = SQ_SEL_0;
-	} else {
-	    src_r = SQ_SEL_X;
-	    src_g = SQ_SEL_Y;
-	    src_b = SQ_SEL_Z;
-	}
-
-	if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
-	    src_a = SQ_SEL_1;
-	} else {
-	    src_a = SQ_SEL_W;
-	}
-
-	R600_comp_ps(info->ChipFamily, ps,
-		     src_a, src_r, src_g, src_b);
-
-    }
-
     CLEAR (cb_conf);
     CLEAR (vs_conf);
     CLEAR (ps_conf);
@@ -1484,19 +1443,19 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     } else
 	accel_state->is_transform[1] = FALSE;
 
-    /* VS bool constant */
-    if (pMask)
-	set_bool_const(pScrn, accel_state->ib, 1, 1);
-    else
-	set_bool_const(pScrn, accel_state->ib, 1, 0);
+    if (pMask) {
+	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
+	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
+	    accel_state->comp_mask_ps_offset;
+    } else {
+	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
+	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
+	    accel_state->comp_ps_offset;
+    }
 
     accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
 	accel_state->comp_vs_offset;
 
-    memcpy ((char *)accel_state->ib->address + (accel_state->ib->total / 2) - 256, ps, sizeof(ps));
-    accel_state->ps_mc_addr = info->gartLocation + info->dri->bufStart +
-	(accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2) - 256;
-
     accel_state->vs_size = 512;
     accel_state->ps_size = 512;
 
@@ -2013,11 +1972,11 @@ R600LoadShaders(ScrnInfoPtr pScrn)
 
     /*  comp ps --------------------------------------- */
     accel_state->comp_ps_offset = 2560;
-    /*  not yet */
+    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
 
     /*  comp mask ps --------------------------------------- */
     accel_state->comp_mask_ps_offset = 3072;
-    /*  not yet */
+    R600_comp_mask_ps(ChipSet, shader + accel_state->comp_mask_ps_offset / 4);
 
     /*  xv vs --------------------------------------- */
     accel_state->xv_vs_offset = 3584;
@@ -2105,10 +2064,10 @@ R600DrawInit(ScreenPtr pScreen)
 #if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
     xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
 
-    info->accel_state->exa->maxPitchBytes = 16320;
+    info->accel_state->exa->maxPitchBytes = 32768;
     info->accel_state->exa->maxX = 8192;
 #else
-    info->accel_state->exa->maxX = 16320 / 4;
+    info->accel_state->exa->maxX = 8192;
 #endif
     info->accel_state->exa->maxY = 8192;
 
diff --git a/src/r600_reg.h b/src/r600_reg.h
index 9036e2a..937926b 100644
--- a/src/r600_reg.h
+++ b/src/r600_reg.h
@@ -51,8 +51,8 @@ enum {
     SET_LOOP_CONST_offset          = 0x0003e200,
     SET_LOOP_CONST_end             = 0x0003e380,
     SET_BOOL_CONST_offset          = 0x0003e380,
-    SET_BOOL_CONST_end             = 0x00040000,
-} ;
+    SET_BOOL_CONST_end             = 0x0003e38c,
+};
 
 /* packet3 IT_SURFACE_BASE_UPDATE bits */
 enum {
diff --git a/src/r600_reg_r6xx.h b/src/r600_reg_r6xx.h
index 2e7dfa9..b4cc639 100644
--- a/src/r600_reg_r6xx.h
+++ b/src/r600_reg_r6xx.h
@@ -488,7 +488,16 @@ enum {
     SQ_LOOP_CONST_ps                                      = 0,
     SQ_LOOP_CONST_vs                                      = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num,
     SQ_LOOP_CONST_gs                                      = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num,
-} ;
+    SQ_BOOL_CONST                                         = SQ_BOOL_CONST_0,	   /* 32 bits per PS, VS, GS */
+    SQ_BOOL_CONST_ps_num                                  = 1,
+    SQ_BOOL_CONST_vs_num                                  = 1,
+    SQ_BOOL_CONST_gs_num                                  = 1,
+    SQ_BOOL_CONST_all_num                                 = 3,
+    SQ_BOOL_CONST_offset                                  = 4,
+    SQ_BOOL_CONST_ps                                      = 0,
+    SQ_BOOL_CONST_vs                                      = SQ_BOOL_CONST_ps + SQ_BOOL_CONST_ps_num,
+    SQ_BOOL_CONST_gs                                      = SQ_BOOL_CONST_vs + SQ_BOOL_CONST_vs_num,
+};
 
 
 #endif
diff --git a/src/r600_shader.c b/src/r600_shader.c
index 21c4c68..addba36 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -1245,10 +1245,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 }
 
 /* comp mask ps --------------------------------------- */
-int R600_comp_mask_ps(RADEONChipFamily ChipSet,
-		      uint32_t* shader,
-		      int src_a, int src_r, int src_g, int src_b,
-		      int mask_a, int mask_r, int mask_g, int mask_b)
+int R600_comp_mask_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 {
     int i = 0;
 
@@ -1421,10 +1418,10 @@ int R600_comp_mask_ps(RADEONChipFamily ChipSet,
 			     R7xx_ALT_CONST(0));
     shader[i++] = TEX_DWORD1(DST_GPR(0),
 			     DST_REL(ABSOLUTE),
-			     DST_SEL_X(src_r),
-			     DST_SEL_Y(src_g),
-			     DST_SEL_Z(src_b),
-			     DST_SEL_W(src_a),
+			     DST_SEL_X(SQ_SEL_X),
+			     DST_SEL_Y(SQ_SEL_Y),
+			     DST_SEL_Z(SQ_SEL_Z),
+			     DST_SEL_W(SQ_SEL_W),
 			     LOD_BIAS(0),
 			     COORD_TYPE_X(TEX_NORMALIZED),
 			     COORD_TYPE_Y(TEX_NORMALIZED),
@@ -1449,10 +1446,10 @@ int R600_comp_mask_ps(RADEONChipFamily ChipSet,
 			     R7xx_ALT_CONST(0));
     shader[i++] = TEX_DWORD1(DST_GPR(1),
 			     DST_REL(ABSOLUTE),
-			     DST_SEL_X(mask_r),
-			     DST_SEL_Y(mask_g),
-			     DST_SEL_Z(mask_b),
-			     DST_SEL_W(mask_a),
+			     DST_SEL_X(SQ_SEL_X),
+			     DST_SEL_Y(SQ_SEL_Y),
+			     DST_SEL_Z(SQ_SEL_Z),
+			     DST_SEL_W(SQ_SEL_W),
 			     LOD_BIAS(0),
 			     COORD_TYPE_X(TEX_NORMALIZED),
 			     COORD_TYPE_Y(TEX_NORMALIZED),
@@ -1781,10 +1778,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 }
 
 /* comp ps --------------------------------------- */
-int R600_comp_ps(RADEONChipFamily ChipSet,
-		 uint32_t* shader,
-		 int src_a, int src_r, int src_g, int src_b
-)
+int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 {
     int i = 0;
 
@@ -1831,10 +1825,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet,
 			     R7xx_ALT_CONST(0));
     shader[i++] = TEX_DWORD1(DST_GPR(0),
 			     DST_REL(ABSOLUTE),
-			     DST_SEL_X(src_r),
-			     DST_SEL_Y(src_g),
-			     DST_SEL_Z(src_b),
-			     DST_SEL_W(src_a),
+			     DST_SEL_X(SQ_SEL_X),
+			     DST_SEL_Y(SQ_SEL_Y),
+			     DST_SEL_Z(SQ_SEL_Z),
+			     DST_SEL_W(SQ_SEL_W),
 			     LOD_BIAS(0),
 			     COORD_TYPE_X(TEX_NORMALIZED),
 			     COORD_TYPE_Y(TEX_NORMALIZED),
diff --git a/src/r600_shader.h b/src/r600_shader.h
index 67b64ff..6c12614 100644
--- a/src/r600_shader.h
+++ b/src/r600_shader.h
@@ -352,15 +352,8 @@ extern int R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* ps);
 extern int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader);
 extern int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader);
 
-extern int R600_comp_mask_vs(RADEONChipFamily ChipSet, uint32_t* vs);
-extern int R600_comp_mask_ps(RADEONChipFamily ChipSet,
-			     uint32_t* ps,
-			     int src_a, int src_r, int src_g, int src_b,
-			     int mask_a, int mask_r, int mask_g, int mask_b);
-
 extern int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* vs);
-extern int R600_comp_ps(RADEONChipFamily ChipSet,
-			uint32_t* ps,
-			int src_a, int src_r, int src_g, int src_b);
+extern int R600_comp_mask_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+extern int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* ps);
 
 #endif
diff --git a/src/r600_state.h b/src/r600_state.h
index c903ded..181e167 100644
--- a/src/r600_state.h
+++ b/src/r600_state.h
@@ -255,7 +255,7 @@ ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf);
 void
 set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf);
 void
-set_bool_const(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val);
+set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val);
 void
 set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res);
 void
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 735231b..3dfe151 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -172,12 +172,12 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     switch(pPriv->id) {
     case FOURCC_YV12:
     case FOURCC_I420:
-	set_bool_const(pScrn, accel_state->ib, 0, 1);
+	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
 	break;
     case FOURCC_UYVY:
     case FOURCC_YUY2:
     default:
-	set_bool_const(pScrn, accel_state->ib, 0, 0);
+	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
 	break;
     }
 
diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c
index f93ca01..bce597b 100644
--- a/src/r6xx_accel.c
+++ b/src/r6xx_accel.c
@@ -153,8 +153,10 @@ reset_bool_loop_const(ScrnInfoPtr pScrn, drmBufPtr ib)
 {
     int i;
 
-    for (i = 0; i < SQ_BOOL_CONST_0_num; i++)
-	EREG(ib, SQ_BOOL_CONST_0 + (i << 2), 0);
+
+    PACK0(ib, SQ_BOOL_CONST, SQ_BOOL_CONST_all_num);
+    for (i = 0; i < SQ_BOOL_CONST_all_num; i++)
+	E32(ib, 0);
 
     PACK0(ib, SQ_LOOP_CONST, SQ_LOOP_CONST_all_num);
 
@@ -430,10 +432,12 @@ set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *co
 }
 
 void
-set_bool_const(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val)
+set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val)
 {
-    /* bool order is: ps, vs, gs, ps, vs, gs, ... */
-    EREG(ib, SQ_BOOL_CONST_0 + (offset << 2), val);
+    /* bool register order is: ps, vs, gs; one register each
+     * 1 bits per bool; 32 bools each for ps, vs, gs.
+     */
+    EREG(ib, SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
 }
 
 void
diff --git a/src/radeon.h b/src/radeon.h
index 7bb720a..d488429 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -325,6 +325,7 @@ typedef enum {
     CHIP_FAMILY_RV620,
     CHIP_FAMILY_RV635,
     CHIP_FAMILY_RS780,
+    CHIP_FAMILY_RS880,
     CHIP_FAMILY_RV770,
     CHIP_FAMILY_RV730,
     CHIP_FAMILY_RV710,
@@ -655,6 +656,10 @@ struct radeon_accel_state {
     Bool              same_surface;
     int               rop;
     uint32_t          planemask;
+
+    // composite
+    Bool              component_alpha;
+    Bool              src_alpha;
 #endif
 
 #ifdef USE_XAA
@@ -1051,8 +1056,10 @@ extern RADEONEntPtr RADEONEntPriv(ScrnInfoPtr pScrn);
 extern int RADEONMinBits(int val);
 extern unsigned RADEONINMC(ScrnInfoPtr pScrn, int addr);
 extern unsigned RADEONINPLL(ScrnInfoPtr pScrn, int addr);
+extern unsigned RADEONINPCIE(ScrnInfoPtr pScrn, int addr);
 extern void RADEONOUTMC(ScrnInfoPtr pScrn, int addr, uint32_t data);
 extern void RADEONOUTPLL(ScrnInfoPtr pScrn, int addr, uint32_t data);
+extern void RADEONOUTPCIE(ScrnInfoPtr pScrn, int addr, uint32_t data);
 extern void RADEONPllErrataAfterData(RADEONInfoPtr info);
 extern void RADEONPllErrataAfterIndex(RADEONInfoPtr info);
 extern void RADEONWaitForVerticalSync(ScrnInfoPtr pScrn);
diff --git a/src/radeon_atombios.c b/src/radeon_atombios.c
index 0b17cbd..47f5103 100644
--- a/src/radeon_atombios.c
+++ b/src/radeon_atombios.c
@@ -1784,7 +1784,7 @@ RADEONGetATOMConnectorInfoFromBIOSObject (ScrnInfoPtr pScrn)
 		continue;
 	    }
 
-	    if ((info->ChipFamily == CHIP_FAMILY_RS780) &&
+	    if (info->IsIGP &&
 		(con_obj_id == CONNECTOR_OBJECT_ID_PCIE_CONNECTOR)) {
 		uint32_t slot_config, ct;
 
diff --git a/src/radeon_atombios.h b/src/radeon_atombios.h
index efebc62..b9a5398 100644
--- a/src/radeon_atombios.h
+++ b/src/radeon_atombios.h
@@ -126,7 +126,7 @@ extern Bool
 RADEONGetATOMTVInfo(xf86OutputPtr output);
 
 extern int
-atombios_external_tmds_setup(xf86OutputPtr output, DisplayModePtr mode);
+atombios_external_tmds_setup(xf86OutputPtr output, int action);
 
 extern void
 atombios_get_command_table_version(atomBiosHandlePtr atomBIOS, int index, int *major, int *minor);
diff --git a/src/radeon_bios.c b/src/radeon_bios.c
index 6fc0cf4..9b5cb88 100644
--- a/src/radeon_bios.c
+++ b/src/radeon_bios.c
@@ -740,6 +740,9 @@ static Bool RADEONGetLegacyConnectorInfoFromBIOS (ScrnInfoPtr pScrn)
 		    if (tmp1) {
 			DDCType	= tmp1;
 			switch (DDCType) {
+			case DDC_NONE_DETECTED:
+			    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "No DDC for LCD\n");
+			    break;
 			case DDC_MONID:
 			    info->BiosConnector[4].ddc_i2c = legacy_setup_i2c_bus(RADEON_GPIO_MONID);
 			    break;
@@ -973,8 +976,8 @@ Bool RADEONGetClockInfoFromBIOS (ScrnInfoPtr pScrn)
 
 	    pll->xclk = RADEON_BIOS16(pll_info_block + 0x08);
 
-	    info->sclk = RADEON_BIOS16(pll_info_block + 8) / 100.0;
-	    info->mclk = RADEON_BIOS16(pll_info_block + 10) / 100.0;
+	    info->sclk = RADEON_BIOS16(pll_info_block + 10) / 100.0;
+	    info->mclk = RADEON_BIOS16(pll_info_block + 8) / 100.0;
 	}
 
 	if (info->sclk == 0) info->sclk = 200;
diff --git a/src/radeon_chipinfo_gen.h b/src/radeon_chipinfo_gen.h
index eb2df17..6321246 100644
--- a/src/radeon_chipinfo_gen.h
+++ b/src/radeon_chipinfo_gen.h
@@ -265,6 +265,8 @@ RADEONCardInfo RADEONCards[] = {
  { 0x9456, CHIP_FAMILY_RV770, 0, 0, 0, 0, 0 },
  { 0x945A, CHIP_FAMILY_RV770, 1, 0, 0, 0, 0 },
  { 0x945B, CHIP_FAMILY_RV770, 1, 0, 0, 0, 0 },
+ { 0x9460, CHIP_FAMILY_RV770, 0, 0, 0, 0, 0 },
+ { 0x9462, CHIP_FAMILY_RV770, 0, 0, 0, 0, 0 },
  { 0x946A, CHIP_FAMILY_RV770, 1, 0, 0, 0, 0 },
  { 0x946B, CHIP_FAMILY_RV770, 1, 0, 0, 0, 0 },
  { 0x947A, CHIP_FAMILY_RV770, 1, 0, 0, 0, 0 },
@@ -349,4 +351,11 @@ RADEONCardInfo RADEONCards[] = {
  { 0x9612, CHIP_FAMILY_RS780, 0, 1, 0, 0, 1 },
  { 0x9613, CHIP_FAMILY_RS780, 0, 1, 0, 0, 1 },
  { 0x9614, CHIP_FAMILY_RS780, 0, 1, 0, 0, 1 },
+ { 0x9615, CHIP_FAMILY_RS780, 0, 1, 0, 0, 1 },
+ { 0x9616, CHIP_FAMILY_RS780, 0, 1, 0, 0, 1 },
+ { 0x9710, CHIP_FAMILY_RS880, 0, 1, 0, 0, 1 },
+ { 0x9711, CHIP_FAMILY_RS880, 0, 1, 0, 0, 1 },
+ { 0x9712, CHIP_FAMILY_RS880, 1, 1, 0, 0, 1 },
+ { 0x9713, CHIP_FAMILY_RS880, 1, 1, 0, 0, 1 },
+ { 0x9714, CHIP_FAMILY_RS880, 0, 1, 0, 0, 1 },
 };
diff --git a/src/radeon_chipset_gen.h b/src/radeon_chipset_gen.h
index 3c86ae6..631eda8 100644
--- a/src/radeon_chipset_gen.h
+++ b/src/radeon_chipset_gen.h
@@ -265,6 +265,8 @@ static SymTabRec RADEONChipsets[] = {
   { PCI_CHIP_RV770_9456, "ATI FirePro V8700 (FireGL)" },
   { PCI_CHIP_RV770_945A, "ATI Mobility RADEON HD 4870" },
   { PCI_CHIP_RV770_945B, "ATI Mobility RADEON M98" },
+  { PCI_CHIP_RV790_9460, "ATI Radeon 4800 Series" },
+  { PCI_CHIP_RV790_9462, "ATI Radeon 4800 Series" },
   { PCI_CHIP_RV770_946A, "ATI FirePro M7750" },
   { PCI_CHIP_RV770_946B, "ATI M98" },
   { PCI_CHIP_RV770_947A, "ATI M98" },
@@ -349,5 +351,12 @@ static SymTabRec RADEONChipsets[] = {
   { PCI_CHIP_RS780_9612, "ATI Radeon HD 3200 Graphics" },
   { PCI_CHIP_RS780_9613, "ATI Radeon 3100 Graphics" },
   { PCI_CHIP_RS780_9614, "ATI Radeon HD 3300 Graphics" },
+  { PCI_CHIP_RS780_9615, "ATI Radeon HD 3200 Graphics" },
+  { PCI_CHIP_RS780_9616, "ATI Radeon 3000 Graphics" },
+  { PCI_CHIP_RS880_9710, "ATI Radeon HD Graphics" },
+  { PCI_CHIP_RS880_9711, "ATI Radeon Graphics" },
+  { PCI_CHIP_RS880_9712, "ATI Mobility Radeon HD Graphics" },
+  { PCI_CHIP_RS880_9713, "ATI Mobility Radeon Graphics" },
+  { PCI_CHIP_RS880_9714, "ATI Radeon Graphics" },
   { -1,                 NULL }
 };
diff --git a/src/radeon_crtc.c b/src/radeon_crtc.c
index cd0d55e..4b508ce 100644
--- a/src/radeon_crtc.c
+++ b/src/radeon_crtc.c
@@ -115,6 +115,9 @@ radeon_crtc_mode_prepare(xf86CrtcPtr crtc)
 {
     RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private;
 
+    if (radeon_crtc->initialized)
+	radeon_crtc_dpms(crtc, DPMSModeOff);
+
     if (radeon_crtc->enabled)
 	crtc->funcs->hide_cursor(crtc);
 }
@@ -283,6 +286,8 @@ radeon_crtc_mode_commit(xf86CrtcPtr crtc)
 {
     if (crtc->scrn->pScreen != NULL)
 	xf86_reload_cursors(crtc->scrn->pScreen);
+
+    radeon_crtc_dpms(crtc, DPMSModeOn);
 }
 
 void
diff --git a/src/radeon_driver.c b/src/radeon_driver.c
index 5a15c70..8673f5e 100644
--- a/src/radeon_driver.c
+++ b/src/radeon_driver.c
@@ -668,7 +668,30 @@ void RADEONOUTMC(ScrnInfoPtr pScrn, int addr, uint32_t data)
     }
 }
 
-static Bool avivo_get_mc_idle(ScrnInfoPtr pScrn)
+/* Read PCIE register */
+unsigned RADEONINPCIE(ScrnInfoPtr pScrn, int addr)
+{
+    RADEONInfoPtr  info       = RADEONPTR(pScrn);
+    unsigned char *RADEONMMIO = info->MMIO;
+    CARD32         data;
+
+    OUTREG(RADEON_PCIE_INDEX, addr & 0xff);
+    data = INREG(RADEON_PCIE_DATA);
+
+    return data;
+}
+
+/* Write PCIE register */
+void RADEONOUTPCIE(ScrnInfoPtr pScrn, int addr, uint32_t data)
+{
+    RADEONInfoPtr  info       = RADEONPTR(pScrn);
+    unsigned char *RADEONMMIO = info->MMIO;
+
+    OUTREG(RADEON_PCIE_INDEX, ((addr) & 0xff));
+    OUTREG(RADEON_PCIE_DATA, data);
+}
+
+static Bool radeon_get_mc_idle(ScrnInfoPtr pScrn)
 {
     RADEONInfoPtr  info       = RADEONPTR(pScrn);
     unsigned char *RADEONMMIO = info->MMIO;
@@ -694,11 +717,21 @@ static Bool avivo_get_mc_idle(ScrnInfoPtr pScrn)
 	    return TRUE;
 	else
 	    return FALSE;
-    } else {
+    } else if (info->ChipFamily >= CHIP_FAMILY_R520) {
 	if (INMC(pScrn, R520_MC_STATUS) & R520_MC_STATUS_IDLE)
 	    return TRUE;
 	else
 	    return FALSE;
+    } else if (IS_R300_VARIANT) {
+	if (INREG(RADEON_MC_STATUS) & R300_MC_IDLE)
+	    return TRUE;
+	else
+	    return FALSE;
+    } else {
+	if (INREG(RADEON_MC_STATUS) & RADEON_MC_IDLE)
+	    return TRUE;
+	else
+	    return FALSE;
     }
 }
 
@@ -1333,7 +1366,8 @@ static void RADEONInitMemoryMap(ScrnInfoPtr pScrn)
     if ((info->ChipFamily != CHIP_FAMILY_RS600) &&
 	(info->ChipFamily != CHIP_FAMILY_RS690) &&
 	(info->ChipFamily != CHIP_FAMILY_RS740) &&
-	(info->ChipFamily != CHIP_FAMILY_RS780)) {
+	(info->ChipFamily != CHIP_FAMILY_RS780) &&
+	(info->ChipFamily != CHIP_FAMILY_RS880)) {
 	if (info->IsIGP)
 	    info->mc_fb_location = INREG(RADEON_NB_TOM);
 	else
@@ -2204,6 +2238,8 @@ static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn)
 	}
     }
 
+    if (info->ChipFamily == CHIP_FAMILY_RS880)
+	return FALSE;
 
     if (!xf86ReturnOptValBool(info->Options, OPTION_DRI, TRUE)) {
 	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
@@ -3818,7 +3854,7 @@ void RADEONRestoreMemMapRegisters(ScrnInfoPtr pScrn,
 
 	    usleep(10000);
 	    timeout = 0;
-	    while (!(avivo_get_mc_idle(pScrn))) {
+	    while (!(radeon_get_mc_idle(pScrn))) {
 		if (++timeout > 1000000) {
 		    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
 			       "Timeout trying to update memory controller settings !\n");
@@ -3858,7 +3894,7 @@ void RADEONRestoreMemMapRegisters(ScrnInfoPtr pScrn,
 	if (mc_fb_loc != restore->mc_fb_location ||
 	    mc_agp_loc != restore->mc_agp_location) {
 	    uint32_t crtc_ext_cntl, crtc_gen_cntl, crtc2_gen_cntl=0, ov0_scale_cntl;
-	    uint32_t old_mc_status, status_idle;
+	    uint32_t old_mc_status;
 
 	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
 			   "  Map Changed ! Applying ...\n");
@@ -3897,15 +3933,8 @@ void RADEONRestoreMemMapRegisters(ScrnInfoPtr pScrn,
 
 	    /* Make sure the chip settles down (paranoid !) */ 
 	    usleep(100000);
-
-	    /* Wait for MC idle */
-	    if (IS_R300_VARIANT)
-		status_idle = R300_MC_IDLE;
-	    else
-		status_idle = RADEON_MC_IDLE;
-
 	    timeout = 0;
-	    while (!(INREG(RADEON_MC_STATUS) & status_idle)) {
+	    while (!(radeon_get_mc_idle(pScrn))) {
 		if (++timeout > 1000000) {
 		    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
 			       "Timeout trying to update memory controller settings !\n");
diff --git a/src/radeon_macros.h b/src/radeon_macros.h
index 387e5f5..b7056b0 100644
--- a/src/radeon_macros.h
+++ b/src/radeon_macros.h
@@ -152,7 +152,9 @@ do {									\
 } while (0)
 
 #define INMC(pScrn, addr) RADEONINMC(pScrn, addr)
-
 #define OUTMC(pScrn, addr, val) RADEONOUTMC(pScrn, addr, val)
 
+#define INPCIE(pScrn, addr) RADEONINPCIE(pScrn, addr)
+#define OUTPCIE(pScrn, addr, val) RADEONOUTPCIE(pScrn, addr, val
+
 #endif
diff --git a/src/radeon_output.c b/src/radeon_output.c
index 3931db4..712ac5f 100644
--- a/src/radeon_output.c
+++ b/src/radeon_output.c
@@ -110,7 +110,6 @@ extern void atombios_output_mode_set(xf86OutputPtr output,
 				     DisplayModePtr adjusted_mode);
 extern void atombios_output_dpms(xf86OutputPtr output, int mode);
 extern RADEONMonitorType atombios_dac_detect(xf86OutputPtr output);
-extern int atombios_external_tmds_setup(xf86OutputPtr output, DisplayModePtr mode);
 extern AtomBiosResult
 atombios_lock_crtc(atomBiosHandlePtr atomBIOS, int crtc, int lock);
 static void
@@ -521,32 +520,8 @@ radeon_mode_fixup(xf86OutputPtr output, DisplayModePtr mode,
 static void
 radeon_mode_prepare(xf86OutputPtr output)
 {
-    RADEONInfoPtr info = RADEONPTR(output->scrn);
-    xf86CrtcConfigPtr	config = XF86_CRTC_CONFIG_PTR (output->scrn);
-    int o;
-
-    for (o = 0; o < config->num_output; o++) {
-	xf86OutputPtr loop_output = config->output[o];
-	if (loop_output == output)
-	    continue;
-	else if (loop_output->crtc) {
-	    xf86CrtcPtr other_crtc = loop_output->crtc;
-	    RADEONCrtcPrivatePtr other_radeon_crtc = other_crtc->driver_private;
-	    if (other_crtc->enabled) {
-		if (other_radeon_crtc->initialized) {
-		    radeon_crtc_dpms(other_crtc, DPMSModeOff);
-		    if (IS_AVIVO_VARIANT || info->r4xx_atom)
-			atombios_lock_crtc(info->atomBIOS, other_radeon_crtc->crtc_id, 1);
-		    radeon_dpms(loop_output, DPMSModeOff);
-		}
-	    }
-	}
-    }
-
     radeon_bios_output_lock(output, TRUE);
     radeon_dpms(output, DPMSModeOff);
-    radeon_crtc_dpms(output->crtc, DPMSModeOff);
-
 }
 
 static void
@@ -566,30 +541,7 @@ radeon_mode_set(xf86OutputPtr output, DisplayModePtr mode,
 static void
 radeon_mode_commit(xf86OutputPtr output)
 {
-    RADEONInfoPtr info = RADEONPTR(output->scrn);
-    xf86CrtcConfigPtr	config = XF86_CRTC_CONFIG_PTR (output->scrn);
-    int o;
-
-    for (o = 0; o < config->num_output; o++) {
-	xf86OutputPtr loop_output = config->output[o];
-	if (loop_output == output)
-	    continue;
-	else if (loop_output->crtc) {
-	    xf86CrtcPtr other_crtc = loop_output->crtc;
-	    RADEONCrtcPrivatePtr other_radeon_crtc = other_crtc->driver_private;
-	    if (other_crtc->enabled) {
-		if (other_radeon_crtc->initialized) {
-		    radeon_crtc_dpms(other_crtc, DPMSModeOn);
-		    if (IS_AVIVO_VARIANT || info->r4xx_atom)
-			atombios_lock_crtc(info->atomBIOS, other_radeon_crtc->crtc_id, 0);
-		    radeon_dpms(loop_output, DPMSModeOn);
-		}
-	    }
-	}
-    }
-
     radeon_dpms(output, DPMSModeOn);
-    radeon_crtc_dpms(output->crtc, DPMSModeOn);
     radeon_bios_output_lock(output, FALSE);
 }
 
@@ -1263,10 +1215,21 @@ radeon_create_resources(xf86OutputPtr output)
 		       "RRConfigureOutputProperty error, %d\n", err);
 	}
 	/* Set the current value of the property */
-	if (radeon_output->devices & (ATOM_DEVICE_LCD_SUPPORT))
-	    s = "full";
-	else
+	switch (radeon_output->rmx_type) {
+	case RMX_OFF:
+	default:
 	    s = "off";
+	    break;
+	case RMX_FULL:
+	    s = "full";
+	    break;
+	case RMX_CENTER:
+	    s = "center";
+	    break;
+	case RMX_ASPECT:
+	    s = "aspect";
+	    break;
+	}
 	err = RRChangeOutputProperty(output->randr_output, rmx_atom,
 				     XA_STRING, 8, PropModeReplace, strlen(s), (pointer)s,
 				     FALSE, FALSE);
@@ -1884,6 +1847,10 @@ void RADEONInitConnector(xf86OutputPtr output)
     else
 	radeon_output->rmx_type = RMX_OFF;
 
+    /* dce 3.2 chips have problems with low dot clocks, so use the scaler */
+    if (IS_DCE32_VARIANT && (radeon_output->devices & (ATOM_DEVICE_DFP_SUPPORT)))
+	radeon_output->rmx_type = RMX_FULL;
+
     if (!IS_AVIVO_VARIANT) {
 	if (radeon_output->devices & (ATOM_DEVICE_CRT2_SUPPORT)) {
 	    if (xf86ReturnOptValBool(info->Options, OPTION_TVDAC_LOAD_DETECT, FALSE))
@@ -2067,12 +2034,12 @@ static Bool RADEONSetupAppleConnectors(ScrnInfoPtr pScrn)
 	info->BiosConnector[0].load_detection = FALSE;
 	info->BiosConnector[0].ConnectorType = CONNECTOR_DVI_I;
 	info->BiosConnector[0].valid = TRUE;
-	info->BiosConnector[0].devices = ATOM_DEVICE_CRT1_SUPPORT | ATOM_DEVICE_DFP2_SUPPORT;
+	info->BiosConnector[0].devices = ATOM_DEVICE_CRT2_SUPPORT | ATOM_DEVICE_DFP2_SUPPORT;
 	if (!radeon_add_encoder(pScrn,
 				radeon_get_encoder_id_from_supported_device(pScrn,
-									    ATOM_DEVICE_CRT1_SUPPORT,
-									    1),
-				ATOM_DEVICE_CRT1_SUPPORT))
+									    ATOM_DEVICE_CRT2_SUPPORT,
+									    2),
+				ATOM_DEVICE_CRT2_SUPPORT))
 	    return FALSE;
 	if (!radeon_add_encoder(pScrn,
 				radeon_get_encoder_id_from_supported_device(pScrn,
@@ -2098,12 +2065,12 @@ static Bool RADEONSetupAppleConnectors(ScrnInfoPtr pScrn)
 	info->BiosConnector[0].load_detection = FALSE;
 	info->BiosConnector[0].ConnectorType = CONNECTOR_DVI_I;
 	info->BiosConnector[0].valid = TRUE;
-	info->BiosConnector[0].devices = ATOM_DEVICE_CRT1_SUPPORT | ATOM_DEVICE_DFP1_SUPPORT;
+	info->BiosConnector[0].devices = ATOM_DEVICE_CRT2_SUPPORT | ATOM_DEVICE_DFP1_SUPPORT;
 	if (!radeon_add_encoder(pScrn,
 				radeon_get_encoder_id_from_supported_device(pScrn,
-									    ATOM_DEVICE_CRT1_SUPPORT,
-									    1),
-				ATOM_DEVICE_CRT1_SUPPORT))
+									    ATOM_DEVICE_CRT2_SUPPORT,
+									    2),
+				ATOM_DEVICE_CRT2_SUPPORT))
 	    return FALSE;
 	if (!radeon_add_encoder(pScrn,
 				radeon_get_encoder_id_from_supported_device(pScrn,
@@ -2502,11 +2469,16 @@ static RADEONMacModel RADEONDetectMacModel(ScrnInfoPtr pScrn)
 static int
 radeon_output_clones (ScrnInfoPtr pScrn, xf86OutputPtr output)
 {
+    RADEONInfoPtr info = RADEONPTR(pScrn);
     RADEONOutputPrivatePtr radeon_output = output->driver_private;
     xf86CrtcConfigPtr	config = XF86_CRTC_CONFIG_PTR (pScrn);
     int			o;
     int			index_mask = 0;
 
+    /* DIG routing gets problematic */
+    if (IS_DCE32_VARIANT)
+	return index_mask;
+
     /* LVDS is too wacky */
     if (radeon_output->devices & (ATOM_DEVICE_LCD_SUPPORT))
 	return index_mask;
diff --git a/src/radeon_pci_chipset_gen.h b/src/radeon_pci_chipset_gen.h
index 31b032a..d61c57d 100644
--- a/src/radeon_pci_chipset_gen.h
+++ b/src/radeon_pci_chipset_gen.h
@@ -265,6 +265,8 @@ PciChipsets RADEONPciChipsets[] = {
  { PCI_CHIP_RV770_9456, PCI_CHIP_RV770_9456, RES_SHARED_VGA },
  { PCI_CHIP_RV770_945A, PCI_CHIP_RV770_945A, RES_SHARED_VGA },
  { PCI_CHIP_RV770_945B, PCI_CHIP_RV770_945B, RES_SHARED_VGA },
+ { PCI_CHIP_RV790_9460, PCI_CHIP_RV790_9460, RES_SHARED_VGA },
+ { PCI_CHIP_RV790_9462, PCI_CHIP_RV790_9462, RES_SHARED_VGA },
  { PCI_CHIP_RV770_946A, PCI_CHIP_RV770_946A, RES_SHARED_VGA },
  { PCI_CHIP_RV770_946B, PCI_CHIP_RV770_946B, RES_SHARED_VGA },
  { PCI_CHIP_RV770_947A, PCI_CHIP_RV770_947A, RES_SHARED_VGA },
@@ -349,5 +351,12 @@ PciChipsets RADEONPciChipsets[] = {
  { PCI_CHIP_RS780_9612, PCI_CHIP_RS780_9612, RES_SHARED_VGA },
  { PCI_CHIP_RS780_9613, PCI_CHIP_RS780_9613, RES_SHARED_VGA },
  { PCI_CHIP_RS780_9614, PCI_CHIP_RS780_9614, RES_SHARED_VGA },
+ { PCI_CHIP_RS780_9615, PCI_CHIP_RS780_9615, RES_SHARED_VGA },
+ { PCI_CHIP_RS780_9616, PCI_CHIP_RS780_9616, RES_SHARED_VGA },
+ { PCI_CHIP_RS880_9710, PCI_CHIP_RS880_9710, RES_SHARED_VGA },
+ { PCI_CHIP_RS880_9711, PCI_CHIP_RS880_9711, RES_SHARED_VGA },
+ { PCI_CHIP_RS880_9712, PCI_CHIP_RS880_9712, RES_SHARED_VGA },
+ { PCI_CHIP_RS880_9713, PCI_CHIP_RS880_9713, RES_SHARED_VGA },
+ { PCI_CHIP_RS880_9714, PCI_CHIP_RS880_9714, RES_SHARED_VGA },
  { -1,                 -1,                 RES_UNDEFINED }
 };
diff --git a/src/radeon_pci_device_match_gen.h b/src/radeon_pci_device_match_gen.h
index b310ce8..a06b4a6 100644
--- a/src/radeon_pci_device_match_gen.h
+++ b/src/radeon_pci_device_match_gen.h
@@ -265,6 +265,8 @@ static const struct pci_id_match radeon_device_match[] = {
  ATI_DEVICE_MATCH( PCI_CHIP_RV770_9456, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_RV770_945A, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_RV770_945B, 0 ),
+ ATI_DEVICE_MATCH( PCI_CHIP_RV790_9460, 0 ),
+ ATI_DEVICE_MATCH( PCI_CHIP_RV790_9462, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_RV770_946A, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_RV770_946B, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_RV770_947A, 0 ),
@@ -349,5 +351,12 @@ static const struct pci_id_match radeon_device_match[] = {
  ATI_DEVICE_MATCH( PCI_CHIP_RS780_9612, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_RS780_9613, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_RS780_9614, 0 ),
+ ATI_DEVICE_MATCH( PCI_CHIP_RS780_9615, 0 ),
+ ATI_DEVICE_MATCH( PCI_CHIP_RS780_9616, 0 ),
+ ATI_DEVICE_MATCH( PCI_CHIP_RS880_9710, 0 ),
+ ATI_DEVICE_MATCH( PCI_CHIP_RS880_9711, 0 ),
+ ATI_DEVICE_MATCH( PCI_CHIP_RS880_9712, 0 ),
+ ATI_DEVICE_MATCH( PCI_CHIP_RS880_9713, 0 ),
+ ATI_DEVICE_MATCH( PCI_CHIP_RS880_9714, 0 ),
  { 0, 0, 0 }
 };
diff --git a/src/radeon_probe.h b/src/radeon_probe.h
index a0c6b2c..6479972 100644
--- a/src/radeon_probe.h
+++ b/src/radeon_probe.h
@@ -271,8 +271,10 @@ typedef struct _RADEONOutputPrivateRec {
     radeon_tvout_rec tvout;
 
     /* dce 3.x dig block */
-    int transmitter_config;
     int igp_lane_info;
+    int dig_block;
+
+    int pixel_clock;
 } RADEONOutputPrivateRec, *RADEONOutputPrivatePtr;
 
 struct avivo_pll_state {
diff --git a/src/radeon_reg.h b/src/radeon_reg.h
index 0af8859..d74a30a 100644
--- a/src/radeon_reg.h
+++ b/src/radeon_reg.h
@@ -274,6 +274,9 @@
 #define RADEON_BUS_CNTL1                    0x0034
 #       define RADEON_BUS_WAIT_ON_LOCK_EN    (1 << 4)
 
+#define RADEON_PCIE_INDEX                   0x0030
+#define RADEON_PCIE_DATA                    0x0034
+
 #define RADEON_CACHE_CNTL                   0x1724
 #define RADEON_CACHE_LINE                   0x0f0c /* PCI */
 #define RADEON_CAPABILITIES_ID              0x0f50 /* PCI */
@@ -3027,6 +3030,18 @@
 #       define R200_TXA_REPL_ARG_B_MASK		(3 << 28)
 #       define R200_TXA_REPL_ARG_C_SHIFT	30
 #       define R200_TXA_REPL_ARG_C_MASK		(3 << 30)
+#define R200_PP_TXCBLEND_1			0x2f10
+#define R200_PP_TXCBLEND2_1			0x2f14
+#define R200_PP_TXABLEND_1			0x2f18
+#define R200_PP_TXABLEND2_1			0x2f1c
+#define R200_PP_TXCBLEND_2			0x2f20
+#define R200_PP_TXCBLEND2_2			0x2f24
+#define R200_PP_TXABLEND_2			0x2f28
+#define R200_PP_TXABLEND2_2			0x2f2c
+#define R200_PP_TXCBLEND_3			0x2f30
+#define R200_PP_TXCBLEND2_3			0x2f34
+#define R200_PP_TXABLEND_3			0x2f38
+#define R200_PP_TXABLEND2_3			0x2f3c
 
 #define R200_SE_VTX_FMT_0			0x2088
 #       define R200_VTX_XY			0 /* always have xy */
@@ -3291,7 +3306,9 @@
 #       define RADEON_RGB_CONVERT_BY_PASS	  (1 << 10)
 #       define RADEON_UVRAM_READ_MARGIN_SHIFT	  16
 #       define RADEON_FIFORAM_FFMACRO_READ_MARGIN_SHIFT	  20
+#	define RADEON_RGB_ATTEN_SEL(x) 		  ((x) << 24)
 #	define RADEON_TVOUT_SCALE_EN 		  (1 << 26)
+#	define RADEON_RGB_ATTEN_VAL(x) 		  ((x) << 28)
 #define RADEON_TV_SYNC_CNTL                          0x0808
 #       define RADEON_SYNC_OE                     (1 <<  0)
 #       define RADEON_SYNC_OUT                    (1 <<  1)
@@ -3610,6 +3627,13 @@
 #       define AVIVO_D1GRPH_MACRO_ADDRESS_MODE          (1<<21)
 
 #define AVIVO_D1GRPH_LUT_SEL                                    0x6108
+
+#define R600_D1GRPH_SWAP_CONTROL                               0x610C
+#       define R600_D1GRPH_SWAP_ENDIAN_NONE                    (0 << 0)
+#       define R600_D1GRPH_SWAP_ENDIAN_16BIT                   (1 << 0)
+#       define R600_D1GRPH_SWAP_ENDIAN_32BIT                   (2 << 0)
+#       define R600_D1GRPH_SWAP_ENDIAN_64BIT                   (3 << 0)
+
 #define AVIVO_D1GRPH_PRIMARY_SURFACE_ADDRESS                    0x6110
 #define AVIVO_D1GRPH_SECONDARY_SURFACE_ADDRESS                  0x6118
 #define AVIVO_D1GRPH_PITCH                                      0x6120
@@ -4406,6 +4430,7 @@
 #define R300_TX_INVALTAGS				0x4100
 #define R300_TX_FILTER0_0				0x4400
 #define R300_TX_FILTER0_1				0x4404
+#define R300_TX_FILTER0_2				0x4408
 #       define R300_TX_CLAMP_S(x)                       ((x) << 0)
 #       define R300_TX_CLAMP_T(x)                       ((x) << 3)
 #       define R300_TX_CLAMP_R(x)                       ((x) << 6)
@@ -4424,8 +4449,10 @@
 #       define R300_TX_ID_SHIFT                         28
 #define R300_TX_FILTER1_0				0x4440
 #define R300_TX_FILTER1_1				0x4444
+#define R300_TX_FILTER1_2				0x4448
 #define R300_TX_FORMAT0_0				0x4480
 #define R300_TX_FORMAT0_1				0x4484
+#define R300_TX_FORMAT0_2				0x4488
 #       define R300_TXWIDTH_SHIFT                       0
 #       define R300_TXHEIGHT_SHIFT                      11
 #       define R300_NUM_LEVELS_SHIFT                    26
@@ -4434,6 +4461,7 @@
 #       define R300_TXPITCH_EN                          (1 << 31)
 #define R300_TX_FORMAT1_0				0x44c0
 #define R300_TX_FORMAT1_1				0x44c4
+#define R300_TX_FORMAT1_2				0x44c8
 #	define R300_TX_FORMAT_X8		    0x0
 #	define R300_TX_FORMAT_X16		    0x1
 #	define R300_TX_FORMAT_Y4X4		    0x2
@@ -4506,13 +4534,23 @@
 #       define R300_TX_FORMAT_YUV_TO_RGB_NO_CLAMP      (2 << 22)
 #       define R300_TX_FORMAT_SWAP_YUV                 (1 << 24)
 
+#       define R300_TX_FORMAT_CACHE_WHOLE              (0 << 27)
+#       define R300_TX_FORMAT_CACHE_HALF_REGION_0      (2 << 27)
+#       define R300_TX_FORMAT_CACHE_HALF_REGION_1      (3 << 27)
+#       define R300_TX_FORMAT_CACHE_FOURTH_REGION_0    (4 << 27)
+#       define R300_TX_FORMAT_CACHE_FOURTH_REGION_1    (5 << 27)
+#       define R300_TX_FORMAT_CACHE_FOURTH_REGION_2    (6 << 27)
+#       define R300_TX_FORMAT_CACHE_FOURTH_REGION_3    (7 << 27)
+
 #define R300_TX_FORMAT2_0				0x4500
 #define R300_TX_FORMAT2_1				0x4504
+#define R300_TX_FORMAT2_2				0x4508
 #       define R500_TXWIDTH_11                          (1 << 15)
 #       define R500_TXHEIGHT_11                         (1 << 16)
 
 #define R300_TX_OFFSET_0				0x4540
 #define R300_TX_OFFSET_1				0x4544
+#define R300_TX_OFFSET_2				0x4548
 #       define R300_ENDIAN_SWAP_16_BIT                  (1 << 0)
 #       define R300_ENDIAN_SWAP_32_BIT                  (2 << 0)
 #       define R300_ENDIAN_SWAP_HALF_DWORD              (3 << 0)
@@ -4523,6 +4561,7 @@
 #define R300_TX_ENABLE				        0x4104
 #       define R300_TEX_0_ENABLE                        (1 << 0)
 #       define R300_TEX_1_ENABLE                        (1 << 1)
+#       define R300_TEX_2_ENABLE                        (1 << 2)
 
 #define R300_US_W_FMT				        0x46b4
 #define R300_US_OUT_FMT_1				0x46a8
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index 2df299f..79671c0 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -119,6 +119,15 @@ static __inline__ uint32_t F_TO_24(float val)
 	return float24;
 }
 
+static __inline__ uint32_t float4touint(float fr, float fg, float fb, float fa)
+{
+    unsigned ur = fr * 255.0 + 0.5;
+    unsigned ug = fg * 255.0 + 0.5;
+    unsigned ub = fb * 255.0 + 0.5;
+    unsigned ua = fa * 255.0 + 0.5;
+    return (ua << 24) | (ur << 16) | (ug << 8) | ub;
+}
+
 #define ACCEL_MMIO
 #define ACCEL_PREAMBLE()	unsigned char *RADEONMMIO = info->MMIO
 #define BEGIN_ACCEL(n)		RADEONWaitForFifo(pScrn, (n))
@@ -304,8 +313,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
     RADEONInfoPtr info = RADEONPTR(pScrn);
     RADEONPortPrivPtr pPriv = (RADEONPortPrivPtr)data;
     INT32 x1, x2, y1, y2;
-    int srcPitch, srcPitch2, dstPitch;
+    int srcPitch, srcPitch2, dstPitch, dstPitch2 = 0;
     int s2offset, s3offset, tmp;
+    int d2line, d3line;
     int top, left, npixels, nlines, size;
     BoxRec dstBox;
     int dst_width = width, dst_height = height;
@@ -327,36 +337,64 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
     if (!xf86XVClipVideoHelper(&dstBox, &x1, &x2, &y1, &y2, clipBoxes, width, height))
 	return Success;
 
-    src_w = (x2 - x1) >> 16;
+/*    src_w = (x2 - x1) >> 16;
     src_h = (y2 - y1) >> 16;
     drw_w = dstBox.x2 - dstBox.x1;
-    drw_h = dstBox.y2 - dstBox.y1;
+    drw_h = dstBox.y2 - dstBox.y1;*/
 
     if ((x1 >= x2) || (y1 >= y2))
 	return Success;
 
+    /* Bicubic filter setup */
+    pPriv->bicubic_enabled = (pPriv->bicubic_state != BICUBIC_OFF);
+    if (!(IS_R300_3D || IS_R500_3D || IS_R600_3D))
+	pPriv->bicubic_enabled = FALSE;
+    if (pPriv->bicubic_enabled && (pPriv->bicubic_state == BICUBIC_AUTO)) {
+	/*
+	 * Applying the bicubic filter with a scale of less than 200%
+	 * results in a blurred picture, so disable the filter.
+	 */
+	if ((src_w > drw_w / 2) || (src_h > drw_h / 2))
+	    pPriv->bicubic_enabled = FALSE;
+    }
+
+    pPriv->planar_hw = pPriv->planar_state;
+    if (pPriv->bicubic_enabled || !( IS_R300_3D ||
+	    (info->ChipFamily == CHIP_FAMILY_RV250) ||
+	    (info->ChipFamily == CHIP_FAMILY_RV280) ||
+	    (info->ChipFamily == CHIP_FAMILY_RS300) ||
+	    (info->ChipFamily == CHIP_FAMILY_R200) ))
+        pPriv->planar_hw = 0;
+
     switch(id) {
     case FOURCC_YV12:
     case FOURCC_I420:
-	dstPitch = ((dst_width << 1) + 15) & ~15;
 	srcPitch = (width + 3) & ~3;
 	srcPitch2 = ((width >> 1) + 3) & ~3;
-	size = dstPitch * dst_height;
+        if (pPriv->planar_hw) {
+	    dstPitch = (dst_width + 15) & ~15;
+	    dstPitch = (dstPitch + 63) & ~63;
+	    dstPitch2 = ((dst_width >> 1) + 15) & ~15;
+	    dstPitch2 = (dstPitch2 + 63) & ~63;
+	} else {
+	    dstPitch = ((dst_width << 1) + 15) & ~15;
+	    dstPitch = (dstPitch + 63) & ~63;
+	}
 	break;
     case FOURCC_UYVY:
     case FOURCC_YUY2:
     default:
 	dstPitch = ((dst_width << 1) + 15) & ~15;
+	dstPitch = (dstPitch + 63) & ~63;
 	srcPitch = (width << 1);
 	srcPitch2 = 0;
-	size = dstPitch * dst_height;
 	break;
     }
 
     if (info->ChipFamily >= CHIP_FAMILY_R600)
 	dstPitch = (dstPitch + 255) & ~255;
-    else
-	dstPitch = (dstPitch + 63) & ~63;
+
+    size = dstPitch * dst_height + 2 * dstPitch2 * ((dst_height + 1) >> 1);
 
     if (pPriv->video_memory != NULL && size != pPriv->size) {
 	radeon_legacy_free_memory(pScrn, pPriv->video_memory);
@@ -376,19 +414,6 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
 	    return BadAlloc;
     }
 
-    /* Bicubic filter setup */
-    pPriv->bicubic_enabled = (pPriv->bicubic_state != BICUBIC_OFF);
-    if (!(IS_R300_3D || IS_R500_3D || IS_R600_3D))
-	pPriv->bicubic_enabled = FALSE;
-    if (pPriv->bicubic_enabled && (pPriv->bicubic_state == BICUBIC_AUTO)) {
-	/*
-	 * Applying the bicubic filter with a scale of less than 200%
-	 * results in a blurred picture, so disable the filter.
-	 */
-	if ((src_w > drw_w / 2) || (src_h > drw_h / 2))
-	    pPriv->bicubic_enabled = FALSE;
-    }
-
     /* Bicubic filter loading */
     if (pPriv->bicubic_memory == NULL && pPriv->bicubic_enabled) {
 	pPriv->bicubic_offset = radeon_legacy_allocate_memory(pScrn,
@@ -432,10 +457,16 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
     else
 	pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset + (top * dstPitch));
     pPriv->src_pitch = dstPitch;
+    pPriv->planeu_offset = dstPitch * dst_height;
+    pPriv->planev_offset = pPriv->planeu_offset + dstPitch2 * ((dst_height + 1) >> 1);
     pPriv->size = size;
     pPriv->pDraw = pDraw;
 
+
 #if 0
+    ErrorF("planeu_offset: 0x%x\n", pPriv->planeu_offset);
+    ErrorF("planev_offset: 0x%x\n", pPriv->planev_offset);
+    ErrorF("dstPitch2: 0x%x\n", dstPitch2);
     ErrorF("src_offset: 0x%x\n", pPriv->src_offset);
     ErrorF("src_addr: 0x%x\n", pPriv->src_addr);
     ErrorF("src_pitch: 0x%x\n", pPriv->src_pitch);
@@ -470,12 +501,34 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
 				     srcPitch, srcPitch2, pPriv->src_pitch,
 				     width, height);
 	    }
+	}
+        else if (pPriv->planar_hw) {
+	    top &= ~1;
+	    s2offset = srcPitch * ((height + 1) & ~1);
+	    s3offset = s2offset + srcPitch2 * ((height + 1) >> 1);
+	    s2offset += (top >> 1) * srcPitch2 + (left >> 1);
+	    s3offset += (top >> 1) * srcPitch2 + (left >> 1);
+	    d2line = pPriv->planeu_offset;
+	    d3line = pPriv->planev_offset;
+	    d2line += (top >> 1) * dstPitch2 - (top * dstPitch);
+	    d3line += (top >> 1) * dstPitch2 - (top * dstPitch);
+	    nlines = ((y2 + 0xffff) >> 16) - top;
+	    if(id == FOURCC_YV12) {
+		tmp = s2offset;
+		s2offset = s3offset;
+		s3offset = tmp;
+	    }
+	    RADEONCopyData(pScrn, buf + (top * srcPitch) + left, pPriv->src_addr + left,
+		srcPitch, dstPitch, nlines, npixels, 1);
+	    RADEONCopyData(pScrn, buf + s2offset,  pPriv->src_addr + d2line + (left >> 1),
+		srcPitch2, dstPitch2, (nlines + 1) >> 1, npixels >> 1, 1);
+	    RADEONCopyData(pScrn, buf + s3offset, pPriv->src_addr + d3line + (left >> 1),
+		srcPitch2, dstPitch2, (nlines + 1) >> 1, npixels >> 1, 1);
 	} else {
 	    top &= ~1;
 	    nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
 	    s2offset = srcPitch * height;
 	    s3offset = (srcPitch2 * (height >> 1)) + s2offset;
-	    top &= ~1;
 	    pPriv->src_addr += left << 1;
 	    tmp = ((top >> 1) * srcPitch2) + (left >> 1);
 	    s2offset += tmp;
@@ -504,7 +557,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
 				 width, height);
 	} else {
 	    nlines = ((y2 + 0xffff) >> 16) - top;
-	    RADEONCopyData(pScrn, buf, pPriv->src_addr, srcPitch, dstPitch, nlines, npixels, 2);
+	    pPriv->src_addr += left << 1;
+	    RADEONCopyData(pScrn, buf + (top * srcPitch) + (left << 1),
+			   pPriv->src_addr, srcPitch, dstPitch, nlines, npixels, 2);
 	}
 	break;
     }
@@ -581,25 +636,28 @@ static XF86VideoFormatRec Formats[NUM_FORMATS] =
     {15, TrueColor}, {16, TrueColor}, {24, TrueColor}
 };
 
-#define NUM_ATTRIBUTES 1
+#define NUM_ATTRIBUTES 2
 
 static XF86AttributeRec Attributes[NUM_ATTRIBUTES+1] =
 {
     {XvSettable | XvGettable, 0, 1, "XV_VSYNC"},
+    {XvSettable | XvGettable, 0, 1, "XV_HWPLANAR"},
     {0, 0, 0, NULL}
 };
 
-#define NUM_ATTRIBUTES_R300 2
+#define NUM_ATTRIBUTES_R300 3
 
 static XF86AttributeRec Attributes_r300[NUM_ATTRIBUTES_R300+1] =
 {
     {XvSettable | XvGettable, 0, 2, "XV_BICUBIC"},
     {XvSettable | XvGettable, 0, 1, "XV_VSYNC"},
+    {XvSettable | XvGettable, 0, 1, "XV_HWPLANAR"},
     {0, 0, 0, NULL}
 };
 
 static Atom xvBicubic;
 static Atom xvVSync;
+static Atom xvHWPlanar;
 
 #define NUM_IMAGES 4
 
@@ -626,6 +684,8 @@ RADEONGetTexPortAttribute(ScrnInfoPtr  pScrn,
 	*value = pPriv->bicubic_state;
     else if (attribute == xvVSync)
 	*value = pPriv->vsync;
+    else if (attribute == xvHWPlanar)
+	*value = pPriv->planar_state;
     else
 	return BadMatch;
 
@@ -647,6 +707,8 @@ RADEONSetTexPortAttribute(ScrnInfoPtr  pScrn,
 	pPriv->bicubic_state = ClipValue (value, 0, 2);
     else if (attribute == xvVSync)
 	pPriv->vsync = ClipValue (value, 0, 1);
+    else if (attribute == xvHWPlanar)
+	pPriv->planar_state = ClipValue (value, 0, 1);
     else
 	return BadMatch;
 
@@ -670,6 +732,7 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen)
 
     xvBicubic         = MAKE_ATOM("XV_BICUBIC");
     xvVSync           = MAKE_ATOM("XV_VSYNC");
+    xvHWPlanar        = MAKE_ATOM("XV_HWPLANAR");
 
     adapt->type = XvWindowMask | XvInputMask | XvImageMask;
     adapt->flags = 0;
@@ -719,6 +782,7 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen)
 	pPriv->doubleBuffer = 0;
 	pPriv->bicubic_state = BICUBIC_AUTO;
 	pPriv->vsync = TRUE;
+	pPriv->planar_state = 1;
 
 	/* gotta uninit this someplace, XXX: shouldn't be necessary for textured */
 	REGION_NULL(pScreen, &pPriv->clip);
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index f55ae12..05acb93 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -97,6 +97,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
     uint32_t dst_offset, dst_pitch, dst_format;
     uint32_t txenable, colorpitch;
     uint32_t blendcntl;
+    Bool isplanar = FALSE;
     int dstxoff, dstyoff, pixel_shift, vtx_count;
     BoxPtr pBox = REGION_RECTS(&pPriv->clip);
     int nBox = REGION_NUM_RECTS(&pPriv->clip);
@@ -181,16 +182,29 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 	if (RADEONTilingEnabled(pScrn, pPixmap))
 	    colorpitch |= R300_COLORTILE;
 
-	if (pPriv->id == FOURCC_UYVY)
-	    txformat1 = R300_TX_FORMAT_YVYU422;
-	else
-	    txformat1 = R300_TX_FORMAT_VYUY422;
+	if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+	    isplanar = TRUE;
+	}
+
+	if (isplanar) {
+	    txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0;
+	    txpitch = pPriv->src_pitch;
+	} else {
+	    if (pPriv->id == FOURCC_UYVY)
+		txformat1 = R300_TX_FORMAT_YVYU422;
+	    else
+		txformat1 = R300_TX_FORMAT_VYUY422;
+
+	    txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
 
-	txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
+	    /* pitch is in pixels */
+	    txpitch = pPriv->src_pitch / 2;
+	}
+	txpitch -= 1;
 
 	txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
-		     (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
-		     R300_TXPITCH_EN);
+		    (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+		    R300_TXPITCH_EN);
 
 	info->accel_state->texW[0] = pPriv->w;
 	info->accel_state->texH[0] = pPriv->h;
@@ -201,9 +215,6 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 		    R300_TX_MIN_FILTER_LINEAR |
 		    (0 << R300_TX_ID_SHIFT));
 
-	/* pitch is in pixels */
-	txpitch = pPriv->src_pitch / 2;
-	txpitch -= 1;
 
 	if (IS_R500_3D && ((pPriv->w - 1) & 0x800))
 	    txpitch |= R500_TXWIDTH_11;
@@ -224,6 +235,34 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 
 	txenable = R300_TEX_0_ENABLE;
 
+	if (isplanar) {
+	    txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
+			(((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+			R300_TXPITCH_EN);
+	    txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
+	    txpitch -= 1;
+	    txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
+		        R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
+			R300_TX_MIN_FILTER_LINEAR |
+			R300_TX_MAG_FILTER_LINEAR);
+
+		BEGIN_ACCEL(12);
+		OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT));
+		OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
+		OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
+		OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2);
+		OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
+		OUT_ACCEL_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset);
+		OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
+		OUT_ACCEL_REG(R300_TX_FILTER1_2, 0);
+		OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0);
+		OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3);
+		OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch);
+		OUT_ACCEL_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset);
+		FINISH_ACCEL();
+		txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE;
+	}
+
 	if (pPriv->bicubic_enabled) {
 		/* Size is 128x1 */
 		txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) |
@@ -691,6 +730,171 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 		OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0);
 
 		FINISH_ACCEL();
+	    } else if (isplanar) {
+	    /*
+	     * y' = y - .0625
+	     * u' = u - .5
+	     * v' = v - .5;
+	     *
+	     * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
+	     * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
+	     * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
+	     *
+	     * DP3 might look like the straightforward solution
+	     * but we'd need to move the texture yuv values in
+	     * the same reg for this to work. Therefore use MADs.
+	     * Without changing the shader at all (only the constants)
+	     * could also provide hue/saturation/brightness/contrast control.
+	     *
+	     * yco = 1.1643
+	     * uco = 0, -0.39173, 2.017
+	     * vco = 1.5958, -0.8129, 0
+	     * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
+	     *       -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
+	     *       -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
+	     *
+	     * temp = MAD(yco, yuv.yyyy, off)
+	     * temp = MAD(uco, yuv.uuuu, temp)
+	     * result = MAD(vco, yuv.vvvv, temp)
+	     */
+		float yco = 1.1643;
+		float uco[3] = {0.0, -0.39173, 2.018};
+		float vco[3] = {1.5958, -0.8129, 0.0};
+		float off[3] = {-0.0625 * yco + -0.5 * uco[0] + -0.5 * vco[0],
+				-0.0625 * yco + -0.5 * uco[1] + -0.5 * vco[1],
+				-0.0625 * yco + -0.5 * uco[2] + -0.5 * vco[2]};
+
+		BEGIN_ACCEL(33);
+		/* 2 components: same 2 for tex0/1/2 */
+		OUT_ACCEL_REG(R300_RS_COUNT,
+			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+			   R300_RS_COUNT_HIRES_EN));
+		/* R300_INST_COUNT_RS - highest RS instruction used */
+		OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+		OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */
+
+		/* Indirection levels */
+		OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
+							R300_FIRST_TEX));
+
+		OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
+						   R300_ALU_CODE_SIZE(3) |
+						   R300_TEX_CODE_OFFSET(0) |
+						   R300_TEX_CODE_SIZE(3)));
+
+		OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
+						   R300_ALU_SIZE(2) |
+						   R300_TEX_START(0) |
+						   R300_TEX_SIZE(2) |
+						   R300_RGBA_OUT));
+
+		/* tex inst */
+		OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
+						  R300_TEX_DST_ADDR(0) |
+						  R300_TEX_ID(0) |
+						  R300_TEX_INST(R300_TEX_INST_LD)));
+		OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) |
+						  R300_TEX_DST_ADDR(1) |
+						  R300_TEX_ID(1) |
+						  R300_TEX_INST(R300_TEX_INST_LD)));
+		OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) |
+						  R300_TEX_DST_ADDR(2) |
+						  R300_TEX_ID(2) |
+						  R300_TEX_INST(R300_TEX_INST_LD)));
+
+		/* ALU inst */
+		/* MAD temp0, const0.a, temp0, const0.rgb */
+		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
+						   R300_ALU_RGB_ADDR1(0) |
+						   R300_ALU_RGB_ADDR2(0) |
+						   R300_ALU_RGB_ADDRD(0) |
+						   R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
+						   R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+						   R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+						   R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+						   R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+		/* alpha nop, but need to set up alpha source for rgb usage */
+		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
+						   R300_ALU_ALPHA_ADDR1(0) |
+						   R300_ALU_ALPHA_ADDR2(0) |
+						   R300_ALU_ALPHA_ADDRD(0) |
+						   R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+						   R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+						   R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+						   R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+		/* MAD const1, temp1, temp0 */
+		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
+						   R300_ALU_RGB_ADDR1(1) |
+						   R300_ALU_RGB_ADDR2(0) |
+						   R300_ALU_RGB_ADDRD(0) |
+						   R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+						   R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+						   R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
+						   R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+						   R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+		/* alpha nop */
+		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) |
+						   R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+						   R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+						   R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+						   R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+		/* MAD result, const2, temp2, temp0 */
+		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
+						   R300_ALU_RGB_ADDR1(2) |
+						   R300_ALU_RGB_ADDR2(0) |
+						   R300_ALU_RGB_ADDRD(0) |
+						   R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
+						   R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)));
+		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+						   R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+						   R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
+						   R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+						   R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
+						   R300_ALU_RGB_CLAMP));
+		/* write alpha 1 */
+		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
+						   R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
+						   R300_ALU_ALPHA_TARGET_A));
+		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+						   R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+						   R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+						   R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
+
+		/* Shader constants. */
+		/* constant 0: off, yco */
+		OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0]));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1]));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2]));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco));
+		/* constant 1: uco */
+		OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0]));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1]));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2]));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(0.0));
+		/* constant 2: vco */
+		OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0]));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1]));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2]));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0));
+
+		FINISH_ACCEL();
+
 	    } else {
 		BEGIN_ACCEL(11);
 		/* 2 components: 2 for tex0 */
@@ -760,7 +964,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 						   R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
 						   R300_ALU_ALPHA_CLAMP));
 		FINISH_ACCEL();
-		}
+	    }
 	} else {
 	    if (pPriv->bicubic_enabled) {
 		BEGIN_ACCEL(7);
@@ -1363,10 +1567,18 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 	    return;
 	}
 
-	if (pPriv->id == FOURCC_UYVY)
-	    txformat = RADEON_TXFORMAT_YVYU422;
-	else
-	    txformat = RADEON_TXFORMAT_VYUY422;
+	if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+	    isplanar = TRUE;
+	}
+
+	if (isplanar) {
+	    txformat = RADEON_TXFORMAT_I8;
+	} else {
+	    if (pPriv->id == FOURCC_UYVY)
+		txformat = RADEON_TXFORMAT_YVYU422;
+	    else
+		txformat = RADEON_TXFORMAT_VYUY422;
+	}
 
 	txformat |= RADEON_TXFORMAT_NON_POWER2;
 
@@ -1375,12 +1587,10 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 	if (RADEONTilingEnabled(pScrn, pPixmap))
 	    colorpitch |= RADEON_COLOR_TILE_ENABLE;
 
-	BEGIN_ACCEL(5);
+	BEGIN_ACCEL(4);
 
-	OUT_ACCEL_REG(RADEON_PP_CNTL,
-		      RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
 	OUT_ACCEL_REG(RADEON_RB3D_CNTL,
-		      dst_format | RADEON_ALPHA_BLEND_ENABLE);
+		      dst_format /*| RADEON_ALPHA_BLEND_ENABLE*/);
 	OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
 
 	OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
@@ -1399,48 +1609,346 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 	    info->accel_state->texW[0] = pPriv->w;
 	    info->accel_state->texH[0] = pPriv->h;
 
-	    BEGIN_ACCEL(12);
-
-	    OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
-	    OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
-			  (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
-
-	    OUT_ACCEL_REG(R200_PP_TXFILTER_0,
-			  R200_MAG_FILTER_LINEAR |
-			  R200_MIN_FILTER_LINEAR |
-			  R200_CLAMP_S_CLAMP_LAST |
-			  R200_CLAMP_T_CLAMP_LAST |
-			  R200_YUV_TO_RGB);
-	    OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
-	    OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
-	    OUT_ACCEL_REG(R200_PP_TXSIZE_0,
-			  (pPriv->w - 1) |
-			  ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
-	    OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
-
-	    OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
-
-	    OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
-			  R200_TXC_ARG_A_ZERO |
-			  R200_TXC_ARG_B_ZERO |
-			  R200_TXC_ARG_C_R0_COLOR |
-			  R200_TXC_OP_MADD);
-	    OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
-			  R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
-	    OUT_ACCEL_REG(R200_PP_TXABLEND_0,
-			  R200_TXA_ARG_A_ZERO |
-			  R200_TXA_ARG_B_ZERO |
-			  R200_TXA_ARG_C_R0_ALPHA |
-			  R200_TXA_OP_MADD);
-	    OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
-			  R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
-	    FINISH_ACCEL();
+	    if (isplanar) {
+		/* note: in contrast to r300, use input biasing on uv components */
+		float yco = 1.1643;
+		float yoff = -0.0625 * yco;
+		float uco[3] = {0.0, -0.39173, 2.018};
+		float vco[3] = {1.5958, -0.8129, 0.0};
+
+		/* need 2 texcoord sets (even though they are identical) due
+		   to denormalization! hw apparently can't premultiply
+		   same coord set by different texture size */
+		vtx_count = 6;
+
+		txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
+			    (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
+		txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
+		txpitch -= 32;
+		txfilter =  R200_MAG_FILTER_LINEAR |
+			    R200_MIN_FILTER_LINEAR |
+			    R200_CLAMP_S_CLAMP_LAST |
+			    R200_CLAMP_T_CLAMP_LAST;
+
+		BEGIN_ACCEL(36);
+
+		OUT_ACCEL_REG(RADEON_PP_CNTL,
+			      RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE |
+			      RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE |
+			      RADEON_TEX_BLEND_2_ENABLE);
+
+		OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
+		OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
+			      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
+			      (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
+
+		OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
+		OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
+		OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
+		OUT_ACCEL_REG(R200_PP_TXSIZE_0,
+			      (pPriv->w - 1) |
+			      ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
+		OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
+		OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
+
+		OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
+		OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
+		OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
+		OUT_ACCEL_REG(R200_PP_TXSIZE_1, txformat0);
+		OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch);
+		OUT_ACCEL_REG(R200_PP_TXOFFSET_1, pPriv->src_offset + pPriv->planeu_offset);
+
+		OUT_ACCEL_REG(R200_PP_TXFILTER_2, txfilter);
+		OUT_ACCEL_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
+		OUT_ACCEL_REG(R200_PP_TXFORMAT_X_2, 0);
+		OUT_ACCEL_REG(R200_PP_TXSIZE_2, txformat0);
+		OUT_ACCEL_REG(R200_PP_TXPITCH_2, txpitch);
+		OUT_ACCEL_REG(R200_PP_TXOFFSET_2, pPriv->src_offset + pPriv->planev_offset);
+
+		/* similar to r300 code. Note the big problem is that hardware constants
+		 * are 8 bits only, representing 0.0-1.0. We can get that up (using bias
+		 * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually
+		 * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but
+		 * the constants not. To get larger range can use output scale, but for
+		 * that 2.018 value we need a total scale by 8, which means the constants
+		 * really have no accuracy whatsoever (5 fractional bits only).
+		 * The only direct way to get high  precision "constants" into the fragment
+		 * pipe I know of is to use the texcoord interpolator (not color, this one
+		 * is 8 bit only too), which seems a bit expensive. We're lucky though it
+		 * seems the values we need seem to fit better than worst case (get about
+		 * 6 fractional bits for this instead of 5, at least when not correcting for
+		 * hue/saturation/contrast/brightness, which is the same as for vco - yco and
+		 * yoff get 8 fractional bits).
+		 *
+		 * A higher precision (8 fractional bits) version might just put uco into
+		 * a texcoord, and calculate a new vcoconst in the shader, like so:
+		 * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable
+		 * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0}
+		 * vcocalc = ADD temp, bias/scale(cohelper), vco
+		 * would in total use 4 tex units, 4 instructions which seems fairly
+		 * balanced for this architecture (instead of 3 + 3 for the solution here)
+		 *
+		 * temp = MAD(yco, yuv.yyyy, yoff)
+		 * temp = MAD(uco, yuv.uuuu, temp)
+		 * result = MAD(vco, yuv.vvvv, temp)
+		 *
+		 * note first mad produces actually scalar, hence we transform
+		 * it into a dp2a to get 8 bit precision of yco instead of 7 -
+		 * That's assuming hw correctly expands consts to internal precision.
+		 * (y * 1 + y * (yco - 1) + yoff)
+		 * temp = DP2A / 2 (yco, yuv.yyyy, yoff)
+		 * temp = MAD (uco / 4, yuv.uuuu * 2, temp)
+		 * result = MAD x2 (vco / 2, yuv.vvvv, temp)
+		 *
+		 * vco, uco need bias (and hence scale too)
+		 *
+		 */
+
+		/* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */
+		OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
+			      R200_TXC_ARG_A_TFACTOR_COLOR |
+			      R200_TXC_ARG_B_R0_COLOR |
+			      R200_TXC_ARG_C_TFACTOR_COLOR |
+			      R200_TXC_NEG_ARG_C |
+			      R200_TXC_OP_DOT2_ADD);
+		OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
+			      (0 << R200_TXC_TFACTOR_SEL_SHIFT) |
+			      R200_TXC_SCALE_INV2 |
+			      R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0);
+		OUT_ACCEL_REG(R200_PP_TXABLEND_0,
+			      R200_TXA_ARG_A_ZERO |
+			      R200_TXA_ARG_B_ZERO |
+			      R200_TXA_ARG_C_ZERO |
+			      R200_TXA_OP_MADD);
+		OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
+			      R200_TXA_OUTPUT_REG_NONE);
+
+		/* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */
+		OUT_ACCEL_REG(R200_PP_TXCBLEND_1,
+			      R200_TXC_ARG_A_TFACTOR_COLOR |
+			      R200_TXC_BIAS_ARG_A |
+			      R200_TXC_SCALE_ARG_A |
+			      R200_TXC_ARG_B_R1_COLOR |
+			      R200_TXC_BIAS_ARG_B |
+			      R200_TXC_SCALE_ARG_B |
+			      R200_TXC_ARG_C_R0_COLOR |
+			      R200_TXC_OP_MADD);
+		OUT_ACCEL_REG(R200_PP_TXCBLEND2_1,
+			      (1 << R200_TXC_TFACTOR_SEL_SHIFT) |
+			      R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0);
+		OUT_ACCEL_REG(R200_PP_TXABLEND_1,
+			      R200_TXA_ARG_A_ZERO |
+			      R200_TXA_ARG_B_ZERO |
+			      R200_TXA_ARG_C_ZERO |
+			      R200_TXA_OP_MADD);
+		OUT_ACCEL_REG(R200_PP_TXABLEND2_1,
+			      R200_TXA_OUTPUT_REG_NONE);
+
+		/* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */
+		OUT_ACCEL_REG(R200_PP_TXCBLEND_2,
+			      R200_TXC_ARG_A_TFACTOR_COLOR |
+			      R200_TXC_BIAS_ARG_A |
+			      R200_TXC_SCALE_ARG_A |
+			      R200_TXC_ARG_B_R2_COLOR |
+			      R200_TXC_BIAS_ARG_B |
+			      R200_TXC_ARG_C_R0_COLOR |
+			      R200_TXC_OP_MADD);
+		OUT_ACCEL_REG(R200_PP_TXCBLEND2_2,
+			      (2 << R200_TXC_TFACTOR_SEL_SHIFT) |
+			      R200_TXC_SCALE_2X |
+			      R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
+		OUT_ACCEL_REG(R200_PP_TXABLEND_2,
+			      R200_TXA_ARG_A_ZERO |
+			      R200_TXA_ARG_B_ZERO |
+			      R200_TXA_ARG_C_ZERO |
+			      R200_TXA_COMP_ARG_C |
+			      R200_TXA_OP_MADD);
+		OUT_ACCEL_REG(R200_PP_TXABLEND2_2,
+			      R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
+
+		/* shader constants */
+		OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(1.0, /* src range [1, 2] */
+							      yco - 1.0,
+							      -yoff, /* range [-1, 0] */
+							      0.0));
+		OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * 0.125 + 0.5, /* range [-4, 4] */
+							      uco[1] * 0.125 + 0.5,
+							      uco[2] * 0.125 + 0.5,
+							      0.0));
+		OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * 0.25 + 0.5, /* range [-2, 2] */
+							      vco[1] * 0.25 + 0.5,
+							      vco[2] * 0.25 + 0.5,
+							      0.0));
+
+		FINISH_ACCEL();
+	    }
+	    else if (info->ChipFamily == CHIP_FAMILY_RV250) {
+		/* fix up broken packed yuv - shader same as above except
+		   yuv compoents are all in same reg */
+		float yco = 1.1643;
+		float yoff = -0.0625 * yco;
+		float uco[3] = {0.0, -0.39173, 2.018};
+		float vco[3] = {1.5958, -0.8129, 0.0};
+
+		txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
+			    (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
+		txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
+		txpitch -= 32;
+		txfilter =  R200_MAG_FILTER_LINEAR |
+			    R200_MIN_FILTER_LINEAR |
+			    R200_CLAMP_S_CLAMP_LAST |
+			    R200_CLAMP_T_CLAMP_LAST;
+
+		BEGIN_ACCEL(24);
+
+		OUT_ACCEL_REG(RADEON_PP_CNTL,
+			      RADEON_TEX_0_ENABLE |
+			      RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE |
+			      RADEON_TEX_BLEND_2_ENABLE);
+
+		OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
+		OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
+			      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
+
+		OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
+		OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
+		OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
+		OUT_ACCEL_REG(R200_PP_TXSIZE_0,
+			      (pPriv->w - 1) |
+			      ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
+		OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
+		OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
+
+		/* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */
+		OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
+			      R200_TXC_ARG_A_TFACTOR_COLOR |
+			      R200_TXC_ARG_B_R0_COLOR |
+			      R200_TXC_ARG_C_TFACTOR_COLOR |
+			      R200_TXC_NEG_ARG_C |
+			      R200_TXC_OP_DOT2_ADD);
+		OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
+			      (0 << R200_TXC_TFACTOR_SEL_SHIFT) |
+			      R200_TXC_SCALE_INV2 |
+			      (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) |
+			      R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1);
+		OUT_ACCEL_REG(R200_PP_TXABLEND_0,
+			      R200_TXA_ARG_A_ZERO |
+			      R200_TXA_ARG_B_ZERO |
+			      R200_TXA_ARG_C_ZERO |
+			      R200_TXA_OP_MADD);
+		OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
+			      R200_TXA_OUTPUT_REG_NONE);
+
+		/* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */
+		OUT_ACCEL_REG(R200_PP_TXCBLEND_1,
+			      R200_TXC_ARG_A_TFACTOR_COLOR |
+			      R200_TXC_BIAS_ARG_A |
+			      R200_TXC_SCALE_ARG_A |
+			      R200_TXC_ARG_B_R0_COLOR |
+			      R200_TXC_BIAS_ARG_B |
+			      R200_TXC_SCALE_ARG_B |
+			      R200_TXC_ARG_C_R1_COLOR |
+			      R200_TXC_OP_MADD);
+		OUT_ACCEL_REG(R200_PP_TXCBLEND2_1,
+			      (1 << R200_TXC_TFACTOR_SEL_SHIFT) |
+			      (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) |
+			      R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1);
+		OUT_ACCEL_REG(R200_PP_TXABLEND_1,
+			      R200_TXA_ARG_A_ZERO |
+			      R200_TXA_ARG_B_ZERO |
+			      R200_TXA_ARG_C_ZERO |
+			      R200_TXA_OP_MADD);
+		OUT_ACCEL_REG(R200_PP_TXABLEND2_1,
+			      R200_TXA_OUTPUT_REG_NONE);
+
+		/* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */
+		OUT_ACCEL_REG(R200_PP_TXCBLEND_2,
+			      R200_TXC_ARG_A_TFACTOR_COLOR |
+			      R200_TXC_BIAS_ARG_A |
+			      R200_TXC_SCALE_ARG_A |
+			      R200_TXC_ARG_B_R0_COLOR |
+			      R200_TXC_BIAS_ARG_B |
+			      R200_TXC_ARG_C_R1_COLOR |
+			      R200_TXC_OP_MADD);
+		OUT_ACCEL_REG(R200_PP_TXCBLEND2_2,
+			      (2 << R200_TXC_TFACTOR_SEL_SHIFT) |
+			      R200_TXC_SCALE_2X |
+			      (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) |
+			      R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
+		OUT_ACCEL_REG(R200_PP_TXABLEND_2,
+			      R200_TXA_ARG_A_ZERO |
+			      R200_TXA_ARG_B_ZERO |
+			      R200_TXA_ARG_C_ZERO |
+			      R200_TXA_COMP_ARG_C |
+			      R200_TXA_OP_MADD);
+		OUT_ACCEL_REG(R200_PP_TXABLEND2_2,
+			      R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
+
+		/* shader constants */
+		OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(1.0, /* src range [1, 2] */
+							      yco - 1.0,
+							      -yoff, /* range [-1, 0] */
+							      0.0));
+		OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * 0.125 + 0.5, /* range [-4, 4] */
+							      uco[1] * 0.125 + 0.5,
+							      uco[2] * 0.125 + 0.5,
+							      0.0));
+		OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * 0.25 + 0.5, /* range [-2, 2] */
+							      vco[1] * 0.25 + 0.5,
+							      vco[2] * 0.25 + 0.5,
+							      0.0));
+
+		FINISH_ACCEL();
+	    }
+	    else {
+		BEGIN_ACCEL(13);
+		OUT_ACCEL_REG(RADEON_PP_CNTL,
+			      RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
+
+		OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
+		OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
+			      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
+
+		OUT_ACCEL_REG(R200_PP_TXFILTER_0,
+			      R200_MAG_FILTER_LINEAR |
+			      R200_MIN_FILTER_LINEAR |
+			      R200_CLAMP_S_CLAMP_LAST |
+			      R200_CLAMP_T_CLAMP_LAST |
+			      R200_YUV_TO_RGB);
+		OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
+		OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
+		OUT_ACCEL_REG(R200_PP_TXSIZE_0,
+			      (pPriv->w - 1) |
+			      ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
+		OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
+
+		OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
+
+		OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
+			      R200_TXC_ARG_A_ZERO |
+			      R200_TXC_ARG_B_ZERO |
+			      R200_TXC_ARG_C_R0_COLOR |
+			      R200_TXC_OP_MADD);
+		OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
+			      R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
+		OUT_ACCEL_REG(R200_PP_TXABLEND_0,
+			      R200_TXA_ARG_A_ZERO |
+			      R200_TXA_ARG_B_ZERO |
+			      R200_TXA_ARG_C_R0_ALPHA |
+			      R200_TXA_OP_MADD);
+		OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
+			      R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
+		FINISH_ACCEL();
+	    }
 	} else {
 
 	    info->accel_state->texW[0] = 1;
 	    info->accel_state->texH[0] = 1;
 
-	    BEGIN_ACCEL(8);
+	    BEGIN_ACCEL(9);
+
+	    OUT_ACCEL_REG(RADEON_PP_CNTL,
+			  RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
 
 	    OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
 					      RADEON_SE_VTX_FMT_ST0));
@@ -1672,6 +2180,20 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 			    ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
 			                                              (float)srcY / info->accel_state->texH[0]);
 		}
+	    } else if (isplanar) {
+		/*
+		 * Just render a rect (using three coords).
+		 * Filter is a bit a misnomer, it's just texcoords...
+		 */
+		VTX_OUT_FILTER((float)dstX,                                (float)(dstY + dsth),
+			(float)srcX / info->accel_state->texW[0],          (float)(srcY + srch) / info->accel_state->texH[0],
+			(float)srcX / info->accel_state->texW[0],          (float)(srcY + srch) / info->accel_state->texH[0]);
+		VTX_OUT_FILTER((float)(dstX + dstw),                       (float)(dstY + dsth),
+			(float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
+			(float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+		VTX_OUT_FILTER((float)(dstX + dstw),                       (float)dstY,
+			(float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
+			(float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
 	    } else {
 		/*
 		 * Just render a rect (using three coords).
diff --git a/src/radeon_tv.c b/src/radeon_tv.c
index 98e3b0a..eef45d9 100644
--- a/src/radeon_tv.c
+++ b/src/radeon_tv.c
@@ -815,7 +815,9 @@ void RADEONInitTVRegisters(xf86OutputPtr output, RADEONSavePtr save,
     save->tv_rgb_cntl = (RADEON_RGB_DITHER_EN
 			 | RADEON_TVOUT_SCALE_EN
 			 | (0x0b << RADEON_UVRAM_READ_MARGIN_SHIFT)
-			 | (0x07 << RADEON_FIFORAM_FFMACRO_READ_MARGIN_SHIFT));
+			 | (0x07 << RADEON_FIFORAM_FFMACRO_READ_MARGIN_SHIFT)
+			 | RADEON_RGB_ATTEN_SEL(0x3)
+			 | RADEON_RGB_ATTEN_VAL(0xc));
 
     if (IsPrimary) {
 	if (radeon_output->Flags & RADEON_USE_RMX)
diff --git a/src/radeon_video.c b/src/radeon_video.c
index 92d1a71..42aa036 100644
--- a/src/radeon_video.c
+++ b/src/radeon_video.c
@@ -297,22 +297,19 @@ void RADEONInitVideo(ScreenPtr pScreen)
 	RADEONInitOffscreenImages(pScreen);
     }
 
-    if (info->ChipFamily != CHIP_FAMILY_RV250) {
-	if ((info->ChipFamily < CHIP_FAMILY_RS400)
+    if ((info->ChipFamily < CHIP_FAMILY_RS400)
 #ifdef XF86DRI
-	    || (info->directRenderingEnabled)
+	|| (info->directRenderingEnabled)
 #endif
-	    ) {
-	    texturedAdaptor = RADEONSetupImageTexturedVideo(pScreen);
-	    if (texturedAdaptor != NULL) {
-		adaptors[num_adaptors++] = texturedAdaptor;
-		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Set up textured video\n");
-	    } else
-		xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Failed to set up textured video\n");
+	) {
+	texturedAdaptor = RADEONSetupImageTexturedVideo(pScreen);
+	if (texturedAdaptor != NULL) {
+	    adaptors[num_adaptors++] = texturedAdaptor;
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Set up textured video\n");
 	} else
-	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Textured video requires CP on R5xx/R6xx/R7xx/IGP\n");
+	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Failed to set up textured video\n");
     } else
-	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Textured video disabled on RV250 due to HW bug\n");
+	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Textured video requires CP on R5xx/R6xx/R7xx/IGP\n");
 
     if(num_adaptors)
 	xf86XVScreenInit(pScreen, adaptors, num_adaptors);
@@ -1070,11 +1067,11 @@ static void RADEONSetTransform (ScrnInfoPtr pScrn,
     OvGCr = CAdjGCr * gamma_curve_r100[gamma].OvGammaCont;
     OvBCb = CAdjBCb * gamma_curve_r100[gamma].OvGammaCont;
     OvBCr = CAdjBCr * gamma_curve_r100[gamma].OvGammaCont;
-    OvROff = CAdjOff * gamma_curve_r100[gamma].OvGammaCont - 
+    OvROff = RedAdj + CAdjOff * gamma_curve_r100[gamma].OvGammaCont - 
 	OvLuma * Loff - (OvRCb + OvRCr) * Coff;
-    OvGOff = CAdjOff * gamma_curve_r100[gamma].OvGammaCont - 
+    OvGOff = GreenAdj + CAdjOff * gamma_curve_r100[gamma].OvGammaCont - 
 	OvLuma * Loff - (OvGCb + OvGCr) * Coff;
-    OvBOff = CAdjOff * gamma_curve_r100[gamma].OvGammaCont - 
+    OvBOff = BlueAdj + CAdjOff * gamma_curve_r100[gamma].OvGammaCont - 
 	OvLuma * Loff - (OvBCb + OvBCr) * Coff;
 #if 0 /* default constants */
     OvROff = -888.5;
diff --git a/src/radeon_video.h b/src/radeon_video.h
index 7f1891e..34fb07f 100644
--- a/src/radeon_video.h
+++ b/src/radeon_video.h
@@ -90,6 +90,11 @@ typedef struct {
    void         *video_memory;
    int           video_offset;
 
+   Bool          planar_hw;
+   Bool          planar_state;
+   int           planeu_offset;
+   int           planev_offset;
+
    /* bicubic filtering */
    void         *bicubic_memory;
    int           bicubic_offset;