From 1fab1751d03482c9071f94994681f0a1ccbdcaab Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Aug 21 2009 03:29:11 +0000 Subject: - change to a git snapshot --- diff --git a/.cvsignore b/.cvsignore index 6281a6e..3e67677 100644 --- a/.cvsignore +++ b/.cvsignore @@ -1 +1 @@ -xf86-video-ati-6.12.2.tar.bz2 +xf86-video-ati-20090821.tar.xz diff --git a/make-git-snapshot.sh b/make-git-snapshot.sh new file mode 100755 index 0000000..3f2c592 --- /dev/null +++ b/make-git-snapshot.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +# Usage: ./make-git-snapshot.sh [COMMIT] +# +# to make a snapshot of the given tag/branch. Defaults to HEAD. +# Point env var REF to a local mesa repo to reduce clone time. + +DIRNAME=xf86-video-ati-$( date +%Y%m%d ) + +echo REF ${REF:+--reference $REF} +echo DIRNAME $DIRNAME +echo HEAD ${1:-HEAD} + +rm -rf $DIRNAME + +git clone ${REF:+--reference $REF} \ + git://git.freedesktop.org/git/xorg/driver/xf86-video-ati $DIRNAME + +GIT_DIR=$DIRNAME/.git git archive --format=tar --prefix=$DIRNAME/ ${1:-HEAD} \ + | xz > $DIRNAME.tar.xz + +# rm -rf $DIRNAME diff --git a/radeon-6.12.2-to-git.patch b/radeon-6.12.2-to-git.patch deleted file mode 100644 index d75650a..0000000 --- a/radeon-6.12.2-to-git.patch +++ /dev/null @@ -1,20161 +0,0 @@ -diff --git a/Makefile.am b/Makefile.am -index 87e90ba..e76bf11 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -27,6 +27,6 @@ CLEANFILES = ChangeLog - .PHONY: ChangeLog - - ChangeLog: -- (GIT_DIR=$(top_srcdir)/.git git-log > .changelog.tmp && mv .changelog.tmp ChangeLog; rm -f .changelog.tmp) || (touch ChangeLog; echo 'git directory not found: installing possibly empty changelog.' >&2) -+ $(CHANGELOG_CMD) - - dist-hook: ChangeLog -diff --git a/README b/README -index 99de20d..4b92a18 100644 ---- a/README -+++ b/README -@@ -1,20 +1,25 @@ - xf86-video-ati - ATI Radeon video driver for the Xorg X server - --Please submit bugs & patches to the Xorg bugzilla: -- -- https://bugs.freedesktop.org/enter_bug.cgi?product=xorg -- - All questions regarding this software should be directed at the - Xorg mailing list: - - http://lists.freedesktop.org/mailman/listinfo/xorg - -+Please submit bug reports to the Xorg bugzilla: -+ -+ https://bugs.freedesktop.org/enter_bug.cgi?product=xorg -+ - The master development code repository can be found at: - - git://anongit.freedesktop.org/git/xorg/driver/xf86-video-ati - - http://cgit.freedesktop.org/xorg/driver/xf86-video-ati - -+For patch submission instructions, see: -+ -+ http://www.x.org/wiki/Development/Documentation/SubmittingPatches -+ - For more information on the git code manager, see: - - http://wiki.x.org/wiki/GitPage -+ -diff --git a/acinclude.m4 b/acinclude.m4 -new file mode 100644 -index 0000000..0a3509e ---- /dev/null -+++ b/acinclude.m4 -@@ -0,0 +1,77 @@ -+dnl Make automake/libtool output more friendly to humans -+dnl Damien Lespiau -+dnl -+dnl SHAVE_INIT([shavedir],[default_mode]) -+dnl -+dnl shavedir: the directory where the shave scripts are, it defaults to -+dnl $(top_builddir) -+dnl default_mode: (enable|disable) default shave mode. This parameter -+dnl controls shave's behaviour when no option has been -+dnl given to configure. It defaults to disable. -+dnl -+dnl * SHAVE_INIT should be called late in your configure.(ac|in) file (just -+dnl before AC_CONFIG_FILE/AC_OUTPUT is perfect. This macro rewrites CC and -+dnl LIBTOOL, you don't want the configure tests to have these variables -+dnl re-defined. -+dnl * This macro requires GNU make's -s option. -+ -+AC_DEFUN([_SHAVE_ARG_ENABLE], -+[ -+ AC_ARG_ENABLE([shave], -+ AS_HELP_STRING( -+ [--enable-shave], -+ [use shave to make the build pretty [[default=$1]]]),, -+ [enable_shave=$1] -+ ) -+]) -+ -+AC_DEFUN([SHAVE_INIT], -+[ -+ dnl you can tweak the default value of enable_shave -+ m4_if([$2], [enable], [_SHAVE_ARG_ENABLE(yes)], [_SHAVE_ARG_ENABLE(no)]) -+ -+ if test x"$enable_shave" = xyes; then -+ dnl where can we find the shave scripts? -+ m4_if([$1],, -+ [shavedir="$ac_pwd"], -+ [shavedir="$ac_pwd/$1"]) -+ AC_SUBST(shavedir) -+ -+ dnl make is now quiet -+ AC_SUBST([MAKEFLAGS], [-s]) -+ AC_SUBST([AM_MAKEFLAGS], ['`test -z $V && echo -s`']) -+ -+ dnl we need sed -+ AC_CHECK_PROG(SED,sed,sed,false) -+ -+ dnl substitute libtool -+ SHAVE_SAVED_LIBTOOL=$LIBTOOL -+ LIBTOOL="${SHELL} ${shavedir}/shave-libtool '${SHAVE_SAVED_LIBTOOL}'" -+ AC_SUBST(LIBTOOL) -+ -+ dnl substitute cc/cxx -+ SHAVE_SAVED_CC=$CC -+ SHAVE_SAVED_CXX=$CXX -+ SHAVE_SAVED_FC=$FC -+ SHAVE_SAVED_F77=$F77 -+ SHAVE_SAVED_OBJC=$OBJC -+ CC="${SHELL} ${shavedir}/shave cc ${SHAVE_SAVED_CC}" -+ CXX="${SHELL} ${shavedir}/shave cxx ${SHAVE_SAVED_CXX}" -+ FC="${SHELL} ${shavedir}/shave fc ${SHAVE_SAVED_FC}" -+ F77="${SHELL} ${shavedir}/shave f77 ${SHAVE_SAVED_F77}" -+ OBJC="${SHELL} ${shavedir}/shave objc ${SHAVE_SAVED_OBJC}" -+ AC_SUBST(CC) -+ AC_SUBST(CXX) -+ AC_SUBST(FC) -+ AC_SUBST(F77) -+ AC_SUBST(OBJC) -+ -+ V=@ -+ else -+ V=1 -+ fi -+ Q='$(V:1=)' -+ AC_SUBST(V) -+ AC_SUBST(Q) -+]) -+ -diff --git a/configure.ac b/configure.ac -index 709fb19..48375ee 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -22,18 +22,26 @@ - - AC_PREREQ(2.57) - AC_INIT([xf86-video-ati], -- 6.12.2, -+ 6.12.99, - [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg], - xf86-video-ati) - - AC_CONFIG_SRCDIR([Makefile.am]) - AM_CONFIG_HEADER([config.h]) -+ -+# Require xorg-macros: XORG_RELEASE_VERSION XORG_CHANGELOG -+m4_ifndef([XORG_MACROS_VERSION], [AC_FATAL([must install xorg-macros 1.2 or later before running autoconf/autogen])]) -+XORG_MACROS_VERSION(1.2) -+ - AC_CONFIG_AUX_DIR(.) - - AM_INIT_AUTOMAKE([dist-bzip2]) -+AC_SYS_LARGEFILE - - AM_MAINTAINER_MODE - -+AC_CONFIG_FILES([shave shave-libtool]) -+ - # Checks for programs. - AC_DISABLE_STATIC - AC_PROG_LIBTOOL -@@ -62,6 +70,12 @@ AC_ARG_ENABLE(exa, - [EXA="$enableval"], - [EXA=yes]) - -+AC_ARG_ENABLE(kms, -+ AC_HELP_STRING([--disable-kms], -+ [Disable KMS support [[default=enabled]]]), -+ [DRM_MODE="$enableval"], -+ [DRM_MODE=yes]) -+ - AC_ARG_WITH(xserver-source,AC_HELP_STRING([--with-xserver-source=XSERVER_SOURCE], - [Path to X server source tree]), - [ XSERVER_SOURCE="$withval" ], -@@ -76,6 +90,10 @@ XORG_DRIVER_CHECK_EXT(DPMSExtension, xextproto) - - # Checks for pkg-config packages - PKG_CHECK_MODULES(XORG, [xorg-server >= 1.2 xproto fontsproto $REQUIRED_MODULES]) -+PKG_CHECK_MODULES(XEXT, [xextproto >= 7.0.99.1], -+ HAVE_XEXTPROTO_71="yes"; AC_DEFINE(HAVE_XEXTPROTO_71, 1, [xextproto 7.1 available]), -+ HAVE_XEXTPROTO_71="no") -+AM_CONDITIONAL(HAVE_XEXTPROTO_71, [ test "$HAVE_XEXTPROTO_71" = "yes" ]) - sdkdir=$(pkg-config --variable=sdkdir xorg-server) - - # Checks for libraries. -@@ -114,6 +132,24 @@ if test "$DRI" = yes; then - if test "$have_damage_h" = yes; then - AC_DEFINE(DAMAGE,1,[Use Damage extension]) - fi -+ -+ save_CFLAGS="$CFLAGS" -+ CFLAGS="$XORG_CFLAGS $DRI_CFLAGS $CFLAGS" -+ AM_CONDITIONAL(DRM_MODE, test x$DRM_MODE = xyes) -+ if test "$DRM_MODE" = yes; then -+ AC_CHECK_HEADER(xf86drmMode.h,[DRM_MODE=yes],[DRM_MODE=no],[#include -+#include ]) -+ if test "x$DRM_MODE" = xyes; then -+ PKG_CHECK_MODULES(LIBDRM_RADEON, [xorg-server >= 1.6 libdrm_radeon], -+ [LIBDRM_RADEON=yes], [LIBDRM_RADEON=no]) -+ -+ if test "x$LIBDRM_RADEON" = xyes; then -+ AC_DEFINE(XF86DRM_MODE,1,[DRM kernel modesetting]) -+ AC_DEFINE(RADEON_DRI2, 1,[Enable DRI2 code]) -+ fi -+ fi -+ fi -+ CFLAGS="$save_CFLAGS" - fi - - save_CFLAGS="$CFLAGS" -@@ -237,6 +273,8 @@ CPPFLAGS="$SAVE_CPPFLAGS" - - AM_CONDITIONAL(USE_EXA, test "x$USE_EXA" = xyes) - -+AM_CONDITIONAL(XF86DRM_MODE, test "x$LIBDRM_RADEON" = xyes) -+ - if test "x$XSERVER_LIBPCIACCESS" = xyes; then - PKG_CHECK_MODULES([PCIACCESS], [pciaccess >= 0.8.0]) - XORG_CFLAGS="$XORG_CFLAGS $PCIACCESS_CFLAGS" -@@ -310,6 +348,8 @@ esac - - AC_SUBST([XORG_CFLAGS]) - AC_SUBST([DRI_CFLAGS]) -+AC_SUBST([LIBDRM_RADEON_CFLAGS]) -+AC_SUBST([LIBDRM_RADEON_LIBS]) - AC_SUBST([moduledir]) - - DRIVER_NAME=ati -@@ -318,6 +358,7 @@ AC_SUBST([DRIVER_NAME]) - XORG_MANPAGE_SECTIONS - XORG_RELEASE_VERSION - XORG_CHECK_LINUXDOC -+XORG_CHANGELOG - - AC_MSG_NOTICE( - [The atimisc sub-driver has been split out to xf86-video-mach64:] -@@ -331,8 +372,32 @@ AC_MSG_NOTICE( - [Please install that driver as well for rage128-based cards.] - ) - -+SHAVE_INIT(.,enable) -+ - AC_OUTPUT([ - Makefile - src/Makefile - man/Makefile - ]) -+ -+dnl -+dnl Output some configuration info for the user -+dnl -+echo "" -+echo " prefix: $prefix" -+echo " exec_prefix: $exec_prefix" -+echo " libdir: $libdir" -+echo " includedir: $includedir" -+ -+ -+echo "" -+echo " Kernel modesetting: $DRM_MODE" -+ -+echo "" -+echo " CFLAGS: $CFLAGS" -+echo " CXXFLAGS: $CXXFLAGS" -+echo " Macros: $DEFINES" -+ -+echo "" -+echo " Run '${MAKE-make}' to build xf86-video-ati" -+echo "" -diff --git a/man/radeon.man b/man/radeon.man -index 09239cf..703fe1d 100644 ---- a/man/radeon.man -+++ b/man/radeon.man -@@ -346,9 +346,8 @@ and - .B EXA. - XAA is the traditional acceleration architecture and support for it is very - stable. EXA is a newer acceleration architecture with better performance for --the Render and Composite extensions, but the rendering code for it is newer and --possibly unstable. The default is --.B XAA. -+the Render and Composite extensions. The default is -+.B EXA. - .TP - .BI "Option \*qAccelDFS\*q \*q" boolean \*q - Use or don't use accelerated EXA DownloadFromScreen hook when possible (only -@@ -413,13 +412,22 @@ for CRT, - .B RGB - for digital panels - .TP --.BI "Option \*qDynamicClocks\*q \*q" boolean \*q --Enable dynamic clock scaling. The on-chip clocks will scale dynamically --based on usage. This can help reduce heat and increase battery -+.BI "Option \*qClockGating\*q \*q" boolean \*q -+Enable dynamic clock gating. This can help reduce heat and increase battery - life by reducing power usage. Some users report reduced 3D performance - with this enabled. The default is - .B off. - .TP -+.BI "Option \*qForceLowPowerMode\*q \*q" boolean \*q -+Enable a static low power mode. This can help reduce heat and increase battery -+life by reducing power usage at the expense of performance. The default is -+.B off. -+.TP -+.BI "Option \*qDynamicPM\*q \*q" boolean \*q -+Enable dynamic power mode switching. This can help reduce heat and increase battery -+life by reducing power usage when the system is idle (DPMS active). The default is -+.B off. -+.TP - .BI "Option \*qVGAAccess\*q \*q" boolean \*q - Tell the driver if it can do legacy VGA IOs to the card. This is - necessary for properly resuming consoles when in VGA text mode, but -@@ -585,7 +593,9 @@ XV_BICUBIC is used to control whether textured adapter should apply - a bicubic filter to smooth the output. It has three values: 'off'(0), 'on'(1) - and 'auto'(2). 'off' means never apply the filter, 'on' means always apply - the filter and 'auto' means apply the filter only if the X and Y --sizes are scaled to more than double, this to avoid blurred output. -+sizes are scaled to more than double to avoid blurred output. Bicubic -+filtering is not currently compatible with other Xv attributes like hue, -+contrast, and brightness, and must be disabled to use those attributes. - The default is - .B 'auto'(2). - -diff --git a/shave-libtool.in b/shave-libtool.in -new file mode 100644 -index 0000000..1f3a720 ---- /dev/null -+++ b/shave-libtool.in -@@ -0,0 +1,69 @@ -+#!/bin/sh -+ -+# we need sed -+SED=@SED@ -+if test -z "$SED" ; then -+SED=sed -+fi -+ -+lt_unmangle () -+{ -+ last_result=`echo $1 | $SED -e 's#.libs/##' -e 's#[0-9a-zA-Z_\-\.]*_la-##'` -+} -+ -+# the real libtool to use -+LIBTOOL="$1" -+shift -+ -+# if 1, don't print anything, the underlaying wrapper will do it -+pass_though=0 -+ -+# scan the arguments, keep the right ones for libtool, and discover the mode -+preserved_args= -+while test "$#" -gt 0; do -+ opt="$1" -+ shift -+ -+ case $opt in -+ --mode=*) -+ mode=`echo $opt | $SED -e 's/[-_a-zA-Z0-9]*=//'` -+ preserved_args="$preserved_args $opt" -+ ;; -+ -o) -+ lt_output="$1" -+ preserved_args="$preserved_args $opt" -+ ;; -+ *) -+ preserved_args="$preserved_args $opt" -+ ;; -+ esac -+done -+ -+case "$mode" in -+compile) -+ # shave will be called and print the actual CC/CXX/LINK line -+ preserved_args="$preserved_args --shave-mode=$mode" -+ pass_though=1 -+ ;; -+link) -+ preserved_args="$preserved_args --shave-mode=$mode" -+ Q=" LINK " -+ ;; -+*) -+ # let's u -+ # echo "*** libtool: Unimplemented mode: $mode, fill a bug report" -+ ;; -+esac -+ -+lt_unmangle "$lt_output" -+output=$last_result -+ -+if test -z $V; then -+ if test $pass_though -eq 0; then -+ echo "$Q$output" -+ fi -+ $LIBTOOL --silent $preserved_args -+else -+ echo $LIBTOOL $preserved_args -+ $LIBTOOL $preserved_args -+fi -diff --git a/shave.in b/shave.in -new file mode 100644 -index 0000000..5c16f27 ---- /dev/null -+++ b/shave.in -@@ -0,0 +1,79 @@ -+#!/bin/sh -+ -+# we need sed -+SED=@SED@ -+if test -z "$SED" ; then -+SED=sed -+fi -+ -+lt_unmangle () -+{ -+ last_result=`echo $1 | $SED -e 's#.libs/##' -e 's#[0-9a-zA-Z_\-\.]*_la-##'` -+} -+ -+# the tool to wrap (cc, cxx, ar, ranlib, ..) -+tool="$1" -+shift -+ -+# the reel tool (to call) -+REEL_TOOL="$1" -+shift -+ -+pass_through=0 -+preserved_args= -+while test "$#" -gt 0; do -+ opt="$1" -+ shift -+ -+ case $opt in -+ --shave-mode=*) -+ mode=`echo $opt | $SED -e 's/[-_a-zA-Z0-9]*=//'` -+ ;; -+ -o) -+ lt_output="$1" -+ preserved_args="$preserved_args $opt" -+ ;; -+ *) -+ preserved_args="$preserved_args $opt" -+ ;; -+ esac -+done -+ -+# mode=link is handled in the libtool wrapper -+case "$mode,$tool" in -+link,*) -+ pass_through=1 -+ ;; -+*,cxx) -+ Q=" CXX " -+ ;; -+*,cc) -+ Q=" CC " -+ ;; -+*,fc) -+ Q=" FC " -+ ;; -+*,f77) -+ Q=" F77 " -+ ;; -+*,objc) -+ Q=" OBJC " -+ ;; -+*,*) -+ # should not happen -+ Q=" CC " -+ ;; -+esac -+ -+lt_unmangle "$lt_output" -+output=$last_result -+ -+if test -z $V; then -+ if test $pass_through -eq 0; then -+ echo "$Q$output" -+ fi -+ $REEL_TOOL $preserved_args -+else -+ echo $REEL_TOOL $preserved_args -+ $REEL_TOOL $preserved_args -+fi -diff --git a/src/Makefile.am b/src/Makefile.am -index 7cc2a6f..57a365b 100644 ---- a/src/Makefile.am -+++ b/src/Makefile.am -@@ -26,7 +26,7 @@ - # _ladir passes a dummy rpath to libtool so the thing will actually link - # TODO: -nostdlib/-Bstatic/-lgcc platform magic, not installing the .a, etc. - --radeon_drv_la_LIBADD = -+radeon_drv_la_LIBADD = $(LIBDRM_RADEON_LIBS) - - if DRI - RADEON_DRI_SRCS = radeon_dri.c -@@ -65,6 +65,10 @@ XMODE_SRCS=\ - modes/xf86Rotate.c \ - modes/xf86DiDGA.c - -+if XF86DRM_MODE -+RADEON_KMS_SRCS=radeon_dri2.c radeon_kms.c drmmode_display.c -+endif -+ - if USE_EXA - RADEON_EXA_SOURCES = radeon_exa.c r600_exa.c r6xx_accel.c r600_textured_videofuncs.c r600_shader.c - endif -@@ -92,10 +96,11 @@ radeon_drv_la_SOURCES = \ - radeon_driver.c radeon_video.c radeon_bios.c radeon_mm_i2c.c \ - radeon_vip.c radeon_misc.c radeon_probe.c \ - legacy_crtc.c legacy_output.c \ -- radeon_textured_video.c \ -+ radeon_textured_video.c radeon_pm.c \ - radeon_crtc.c radeon_output.c radeon_modes.c radeon_tv.c \ - $(RADEON_ATOMBIOS_SOURCES) radeon_atombios.c radeon_atomwrapper.c \ -- $(RADEON_DRI_SRCS) $(RADEON_EXA_SOURCES) atombios_output.c atombios_crtc.c -+ $(RADEON_DRI_SRCS) $(RADEON_EXA_SOURCES) atombios_output.c atombios_crtc.c \ -+ $(RADEON_KMS_SRCS) - - if XMODES - radeon_drv_la_SOURCES += \ -@@ -164,4 +169,6 @@ EXTRA_DIST = \ - radeon_pci_device_match_gen.h \ - pcidb/ati_pciids.csv \ - pcidb/parse_pci_ids.pl \ -- radeon_atombios.h -+ radeon_atombios.h \ -+ radeon_dri2.h \ -+ drmmode_display.h -diff --git a/src/ati_pciids_gen.h b/src/ati_pciids_gen.h -index 3304e84..3f9691e 100644 ---- a/src/ati_pciids_gen.h -+++ b/src/ati_pciids_gen.h -@@ -60,6 +60,8 @@ - #define PCI_CHIP_R420_JN 0x4A4E - #define PCI_CHIP_R420_4A4F 0x4A4F - #define PCI_CHIP_R420_JP 0x4A50 -+#define PCI_CHIP_R420_JT 0x4A54 -+#define PCI_CHIP_R481_4B48 0x4B48 - #define PCI_CHIP_R481_4B49 0x4B49 - #define PCI_CHIP_R481_4B4A 0x4B4A - #define PCI_CHIP_R481_4B4B 0x4B4B -@@ -334,6 +336,7 @@ - #define PCI_CHIP_RV770_9440 0x9440 - #define PCI_CHIP_RV770_9441 0x9441 - #define PCI_CHIP_RV770_9442 0x9442 -+#define PCI_CHIP_RV770_9443 0x9443 - #define PCI_CHIP_RV770_9444 0x9444 - #define PCI_CHIP_RV770_9446 0x9446 - #define PCI_CHIP_RV770_944A 0x944A -@@ -351,15 +354,26 @@ - #define PCI_CHIP_RV770_946B 0x946B - #define PCI_CHIP_RV770_947A 0x947A - #define PCI_CHIP_RV770_947B 0x947B -+#define PCI_CHIP_RV730_9480 0x9480 - #define PCI_CHIP_RV730_9487 0x9487 -+#define PCI_CHIP_RV730_9488 0x9488 - #define PCI_CHIP_RV730_9489 0x9489 - #define PCI_CHIP_RV730_948F 0x948F - #define PCI_CHIP_RV730_9490 0x9490 - #define PCI_CHIP_RV730_9491 0x9491 -+#define PCI_CHIP_RV730_9495 0x9495 - #define PCI_CHIP_RV730_9498 0x9498 - #define PCI_CHIP_RV730_949C 0x949C - #define PCI_CHIP_RV730_949E 0x949E - #define PCI_CHIP_RV730_949F 0x949F -+#define PCI_CHIP_RV740_94A0 0x94A0 -+#define PCI_CHIP_RV740_94A1 0x94A1 -+#define PCI_CHIP_RV740_94A3 0x94A3 -+#define PCI_CHIP_RV740_94B1 0x94B1 -+#define PCI_CHIP_RV740_94B3 0x94B3 -+#define PCI_CHIP_RV740_94B4 0x94B4 -+#define PCI_CHIP_RV740_94B5 0x94B5 -+#define PCI_CHIP_RV740_94B9 0x94B9 - #define PCI_CHIP_RV610_94C0 0x94C0 - #define PCI_CHIP_RV610_94C1 0x94C1 - #define PCI_CHIP_RV610_94C3 0x94C3 -@@ -392,6 +406,7 @@ - #define PCI_CHIP_RV710_9552 0x9552 - #define PCI_CHIP_RV710_9553 0x9553 - #define PCI_CHIP_RV710_9555 0x9555 -+#define PCI_CHIP_RV710_9557 0x9557 - #define PCI_CHIP_RV630_9580 0x9580 - #define PCI_CHIP_RV630_9581 0x9581 - #define PCI_CHIP_RV630_9583 0x9583 -diff --git a/src/atombios_crtc.c b/src/atombios_crtc.c -index 31c032b..e04b3c4 100644 ---- a/src/atombios_crtc.c -+++ b/src/atombios_crtc.c -@@ -33,8 +33,13 @@ - #include "config.h" - #endif - /* DPMS */ -+#ifdef HAVE_XEXTPROTO_71 -+#include -+#else - #define DPMS_SERVER - #include -+#endif -+ - - #include "radeon.h" - #include "radeon_reg.h" -@@ -167,13 +172,13 @@ atombios_crtc_dpms(xf86CrtcPtr crtc, int mode) - RADEONInfoPtr info = RADEONPTR(crtc->scrn); - switch (mode) { - case DPMSModeOn: -- case DPMSModeStandby: -- case DPMSModeSuspend: - if (IS_DCE3_VARIANT) - atombios_enable_crtc_memreq(info->atomBIOS, radeon_crtc->crtc_id, 1); - atombios_enable_crtc(info->atomBIOS, radeon_crtc->crtc_id, 1); - atombios_blank_crtc(info->atomBIOS, radeon_crtc->crtc_id, 0); - break; -+ case DPMSModeStandby: -+ case DPMSModeSuspend: - case DPMSModeOff: - atombios_blank_crtc(info->atomBIOS, radeon_crtc->crtc_id, 1); - atombios_enable_crtc(info->atomBIOS, radeon_crtc->crtc_id, 0); -@@ -259,7 +264,7 @@ atombios_crtc_set_pll(xf86CrtcPtr crtc, DisplayModePtr mode) - unsigned char *RADEONMMIO = info->MMIO; - int index = GetIndexIntoMasterTable(COMMAND, SetPixelClock); - uint32_t sclock = mode->Clock; -- uint32_t ref_div = 0, fb_div = 0, post_div = 0; -+ uint32_t ref_div = 0, fb_div = 0, frac_fb_div = 0, post_div = 0; - int major, minor, i; - SET_PIXEL_CLOCK_PS_ALLOCATION spc_param; - PIXEL_CLOCK_PARAMETERS_V2 *spc2_ptr; -@@ -276,6 +281,10 @@ atombios_crtc_set_pll(xf86CrtcPtr crtc, DisplayModePtr mode) - - memset(&spc_param, 0, sizeof(spc_param)); - if (IS_AVIVO_VARIANT) { -+ if ((info->ChipFamily == CHIP_FAMILY_RS600) || -+ (info->ChipFamily == CHIP_FAMILY_RS690) || -+ (info->ChipFamily == CHIP_FAMILY_RS740)) -+ pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV | RADEON_PLL_PREFER_CLOSEST_LOWER; - if (IS_DCE3_VARIANT && mode->Clock > 200000) /* range limits??? */ - pll_flags |= RADEON_PLL_PREFER_HIGH_FB_DIV; - else -@@ -311,15 +320,16 @@ atombios_crtc_set_pll(xf86CrtcPtr crtc, DisplayModePtr mode) - pll_flags |= RADEON_PLL_PREFER_LOW_REF_DIV; - } - -- RADEONComputePLL(&info->pll, mode->Clock, &temp, &fb_div, &ref_div, &post_div, pll_flags); -+ RADEONComputePLL(&info->pll, mode->Clock, &temp, &fb_div, &frac_fb_div, &ref_div, &post_div, pll_flags); - sclock = temp; - - xf86DrvMsg(crtc->scrn->scrnIndex, X_INFO, - "crtc(%d) Clock: mode %d, PLL %lu\n", - radeon_crtc->crtc_id, mode->Clock, (long unsigned int)sclock * 10); - xf86DrvMsg(crtc->scrn->scrnIndex, X_INFO, -- "crtc(%d) PLL : refdiv %u, fbdiv 0x%X(%u), pdiv %u\n", -- radeon_crtc->crtc_id, (unsigned int)ref_div, (unsigned int)fb_div, (unsigned int)fb_div, (unsigned int)post_div); -+ "crtc(%d) PLL : refdiv %u, fbdiv 0x%X(%u), fracfbdiv %u, pdiv %u\n", -+ radeon_crtc->crtc_id, (unsigned int)ref_div, (unsigned int)fb_div, -+ (unsigned int)fb_div, (unsigned int)frac_fb_div, (unsigned int)post_div); - - /* Can't really do cloning easily on DCE3 cards */ - for (i = 0; i < xf86_config->num_output; i++) { -@@ -353,6 +363,7 @@ atombios_crtc_set_pll(xf86CrtcPtr crtc, DisplayModePtr mode) - spc2_ptr->usPixelClock = cpu_to_le16(sclock); - spc2_ptr->usRefDiv = cpu_to_le16(ref_div); - spc2_ptr->usFbDiv = cpu_to_le16(fb_div); -+ spc2_ptr->ucFracFbDiv = frac_fb_div; - spc2_ptr->ucPostDiv = post_div; - spc2_ptr->ucPpll = radeon_crtc->crtc_id ? ATOM_PPLL2 : ATOM_PPLL1; - spc2_ptr->ucCRTC = radeon_crtc->crtc_id; -@@ -364,6 +375,7 @@ atombios_crtc_set_pll(xf86CrtcPtr crtc, DisplayModePtr mode) - spc3_ptr->usPixelClock = cpu_to_le16(sclock); - spc3_ptr->usRefDiv = cpu_to_le16(ref_div); - spc3_ptr->usFbDiv = cpu_to_le16(fb_div); -+ spc3_ptr->ucFracFbDiv = frac_fb_div; - spc3_ptr->ucPostDiv = post_div; - spc3_ptr->ucPpll = radeon_crtc->crtc_id ? ATOM_PPLL2 : ATOM_PPLL1; - spc3_ptr->ucMiscInfo = (radeon_crtc->crtc_id << 2); -@@ -672,9 +684,10 @@ RADEONInitDispBandwidthAVIVO(ScrnInfoPtr pScrn, - unsigned char *RADEONMMIO = info->MMIO; - - uint32_t dc_lb_memory_split; -- float mem_bw, peak_disp_bw; -- float min_mem_eff = 0.8; /* XXX: taken from legacy method */ -- float pix_clk, pix_clk2; /* in MHz */ -+ float available_bandwidth = 0; -+ float read_delay_latency = 1000; -+ int i; -+ Bool sideport = FALSE; - - /* - * Set display0/1 priority up in the memory controller for -@@ -685,7 +698,8 @@ RADEONInitDispBandwidthAVIVO(ScrnInfoPtr pScrn, - uint32_t mc_init_misc_lat_timer = 0; - if (info->ChipFamily == CHIP_FAMILY_RV515) - mc_init_misc_lat_timer = INMC(pScrn, RV515_MC_INIT_MISC_LAT_TIMER); -- else if (info->ChipFamily == CHIP_FAMILY_RS690) -+ else if ((info->ChipFamily == CHIP_FAMILY_RS690) || -+ (info->ChipFamily == CHIP_FAMILY_RS740)) - mc_init_misc_lat_timer = INMC(pScrn, RS690_MC_INIT_MISC_LAT_TIMER); - - mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT); -@@ -698,33 +712,11 @@ RADEONInitDispBandwidthAVIVO(ScrnInfoPtr pScrn, - - if (info->ChipFamily == CHIP_FAMILY_RV515) - OUTMC(pScrn, RV515_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer); -- else if (info->ChipFamily == CHIP_FAMILY_RS690) -+ else if ((info->ChipFamily == CHIP_FAMILY_RS690) || -+ (info->ChipFamily == CHIP_FAMILY_RS740)) - OUTMC(pScrn, RS690_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer); - } - -- /* XXX: fix me for AVIVO -- * Determine if there is enough bandwidth for current display mode -- */ -- mem_bw = info->mclk * (info->RamWidth / 8) * (info->IsDDR ? 2 : 1); -- -- pix_clk = 0; -- pix_clk2 = 0; -- peak_disp_bw = 0; -- if (mode1) { -- pix_clk = mode1->Clock/1000.0; -- peak_disp_bw += (pix_clk * pixel_bytes1); -- } -- if (mode2) { -- pix_clk2 = mode2->Clock/1000.0; -- peak_disp_bw += (pix_clk2 * pixel_bytes2); -- } -- -- if (peak_disp_bw >= mem_bw * min_mem_eff) { -- xf86DrvMsg(pScrn->scrnIndex, X_WARNING, -- "You may not have enough display bandwidth for current mode\n" -- "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n"); -- } -- - /* - * Line Buffer Setup - * There is a single line buffer shared by both display controllers. -@@ -740,10 +732,8 @@ RADEONInitDispBandwidthAVIVO(ScrnInfoPtr pScrn, - * 14:4; D2 allocation follows D1. - */ - -- /* is auto or manual better ? */ - dc_lb_memory_split = INREG(AVIVO_DC_LB_MEMORY_SPLIT) & ~AVIVO_DC_LB_MEMORY_SPLIT_MASK; - dc_lb_memory_split &= ~AVIVO_DC_LB_MEMORY_SPLIT_SHIFT_MODE; --#if 1 - /* auto */ - if (mode1 && mode2) { - if (mode1->HDisplay > mode2->HDisplay) { -@@ -763,7 +753,8 @@ RADEONInitDispBandwidthAVIVO(ScrnInfoPtr pScrn, - } else if (mode2) { - dc_lb_memory_split |= AVIVO_DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q; - } --#else -+ OUTREG(AVIVO_DC_LB_MEMORY_SPLIT, dc_lb_memory_split); -+#if 0 - /* manual */ - dc_lb_memory_split |= AVIVO_DC_LB_MEMORY_SPLIT_SHIFT_MODE; - dc_lb_memory_split &= ~(AVIVO_DC_LB_DISP1_END_ADR_MASK << AVIVO_DC_LB_DISP1_END_ADR_SHIFT); -@@ -776,11 +767,381 @@ RADEONInitDispBandwidthAVIVO(ScrnInfoPtr pScrn, - OUTREG(AVIVO_DC_LB_MEMORY_SPLIT, dc_lb_memory_split); - #endif - -- /* -- * Watermark setup -- * TODO... -- * Unforunately, I haven't been able to dig up the avivo watermark programming -- * guide yet. -AGD -+ // fixme -+ if (info->ChipFamily == CHIP_FAMILY_RS600) -+ return; -+ -+ /* IGP bandwidth - get from integrated systems table -+ * SYSTEM_MEMORY_BANDWIDTH (Mbyte/s) = SYSTEM_MEMORY_CLOCK (MHz) * (1+DDR) * 8 * EFF * Num of channels -+ * SIDEPORT_MEMORY_BANDWIDTH = SIDEPORT_MEMORY_CLOCK * 2(byte) * 2(DDR) * 0.7(Eff) -+ * CORE_CLOCK_BANDWIDTH (Mbyte/s) = SCLK (MHz) * 16 / Dynamic Engine clock Divider -+ * HT_LINK_BANDWIDTH = HT_LINK_CLOCK * 2 * HT_LINK_WIDTH/8 * HT_LINK_EFF -+ * system read delay -+ * READ_DLY_MAX_LATENCY: 5000 ns -+ * sideport read delay -+ * READ_DLY_MAX_LATENCY: 370 * MCLK + 800 ns -+ * MCLK is the sideport memory clock period in ns (MCLK = 1000 / MCLKfreq MHz) - */ - -+ if (info->IsIGP) { -+ float core_clock_bandwidth = ((float)info->pm.mode[info->pm.current_mode].sclk / 100) * 16 / 1; -+ -+ if (sideport) { -+ float sideport_memory_bandwidth = (info->igp_sideport_mclk / 2) * 2 * 2 * 0.7; -+ float mclk = 1000 / info->igp_sideport_mclk; -+ read_delay_latency = 370 * mclk * 800; -+ available_bandwidth = MIN(sideport_memory_bandwidth, core_clock_bandwidth); -+ } else { -+ float system_memory_bandwidth = (info->igp_system_mclk / 2) * (1 + 1) * 8 * 0.5 * 1; -+ float ht_link_bandwidth = info->igp_ht_link_clk * 2 * (info->igp_ht_link_width / 8) * 0.8; -+ read_delay_latency = 5000; -+ available_bandwidth = MIN(system_memory_bandwidth, MIN(ht_link_bandwidth, core_clock_bandwidth)); -+ } -+ } -+ -+ /* calculate for each display */ -+ for (i = 0; i < 2; i++) { -+ DisplayModePtr current = NULL; -+ //RADEONCrtcPrivatePtr radeon_crtc = pRADEONEnt->Controller[i]; -+ float pclk, sclk, sclkfreq = 0; -+ float consumption_time, consumption_rate; -+ int num_line_pair, request_fifo_depth, lb_request_fifo_depth; -+ int max_req; -+ uint32_t lb_max_req_outstanding; -+ float line_time, active_time, chunk_time; -+ float worst_case_latency, tolerable_latency; -+ float fill_rate; -+ int priority_mark_max, priority_mark, priority_mark2; -+ int width, estimated_width; -+ /* FIXME: handle the scalers better */ -+ Bool d1_scale_en = pRADEONEnt->Controller[0]->scaler_enabled; -+ Bool d2_scale_en = pRADEONEnt->Controller[1]->scaler_enabled; -+ float vtaps1 = 2; /* XXX */ -+ float vsc1 = pRADEONEnt->Controller[0]->vsc; -+ float hsc1 = pRADEONEnt->Controller[0]->hsc; -+ float vtaps2 = 2; /* XXX */ -+ float vsc2 = pRADEONEnt->Controller[1]->vsc; -+ float hsc2 = pRADEONEnt->Controller[1]->hsc; -+ -+ if (i == 0) -+ current = mode1; -+ else -+ current = mode2; -+ -+ if (current == NULL) -+ continue; -+ -+ /* Determine consumption rate -+ pclk = pixel clock period(ns) -+ vtaps = number of vertical taps, -+ vsc = vertical scaling ratio, defined as source/destination -+ hsc = horizontal scaling ration, defined as source/destination -+ */ -+ -+ pclk = 1000 / ((float)current->Clock / 1000); -+ -+ if (i == 0) { -+ if (d1_scale_en) -+ consumption_time = pclk / ((MAX(vtaps1, vsc1) * hsc1) / vtaps1); -+ else -+ consumption_time = pclk; -+ } else { -+ if (d2_scale_en) -+ consumption_time = pclk / ((MAX(vtaps2, vsc2) * hsc2) / vtaps2); -+ else -+ consumption_time = pclk; -+ } -+ -+ consumption_rate = 1 / consumption_time; -+ -+ /* Determine request line buffer fifo depth -+ NumLinePair = Number of line pairs to request(1 = 2 lines, 2 = 4 lines) -+ LBRequestFifoDepth = Number of chunk requests the LB can put into the request FIFO for a display -+ width = viewport width in pixels -+ */ -+ if (i == 0) { -+ if (vsc1 > 2) -+ num_line_pair = 2; -+ else -+ num_line_pair = 1; -+ } else { -+ if (vsc2 > 2) -+ num_line_pair = 2; -+ else -+ num_line_pair = 1; -+ } -+ -+ width = current->CrtcHDisplay; -+ request_fifo_depth = ceil(width/256) * num_line_pair; -+ if (request_fifo_depth < 4) -+ lb_request_fifo_depth = 4; -+ else -+ lb_request_fifo_depth = request_fifo_depth; -+ -+ if (info->IsIGP) { -+ if ((info->ChipFamily == CHIP_FAMILY_RS690) || -+ (info->ChipFamily == CHIP_FAMILY_RS740)) -+ OUTREG(RS690_DCP_CONTROL, 0); -+ else if ((info->ChipFamily == CHIP_FAMILY_RS780) || -+ (info->ChipFamily == CHIP_FAMILY_RS880)) -+ OUTREG(RS690_DCP_CONTROL, 2); -+ max_req = lb_request_fifo_depth - 1; -+ } else -+ max_req = lb_request_fifo_depth; -+ -+ /*ErrorF("max_req %d: 0x%x\n", i, max_req);*/ -+ -+ lb_max_req_outstanding = INREG(AVIVO_LB_MAX_REQ_OUTSTANDING); -+ if (i == 0) { -+ lb_max_req_outstanding &= ~(AVIVO_LB_D1_MAX_REQ_OUTSTANDING_MASK << AVIVO_LB_D1_MAX_REQ_OUTSTANDING_SHIFT); -+ lb_max_req_outstanding |= (max_req & AVIVO_LB_D1_MAX_REQ_OUTSTANDING_MASK) << AVIVO_LB_D1_MAX_REQ_OUTSTANDING_SHIFT; -+ } else { -+ lb_max_req_outstanding &= ~(AVIVO_LB_D2_MAX_REQ_OUTSTANDING_MASK << AVIVO_LB_D2_MAX_REQ_OUTSTANDING_SHIFT); -+ lb_max_req_outstanding |= (max_req & AVIVO_LB_D2_MAX_REQ_OUTSTANDING_MASK) << AVIVO_LB_D2_MAX_REQ_OUTSTANDING_SHIFT; -+ } -+ OUTREG(AVIVO_LB_MAX_REQ_OUTSTANDING, lb_max_req_outstanding); -+ -+ /* Determine line time -+ LineTime = total time for one line of displayhtotal = total number of horizontal pixels -+ pclk = pixel clock period(ns) -+ */ -+ line_time = current->CrtcHTotal * pclk; -+ -+ /* Determine active time -+ ActiveTime = time of active region of display within one line, -+ hactive = total number of horizontal active pixels -+ htotal = total number of horizontal pixels -+ */ -+ active_time = line_time * current->CrtcHDisplay / current->CrtcHTotal; -+ -+ /* Determine chunk time -+ ChunkTime = the time it takes the DCP to send one chunk of data -+ to the LB which consists of pipeline delay and inter chunk gap -+ sclk = system clock(ns) -+ */ -+ if (info->IsIGP) { -+ sclk = 1000 / (available_bandwidth / 16); -+ /* Sclkfreq = sclk in MHz = 1000/sclk (because sclk is in ns). */ -+ sclkfreq = 1000 / sclk; -+ chunk_time = sclk * 256 * 1.3; -+ } else { -+ sclk = 1000 / ((float)info->pm.mode[info->pm.current_mode].sclk / 100); -+ chunk_time = sclk * 600; -+ } -+ -+ /* Determine the worst case latency -+ NumLinePair = Number of line pairs to request(1 = 2 lines, 2 = 4 lines) -+ WorstCaseLatency = The worst case time from urgent to when the MC starts -+ to return data -+ READ_DELAY_IDLE_MAX = constant of 1us -+ ChunkTime = the time it takes the DCP to send one chunk of data to the LB -+ which consists of pipeline delay and -+ inter chunk gap -+ */ -+ if (info->IsIGP) { -+ if (num_line_pair > 1) -+ worst_case_latency = read_delay_latency + 3 * chunk_time; -+ else -+ worst_case_latency = read_delay_latency + 2 * chunk_time; -+ } else { -+ if (num_line_pair > 1) -+ worst_case_latency = read_delay_latency + 3 * chunk_time; -+ else -+ worst_case_latency = read_delay_latency + chunk_time; -+ } -+ -+ /* Determine the tolerable latency -+ TolerableLatency = Any given request has only 1 line time for the data to be returned -+ LBRequestFifoDepth = Number of chunk requests the LB can put into the request FIFO for a display -+ LineTime = total time for one line of display -+ ChunkTime = the time it takes the DCP to send one chunk of data to the LB which consists of -+ pipeline delay and inter chunk gap -+ */ -+ if ((2 + lb_request_fifo_depth) >= request_fifo_depth) -+ tolerable_latency = line_time; -+ else -+ tolerable_latency = line_time - (request_fifo_depth - lb_request_fifo_depth - 2) * chunk_time; -+ -+ if (mode1 && mode2) { -+ int d1bpp, d2bpp; -+ int d1_graph_enable = 1; -+ int d2_graph_enable = 1; -+ int d1_ovl_enable = 0; -+ int d2_ovl_enable = 0; -+ int d1grph_depth, d2grph_depth; -+ int d1ovl_depth = 0; -+ int d2ovl_depth = 0; -+ int d1_num_line_pair, d2_num_line_pair; -+ float d1_fill_rate_coeff, d2_fill_rate_coeff; -+ -+ switch (pixel_bytes1) { -+ case 2: -+ d1grph_depth = 1; -+ break; -+ case 4: -+ d1grph_depth = 2; -+ break; -+ default: -+ d1grph_depth = 0; -+ break; -+ } -+ -+ switch (pixel_bytes2) { -+ case 2: -+ d2grph_depth = 1; -+ break; -+ case 4: -+ d2grph_depth = 2; -+ break; -+ default: -+ d2grph_depth = 0; -+ break; -+ } -+ -+ /* If both displays are active, determine line buffer fill rate */ -+ if (d1_scale_en && (vsc1 > 2)) -+ d1_num_line_pair = 2; -+ else -+ d1_num_line_pair = 1; -+ -+ if (d2_scale_en && (vsc2 > 2)) -+ d2_num_line_pair = 2; -+ else -+ d2_num_line_pair = 1; -+ -+ if (info->IsIGP) { -+ d1bpp = (d1_graph_enable * pow(2, d1grph_depth) * 8) + (d1_ovl_enable * pow(2, d1ovl_depth) * 8); -+ d2bpp = (d2_graph_enable * pow(2, d2grph_depth) * 8) + (d2_ovl_enable * pow(2, d2ovl_depth) * 8); -+ -+ if (d1bpp > 64) -+ d1_fill_rate_coeff = d1bpp * d1_num_line_pair; -+ else -+ d1_fill_rate_coeff = d1_num_line_pair; -+ -+ if (d2bpp > 64) -+ d2_fill_rate_coeff = d2bpp * d2_num_line_pair; -+ else -+ d2_fill_rate_coeff = d2_num_line_pair; -+ -+ fill_rate = sclkfreq / (d1_fill_rate_coeff + d2_fill_rate_coeff); -+ } else { -+ d1bpp = (d1grph_depth + d1ovl_depth) * 16; -+ d2bpp = (d2grph_depth + d2ovl_depth) * 16; -+ -+ if (d1bpp > 64) -+ d1_fill_rate_coeff = d1bpp / d1_num_line_pair; -+ else -+ d1_fill_rate_coeff = d1_num_line_pair; -+ -+ if (d2bpp > 64) -+ d2_fill_rate_coeff = d2bpp / d2_num_line_pair; -+ else -+ d2_fill_rate_coeff = d2_num_line_pair; -+ -+ fill_rate = sclk / (d1_fill_rate_coeff + d2_fill_rate_coeff); -+ -+ /* Convert line buffer fill rate from period to frequency */ -+ fill_rate = 1 / fill_rate; -+ } -+ } else { -+ int dxbpp; -+ int dx_grph_enable = 1; -+ int dx_ovl_enable = 0; -+ int dxgrph_depth; -+ int dxovl_depth = 0; -+ int cpp; -+ -+ if (i == 0) -+ cpp = pixel_bytes1; -+ else -+ cpp = pixel_bytes2; -+ -+ switch (cpp) { -+ case 2: -+ dxgrph_depth = 1; -+ break; -+ case 4: -+ dxgrph_depth = 2; -+ break; -+ default: -+ dxgrph_depth = 0; -+ break; -+ } -+ -+ /* If only one display active, the line buffer fill rate becomes */ -+ if (info->IsIGP) { -+ dxbpp = (dx_grph_enable * pow(2, dxgrph_depth) * 8) + (dx_ovl_enable * pow(2, dxovl_depth) * 8); -+ if (dxbpp > 64) -+ fill_rate = sclkfreq / dxbpp / num_line_pair; -+ else -+ fill_rate = sclkfreq / num_line_pair; -+ } else { -+ dxbpp = (dxgrph_depth + dxovl_depth) * 16; -+ -+ if (dxbpp > 64) -+ fill_rate = sclk / dxbpp / num_line_pair; -+ else -+ fill_rate = sclk / num_line_pair; -+ -+ /* Convert line buffer fill rate from period to frequency */ -+ fill_rate = 1 / fill_rate; -+ } -+ } -+ -+ /* Determine the maximum priority mark -+ width = viewport width in pixels -+ */ -+ priority_mark_max = ceil(width/16); -+ -+ /* Determine estimated width */ -+ estimated_width = (tolerable_latency - worst_case_latency) / consumption_time; -+ -+ /* Determine priority mark based on active time */ -+ if (info->IsIGP) { -+ if (estimated_width > width) -+ priority_mark = 10; -+ else -+ priority_mark = priority_mark_max - ceil(estimated_width / 16); -+ } else { -+ if (estimated_width > width) -+ priority_mark = priority_mark_max; -+ else -+ priority_mark = priority_mark_max - ceil(estimated_width / 16); -+ } -+ -+ /* Determine priority mark 2 based on worst case latency, -+ consumption rate, fill rate and active time -+ */ -+ if (info->IsIGP) { -+ if (consumption_rate > fill_rate) -+ priority_mark2 = ceil((worst_case_latency * consumption_rate + (consumption_rate - fill_rate) * active_time) / 1000 / 16); -+ else -+ priority_mark2 = ceil(worst_case_latency * consumption_rate / 1000 / 16); -+ } else { -+ if (consumption_rate > fill_rate) -+ priority_mark2 = ceil(worst_case_latency * consumption_rate + (consumption_rate - fill_rate) * active_time / 16); -+ else -+ priority_mark2 = ceil(worst_case_latency * consumption_rate / 16); -+ } -+ -+ /* Determine final priority mark and clamp if necessary */ -+ priority_mark = max(priority_mark, priority_mark2); -+ if (priority_mark < 0) -+ priority_mark = 0; -+ else if (priority_mark > priority_mark_max) -+ priority_mark = priority_mark_max; -+ -+ /*ErrorF("priority_mark %d: 0x%x\n", i, priority_mark);*/ -+ -+ /* Determine which display to program priority mark for */ -+ /* FIXME: program DxMODE_PRIORITY_B_CNT for slower sclk */ -+ if (i == 0) { -+ OUTREG(AVIVO_D1MODE_PRIORITY_A_CNT, (priority_mark & AVIVO_DxMODE_PRIORITY_MARK_MASK)); -+ OUTREG(AVIVO_D1MODE_PRIORITY_B_CNT, (priority_mark & AVIVO_DxMODE_PRIORITY_MARK_MASK)); -+ } else { -+ OUTREG(AVIVO_D2MODE_PRIORITY_A_CNT, (priority_mark & AVIVO_DxMODE_PRIORITY_MARK_MASK)); -+ OUTREG(AVIVO_D2MODE_PRIORITY_B_CNT, (priority_mark & AVIVO_DxMODE_PRIORITY_MARK_MASK)); -+ } -+ } -+ - } -diff --git a/src/atombios_output.c b/src/atombios_output.c -index 158e76f..00d17cb 100644 ---- a/src/atombios_output.c -+++ b/src/atombios_output.c -@@ -34,8 +34,13 @@ - #include "config.h" - #endif - /* DPMS */ -+#ifdef HAVE_XEXTPROTO_71 -+#include -+#else - #define DPMS_SERVER - #include -+#endif -+ - #include - - #include "radeon.h" -diff --git a/src/drmmode_display.c b/src/drmmode_display.c -new file mode 100644 -index 0000000..3f4ccfc ---- /dev/null -+++ b/src/drmmode_display.c -@@ -0,0 +1,1044 @@ -+/* -+ * Copyright © 2007 Red Hat, Inc. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ * -+ * Authors: -+ * Dave Airlie -+ * -+ */ -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#ifdef XF86DRM_MODE -+#include -+#include "micmap.h" -+#include "xf86cmap.h" -+#include "radeon.h" -+#include "radeon_reg.h" -+#include "radeon_drm.h" -+#include "sarea.h" -+ -+#include "drmmode_display.h" -+ -+/* DPMS */ -+#ifdef HAVE_XEXTPROTO_71 -+#include -+#else -+#define DPMS_SERVER -+#include -+#endif -+ -+ -+static void -+drmmode_ConvertFromKMode(ScrnInfoPtr scrn, -+ drmModeModeInfo *kmode, -+ DisplayModePtr mode) -+{ -+ memset(mode, 0, sizeof(DisplayModeRec)); -+ mode->status = MODE_OK; -+ -+ mode->Clock = kmode->clock; -+ -+ mode->HDisplay = kmode->hdisplay; -+ mode->HSyncStart = kmode->hsync_start; -+ mode->HSyncEnd = kmode->hsync_end; -+ mode->HTotal = kmode->htotal; -+ mode->HSkew = kmode->hskew; -+ -+ mode->VDisplay = kmode->vdisplay; -+ mode->VSyncStart = kmode->vsync_start; -+ mode->VSyncEnd = kmode->vsync_end; -+ mode->VTotal = kmode->vtotal; -+ mode->VScan = kmode->vscan; -+ -+ mode->Flags = kmode->flags; //& FLAG_BITS; -+ mode->name = strdup(kmode->name); -+ -+ if (kmode->type & DRM_MODE_TYPE_DRIVER) -+ mode->type = M_T_DRIVER; -+ if (kmode->type & DRM_MODE_TYPE_PREFERRED) -+ mode->type |= M_T_PREFERRED; -+ xf86SetModeCrtc (mode, scrn->adjustFlags); -+} -+ -+static void -+drmmode_ConvertToKMode(ScrnInfoPtr scrn, -+ drmModeModeInfo *kmode, -+ DisplayModePtr mode) -+{ -+ memset(kmode, 0, sizeof(*kmode)); -+ -+ kmode->clock = mode->Clock; -+ kmode->hdisplay = mode->HDisplay; -+ kmode->hsync_start = mode->HSyncStart; -+ kmode->hsync_end = mode->HSyncEnd; -+ kmode->htotal = mode->HTotal; -+ kmode->hskew = mode->HSkew; -+ -+ kmode->vdisplay = mode->VDisplay; -+ kmode->vsync_start = mode->VSyncStart; -+ kmode->vsync_end = mode->VSyncEnd; -+ kmode->vtotal = mode->VTotal; -+ kmode->vscan = mode->VScan; -+ -+ kmode->flags = mode->Flags; //& FLAG_BITS; -+ if (mode->name) -+ strncpy(kmode->name, mode->name, DRM_DISPLAY_MODE_LEN); -+ kmode->name[DRM_DISPLAY_MODE_LEN-1] = 0; -+ -+} -+ -+static void -+drmmode_crtc_dpms(xf86CrtcPtr crtc, int mode) -+{ -+#if 0 -+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(crtc->scrn); -+// drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; -+// drmmode_ptr drmmode = drmmode_crtc->drmmode; -+ -+ /* bonghits in the randr 1.2 - uses dpms to disable crtc - bad buzz */ -+ if (mode == DPMSModeOff) { -+// drmModeSetCrtc(drmmode->fd, drmmode_crtc->mode_crtc->crtc_id, -+// 0, 0, 0, NULL, 0, NULL); -+ } -+#endif -+} -+ -+static PixmapPtr -+create_pixmap_for_fbcon(drmmode_ptr drmmode, -+ ScrnInfoPtr pScrn, int crtc_id) -+{ -+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); -+ drmmode_crtc_private_ptr drmmode_crtc; -+ ScreenPtr pScreen = pScrn->pScreen; -+ PixmapPtr pixmap; -+ struct radeon_bo *bo; -+ drmModeFBPtr fbcon; -+ struct drm_gem_flink flink; -+ -+ drmmode_crtc = xf86_config->crtc[crtc_id]->driver_private; -+ -+ fbcon = drmModeGetFB(drmmode->fd, drmmode_crtc->mode_crtc->buffer_id); -+ if (fbcon == NULL) -+ return NULL; -+ -+ flink.handle = fbcon->handle; -+ if (ioctl(drmmode->fd, DRM_IOCTL_GEM_FLINK, &flink) < 0) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "Couldn't flink fbcon handle\n"); -+ return NULL; -+ } -+ -+ bo = radeon_bo_open(drmmode->bufmgr, flink.name, 0, 0, 0, 0); -+ if (bo == NULL) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "Couldn't allocate bo for fbcon handle\n"); -+ return NULL; -+ } -+ -+ pixmap = GetScratchPixmapHeader(pScreen, -+ fbcon->width, fbcon->height, -+ fbcon->depth, fbcon->bpp, -+ fbcon->pitch, NULL); -+ if (pixmap == NULL) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "Couldn't allocate pixmap fbcon contents\n"); -+ return NULL; -+ } -+ -+ radeon_set_pixmap_bo(pixmap, bo); -+ -+ radeon_bo_unref(bo); -+ drmModeFreeFB(fbcon); -+ return pixmap; -+} -+ -+void drmmode_copy_fb(ScrnInfoPtr pScrn, drmmode_ptr drmmode) -+{ -+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ PixmapPtr src, dst; -+ ScreenPtr pScreen = pScrn->pScreen; -+ int crtc_id = 0; -+ int i; -+ int pitch = pScrn->displayWidth * info->CurrentLayout.pixel_bytes; -+ -+ if (info->ChipFamily >= CHIP_FAMILY_R600) -+ return; -+ -+ for (i = 0; i < xf86_config->num_crtc; i++) { -+ xf86CrtcPtr crtc = xf86_config->crtc[i]; -+ drmmode_crtc_private_ptr drmmode_crtc; -+ -+ drmmode_crtc = crtc->driver_private; -+ if (drmmode_crtc->mode_crtc->buffer_id) -+ crtc_id = i; -+ } -+ -+ src = create_pixmap_for_fbcon(drmmode, pScrn, crtc_id); -+ if (!src) -+ return; -+ -+ dst = GetScratchPixmapHeader(pScreen, -+ pScrn->virtualX, pScrn->virtualY, -+ pScrn->depth, pScrn->bitsPerPixel, -+ pitch, NULL); -+ radeon_set_pixmap_bo(dst, info->front_bo); -+ info->accel_state->exa->PrepareCopy (src, dst, -+ -1, -1, GXcopy, FB_ALLONES); -+ info->accel_state->exa->Copy (dst, 0, 0, 0, 0, -+ pScrn->virtualX, pScrn->virtualY); -+ info->accel_state->exa->DoneCopy (dst); -+ radeon_cs_flush_indirect(pScrn); -+ -+ (*pScreen->DestroyPixmap)(dst); -+ (*pScreen->DestroyPixmap)(src); -+ -+} -+ -+static Bool -+drmmode_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, -+ Rotation rotation, int x, int y) -+{ -+ ScrnInfoPtr pScrn = crtc->scrn; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(crtc->scrn); -+ drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; -+ drmmode_ptr drmmode = drmmode_crtc->drmmode; -+ int saved_x, saved_y; -+ Rotation saved_rotation; -+ DisplayModeRec saved_mode; -+ uint32_t *output_ids; -+ int output_count = 0; -+ Bool ret = TRUE; -+ int i; -+ int fb_id; -+ drmModeModeInfo kmode; -+ int pitch = pScrn->displayWidth * info->CurrentLayout.pixel_bytes; -+ -+ if (drmmode->fb_id == 0) { -+ ret = drmModeAddFB(drmmode->fd, -+ pScrn->virtualX, pScrn->virtualY, -+ pScrn->depth, pScrn->bitsPerPixel, -+ pitch, -+ info->front_bo->handle, -+ &drmmode->fb_id); -+ if (ret < 0) { -+ ErrorF("failed to add fb\n"); -+ return FALSE; -+ } -+ } -+ -+ saved_mode = crtc->mode; -+ saved_x = crtc->x; -+ saved_y = crtc->y; -+ saved_rotation = crtc->rotation; -+ -+ if (mode) { -+ crtc->mode = *mode; -+ crtc->x = x; -+ crtc->y = y; -+ crtc->rotation = rotation; -+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,5,99,0,0) -+ crtc->transformPresent = FALSE; -+#endif -+ } -+ -+ output_ids = xcalloc(sizeof(uint32_t), xf86_config->num_output); -+ if (!output_ids) { -+ ret = FALSE; -+ goto done; -+ } -+ -+ if (mode) { -+ for (i = 0; i < xf86_config->num_output; i++) { -+ xf86OutputPtr output = xf86_config->output[i]; -+ drmmode_output_private_ptr drmmode_output; -+ -+ if (output->crtc != crtc) -+ continue; -+ -+ drmmode_output = output->driver_private; -+ output_ids[output_count] = drmmode_output->mode_output->connector_id; -+ output_count++; -+ } -+ -+ if (!xf86CrtcRotate(crtc)) { -+ goto done; -+ } -+ -+ drmmode_ConvertToKMode(crtc->scrn, &kmode, mode); -+ -+ fb_id = drmmode->fb_id; -+ if (drmmode_crtc->rotate_fb_id) { -+ fb_id = drmmode_crtc->rotate_fb_id; -+ x = y = 0; -+ } -+ ret = drmModeSetCrtc(drmmode->fd, drmmode_crtc->mode_crtc->crtc_id, -+ fb_id, x, y, output_ids, output_count, &kmode); -+ if (ret) -+ xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, -+ "failed to set mode: %s", strerror(-ret)); -+ else -+ ret = TRUE; -+ -+ if (crtc->scrn->pScreen) -+ xf86CrtcSetScreenSubpixelOrder(crtc->scrn->pScreen); -+ /* go through all the outputs and force DPMS them back on? */ -+ for (i = 0; i < xf86_config->num_output; i++) { -+ xf86OutputPtr output = xf86_config->output[i]; -+ -+ if (output->crtc != crtc) -+ continue; -+ -+ output->funcs->dpms(output, DPMSModeOn); -+ } -+ } -+ -+ -+ -+done: -+ if (!ret) { -+ crtc->x = saved_x; -+ crtc->y = saved_y; -+ crtc->rotation = saved_rotation; -+ crtc->mode = saved_mode; -+ } -+#if defined(XF86_CRTC_VERSION) && XF86_CRTC_VERSION >= 3 -+ else -+ crtc->active = TRUE; -+#endif -+ -+ return ret; -+} -+ -+static void -+drmmode_set_cursor_colors (xf86CrtcPtr crtc, int bg, int fg) -+{ -+ -+} -+ -+static void -+drmmode_set_cursor_position (xf86CrtcPtr crtc, int x, int y) -+{ -+ drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; -+ drmmode_ptr drmmode = drmmode_crtc->drmmode; -+ -+ drmModeMoveCursor(drmmode->fd, drmmode_crtc->mode_crtc->crtc_id, x, y); -+} -+ -+static void -+drmmode_load_cursor_argb (xf86CrtcPtr crtc, CARD32 *image) -+{ -+ drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; -+ void *ptr; -+ -+ /* cursor should be mapped already */ -+ ptr = drmmode_crtc->cursor_bo->ptr; -+ -+ memcpy (ptr, image, 64 * 64 * 4); -+ -+ return; -+} -+ -+ -+static void -+drmmode_hide_cursor (xf86CrtcPtr crtc) -+{ -+ drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; -+ drmmode_ptr drmmode = drmmode_crtc->drmmode; -+ -+ drmModeSetCursor(drmmode->fd, drmmode_crtc->mode_crtc->crtc_id, 0, 64, 64); -+ -+} -+ -+static void -+drmmode_show_cursor (xf86CrtcPtr crtc) -+{ -+ drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; -+ drmmode_ptr drmmode = drmmode_crtc->drmmode; -+ uint32_t handle = drmmode_crtc->cursor_bo->handle; -+ -+ drmModeSetCursor(drmmode->fd, drmmode_crtc->mode_crtc->crtc_id, handle, 64, 64); -+} -+ -+static void * -+drmmode_crtc_shadow_allocate(xf86CrtcPtr crtc, int width, int height) -+{ -+ drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; -+ drmmode_ptr drmmode = drmmode_crtc->drmmode; -+ int size; -+ struct radeon_bo *rotate_bo; -+ int ret; -+ unsigned long rotate_pitch; -+ -+ width = RADEON_ALIGN(width, 63); -+ rotate_pitch = width * drmmode->cpp; -+ -+ size = rotate_pitch * height; -+ -+ rotate_bo = radeon_bo_open(drmmode->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_VRAM, 0); -+ if (rotate_bo == NULL) -+ return NULL; -+ -+ radeon_bo_map(rotate_bo, 1); -+ -+ ret = drmModeAddFB(drmmode->fd, width, height, crtc->scrn->depth, -+ crtc->scrn->bitsPerPixel, rotate_pitch, -+ rotate_bo->handle, -+ &drmmode_crtc->rotate_fb_id); -+ if (ret) { -+ ErrorF("failed to add rotate fb\n"); -+ } -+ -+ drmmode_crtc->rotate_bo = rotate_bo; -+ return drmmode_crtc->rotate_bo->ptr; -+} -+ -+static PixmapPtr -+drmmode_crtc_shadow_create(xf86CrtcPtr crtc, void *data, int width, int height) -+{ -+ ScrnInfoPtr pScrn = crtc->scrn; -+ drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; -+ drmmode_ptr drmmode = drmmode_crtc->drmmode; -+ unsigned long rotate_pitch; -+ PixmapPtr rotate_pixmap; -+ -+ if (!data) -+ data = drmmode_crtc_shadow_allocate (crtc, width, height); -+ -+ rotate_pitch = RADEON_ALIGN(width, 63) * drmmode->cpp; -+ -+ rotate_pixmap = GetScratchPixmapHeader(pScrn->pScreen, -+ width, height, -+ pScrn->depth, -+ pScrn->bitsPerPixel, -+ rotate_pitch, -+ data); -+ -+ if (rotate_pixmap == NULL) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "Couldn't allocate shadow pixmap for rotated CRTC\n"); -+ } -+ -+ if (drmmode_crtc->rotate_bo) -+ radeon_set_pixmap_bo(rotate_pixmap, drmmode_crtc->rotate_bo); -+ return rotate_pixmap; -+ -+} -+ -+static void -+drmmode_crtc_shadow_destroy(xf86CrtcPtr crtc, PixmapPtr rotate_pixmap, void *data) -+{ -+ drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; -+ drmmode_ptr drmmode = drmmode_crtc->drmmode; -+ -+ if (rotate_pixmap) -+ FreeScratchPixmapHeader(rotate_pixmap); -+ -+ if (data) { -+ drmModeRmFB(drmmode->fd, drmmode_crtc->rotate_fb_id); -+ drmmode_crtc->rotate_fb_id = 0; -+ radeon_bo_unmap(drmmode_crtc->rotate_bo); -+ radeon_bo_unref(drmmode_crtc->rotate_bo); -+ drmmode_crtc->rotate_bo = NULL; -+ } -+ -+} -+ -+static void -+drmmode_crtc_gamma_set(xf86CrtcPtr crtc, uint16_t *red, uint16_t *green, -+ uint16_t *blue, int size) -+{ -+ drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; -+ drmmode_ptr drmmode = drmmode_crtc->drmmode; -+ -+ drmModeCrtcSetGamma(drmmode->fd, drmmode_crtc->mode_crtc->crtc_id, -+ size, red, green, blue); -+} -+ -+static const xf86CrtcFuncsRec drmmode_crtc_funcs = { -+ .dpms = drmmode_crtc_dpms, -+ .set_mode_major = drmmode_set_mode_major, -+ .set_cursor_colors = drmmode_set_cursor_colors, -+ .set_cursor_position = drmmode_set_cursor_position, -+ .show_cursor = drmmode_show_cursor, -+ .hide_cursor = drmmode_hide_cursor, -+ .load_cursor_argb = drmmode_load_cursor_argb, -+ -+ .gamma_set = drmmode_crtc_gamma_set, -+ .shadow_create = drmmode_crtc_shadow_create, -+ .shadow_allocate = drmmode_crtc_shadow_allocate, -+ .shadow_destroy = drmmode_crtc_shadow_destroy, -+ .destroy = NULL, /* XXX */ -+}; -+ -+ -+static void -+drmmode_crtc_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode, int num) -+{ -+ xf86CrtcPtr crtc; -+ drmmode_crtc_private_ptr drmmode_crtc; -+ -+ crtc = xf86CrtcCreate(pScrn, &drmmode_crtc_funcs); -+ if (crtc == NULL) -+ return; -+ -+ drmmode_crtc = xnfcalloc(sizeof(drmmode_crtc_private_rec), 1); -+ drmmode_crtc->mode_crtc = drmModeGetCrtc(drmmode->fd, drmmode->mode_res->crtcs[num]); -+ drmmode_crtc->drmmode = drmmode; -+ crtc->driver_private = drmmode_crtc; -+ -+ return; -+} -+ -+static xf86OutputStatus -+drmmode_output_detect(xf86OutputPtr output) -+{ -+ /* go to the hw and retrieve a new output struct */ -+ drmmode_output_private_ptr drmmode_output = output->driver_private; -+ drmmode_ptr drmmode = drmmode_output->drmmode; -+ xf86OutputStatus status; -+ drmModeFreeConnector(drmmode_output->mode_output); -+ -+ drmmode_output->mode_output = drmModeGetConnector(drmmode->fd, drmmode_output->output_id); -+ -+ switch (drmmode_output->mode_output->connection) { -+ case DRM_MODE_CONNECTED: -+ status = XF86OutputStatusConnected; -+ break; -+ case DRM_MODE_DISCONNECTED: -+ status = XF86OutputStatusDisconnected; -+ break; -+ default: -+ case DRM_MODE_UNKNOWNCONNECTION: -+ status = XF86OutputStatusUnknown; -+ break; -+ } -+ return status; -+} -+ -+static Bool -+drmmode_output_mode_valid(xf86OutputPtr output, DisplayModePtr pModes) -+{ -+ return MODE_OK; -+} -+ -+static DisplayModePtr -+drmmode_output_get_modes(xf86OutputPtr output) -+{ -+ drmmode_output_private_ptr drmmode_output = output->driver_private; -+ drmModeConnectorPtr koutput = drmmode_output->mode_output; -+ drmmode_ptr drmmode = drmmode_output->drmmode; -+ int i; -+ DisplayModePtr Modes = NULL, Mode; -+ drmModePropertyPtr props; -+ -+ /* look for an EDID property */ -+ for (i = 0; i < koutput->count_props; i++) { -+ props = drmModeGetProperty(drmmode->fd, koutput->props[i]); -+ if (props && (props->flags & DRM_MODE_PROP_BLOB)) { -+ if (!strcmp(props->name, "EDID")) { -+ if (drmmode_output->edid_blob) -+ drmModeFreePropertyBlob(drmmode_output->edid_blob); -+ drmmode_output->edid_blob = drmModeGetPropertyBlob(drmmode->fd, koutput->prop_values[i]); -+ } -+ drmModeFreeProperty(props); -+ } -+ } -+ -+ if (drmmode_output->edid_blob) -+ xf86OutputSetEDID(output, xf86InterpretEDID(output->scrn->scrnIndex, drmmode_output->edid_blob->data)); -+ else -+ xf86OutputSetEDID(output, xf86InterpretEDID(output->scrn->scrnIndex, NULL)); -+ -+ /* modes should already be available */ -+ for (i = 0; i < koutput->count_modes; i++) { -+ Mode = xnfalloc(sizeof(DisplayModeRec)); -+ -+ drmmode_ConvertFromKMode(output->scrn, &koutput->modes[i], Mode); -+ Modes = xf86ModesAdd(Modes, Mode); -+ -+ } -+ return Modes; -+} -+ -+static void -+drmmode_output_destroy(xf86OutputPtr output) -+{ -+ drmmode_output_private_ptr drmmode_output = output->driver_private; -+ -+ if (drmmode_output->edid_blob) -+ drmModeFreePropertyBlob(drmmode_output->edid_blob); -+ drmModeFreeConnector(drmmode_output->mode_output); -+ xfree(drmmode_output); -+ output->driver_private = NULL; -+} -+ -+static void -+drmmode_output_dpms(xf86OutputPtr output, int mode) -+{ -+ drmmode_output_private_ptr drmmode_output = output->driver_private; -+ drmModeConnectorPtr koutput = drmmode_output->mode_output; -+ drmmode_ptr drmmode = drmmode_output->drmmode; -+ -+ drmModeConnectorSetProperty(drmmode->fd, koutput->connector_id, -+ drmmode_output->dpms_enum_id, mode); -+ return; -+} -+ -+static const xf86OutputFuncsRec drmmode_output_funcs = { -+ .dpms = drmmode_output_dpms, -+#if 0 -+ -+ .save = drmmode_crt_save, -+ .restore = drmmode_crt_restore, -+ .mode_fixup = drmmode_crt_mode_fixup, -+ .prepare = drmmode_output_prepare, -+ .mode_set = drmmode_crt_mode_set, -+ .commit = drmmode_output_commit, -+#endif -+ .detect = drmmode_output_detect, -+ .mode_valid = drmmode_output_mode_valid, -+ -+ .get_modes = drmmode_output_get_modes, -+ .destroy = drmmode_output_destroy -+}; -+ -+static int subpixel_conv_table[7] = { 0, SubPixelUnknown, -+ SubPixelHorizontalRGB, -+ SubPixelHorizontalBGR, -+ SubPixelVerticalRGB, -+ SubPixelVerticalBGR, -+ SubPixelNone }; -+ -+const char *output_names[] = { "None", -+ "VGA", -+ "DVI", -+ "DVI", -+ "DVI", -+ "Composite", -+ "TV", -+ "LVDS", -+ "CTV", -+ "DIN", -+ "DP", -+ "HDMI", -+ "HDMI", -+}; -+ -+static void -+drmmode_output_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode, int num) -+{ -+ xf86OutputPtr output; -+ drmModeConnectorPtr koutput; -+ drmModeEncoderPtr kencoder; -+ drmmode_output_private_ptr drmmode_output; -+ drmModePropertyPtr props; -+ char name[32]; -+ int i; -+ -+ koutput = drmModeGetConnector(drmmode->fd, drmmode->mode_res->connectors[num]); -+ if (!koutput) -+ return; -+ -+ kencoder = drmModeGetEncoder(drmmode->fd, koutput->encoders[0]); -+ if (!kencoder) { -+ drmModeFreeConnector(koutput); -+ return; -+ } -+ -+ /* need to do smart conversion here for compat with non-kms ATI driver */ -+ if (koutput->connector_type_id == 1) { -+ switch(koutput->connector_type) { -+ case DRM_MODE_CONNECTOR_VGA: -+ case DRM_MODE_CONNECTOR_DVII: -+ case DRM_MODE_CONNECTOR_DVID: -+ case DRM_MODE_CONNECTOR_DVIA: -+ case DRM_MODE_CONNECTOR_HDMIA: -+ case DRM_MODE_CONNECTOR_HDMIB: -+ snprintf(name, 32, "%s-%d", output_names[koutput->connector_type], koutput->connector_type_id - 1); -+ break; -+ default: -+ snprintf(name, 32, "%s", output_names[koutput->connector_type]); -+ break; -+ } -+ } else { -+ snprintf(name, 32, "%s-%d", output_names[koutput->connector_type], koutput->connector_type_id - 1); -+ } -+ -+ output = xf86OutputCreate (pScrn, &drmmode_output_funcs, name); -+ if (!output) { -+ drmModeFreeEncoder(kencoder); -+ drmModeFreeConnector(koutput); -+ return; -+ } -+ -+ drmmode_output = xcalloc(sizeof(drmmode_output_private_rec), 1); -+ if (!drmmode_output) { -+ xf86OutputDestroy(output); -+ drmModeFreeConnector(koutput); -+ drmModeFreeEncoder(kencoder); -+ return; -+ } -+ -+ drmmode_output->output_id = drmmode->mode_res->connectors[num]; -+ drmmode_output->mode_output = koutput; -+ drmmode_output->mode_encoder = kencoder; -+ drmmode_output->drmmode = drmmode; -+ output->mm_width = koutput->mmWidth; -+ output->mm_height = koutput->mmHeight; -+ -+ output->subpixel_order = subpixel_conv_table[koutput->subpixel]; -+ output->driver_private = drmmode_output; -+ -+ output->possible_crtcs = kencoder->possible_crtcs; -+ output->possible_clones = kencoder->possible_clones; -+ -+ for (i = 0; i < koutput->count_props; i++) { -+ props = drmModeGetProperty(drmmode->fd, koutput->props[i]); -+ if (props && (props->flags && DRM_MODE_PROP_ENUM)) { -+ if (!strcmp(props->name, "DPMS")) { -+ drmmode_output->dpms_enum_id = koutput->props[i]; -+ drmModeFreeProperty(props); -+ break; -+ } -+ drmModeFreeProperty(props); -+ } -+ } -+ -+ return; -+} -+ -+static Bool -+drmmode_xf86crtc_resize (ScrnInfoPtr scrn, int width, int height) -+{ -+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); -+ drmmode_crtc_private_ptr -+ drmmode_crtc = xf86_config->crtc[0]->driver_private; -+ drmmode_ptr drmmode = drmmode_crtc->drmmode; -+ RADEONInfoPtr info = RADEONPTR(scrn); -+ struct radeon_bo *old_front = NULL; -+ Bool ret; -+ ScreenPtr screen = screenInfo.screens[scrn->scrnIndex]; -+ uint32_t old_fb_id; -+ int i, pitch, old_width, old_height, old_pitch; -+ int screen_size; -+ int cpp = info->CurrentLayout.pixel_bytes; -+ struct radeon_bo *front_bo; -+ -+ if (scrn->virtualX == width && scrn->virtualY == height) -+ return TRUE; -+ -+ front_bo = radeon_get_pixmap_bo(screen->GetScreenPixmap(screen)); -+ radeon_cs_flush_indirect(scrn); -+ -+ if (front_bo) -+ radeon_bo_wait(front_bo); -+ -+ pitch = RADEON_ALIGN(width, 63); -+ height = RADEON_ALIGN(height, 16); -+ -+ screen_size = pitch * height * cpp; -+ -+ xf86DrvMsg(scrn->scrnIndex, X_INFO, -+ "Allocate new frame buffer %dx%d stride %d\n", -+ width, height, pitch); -+ -+ old_width = scrn->virtualX; -+ old_height = scrn->virtualY; -+ old_pitch = scrn->displayWidth; -+ old_fb_id = drmmode->fb_id; -+ old_front = info->front_bo; -+ -+ scrn->virtualX = width; -+ scrn->virtualY = height; -+ scrn->displayWidth = pitch; -+ -+ info->front_bo = radeon_bo_open(info->bufmgr, 0, screen_size, 0, RADEON_GEM_DOMAIN_VRAM, 0); -+ if (!info->front_bo) -+ goto fail; -+ -+ ret = drmModeAddFB(drmmode->fd, width, height, scrn->depth, -+ scrn->bitsPerPixel, pitch * cpp, -+ info->front_bo->handle, -+ &drmmode->fb_id); -+ if (ret) -+ goto fail; -+ -+ radeon_set_pixmap_bo(screen->GetScreenPixmap(screen), info->front_bo); -+ screen->ModifyPixmapHeader(screen->GetScreenPixmap(screen), -+ width, height, -1, -1, pitch * cpp, NULL); -+ -+ // xf86DrvMsg(scrn->scrnIndex, X_INFO, "New front buffer at 0x%lx\n", -+ // info->front_bo-); -+ -+ for (i = 0; i < xf86_config->num_crtc; i++) { -+ xf86CrtcPtr crtc = xf86_config->crtc[i]; -+ -+ if (!crtc->enabled) -+ continue; -+ -+ drmmode_set_mode_major(crtc, &crtc->mode, -+ crtc->rotation, crtc->x, crtc->y); -+ } -+ -+ if (old_fb_id) -+ drmModeRmFB(drmmode->fd, old_fb_id); -+ if (old_front) -+ radeon_bo_unref(old_front); -+ -+ return TRUE; -+ -+ fail: -+ if (info->front_bo) -+ radeon_bo_unref(info->front_bo); -+ info->front_bo = old_front; -+ scrn->virtualX = old_width; -+ scrn->virtualY = old_height; -+ scrn->displayWidth = old_pitch; -+ drmmode->fb_id = old_fb_id; -+ -+ return FALSE; -+} -+ -+static const xf86CrtcConfigFuncsRec drmmode_xf86crtc_config_funcs = { -+ drmmode_xf86crtc_resize -+}; -+ -+ -+Bool drmmode_pre_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode, char *busId, char *driver_name, int cpp, int zaphod_mask) -+{ -+ xf86CrtcConfigPtr xf86_config; -+ RADEONEntPtr pRADEONEnt = RADEONEntPriv(pScrn); -+ int i; -+ Bool ret; -+ -+ /* Create a bus Id */ -+ /* Low level DRM open */ -+ if (!pRADEONEnt->fd) { -+ ret = DRIOpenDRMMaster(pScrn, SAREA_MAX, busId, driver_name); -+ if (!ret) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "[dri] DRIGetVersion failed to open the DRM\n" -+ "[dri] Disabling DRI.\n"); -+ return FALSE; -+ } -+ -+ drmmode->fd = DRIMasterFD(pScrn); -+ pRADEONEnt->fd = drmmode->fd; -+ } else { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ " reusing fd for second head\n"); -+ drmmode->fd = pRADEONEnt->fd; -+ } -+ xf86CrtcConfigInit(pScrn, &drmmode_xf86crtc_config_funcs); -+ xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); -+ -+ drmmode->cpp = cpp; -+ drmmode->mode_res = drmModeGetResources(drmmode->fd); -+ if (!drmmode->mode_res) -+ return FALSE; -+ -+ xf86CrtcSetSizeRange(pScrn, 320, 200, drmmode->mode_res->max_width, drmmode->mode_res->max_height); -+ for (i = 0; i < drmmode->mode_res->count_crtcs; i++) -+ if (zaphod_mask & (1 << i)) -+ drmmode_crtc_init(pScrn, drmmode, i); -+ -+ for (i = 0; i < drmmode->mode_res->count_connectors; i++) -+ if (zaphod_mask & (1 << i)) -+ drmmode_output_init(pScrn, drmmode, i); -+ -+ xf86InitialConfiguration(pScrn, TRUE); -+ -+ return TRUE; -+} -+ -+Bool drmmode_set_bufmgr(ScrnInfoPtr pScrn, drmmode_ptr drmmode, struct radeon_bo_manager *bufmgr) -+{ -+ drmmode->bufmgr = bufmgr; -+ return TRUE; -+} -+ -+ -+ -+void drmmode_set_cursor(ScrnInfoPtr scrn, drmmode_ptr drmmode, int id, struct radeon_bo *bo) -+{ -+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); -+ xf86CrtcPtr crtc = xf86_config->crtc[id]; -+ drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; -+ -+ drmmode_crtc->cursor_bo = bo; -+} -+ -+void drmmode_adjust_frame(ScrnInfoPtr pScrn, drmmode_ptr drmmode, int x, int y, int flags) -+{ -+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(pScrn); -+ xf86OutputPtr output = config->output[config->compat_output]; -+ xf86CrtcPtr crtc = output->crtc; -+ -+ if (crtc && crtc->enabled) { -+ drmmode_set_mode_major(crtc, &crtc->mode, crtc->rotation, -+ x, y); -+ } -+} -+ -+Bool drmmode_set_desired_modes(ScrnInfoPtr pScrn, drmmode_ptr drmmode) -+{ -+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(pScrn); -+ int c; -+ -+ drmmode_copy_fb(pScrn, drmmode); -+ -+ for (c = 0; c < config->num_crtc; c++) { -+ xf86CrtcPtr crtc = config->crtc[c]; -+ drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; -+ xf86OutputPtr output = NULL; -+ int o; -+ -+ /* Skip disabled CRTCs */ -+ if (!crtc->enabled) { -+ drmModeSetCrtc(drmmode->fd, drmmode_crtc->mode_crtc->crtc_id, -+ 0, 0, 0, NULL, 0, NULL); -+ continue; -+ } -+ -+ if (config->output[config->compat_output]->crtc == crtc) -+ output = config->output[config->compat_output]; -+ else -+ { -+ for (o = 0; o < config->num_output; o++) -+ if (config->output[o]->crtc == crtc) -+ { -+ output = config->output[o]; -+ break; -+ } -+ } -+ /* paranoia */ -+ if (!output) -+ continue; -+ -+ /* Mark that we'll need to re-set the mode for sure */ -+ memset(&crtc->mode, 0, sizeof(crtc->mode)); -+ if (!crtc->desiredMode.CrtcHDisplay) -+ { -+ DisplayModePtr mode = xf86OutputFindClosestMode (output, pScrn->currentMode); -+ -+ if (!mode) -+ return FALSE; -+ crtc->desiredMode = *mode; -+ crtc->desiredRotation = RR_Rotate_0; -+ crtc->desiredX = 0; -+ crtc->desiredY = 0; -+ } -+ -+ if (!crtc->funcs->set_mode_major(crtc, &crtc->desiredMode, crtc->desiredRotation, -+ crtc->desiredX, crtc->desiredY)) -+ return FALSE; -+ } -+ return TRUE; -+} -+ -+static void drmmode_load_palette(ScrnInfoPtr pScrn, int numColors, -+ int *indices, LOCO *colors, VisualPtr pVisual) -+{ -+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); -+ uint16_t lut_r[256], lut_g[256], lut_b[256]; -+ int index, j, i; -+ int c; -+ -+ for (c = 0; c < xf86_config->num_crtc; c++) { -+ xf86CrtcPtr crtc = xf86_config->crtc[c]; -+ drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; -+ -+ for (i = 0 ; i < 256; i++) { -+ lut_r[i] = drmmode_crtc->lut_r[i] << 6; -+ lut_g[i] = drmmode_crtc->lut_g[i] << 6; -+ lut_b[i] = drmmode_crtc->lut_b[i] << 6; -+ } -+ -+ switch(pScrn->depth) { -+ case 15: -+ for (i = 0; i < numColors; i++) { -+ index = indices[i]; -+ for (j = 0; j < 8; j++) { -+ lut_r[index * 8 + j] = colors[index].red << 6; -+ lut_g[index * 8 + j] = colors[index].green << 6; -+ lut_b[index * 8 + j] = colors[index].blue << 6; -+ } -+ } -+ break; -+ case 16: -+ for (i = 0; i < numColors; i++) { -+ index = indices[i]; -+ -+ if (i <= 31) { -+ for (j = 0; j < 8; j++) { -+ lut_r[index * 8 + j] = colors[index].red << 6; -+ lut_b[index * 8 + j] = colors[index].blue << 6; -+ } -+ } -+ -+ for (j = 0; j < 4; j++) { -+ lut_g[index * 4 + j] = colors[index].green << 6; -+ } -+ } -+ break; -+ default: -+ for (i = 0; i < numColors; i++) { -+ index = indices[i]; -+ lut_r[index] = colors[index].red << 6; -+ lut_g[index] = colors[index].green << 6; -+ lut_b[index] = colors[index].blue << 6; -+ } -+ break; -+ } -+ -+ /* Make the change through RandR */ -+#ifdef RANDR_12_INTERFACE -+ if (crtc->randr_crtc) -+ RRCrtcGammaSet(crtc->randr_crtc, lut_r, lut_g, lut_b); -+ else -+#endif -+ crtc->funcs->gamma_set(crtc, lut_r, lut_g, lut_b, 256); -+ } -+} -+ -+Bool drmmode_setup_colormap(ScreenPtr pScreen, ScrnInfoPtr pScrn) -+{ -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "Initializing kms color map\n"); -+ if (!miCreateDefColormap(pScreen)) -+ return FALSE; -+ /* all radeons support 10 bit CLUTs */ -+ if (!xf86HandleColormaps(pScreen, 256, 10, -+ drmmode_load_palette, NULL, -+ CMAP_PALETTED_TRUECOLOR -+#if 0 /* This option messes up text mode! (eich@suse.de) */ -+ | CMAP_LOAD_EVEN_IF_OFFSCREEN -+#endif -+ | CMAP_RELOAD_ON_MODE_SWITCH)) -+ return FALSE; -+ return TRUE; -+} -+#endif -diff --git a/src/drmmode_display.h b/src/drmmode_display.h -new file mode 100644 -index 0000000..2713bab ---- /dev/null -+++ b/src/drmmode_display.h -@@ -0,0 +1,74 @@ -+/* -+ * Copyright © 2007 Red Hat, Inc. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ * -+ * Authors: -+ * Dave Airlie -+ * -+ */ -+#ifndef DRMMODE_DISPLAY_H -+#define DRMMODE_DISPLAY_H -+ -+#ifdef XF86DRM_MODE -+ -+#include "xf86drmMode.h" -+ -+#include "radeon_probe.h" -+ -+typedef struct { -+ int fd; -+ unsigned fb_id; -+ drmModeResPtr mode_res; -+ drmModeFBPtr mode_fb; -+ int cpp; -+ struct radeon_bo_manager *bufmgr; -+} drmmode_rec, *drmmode_ptr; -+ -+typedef struct { -+ -+ drmmode_ptr drmmode; -+ drmModeCrtcPtr mode_crtc; -+ struct radeon_bo *cursor_bo; -+ struct radeon_bo *rotate_bo; -+ unsigned rotate_fb_id; -+ uint16_t lut_r[256], lut_g[256], lut_b[256]; -+} drmmode_crtc_private_rec, *drmmode_crtc_private_ptr; -+ -+typedef struct { -+ drmmode_ptr drmmode; -+ int output_id; -+ drmModeConnectorPtr mode_output; -+ drmModeEncoderPtr mode_encoder; -+ drmModePropertyBlobPtr edid_blob; -+ int dpms_enum_id; -+} drmmode_output_private_rec, *drmmode_output_private_ptr; -+ -+ -+extern Bool drmmode_pre_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode, char *busId, char *driver_name, int cpp, int zaphod_mask); -+extern Bool drmmode_set_bufmgr(ScrnInfoPtr pScrn, drmmode_ptr drmmode, struct radeon_bo_manager *bufmgr); -+extern void drmmode_set_cursor(ScrnInfoPtr scrn, drmmode_ptr drmmode, int id, struct radeon_bo *bo); -+void drmmode_adjust_frame(ScrnInfoPtr pScrn, drmmode_ptr drmmode, int x, int y, int flags); -+extern Bool drmmode_set_desired_modes(ScrnInfoPtr pScrn, drmmode_ptr drmmode); -+extern void drmmode_copy_fb(ScrnInfoPtr pScrn, drmmode_ptr drmmode); -+extern Bool drmmode_setup_colormap(ScreenPtr pScreen, ScrnInfoPtr pScrn); -+#endif -+ -+#endif -diff --git a/src/legacy_crtc.c b/src/legacy_crtc.c -index 829b453..b57ba5f 100644 ---- a/src/legacy_crtc.c -+++ b/src/legacy_crtc.c -@@ -649,6 +649,9 @@ radeon_crtc_modeset_ioctl(xf86CrtcPtr crtc, Bool post) - if (!info->directRenderingEnabled) - return; - -+ if (info->ChipFamily >= CHIP_FAMILY_R600) -+ return; -+ - modeset.crtc = radeon_crtc->crtc_id; - modeset.cmd = post ? _DRM_POST_MODESET : _DRM_PRE_MODESET; - -@@ -661,55 +664,42 @@ radeon_crtc_modeset_ioctl(xf86CrtcPtr crtc, Bool post) - void - legacy_crtc_dpms(xf86CrtcPtr crtc, int mode) - { -- int mask; -+ uint32_t mask; - RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; - RADEONEntPtr pRADEONEnt = RADEONEntPriv(crtc->scrn); - unsigned char *RADEONMMIO = pRADEONEnt->MMIO; - -- mask = radeon_crtc->crtc_id ? (RADEON_CRTC2_DISP_DIS | RADEON_CRTC2_VSYNC_DIS | RADEON_CRTC2_HSYNC_DIS | RADEON_CRTC2_DISP_REQ_EN_B) : (RADEON_CRTC_DISPLAY_DIS | RADEON_CRTC_HSYNC_DIS | RADEON_CRTC_VSYNC_DIS); -- -- if (mode == DPMSModeOff) -- radeon_crtc_modeset_ioctl(crtc, FALSE); -+ if (radeon_crtc->crtc_id) -+ mask = (RADEON_CRTC2_EN | -+ RADEON_CRTC2_DISP_DIS | -+ RADEON_CRTC2_VSYNC_DIS | -+ RADEON_CRTC2_HSYNC_DIS | -+ RADEON_CRTC2_DISP_REQ_EN_B); -+ else -+ mask = (RADEON_CRTC_DISPLAY_DIS | -+ RADEON_CRTC_HSYNC_DIS | -+ RADEON_CRTC_VSYNC_DIS); - - switch(mode) { - case DPMSModeOn: - if (radeon_crtc->crtc_id) { -- OUTREGP(RADEON_CRTC2_GEN_CNTL, 0, ~mask); -+ OUTREGP(RADEON_CRTC2_GEN_CNTL, RADEON_CRTC2_EN, ~mask); - } else { -- OUTREGP(RADEON_CRTC_GEN_CNTL, 0, ~RADEON_CRTC_DISP_REQ_EN_B); -+ OUTREGP(RADEON_CRTC_GEN_CNTL, RADEON_CRTC_EN, ~(RADEON_CRTC_EN | RADEON_CRTC_DISP_REQ_EN_B)); - OUTREGP(RADEON_CRTC_EXT_CNTL, 0, ~mask); - } - break; - case DPMSModeStandby: -- if (radeon_crtc->crtc_id) { -- OUTREGP(RADEON_CRTC2_GEN_CNTL, (RADEON_CRTC2_DISP_DIS | RADEON_CRTC2_HSYNC_DIS), ~mask); -- } else { -- OUTREGP(RADEON_CRTC_GEN_CNTL, 0, ~RADEON_CRTC_DISP_REQ_EN_B); -- OUTREGP(RADEON_CRTC_EXT_CNTL, (RADEON_CRTC_DISPLAY_DIS | RADEON_CRTC_HSYNC_DIS), ~mask); -- } -- break; - case DPMSModeSuspend: -- if (radeon_crtc->crtc_id) { -- OUTREGP(RADEON_CRTC2_GEN_CNTL, (RADEON_CRTC2_DISP_DIS | RADEON_CRTC2_VSYNC_DIS), ~mask); -- } else { -- OUTREGP(RADEON_CRTC_GEN_CNTL, 0, ~RADEON_CRTC_DISP_REQ_EN_B); -- OUTREGP(RADEON_CRTC_EXT_CNTL, (RADEON_CRTC_DISPLAY_DIS | RADEON_CRTC_VSYNC_DIS), ~mask); -- } -- break; - case DPMSModeOff: - if (radeon_crtc->crtc_id) { - OUTREGP(RADEON_CRTC2_GEN_CNTL, mask, ~mask); - } else { -- OUTREGP(RADEON_CRTC_GEN_CNTL, RADEON_CRTC_DISP_REQ_EN_B, ~RADEON_CRTC_DISP_REQ_EN_B); -+ OUTREGP(RADEON_CRTC_GEN_CNTL, RADEON_CRTC_DISP_REQ_EN_B, ~(RADEON_CRTC_EN | RADEON_CRTC_DISP_REQ_EN_B)); - OUTREGP(RADEON_CRTC_EXT_CNTL, mask, ~mask); - } - break; - } -- -- if (mode != DPMSModeOff) { -- radeon_crtc_modeset_ioctl(crtc, TRUE); -- radeon_crtc_load_lut(crtc); -- } - } - - -@@ -912,7 +902,6 @@ RADEONInitCrtcRegisters(xf86CrtcPtr crtc, RADEONSavePtr save, - - /*save->bios_4_scratch = info->SavedReg->bios_4_scratch;*/ - save->crtc_gen_cntl = (RADEON_CRTC_EXT_DISP_EN -- | RADEON_CRTC_EN - | (format << 8) - | ((mode->Flags & V_DBLSCAN) - ? RADEON_CRTC_DBL_SCAN_EN -@@ -1160,8 +1149,7 @@ RADEONInitCrtc2Registers(xf86CrtcPtr crtc, RADEONSavePtr save, - else - save->crtc2_gen_cntl = 0; - -- save->crtc2_gen_cntl |= (RADEON_CRTC2_EN -- | (format << 8) -+ save->crtc2_gen_cntl |= ((format << 8) - | RADEON_CRTC2_VSYNC_DIS - | RADEON_CRTC2_HSYNC_DIS - | RADEON_CRTC2_DISP_DIS -@@ -1193,6 +1181,7 @@ RADEONInitPLLRegisters(ScrnInfoPtr pScrn, RADEONSavePtr save, - { - RADEONInfoPtr info = RADEONPTR(pScrn); - uint32_t feedback_div = 0; -+ uint32_t frac_fb_div = 0; - uint32_t reference_div = 0; - uint32_t post_divider = 0; - uint32_t freq = 0; -@@ -1225,7 +1214,7 @@ RADEONInitPLLRegisters(ScrnInfoPtr pScrn, RADEONSavePtr save, - return; - } - -- RADEONComputePLL(pll, mode->Clock, &freq, &feedback_div, &reference_div, &post_divider, flags); -+ RADEONComputePLL(pll, mode->Clock, &freq, &feedback_div, &frac_fb_div, &reference_div, &post_divider, flags); - - for (post_div = &post_divs[0]; post_div->divider; ++post_div) { - if (post_div->divider == post_divider) -@@ -1274,6 +1263,7 @@ RADEONInitPLL2Registers(ScrnInfoPtr pScrn, RADEONSavePtr save, - { - RADEONInfoPtr info = RADEONPTR(pScrn); - uint32_t feedback_div = 0; -+ uint32_t frac_fb_div = 0; - uint32_t reference_div = 0; - uint32_t post_divider = 0; - uint32_t freq = 0; -@@ -1304,7 +1294,7 @@ RADEONInitPLL2Registers(ScrnInfoPtr pScrn, RADEONSavePtr save, - return; - } - -- RADEONComputePLL(pll, mode->Clock, &freq, &feedback_div, &reference_div, &post_divider, flags); -+ RADEONComputePLL(pll, mode->Clock, &freq, &feedback_div, &frac_fb_div, &reference_div, &post_divider, flags); - - for (post_div = &post_divs[0]; post_div->divider; ++post_div) { - if (post_div->divider == post_divider) -@@ -1361,17 +1351,12 @@ RADEONInitDispBandwidthLegacy(ScrnInfoPtr pScrn, - uint32_t temp, data, mem_trcd, mem_trp, mem_tras, mem_trbs=0; - float mem_tcas; - int k1, c; -- uint32_t MemTrcdExtMemCntl[4] = {1, 2, 3, 4}; -- uint32_t MemTrpExtMemCntl[4] = {1, 2, 3, 4}; -- uint32_t MemTrasExtMemCntl[8] = {1, 2, 3, 4, 5, 6, 7, 8}; - -- uint32_t MemTrcdMemTimingCntl[8] = {1, 2, 3, 4, 5, 6, 7, 8}; -- uint32_t MemTrpMemTimingCntl[8] = {1, 2, 3, 4, 5, 6, 7, 8}; -- uint32_t MemTrasMemTimingCntl[16] = {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; -- -- float MemTcas[8] = {0, 1, 2, 3, 0, 1.5, 2.5, 0}; -+ float MemTcas[8] = {0, 1, 2, 3, 0, 1.5, 2.5, 0.0}; -+ float MemTcas_rs480[8] = {0, 1, 2, 3, 0, 1.5, 2.5, 3.5}; - float MemTcas2[8] = {0, 1, 2, 3, 4, 5, 6, 7}; - float MemTrbs[8] = {1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5}; -+ float MemTrbs_r4xx[8] = {4, 5, 6, 7, 8, 9, 10, 11}; - - float mem_bw, peak_disp_bw; - float min_mem_eff = 0.8; -@@ -1400,9 +1385,6 @@ RADEONInitDispBandwidthLegacy(ScrnInfoPtr pScrn, - OUTREG(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer); - } - -- /* R420 and RV410 family not supported yet */ -- if (info->ChipFamily == CHIP_FAMILY_R420 || info->ChipFamily == CHIP_FAMILY_RV410) return; -- - /* - * Determine if there is enough bandwidth for current display mode - */ -@@ -1429,25 +1411,58 @@ RADEONInitDispBandwidthLegacy(ScrnInfoPtr pScrn, - /* Get values from the EXT_MEM_CNTL register...converting its contents. */ - temp = INREG(RADEON_MEM_TIMING_CNTL); - if ((info->ChipFamily == CHIP_FAMILY_RV100) || info->IsIGP) { /* RV100, M6, IGPs */ -- mem_trcd = MemTrcdExtMemCntl[(temp & 0x0c) >> 2]; -- mem_trp = MemTrpExtMemCntl[ (temp & 0x03) >> 0]; -- mem_tras = MemTrasExtMemCntl[(temp & 0x70) >> 4]; -- } else { /* RV200 and later */ -- mem_trcd = MemTrcdMemTimingCntl[(temp & 0x07) >> 0]; -- mem_trp = MemTrpMemTimingCntl[ (temp & 0x700) >> 8]; -- mem_tras = MemTrasMemTimingCntl[(temp & 0xf000) >> 12]; -+ mem_trcd = ((temp >> 2) & 0x3) + 1; -+ mem_trp = ((temp & 0x3)) + 1; -+ mem_tras = ((temp & 0x70) >> 4) + 1; -+ } else if (info->ChipFamily == CHIP_FAMILY_R300 || -+ info->ChipFamily == CHIP_FAMILY_R350) { /* r300, r350 */ -+ mem_trcd = (temp & 0x7) + 1; -+ mem_trp = ((temp >> 8) & 0x7) + 1; -+ mem_tras = ((temp >> 11) & 0xf) + 4; -+ } else if (info->ChipFamily == CHIP_FAMILY_RV350 || -+ info->ChipFamily <= CHIP_FAMILY_RV380) { -+ /* rv3x0 */ -+ mem_trcd = (temp & 0x7) + 3; -+ mem_trp = ((temp >> 8) & 0x7) + 3; -+ mem_tras = ((temp >> 11) & 0xf) + 6; -+ } else if (info->ChipFamily == CHIP_FAMILY_R420 || -+ info->ChipFamily == CHIP_FAMILY_RV410) { -+ /* r4xx */ -+ mem_trcd = (temp & 0xf) + 3; -+ if (mem_trcd > 15) -+ mem_trcd = 15; -+ mem_trp = ((temp >> 8) & 0xf) + 3; -+ if (mem_trp > 15) -+ mem_trp = 15; -+ mem_tras = ((temp >> 12) & 0x1f) + 6; -+ if (mem_tras > 31) -+ mem_tras = 31; -+ } else { /* RV200, R200 */ -+ mem_trcd = (temp & 0x7) + 1; -+ mem_trp = ((temp >> 8) & 0x7) + 1; -+ mem_tras = ((temp >> 12) & 0xf) + 4; - } - - /* Get values from the MEM_SDRAM_MODE_REG register...converting its */ - temp = INREG(RADEON_MEM_SDRAM_MODE_REG); - data = (temp & (7<<20)) >> 20; - if ((info->ChipFamily == CHIP_FAMILY_RV100) || info->IsIGP) { /* RV100, M6, IGPs */ -- mem_tcas = MemTcas [data]; -+ if (info->ChipFamily == CHIP_FAMILY_RS480) /* don't think rs400 */ -+ mem_tcas = MemTcas_rs480[data]; -+ else -+ mem_tcas = MemTcas[data]; - } else { - mem_tcas = MemTcas2 [data]; - } -+ if (info->ChipFamily == CHIP_FAMILY_RS400 || -+ info->ChipFamily == CHIP_FAMILY_RS480) { -+ /* extra cas latency stored in bits 23-25 0-4 clocks */ -+ data = (temp >> 23) & 0x7; -+ if (data < 5) -+ mem_tcas += data; -+ } - -- if (IS_R300_VARIANT) { -+ if (IS_R300_VARIANT && !info->IsIGP) { - /* on the R300, Tcas is included in Trbs. - */ - temp = INREG(RADEON_MEM_CNTL); -@@ -1469,7 +1484,11 @@ RADEONInitDispBandwidthLegacy(ScrnInfoPtr pScrn, - data = (R300_MEM_RBS_POSITION_A_MASK & temp); - } - -- mem_trbs = MemTrbs[data]; -+ if (info->ChipFamily == CHIP_FAMILY_RV410 || -+ info->ChipFamily == CHIP_FAMILY_R420) -+ mem_trbs = MemTrbs_r4xx[data]; -+ else -+ mem_trbs = MemTrbs[data]; - mem_tcas += mem_trbs; - } - -diff --git a/src/legacy_output.c b/src/legacy_output.c -index 423a3e2..7134ee1 100644 ---- a/src/legacy_output.c -+++ b/src/legacy_output.c -@@ -150,7 +150,6 @@ void - RADEONGetExtTMDSInfo(ScrnInfoPtr pScrn, radeon_dvo_ptr dvo) - { - RADEONInfoPtr info = RADEONPTR(pScrn); -- I2CBusPtr pDVOBus; - - if (!info->IsAtomBios) { - #if defined(__powerpc__) -@@ -162,11 +161,11 @@ RADEONGetExtTMDSInfo(ScrnInfoPtr pScrn, radeon_dvo_ptr dvo) - dvo->dvo_i2c_slave_addr = 0x70; - } - #endif -- if (RADEONI2CInit(pScrn, &pDVOBus, "DVO", &dvo->dvo_i2c)) { -+ if (RADEONI2CInit(pScrn, &dvo->pI2CBus, "DVO", &dvo->dvo_i2c)) { - dvo->DVOChip = -- RADEONDVODeviceInit(pDVOBus, dvo->dvo_i2c_slave_addr); -+ RADEONDVODeviceInit(dvo->pI2CBus, dvo->dvo_i2c_slave_addr); - if (!dvo->DVOChip) -- xfree(pDVOBus); -+ xfree(dvo->pI2CBus); - } - } - } -@@ -481,7 +480,7 @@ RADEONRestoreDVOChip(ScrnInfoPtr pScrn, xf86OutputPtr output) - if (!dvo->DVOChip) - return; - -- RADEONI2CDoLock(output, TRUE); -+ RADEONI2CDoLock(output, dvo->pI2CBus, TRUE); - if (!RADEONInitExtTMDSInfoFromBIOS(output)) { - if (dvo->DVOChip) { - switch(info->ext_tmds_chip) { -@@ -511,7 +510,7 @@ RADEONRestoreDVOChip(ScrnInfoPtr pScrn, xf86OutputPtr output) - } - } - } -- RADEONI2CDoLock(output, FALSE); -+ RADEONI2CDoLock(output, dvo->pI2CBus, FALSE); - } - - #if 0 -diff --git a/src/pcidb/ati_pciids.csv b/src/pcidb/ati_pciids.csv -index b361d9d..e730228 100644 ---- a/src/pcidb/ati_pciids.csv -+++ b/src/pcidb/ati_pciids.csv -@@ -61,6 +61,8 @@ - "0x4A4E","R420_JN","R420",1,,,,,"ATI Radeon Mobility 9800 (M18) JN (AGP)" - "0x4A4F","R420_4A4F","R420",,,,,,"ATI Radeon X800 SE (R420) (AGP)" - "0x4A50","R420_JP","R420",,,,,,"ATI Radeon X800XT (R420) JP (AGP)" -+"0x4A54","R420_JT","R420",,,,,,"ATI Radeon X800 VE (R420) JT (AGP)" -+"0x4B48","R481_4B48","R420",,,,,,"ATI Radeon X850 (R480) (AGP)" - "0x4B49","R481_4B49","R420",,,,,,"ATI Radeon X850 XT (R480) (AGP)" - "0x4B4A","R481_4B4A","R420",,,,,,"ATI Radeon X850 SE (R480) (AGP)" - "0x4B4B","R481_4B4B","R420",,,,,,"ATI Radeon X850 PRO (R480) (AGP)" -@@ -335,6 +337,7 @@ - "0x9440","RV770_9440","RV770",,,,,,"ATI Radeon 4800 Series" - "0x9441","RV770_9441","RV770",,,,,,"ATI Radeon HD 4870 x2" - "0x9442","RV770_9442","RV770",,,,,,"ATI Radeon 4800 Series" -+"0x9443","RV770_9443","RV770",,,,,,"ATI Radeon HD 4850 x2" - "0x9444","RV770_9444","RV770",,,,,,"ATI FirePro V8750 (FireGL)" - "0x9446","RV770_9446","RV770",,,,,,"ATI FirePro V7760 (FireGL)" - "0x944A","RV770_944A","RV770",1,,,,,"ATI Mobility RADEON HD 4850" -@@ -352,15 +355,26 @@ - "0x946B","RV770_946B","RV770",1,,,,,"ATI M98" - "0x947A","RV770_947A","RV770",1,,,,,"ATI M98" - "0x947B","RV770_947B","RV770",1,,,,,"ATI M98" -+"0x9480","RV730_9480","RV730",1,,,,,"ATI Mobility Radeon HD 4650" - "0x9487","RV730_9487","RV730",,,,,,"ATI Radeon RV730 (AGP)" -+"0x9488","RV730_9488","RV730",1,,,,,"ATI Mobility Radeon HD 4670" - "0x9489","RV730_9489","RV730",1,,,,,"ATI FirePro M5750" - "0x948F","RV730_948F","RV730",,,,,,"ATI Radeon RV730 (AGP)" - "0x9490","RV730_9490","RV730",,,,,,"ATI RV730XT [Radeon HD 4670]" - "0x9491","RV730_9491","RV730",,,,,,"ATI RADEON E4600" -+"0x9495","RV730_9495","RV730",,,,,,"ATI Radeon HD 4600 Series" - "0x9498","RV730_9498","RV730",,,,,,"ATI RV730 PRO [Radeon HD 4650]" - "0x949C","RV730_949C","RV730",,,,,,"ATI FirePro V7750 (FireGL)" - "0x949E","RV730_949E","RV730",,,,,,"ATI FirePro V5700 (FireGL)" - "0x949F","RV730_949F","RV730",,,,,,"ATI FirePro V3750 (FireGL)" -+"0x94A0","RV740_94A0","RV740",1,,,,,"ATI Mobility Radeon HD 4830" -+"0x94A1","RV740_94A1","RV740",1,,,,,"ATI Mobility Radeon HD 4850" -+"0x94A3","RV740_94A3","RV740",1,,,,,"ATI FirePro M7740" -+"0x94B1","RV740_94B1","RV740",,,,,,"ATI RV740" -+"0x94B3","RV740_94B3","RV740",,,,,,"ATI Radeon HD 4770" -+"0x94B4","RV740_94B4","RV740",,,,,,"ATI Radeon HD 4700 Series" -+"0x94B5","RV740_94B5","RV740",,,,,,"ATI Radeon HD 4770" -+"0x94B9","RV740_94B9","RV740",1,,,,,"ATI FirePro M5750" - "0x94C0","RV610_94C0","RV610",,,,,,"ATI RV610" - "0x94C1","RV610_94C1","RV610",,,,,,"ATI Radeon HD 2400 XT" - "0x94C3","RV610_94C3","RV610",,,,,,"ATI Radeon HD 2400 Pro" -@@ -393,6 +407,7 @@ - "0x9552","RV710_9552","RV710",1,,,,,"ATI Mobility Radeon 4300 Series" - "0x9553","RV710_9553","RV710",1,,,,,"ATI Mobility Radeon 4500 Series" - "0x9555","RV710_9555","RV710",1,,,,,"ATI Mobility Radeon 4500 Series" -+"0x9557","RV710_9557","RV710",1,,,,,"ATI FirePro RG220" - "0x9580","RV630_9580","RV630",,,,,,"ATI RV630" - "0x9581","RV630_9581","RV630",1,,,,,"ATI Mobility Radeon HD 2600" - "0x9583","RV630_9583","RV630",1,,,,,"ATI Mobility Radeon HD 2600 XT" -diff --git a/src/pcidb/parse_pci_ids.pl b/src/pcidb/parse_pci_ids.pl -index a3a8af8..d1900a4 100755 ---- a/src/pcidb/parse_pci_ids.pl -+++ b/src/pcidb/parse_pci_ids.pl -@@ -33,7 +33,7 @@ print PCICHIPSET "PciChipsets RADEONPciChipsets[] = {\n"; - print PCIDEVICEMATCH "/* This file is autogenerated please do not edit */\n"; - print PCIDEVICEMATCH "static const struct pci_id_match radeon_device_match[] = {\n"; - print RADEONCHIPINFO "/* This file is autogenerated please do not edit */\n"; --print RADEONCHIPINFO "RADEONCardInfo RADEONCards[] = {\n"; -+print RADEONCHIPINFO "static RADEONCardInfo RADEONCards[] = {\n"; - while () { - if ($csv->parse($_)) { - my @columns = $csv->fields(); -diff --git a/src/r600_exa.c b/src/r600_exa.c -index 2dc33a8..485322d 100644 ---- a/src/r600_exa.c -+++ b/src/r600_exa.c -@@ -136,6 +136,10 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) - EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); - EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); - -+ set_generic_scissor(pScrn, accel_state->ib, 0, 0, pPix->drawable.width, pPix->drawable.height); -+ set_screen_scissor(pScrn, accel_state->ib, 0, 0, pPix->drawable.width, pPix->drawable.height); -+ set_window_scissor(pScrn, accel_state->ib, 0, 0, pPix->drawable.width, pPix->drawable.height); -+ - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->solid_vs_offset; - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + -@@ -219,7 +223,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) - (0x03 << DEFAULT_VAL_shift) | - FLAT_SHADE_bit | - SEL_CENTROID_bit)); -- EREG(accel_state->ib, SPI_INTERP_CONTROL_0, FLAT_SHADE_ENA_bit | 0); -+ EREG(accel_state->ib, SPI_INTERP_CONTROL_0, FLAT_SHADE_ENA_bit); - - /* PS alu constants */ - if (pPix->drawable.bitsPerPixel == 16) { -@@ -351,7 +355,7 @@ R600DoneSolid(PixmapPtr pPix) - static void - R600DoPrepareCopy(ScrnInfoPtr pScrn, - int src_pitch, int src_width, int src_height, uint32_t src_offset, int src_bpp, -- int dst_pitch, int dst_height, uint32_t dst_offset, int dst_bpp, -+ int dst_pitch, int dst_width, int dst_height, uint32_t dst_offset, int dst_bpp, - int rop, Pixel planemask) - { - RADEONInfoPtr info = RADEONPTR(pScrn); -@@ -379,6 +383,10 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, - EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); - EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); - -+ set_generic_scissor(pScrn, accel_state->ib, 0, 0, dst_width, dst_height); -+ set_screen_scissor(pScrn, accel_state->ib, 0, 0, dst_width, dst_height); -+ set_window_scissor(pScrn, accel_state->ib, 0, 0, dst_width, dst_height); -+ - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->copy_vs_offset; - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + -@@ -689,7 +697,7 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, - R600DoPrepareCopy(pScrn, - accel_state->src_pitch[0], pSrc->drawable.width, pSrc->drawable.height, - accel_state->src_mc_addr[0], pSrc->drawable.bitsPerPixel, -- accel_state->dst_pitch, pDst->drawable.height, -+ accel_state->dst_pitch, pDst->drawable.width, pDst->drawable.height, - accel_state->dst_mc_addr, pDst->drawable.bitsPerPixel, - rop, planemask); - -@@ -738,7 +746,7 @@ R600OverlapCopy(PixmapPtr pDst, - if (srcY > dstY ) { /* diagonal up */ - R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -- dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - accel_state->rop, accel_state->planemask); - R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, vchunk); - R600DoCopy(pScrn); -@@ -748,7 +756,7 @@ R600OverlapCopy(PixmapPtr pDst, - } else { /* diagonal down */ - R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -- dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - accel_state->rop, accel_state->planemask); - R600AppendCopyVertex(pScrn, srcX, srcY + h - vchunk, dstX, dstY + h - vchunk, w, vchunk); - R600DoCopy(pScrn); -@@ -759,7 +767,7 @@ R600OverlapCopy(PixmapPtr pDst, - if (srcX > dstX ) { /* diagonal left */ - R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -- dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - accel_state->rop, accel_state->planemask); - R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, hchunk, h); - R600DoCopy(pScrn); -@@ -769,7 +777,7 @@ R600OverlapCopy(PixmapPtr pDst, - } else { /* diagonal right */ - R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -- dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - accel_state->rop, accel_state->planemask); - R600AppendCopyVertex(pScrn, srcX + w - hchunk, srcY, dstX + w - hchunk, dstY, hchunk, h); - R600DoCopy(pScrn); -@@ -785,7 +793,7 @@ R600OverlapCopy(PixmapPtr pDst, - for (i = w; i > 0; i -= hchunk) { - R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -- dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - accel_state->rop, accel_state->planemask); - R600AppendCopyVertex(pScrn, srcX + i - hchunk, srcY, dstX + i - hchunk, dstY, hchunk, h); - R600DoCopy(pScrn); -@@ -795,7 +803,7 @@ R600OverlapCopy(PixmapPtr pDst, - for (i = 0; i < w; i += hchunk) { - R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -- dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - accel_state->rop, accel_state->planemask); - - R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY, hchunk, h); -@@ -808,7 +816,7 @@ R600OverlapCopy(PixmapPtr pDst, - for (i = 0; i < h; i += vchunk) { - R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -- dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - accel_state->rop, accel_state->planemask); - - if (vchunk > h - i) vchunk = h - i; -@@ -820,7 +828,7 @@ R600OverlapCopy(PixmapPtr pDst, - for (i = h; i > 0; i -= vchunk) { - R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -- dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - accel_state->rop, accel_state->planemask); - - if (vchunk > i) vchunk = i; -@@ -832,7 +840,7 @@ R600OverlapCopy(PixmapPtr pDst, - } else { - R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -- dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, -+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - accel_state->rop, accel_state->planemask); - - R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); -@@ -863,13 +871,13 @@ R600Copy(PixmapPtr pDst, - - R600DoPrepareCopy(pScrn, - pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, -- pitch, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, -+ pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, - accel_state->rop, accel_state->planemask); - R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); - R600DoCopy(pScrn); - R600DoPrepareCopy(pScrn, - pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, -- pitch, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, -+ pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, - accel_state->rop, accel_state->planemask); - R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h); - R600DoCopy(pScrn); -@@ -881,7 +889,7 @@ R600Copy(PixmapPtr pDst, - - R600DoPrepareCopy(pScrn, - pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, -- pitch, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, -+ pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, - accel_state->rop, accel_state->planemask); - R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); - R600DoCopy(pScrn); -@@ -924,17 +932,6 @@ do { \ - - #define xFixedToFloat(f) (((float) (f)) / 65536) - --static inline void transformPoint(PictTransform *transform, xPointFixed *point) --{ -- PictVector v; -- v.vector[0] = point->x; -- v.vector[1] = point->y; -- v.vector[2] = xFixed1; -- PictureTransformPoint(transform, &v); -- point->x = v.vector[0]; -- point->y = v.vector[1]; --} -- - struct blendinfo { - Bool dst_alpha; - Bool src_alpha; -@@ -1099,6 +1096,7 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, - tex_resource_t tex_res; - tex_sampler_t tex_samp; - int pix_r, pix_g, pix_b, pix_a; -+ float vs_alu_consts[8]; - - CLEAR (tex_res); - CLEAR (tex_samp); -@@ -1118,9 +1116,6 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, - break; - } - -- accel_state->texW[unit] = w; -- accel_state->texH[unit] = h; -- - /* ErrorF("Tex %d setup %dx%d\n", unit, w, h); */ - - /* flush texture cache */ -@@ -1179,7 +1174,7 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, - } - - if (unit == 0) { -- if (!accel_state->has_mask) { -+ if (!accel_state->msk_pic) { - if (PICT_FORMAT_RGB(pPict->format) == 0) { - pix_r = SQ_SEL_0; - pix_g = SQ_SEL_0; -@@ -1294,9 +1289,34 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, - if (pPict->transform != 0) { - accel_state->is_transform[unit] = TRUE; - accel_state->transform[unit] = pPict->transform; -- } else -+ -+ vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]); -+ vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]); -+ vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]); -+ vs_alu_consts[3] = 1.0 / w; -+ -+ vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]); -+ vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]); -+ vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]); -+ vs_alu_consts[7] = 1.0 / h; -+ } else { - accel_state->is_transform[unit] = FALSE; - -+ vs_alu_consts[0] = 1.0; -+ vs_alu_consts[1] = 0.0; -+ vs_alu_consts[2] = 0.0; -+ vs_alu_consts[3] = 1.0 / w; -+ -+ vs_alu_consts[4] = 0.0; -+ vs_alu_consts[5] = 1.0; -+ vs_alu_consts[6] = 0.0; -+ vs_alu_consts[7] = 1.0 / h; -+ } -+ -+ /* VS alu constants */ -+ set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2), -+ sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts); -+ - return TRUE; - } - -@@ -1385,7 +1405,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, - /* return FALSE; */ - - if (pMask) { -- accel_state->has_mask = TRUE; -+ accel_state->msk_pic = pMaskPicture; - if (pMaskPicture->componentAlpha) { - accel_state->component_alpha = TRUE; - if (R600BlendOp[op].src_alpha) -@@ -1397,7 +1417,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, - accel_state->src_alpha = FALSE; - } - } else { -- accel_state->has_mask = FALSE; -+ accel_state->msk_pic = NULL; - accel_state->component_alpha = FALSE; - accel_state->src_alpha = FALSE; - } -@@ -1430,6 +1450,10 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, - EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); - EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); - -+ set_generic_scissor(pScrn, accel_state->ib, 0, 0, pDst->drawable.width, pDst->drawable.height); -+ set_screen_scissor(pScrn, accel_state->ib, 0, 0, pDst->drawable.width, pDst->drawable.height); -+ set_window_scissor(pScrn, accel_state->ib, 0, 0, pDst->drawable.width, pDst->drawable.height); -+ - if (!R600TextureSetup(pSrcPicture, pSrc, 0)) { - R600IBDiscard(pScrn, accel_state->ib); - return FALSE; -@@ -1586,15 +1610,7 @@ static void R600Composite(PixmapPtr pDst, - srcBottomRight.x = IntToxFixed(srcX + w); - srcBottomRight.y = IntToxFixed(srcY + h); - -- /* XXX do transform in vertex shader */ -- if (accel_state->is_transform[0]) { -- transformPoint(accel_state->transform[0], &srcTopLeft); -- transformPoint(accel_state->transform[0], &srcTopRight); -- transformPoint(accel_state->transform[0], &srcBottomLeft); -- transformPoint(accel_state->transform[0], &srcBottomRight); -- } -- -- if (accel_state->has_mask) { -+ if (accel_state->msk_pic) { - xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight; - - if (((accel_state->vb_index + 3) * 24) > (accel_state->ib->total / 2)) { -@@ -1616,33 +1632,26 @@ static void R600Composite(PixmapPtr pDst, - maskBottomRight.x = IntToxFixed(maskX + w); - maskBottomRight.y = IntToxFixed(maskY + h); - -- if (accel_state->is_transform[1]) { -- transformPoint(accel_state->transform[1], &maskTopLeft); -- transformPoint(accel_state->transform[1], &maskTopRight); -- transformPoint(accel_state->transform[1], &maskBottomLeft); -- transformPoint(accel_state->transform[1], &maskBottomRight); -- } -- - vb[0] = (float)dstX; - vb[1] = (float)dstY; -- vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0]; -- vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0]; -- vb[4] = xFixedToFloat(maskTopLeft.x) / accel_state->texW[1]; -- vb[5] = xFixedToFloat(maskTopLeft.y) / accel_state->texH[1]; -+ vb[2] = xFixedToFloat(srcTopLeft.x); -+ vb[3] = xFixedToFloat(srcTopLeft.y); -+ vb[4] = xFixedToFloat(maskTopLeft.x); -+ vb[5] = xFixedToFloat(maskTopLeft.y); - - vb[6] = (float)dstX; - vb[7] = (float)(dstY + h); -- vb[8] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0]; -- vb[9] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0]; -- vb[10] = xFixedToFloat(maskBottomLeft.x) / accel_state->texW[1]; -- vb[11] = xFixedToFloat(maskBottomLeft.y) / accel_state->texH[1]; -+ vb[8] = xFixedToFloat(srcBottomLeft.x); -+ vb[9] = xFixedToFloat(srcBottomLeft.y); -+ vb[10] = xFixedToFloat(maskBottomLeft.x); -+ vb[11] = xFixedToFloat(maskBottomLeft.y); - - vb[12] = (float)(dstX + w); - vb[13] = (float)(dstY + h); -- vb[14] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0]; -- vb[15] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0]; -- vb[16] = xFixedToFloat(maskBottomRight.x) / accel_state->texW[1]; -- vb[17] = xFixedToFloat(maskBottomRight.y) / accel_state->texH[1]; -+ vb[14] = xFixedToFloat(srcBottomRight.x); -+ vb[15] = xFixedToFloat(srcBottomRight.y); -+ vb[16] = xFixedToFloat(maskBottomRight.x); -+ vb[17] = xFixedToFloat(maskBottomRight.y); - - } else { - if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) { -@@ -1657,18 +1666,18 @@ static void R600Composite(PixmapPtr pDst, - - vb[0] = (float)dstX; - vb[1] = (float)dstY; -- vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0]; -- vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0]; -+ vb[2] = xFixedToFloat(srcTopLeft.x); -+ vb[3] = xFixedToFloat(srcTopLeft.y); - - vb[4] = (float)dstX; - vb[5] = (float)(dstY + h); -- vb[6] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0]; -- vb[7] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0]; -+ vb[6] = xFixedToFloat(srcBottomLeft.x); -+ vb[7] = xFixedToFloat(srcBottomLeft.y); - - vb[8] = (float)(dstX + w); - vb[9] = (float)(dstY + h); -- vb[10] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0]; -- vb[11] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0]; -+ vb[10] = xFixedToFloat(srcBottomRight.x); -+ vb[11] = xFixedToFloat(srcBottomRight.y); - } - - accel_state->vb_index += 3; -@@ -1696,7 +1705,7 @@ static void R600DoneComposite(PixmapPtr pDst) - - - /* Vertex buffer setup */ -- if (accel_state->has_mask) { -+ if (accel_state->msk_pic) { - accel_state->vb_size = accel_state->vb_index * 24; - vtx_res.id = SQ_VTX_RESOURCE_vs; - vtx_res.vtx_size_dw = 24 / 4; -@@ -1743,7 +1752,7 @@ static void R600DoneComposite(PixmapPtr pDst) - Bool - R600CopyToVRAM(ScrnInfoPtr pScrn, - char *src, int src_pitch, -- uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_height, int bpp, -+ uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_width, uint32_t dst_height, int bpp, - int x, int y, int w, int h) - { - RADEONInfoPtr info = RADEONPTR(pScrn); -@@ -1797,7 +1806,7 @@ R600CopyToVRAM(ScrnInfoPtr pScrn, - /* blit from scratch to vram */ - R600DoPrepareCopy(pScrn, - scratch_pitch, w, oldhpass, offset, bpp, -- dst_pitch, dst_height, dst_mc_addr, bpp, -+ dst_pitch, dst_width, dst_height, dst_mc_addr, bpp, - 3, 0xffffffff); - R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass); - R600DoCopy(pScrn); -@@ -1817,12 +1826,11 @@ R600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, - RADEONInfoPtr info = RADEONPTR(pScrn); - uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); - uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; -- uint32_t dst_height = pDst->drawable.height; - int bpp = pDst->drawable.bitsPerPixel; - - return R600CopyToVRAM(pScrn, - src, src_pitch, -- dst_pitch, dst_mc_addr, dst_height, bpp, -+ dst_pitch, dst_mc_addr, pDst->drawable.width, pDst->drawable.height, bpp, - x, y, w, h); - } - -@@ -1857,7 +1865,7 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, - /* blit from vram to scratch */ - R600DoPrepareCopy(pScrn, - src_pitch, src_width, src_height, src_mc_addr, bpp, -- scratch_pitch, hpass, scratch_mc_addr, bpp, -+ scratch_pitch, src_width, hpass, scratch_mc_addr, bpp, - 3, 0xffffffff); - R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); - R600DoCopy(pScrn); -@@ -1874,7 +1882,7 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, - /* blit from vram to scratch */ - R600DoPrepareCopy(pScrn, - src_pitch, src_width, src_height, src_mc_addr, bpp, -- scratch_pitch, hpass, scratch_mc_addr + scratch_offset, bpp, -+ scratch_pitch, src_width, hpass, scratch_mc_addr + scratch_offset, bpp, - 3, 0xffffffff); - R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); - R600DoCopy(pScrn); -diff --git a/src/r600_shader.c b/src/r600_shader.c -index addba36..584deb4 100644 ---- a/src/r600_shader.c -+++ b/src/r600_shader.c -@@ -457,7 +457,7 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) - int i = 0; - - /* 0 */ -- shader[i++] = CF_DWORD0(ADDR(4)); -+ shader[i++] = CF_DWORD0(ADDR(6)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), -@@ -468,7 +468,22 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_VTX), - WHOLE_QUAD_MODE(0), - BARRIER(1)); -- /* 1 */ -+ -+ /* 1 - ALU */ -+ shader[i++] = CF_ALU_DWORD0(ADDR(4), -+ KCACHE_BANK0(0), -+ KCACHE_BANK1(0), -+ KCACHE_MODE0(SQ_CF_KCACHE_NOP)); -+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), -+ KCACHE_ADDR0(0), -+ KCACHE_ADDR1(0), -+ I_COUNT(2), -+ USES_WATERFALL(0), -+ CF_INST(SQ_CF_INST_ALU), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ -+ /* 2 */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), - TYPE(SQ_EXPORT_POS), - RW_GPR(1), -@@ -486,7 +501,7 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_EXPORT_DONE), - WHOLE_QUAD_MODE(0), - BARRIER(1)); -- /* 2 */ -+ /* 3 */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), - TYPE(SQ_EXPORT_PARAM), - RW_GPR(0), -@@ -504,9 +519,63 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_EXPORT_DONE), - WHOLE_QUAD_MODE(0), - BARRIER(0)); -- shader[i++] = 0x00000000; -- shader[i++] = 0x00000000; -- /* 4/5 */ -+ -+ -+ /* 4 texX / w */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(0), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(256), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_X), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_AR_X), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MUL), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_X), -+ CLAMP(0)); -+ -+ /* 5 texY / h */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(0), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Y), -+ SRC0_NEG(0), -+ SRC1_SEL(256), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Y), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_AR_X), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MUL), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Y), -+ CLAMP(0)); -+ -+ /* 6/7 */ - shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), - FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), - FETCH_WHOLE_QUAD(0), -@@ -531,7 +600,7 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) - CONST_BUF_NO_STRIDE(0), - MEGA_FETCH(1)); - shader[i++] = VTX_DWORD_PAD; -- /* 6/7 */ -+ /* 8/9 */ - shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), - FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), - FETCH_WHOLE_QUAD(0), -@@ -560,41 +629,12 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) - return i; - } - --/* -- * ; xv ps planar -- * 00 TEX: ADDR(20) CNT(3) NO_BARRIER -- * 0 SAMPLE R1.x__1, R0.xy01, t0, s0 -- * 1 SAMPLE R1.__x_, R0.xy01, t1, s1 -- * 2 SAMPLE R1._x__, R0.xy01, t2, s2 -- * 01 TEX: ADDR(28) CNT(2) NO_BARRIER -- * 0 SAMPLE R1.x__1, R0.xy01, t0, s0 -- * 1 SAMPLE R1._xy_, R0.xy01, t1, s1 -- * 02 ALU: ADDR(4) CNT(16) -- * 3 x: MULADD R1.x, R1.x, C3.x, C3.y CLAMP -- * y: MULADD R1.y, R1.y, C3.z, C3.w -- * z: MULADD R1.z, R1.z, C3.z, C3.w -- * w: MOV R1.w, 0.0f -- * 4 x: DOT4 R2.x, R1.x, C0.x CLAMP VEC_102 -- * y: DOT4 ____, R1.y, C0.y CLAMP VEC_102 -- * z: DOT4 ____, R1.z, C0.z CLAMP VEC_102 -- * w: DOT4 ____, R1.w, C0.w CLAMP VEC_021 -- * 5 x: DOT4 ____, R1.x, C1.x CLAMP VEC_102 -- * y: DOT4 R2.y, R1.y, C1.y CLAMP VEC_102 -- * z: DOT4 ____, R1.z, C1.z CLAMP VEC_102 -- * w: DOT4 ____, R1.w, C1.w CLAMP VEC_021 -- * 6 x: DOT4 ____, R1.x, C2.x CLAMP VEC_102 -- * y: DOT4 ____, R1.y, C2.y CLAMP VEC_102 -- * z: DOT4 R2.z, R1.z, C2.z CLAMP VEC_102 -- * w: DOT4 ____, R1.w, C2.w CLAMP VEC_021 -- * 03 EXP_DONE: PIX0, R2 -- * END_OF_PROGRAM -- */ - int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) - { - int i = 0; - - /* 0 */ -- shader[i++] = CF_DWORD0(ADDR(20)); -+ shader[i++] = CF_DWORD0(ADDR(16)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_BOOL), -@@ -606,7 +646,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) - WHOLE_QUAD_MODE(0), - BARRIER(0)); - /* 1 */ -- shader[i++] = CF_DWORD0(ADDR(28)); -+ shader[i++] = CF_DWORD0(ADDR(24)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_NOT_BOOL), -@@ -625,7 +665,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) - shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), - KCACHE_ADDR0(0), - KCACHE_ADDR1(0), -- I_COUNT(16), -+ I_COUNT(12), - USES_WATERFALL(0), - CF_INST(SQ_CF_INST_ALU), - WHOLE_QUAD_MODE(0), -@@ -648,73 +688,74 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_EXPORT_DONE), - WHOLE_QUAD_MODE(0), - BARRIER(1)); -- /* 4 */ -- shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ /* 4,5,6,7 */ -+ /* r2.x = MAD(c0.w, r1.x, c0.x) */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(256), - SRC0_REL(ABSOLUTE), -- SRC0_ELEM(ELEM_X), -+ SRC0_ELEM(ELEM_W), - SRC0_NEG(0), -- SRC1_SEL(259), -+ SRC1_SEL(1), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); -- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), - SRC2_REL(ABSOLUTE), -- SRC2_ELEM(ELEM_Y), -+ SRC2_ELEM(ELEM_X), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), - BANK_SWIZZLE(SQ_ALU_VEC_012), -- DST_GPR(1), -+ DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_X), -- CLAMP(1)); -- /* 5 */ -- shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ CLAMP(0)); -+ /* r2.y = MAD(c0.w, r1.x, c0.y) */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(256), - SRC0_REL(ABSOLUTE), -- SRC0_ELEM(ELEM_Y), -+ SRC0_ELEM(ELEM_W), - SRC0_NEG(0), -- SRC1_SEL(259), -+ SRC1_SEL(1), - SRC1_REL(ABSOLUTE), -- SRC1_ELEM(ELEM_Z), -+ SRC1_ELEM(ELEM_X), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); -- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), - SRC2_REL(ABSOLUTE), -- SRC2_ELEM(ELEM_W), -+ SRC2_ELEM(ELEM_Y), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), - BANK_SWIZZLE(SQ_ALU_VEC_012), -- DST_GPR(1), -+ DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), - CLAMP(0)); -- /* 6 */ -- shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ /* r2.z = MAD(c0.w, r1.x, c0.z) */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(256), - SRC0_REL(ABSOLUTE), -- SRC0_ELEM(ELEM_Z), -+ SRC0_ELEM(ELEM_W), - SRC0_NEG(0), -- SRC1_SEL(259), -+ SRC1_SEL(1), - SRC1_REL(ABSOLUTE), -- SRC1_ELEM(ELEM_Z), -+ SRC1_ELEM(ELEM_X), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); -- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), - SRC2_REL(ABSOLUTE), -- SRC2_ELEM(ELEM_W), -+ SRC2_ELEM(ELEM_Z), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), - BANK_SWIZZLE(SQ_ALU_VEC_012), -- DST_GPR(1), -+ DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Z), - CLAMP(0)); -- /* 7 */ -+ /* r2.w = MAD(0, 0, 1) */ - shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), -@@ -726,334 +767,198 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); -- shader[i++] = ALU_DWORD1_OP2(ChipSet, -- SRC0_ABS(0), -- SRC1_ABS(0), -- UPDATE_EXECUTE_MASK(0), -- UPDATE_PRED(0), -- WRITE_MASK(1), -- FOG_MERGE(0), -- OMOD(SQ_ALU_OMOD_OFF), -- ALU_INST(SQ_OP2_INST_MOV), -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_X), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), - BANK_SWIZZLE(SQ_ALU_VEC_012), -- DST_GPR(1), -+ DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_W), - CLAMP(0)); -- /* 8 */ -- shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ -+ /* 8,9,10,11 */ -+ /* r2.x = MAD(c1.x, r1.y, pv.x) */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(257), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), - SRC0_NEG(0), -- SRC1_SEL(256), -+ SRC1_SEL(1), - SRC1_REL(ABSOLUTE), -- SRC1_ELEM(ELEM_X), -+ SRC1_ELEM(ELEM_Y), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); -- shader[i++] = ALU_DWORD1_OP2(ChipSet, -- SRC0_ABS(0), -- SRC1_ABS(0), -- UPDATE_EXECUTE_MASK(0), -- UPDATE_PRED(0), -- WRITE_MASK(1), -- FOG_MERGE(0), -- OMOD(SQ_ALU_OMOD_OFF), -- ALU_INST(SQ_OP2_INST_DOT4), -- BANK_SWIZZLE(SQ_ALU_VEC_102), -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_X), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_X), -- CLAMP(1)); -- /* 9 */ -- shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ CLAMP(0)); -+ /* r2.y = MAD(c1.y, r1.y, pv.y) */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(257), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), - SRC0_NEG(0), -- SRC1_SEL(256), -+ SRC1_SEL(1), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); -- shader[i++] = ALU_DWORD1_OP2(ChipSet, -- SRC0_ABS(0), -- SRC1_ABS(0), -- UPDATE_EXECUTE_MASK(0), -- UPDATE_PRED(0), -- WRITE_MASK(0), -- FOG_MERGE(0), -- OMOD(SQ_ALU_OMOD_OFF), -- ALU_INST(SQ_OP2_INST_DOT4), -- BANK_SWIZZLE(SQ_ALU_VEC_102), -- DST_GPR(0), -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_Y), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), -- CLAMP(1)); -- /* 10 */ -- shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ CLAMP(0)); -+ /* r2.z = MAD(c1.z, r1.y, pv.z) */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(257), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Z), - SRC0_NEG(0), -- SRC1_SEL(256), -- SRC1_REL(ABSOLUTE), -- SRC1_ELEM(ELEM_Z), -- SRC1_NEG(0), -- INDEX_MODE(SQ_INDEX_LOOP), -- PRED_SEL(SQ_PRED_SEL_OFF), -- LAST(0)); -- shader[i++] = ALU_DWORD1_OP2(ChipSet, -- SRC0_ABS(0), -- SRC1_ABS(0), -- UPDATE_EXECUTE_MASK(0), -- UPDATE_PRED(0), -- WRITE_MASK(0), -- FOG_MERGE(0), -- OMOD(SQ_ALU_OMOD_OFF), -- ALU_INST(SQ_OP2_INST_DOT4), -- BANK_SWIZZLE(SQ_ALU_VEC_102), -- DST_GPR(0), -- DST_REL(ABSOLUTE), -- DST_ELEM(ELEM_Z), -- CLAMP(1)); -- /* 11 */ -- shader[i++] = ALU_DWORD0(SRC0_SEL(1), -- SRC0_REL(ABSOLUTE), -- SRC0_ELEM(ELEM_W), -- SRC0_NEG(0), -- SRC1_SEL(256), -- SRC1_REL(ABSOLUTE), -- SRC1_ELEM(ELEM_W), -- SRC1_NEG(0), -- INDEX_MODE(SQ_INDEX_LOOP), -- PRED_SEL(SQ_PRED_SEL_OFF), -- LAST(1)); -- shader[i++] = ALU_DWORD1_OP2(ChipSet, -- SRC0_ABS(0), -- SRC1_ABS(0), -- UPDATE_EXECUTE_MASK(0), -- UPDATE_PRED(0), -- WRITE_MASK(0), -- FOG_MERGE(0), -- OMOD(SQ_ALU_OMOD_OFF), -- ALU_INST(SQ_OP2_INST_DOT4), -- BANK_SWIZZLE(SQ_ALU_VEC_021), -- DST_GPR(0), -- DST_REL(ABSOLUTE), -- DST_ELEM(ELEM_W), -- CLAMP(1)); -- /* 12 */ -- shader[i++] = ALU_DWORD0(SRC0_SEL(1), -- SRC0_REL(ABSOLUTE), -- SRC0_ELEM(ELEM_X), -- SRC0_NEG(0), -- SRC1_SEL(257), -- SRC1_REL(ABSOLUTE), -- SRC1_ELEM(ELEM_X), -- SRC1_NEG(0), -- INDEX_MODE(SQ_INDEX_LOOP), -- PRED_SEL(SQ_PRED_SEL_OFF), -- LAST(0)); -- shader[i++] = ALU_DWORD1_OP2(ChipSet, -- SRC0_ABS(0), -- SRC1_ABS(0), -- UPDATE_EXECUTE_MASK(0), -- UPDATE_PRED(0), -- WRITE_MASK(0), -- FOG_MERGE(0), -- OMOD(SQ_ALU_OMOD_OFF), -- ALU_INST(SQ_OP2_INST_DOT4), -- BANK_SWIZZLE(SQ_ALU_VEC_102), -- DST_GPR(0), -- DST_REL(ABSOLUTE), -- DST_ELEM(ELEM_X), -- CLAMP(1)); -- /* 13 */ -- shader[i++] = ALU_DWORD0(SRC0_SEL(1), -- SRC0_REL(ABSOLUTE), -- SRC0_ELEM(ELEM_Y), -- SRC0_NEG(0), -- SRC1_SEL(257), -+ SRC1_SEL(1), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); -- shader[i++] = ALU_DWORD1_OP2(ChipSet, -- SRC0_ABS(0), -- SRC1_ABS(0), -- UPDATE_EXECUTE_MASK(0), -- UPDATE_PRED(0), -- WRITE_MASK(1), -- FOG_MERGE(0), -- OMOD(SQ_ALU_OMOD_OFF), -- ALU_INST(SQ_OP2_INST_DOT4), -- BANK_SWIZZLE(SQ_ALU_VEC_102), -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_Z), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), - DST_REL(ABSOLUTE), -- DST_ELEM(ELEM_Y), -- CLAMP(1)); -- /* 14 */ -- shader[i++] = ALU_DWORD0(SRC0_SEL(1), -- SRC0_REL(ABSOLUTE), -- SRC0_ELEM(ELEM_Z), -- SRC0_NEG(0), -- SRC1_SEL(257), -- SRC1_REL(ABSOLUTE), -- SRC1_ELEM(ELEM_Z), -- SRC1_NEG(0), -- INDEX_MODE(SQ_INDEX_LOOP), -- PRED_SEL(SQ_PRED_SEL_OFF), -- LAST(0)); -- shader[i++] = ALU_DWORD1_OP2(ChipSet, -- SRC0_ABS(0), -- SRC1_ABS(0), -- UPDATE_EXECUTE_MASK(0), -- UPDATE_PRED(0), -- WRITE_MASK(0), -- FOG_MERGE(0), -- OMOD(SQ_ALU_OMOD_OFF), -- ALU_INST(SQ_OP2_INST_DOT4), -- BANK_SWIZZLE(SQ_ALU_VEC_102), -- DST_GPR(0), -- DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Z), -- CLAMP(1)); -- /* 15 */ -- shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ CLAMP(0)); -+ /* r2.w = MAD(0, 0, 1) */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), - SRC0_REL(ABSOLUTE), -- SRC0_ELEM(ELEM_W), -+ SRC0_ELEM(ELEM_X), - SRC0_NEG(0), -- SRC1_SEL(257), -+ SRC1_SEL(SQ_ALU_SRC_0), - SRC1_REL(ABSOLUTE), -- SRC1_ELEM(ELEM_W), -+ SRC1_ELEM(ELEM_X), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); -- shader[i++] = ALU_DWORD1_OP2(ChipSet, -- SRC0_ABS(0), -- SRC1_ABS(0), -- UPDATE_EXECUTE_MASK(0), -- UPDATE_PRED(0), -- WRITE_MASK(0), -- FOG_MERGE(0), -- OMOD(SQ_ALU_OMOD_OFF), -- ALU_INST(SQ_OP2_INST_DOT4), -- BANK_SWIZZLE(SQ_ALU_VEC_021), -- DST_GPR(0), -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_W), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_W), -- CLAMP(1)); -- /* 16 */ -- shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ CLAMP(0)); -+ /* 12,13,14,15 */ -+ /* r2.x = MAD(c2.x, r1.z, pv.x) */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(258), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), - SRC0_NEG(0), -- SRC1_SEL(258), -+ SRC1_SEL(1), - SRC1_REL(ABSOLUTE), -- SRC1_ELEM(ELEM_X), -+ SRC1_ELEM(ELEM_Z), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); -- shader[i++] = ALU_DWORD1_OP2(ChipSet, -- SRC0_ABS(0), -- SRC1_ABS(0), -- UPDATE_EXECUTE_MASK(0), -- UPDATE_PRED(0), -- WRITE_MASK(0), -- FOG_MERGE(0), -- OMOD(SQ_ALU_OMOD_OFF), -- ALU_INST(SQ_OP2_INST_DOT4), -- BANK_SWIZZLE(SQ_ALU_VEC_102), -- DST_GPR(0), -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_X), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_X), - CLAMP(1)); -- /* 17 */ -- shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ /* r2.y = MAD(c2.y, r1.z, pv.y) */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(258), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), - SRC0_NEG(0), -- SRC1_SEL(258), -+ SRC1_SEL(1), - SRC1_REL(ABSOLUTE), -- SRC1_ELEM(ELEM_Y), -+ SRC1_ELEM(ELEM_Z), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); -- shader[i++] = ALU_DWORD1_OP2(ChipSet, -- SRC0_ABS(0), -- SRC1_ABS(0), -- UPDATE_EXECUTE_MASK(0), -- UPDATE_PRED(0), -- WRITE_MASK(0), -- FOG_MERGE(0), -- OMOD(SQ_ALU_OMOD_OFF), -- ALU_INST(SQ_OP2_INST_DOT4), -- BANK_SWIZZLE(SQ_ALU_VEC_102), -- DST_GPR(0), -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_Y), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), - CLAMP(1)); -- /* 18 */ -- shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ /* r2.z = MAD(c2.z, r1.z, pv.z) */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(258), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Z), - SRC0_NEG(0), -- SRC1_SEL(258), -+ SRC1_SEL(1), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); -- shader[i++] = ALU_DWORD1_OP2(ChipSet, -- SRC0_ABS(0), -- SRC1_ABS(0), -- UPDATE_EXECUTE_MASK(0), -- UPDATE_PRED(0), -- WRITE_MASK(1), -- FOG_MERGE(0), -- OMOD(SQ_ALU_OMOD_OFF), -- ALU_INST(SQ_OP2_INST_DOT4), -- BANK_SWIZZLE(SQ_ALU_VEC_102), -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_Z), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Z), - CLAMP(1)); -- /* 19 */ -- shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ /* r2.w = MAD(0, 0, 1) */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), - SRC0_REL(ABSOLUTE), -- SRC0_ELEM(ELEM_W), -+ SRC0_ELEM(ELEM_X), - SRC0_NEG(0), -- SRC1_SEL(258), -+ SRC1_SEL(SQ_ALU_SRC_0), - SRC1_REL(ABSOLUTE), -- SRC1_ELEM(ELEM_W), -+ SRC1_ELEM(ELEM_X), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); -- shader[i++] = ALU_DWORD1_OP2(ChipSet, -- SRC0_ABS(0), -- SRC1_ABS(0), -- UPDATE_EXECUTE_MASK(0), -- UPDATE_PRED(0), -- WRITE_MASK(0), -- FOG_MERGE(0), -- OMOD(SQ_ALU_OMOD_OFF), -- ALU_INST(SQ_OP2_INST_DOT4), -- BANK_SWIZZLE(SQ_ALU_VEC_021), -- DST_GPR(0), -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_X), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_W), - CLAMP(1)); -- /* 20 */ -- shader[i++] = CF_DWORD0(ADDR(22)); -+ -+ /* 16 */ -+ shader[i++] = CF_DWORD0(ADDR(18)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), -@@ -1064,7 +969,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_TEX), - WHOLE_QUAD_MODE(0), - BARRIER(1)); -- /* 21 */ -+ /* 17 */ - shader[i++] = CF_DWORD0(ADDR(0)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), -@@ -1076,7 +981,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_RETURN), - WHOLE_QUAD_MODE(0), - BARRIER(1)); -- /* 22/23 */ -+ /* 18/19 */ - shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), - BC_FRAC_MODE(0), - FETCH_WHOLE_QUAD(0), -@@ -1104,7 +1009,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1)); - shader[i++] = TEX_DWORD_PAD; -- /* 24/25 */ -+ /* 20/21 */ - shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), - BC_FRAC_MODE(0), - FETCH_WHOLE_QUAD(0), -@@ -1132,7 +1037,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1)); - shader[i++] = TEX_DWORD_PAD; -- /* 26/27 */ -+ /* 22/23 */ - shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), - BC_FRAC_MODE(0), - FETCH_WHOLE_QUAD(0), -@@ -1160,8 +1065,8 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1)); - shader[i++] = TEX_DWORD_PAD; -- /* 28 */ -- shader[i++] = CF_DWORD0(ADDR(30)); -+ /* 24 */ -+ shader[i++] = CF_DWORD0(ADDR(26)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), -@@ -1172,7 +1077,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_TEX), - WHOLE_QUAD_MODE(0), - BARRIER(1)); -- /* 29 */ -+ /* 25 */ - shader[i++] = CF_DWORD0(ADDR(0)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), -@@ -1184,7 +1089,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_RETURN), - WHOLE_QUAD_MODE(0), - BARRIER(1)); -- /* 30/31 */ -+ /* 26/27 */ - shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), - BC_FRAC_MODE(0), - FETCH_WHOLE_QUAD(0), -@@ -1212,7 +1117,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1)); - shader[i++] = TEX_DWORD_PAD; -- /* 32/33 */ -+ /* 28/29 */ - shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), - BC_FRAC_MODE(0), - FETCH_WHOLE_QUAD(0), -@@ -1486,7 +1391,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - WHOLE_QUAD_MODE(0), - BARRIER(0)); - /* 1 */ -- shader[i++] = CF_DWORD0(ADDR(14)); -+ shader[i++] = CF_DWORD0(ADDR(28)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_NOT_BOOL), -@@ -1510,7 +1415,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - WHOLE_QUAD_MODE(0), - BARRIER(1)); - /* 3 - mask sub */ -- shader[i++] = CF_DWORD0(ADDR(8)); -+ shader[i++] = CF_DWORD0(ADDR(22)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), -@@ -1521,7 +1426,22 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_VTX), - WHOLE_QUAD_MODE(0), - BARRIER(1)); -- /* 4 - dst */ -+ -+ /* 4 - ALU */ -+ shader[i++] = CF_ALU_DWORD0(ADDR(9), -+ KCACHE_BANK0(0), -+ KCACHE_BANK1(0), -+ KCACHE_MODE0(SQ_CF_KCACHE_NOP)); -+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), -+ KCACHE_ADDR0(0), -+ KCACHE_ADDR1(0), -+ I_COUNT(12), -+ USES_WATERFALL(0), -+ CF_INST(SQ_CF_INST_ALU), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ -+ /* 5 - dst */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), - TYPE(SQ_EXPORT_POS), - RW_GPR(2), -@@ -1530,8 +1450,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - ELEM_SIZE(0)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), -- SRC_SEL_Z(SQ_SEL_Z), -- SRC_SEL_W(SQ_SEL_W), -+ SRC_SEL_Z(SQ_SEL_0), -+ SRC_SEL_W(SQ_SEL_1), - R6xx_ELEM_LOOP(0), - BURST_COUNT(1), - END_OF_PROGRAM(0), -@@ -1539,7 +1459,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_EXPORT_DONE), - WHOLE_QUAD_MODE(0), - BARRIER(1)); -- /* 5 - src */ -+ /* 6 - src */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), - TYPE(SQ_EXPORT_PARAM), - RW_GPR(1), -@@ -1548,8 +1468,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - ELEM_SIZE(0)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), -- SRC_SEL_Z(SQ_SEL_Z), -- SRC_SEL_W(SQ_SEL_W), -+ SRC_SEL_Z(SQ_SEL_0), -+ SRC_SEL_W(SQ_SEL_1), - R6xx_ELEM_LOOP(0), - BURST_COUNT(1), - END_OF_PROGRAM(0), -@@ -1557,7 +1477,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_EXPORT), - WHOLE_QUAD_MODE(0), - BARRIER(0)); -- /* 6 - mask */ -+ /* 7 - mask */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), - TYPE(SQ_EXPORT_PARAM), - RW_GPR(0), -@@ -1566,8 +1486,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - ELEM_SIZE(0)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), -- SRC_SEL_Z(SQ_SEL_Z), -- SRC_SEL_W(SQ_SEL_W), -+ SRC_SEL_Z(SQ_SEL_0), -+ SRC_SEL_W(SQ_SEL_1), - R6xx_ELEM_LOOP(0), - BURST_COUNT(1), - END_OF_PROGRAM(0), -@@ -1575,7 +1495,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_EXPORT_DONE), - WHOLE_QUAD_MODE(0), - BARRIER(0)); -- /* 7 */ -+ /* 8 */ - shader[i++] = CF_DWORD0(ADDR(0)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), -@@ -1587,7 +1507,301 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_RETURN), - WHOLE_QUAD_MODE(0), - BARRIER(1)); -- /* 8/9 - dst */ -+ -+ -+ /* 9 srcX MAD */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(256), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Y), -+ SRC0_NEG(0), -+ SRC1_SEL(1), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Y), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_Z), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Z), -+ CLAMP(0)); -+ /* 10 srcY MAD */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(257), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Y), -+ SRC0_NEG(0), -+ SRC1_SEL(1), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Y), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_Z), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_W), -+ CLAMP(0)); -+ -+ /* 11 srcX MAD */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(256), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(1), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_X), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_Z), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_X), -+ CLAMP(0)); -+ /* 12 srcY MAD */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(257), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(1), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_X), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_W), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Y), -+ CLAMP(0)); -+ -+ /* 13 maskX MAD */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(258), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Y), -+ SRC0_NEG(0), -+ SRC1_SEL(0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Y), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(258), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_Z), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Z), -+ CLAMP(0)); -+ -+ /* 14 maskY MAD */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(259), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Y), -+ SRC0_NEG(0), -+ SRC1_SEL(0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Y), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_Z), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_W), -+ CLAMP(0)); -+ -+ /* 15 srcX MAD */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(258), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_X), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_Z), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_X), -+ CLAMP(0)); -+ /* 16 srcY MAD */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(259), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_X), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_W), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Y), -+ CLAMP(0)); -+ -+ /* 17 srcX / w */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(256), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_W), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_AR_X), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MUL), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_X), -+ CLAMP(0)); -+ -+ /* 18 srcY / h */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(1), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Y), -+ SRC0_NEG(0), -+ SRC1_SEL(257), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_W), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_AR_X), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MUL), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(1), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Y), -+ CLAMP(0)); -+ -+ /* 19 maskX / w */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(0), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(258), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_W), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_AR_X), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MUL), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_X), -+ CLAMP(0)); -+ -+ /* 20 maskY / h */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(0), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Y), -+ SRC0_NEG(0), -+ SRC1_SEL(259), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_W), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_AR_X), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MUL), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Y), -+ CLAMP(0)); -+ /* 21 */ -+ shader[i++] = 0x00000000; -+ shader[i++] = 0x00000000; -+ -+ /* 22/23 - dst */ - shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), - FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), - FETCH_WHOLE_QUAD(0), -@@ -1612,7 +1826,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - CONST_BUF_NO_STRIDE(0), - MEGA_FETCH(1)); - shader[i++] = VTX_DWORD_PAD; -- /* 10/11 - src */ -+ /* 24/25 - src */ - shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), - FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), - FETCH_WHOLE_QUAD(0), -@@ -1625,8 +1839,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - DST_REL(0), - DST_SEL_X(SQ_SEL_X), - DST_SEL_Y(SQ_SEL_Y), -- DST_SEL_Z(SQ_SEL_0), -- DST_SEL_W(SQ_SEL_1), -+ DST_SEL_Z(SQ_SEL_1), -+ DST_SEL_W(SQ_SEL_0), - USE_CONST_FIELDS(0), - DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ - NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ -@@ -1637,7 +1851,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - CONST_BUF_NO_STRIDE(0), - MEGA_FETCH(0)); - shader[i++] = VTX_DWORD_PAD; -- /* 12/13 - mask */ -+ /* 26/27 - mask */ - shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), - FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), - FETCH_WHOLE_QUAD(0), -@@ -1650,8 +1864,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - DST_REL(0), - DST_SEL_X(SQ_SEL_X), - DST_SEL_Y(SQ_SEL_Y), -- DST_SEL_Z(SQ_SEL_0), -- DST_SEL_W(SQ_SEL_1), -+ DST_SEL_Z(SQ_SEL_1), -+ DST_SEL_W(SQ_SEL_0), - USE_CONST_FIELDS(0), - DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ - NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ -@@ -1663,8 +1877,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - MEGA_FETCH(0)); - shader[i++] = VTX_DWORD_PAD; - -- /* 14 - non-mask sub */ -- shader[i++] = CF_DWORD0(ADDR(18)); -+ /* 28 - non-mask sub */ -+ shader[i++] = CF_DWORD0(ADDR(40)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), -@@ -1675,7 +1889,22 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_VTX), - WHOLE_QUAD_MODE(0), - BARRIER(1)); -- /* 15 - dst */ -+ -+ /* 29 - ALU */ -+ shader[i++] = CF_ALU_DWORD0(ADDR(33), -+ KCACHE_BANK0(0), -+ KCACHE_BANK1(0), -+ KCACHE_MODE0(SQ_CF_KCACHE_NOP)); -+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), -+ KCACHE_ADDR0(0), -+ KCACHE_ADDR1(0), -+ I_COUNT(6), -+ USES_WATERFALL(0), -+ CF_INST(SQ_CF_INST_ALU), -+ WHOLE_QUAD_MODE(0), -+ BARRIER(1)); -+ -+ /* 30 - dst */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), - TYPE(SQ_EXPORT_POS), - RW_GPR(1), -@@ -1684,8 +1913,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - ELEM_SIZE(0)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), -- SRC_SEL_Z(SQ_SEL_Z), -- SRC_SEL_W(SQ_SEL_W), -+ SRC_SEL_Z(SQ_SEL_0), -+ SRC_SEL_W(SQ_SEL_1), - R6xx_ELEM_LOOP(0), - BURST_COUNT(0), - END_OF_PROGRAM(0), -@@ -1693,7 +1922,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_EXPORT_DONE), - WHOLE_QUAD_MODE(0), - BARRIER(1)); -- /* 16 - src */ -+ /* 31 - src */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), - TYPE(SQ_EXPORT_PARAM), - RW_GPR(0), -@@ -1702,8 +1931,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - ELEM_SIZE(0)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), -- SRC_SEL_Z(SQ_SEL_Z), -- SRC_SEL_W(SQ_SEL_W), -+ SRC_SEL_Z(SQ_SEL_0), -+ SRC_SEL_W(SQ_SEL_1), - R6xx_ELEM_LOOP(0), - BURST_COUNT(0), - END_OF_PROGRAM(0), -@@ -1711,7 +1940,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_EXPORT_DONE), - WHOLE_QUAD_MODE(0), - BARRIER(0)); -- /* 17 */ -+ /* 32 */ - shader[i++] = CF_DWORD0(ADDR(0)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), -@@ -1723,7 +1952,156 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - CF_INST(SQ_CF_INST_RETURN), - WHOLE_QUAD_MODE(0), - BARRIER(1)); -- /* 18/19 - dst */ -+ -+ -+ /* 33 srcX MAD */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(256), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Y), -+ SRC0_NEG(0), -+ SRC1_SEL(0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Y), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_Z), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Z), -+ CLAMP(0)); -+ /* 34 srcY MAD */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(257), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Y), -+ SRC0_NEG(0), -+ SRC1_SEL(0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_Y), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_Z), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_W), -+ CLAMP(0)); -+ -+ /* 35 srcX MAD */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(256), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_X), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(0)); -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_Z), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_X), -+ CLAMP(0)); -+ /* 36 srcY MAD */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(257), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(0), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_X), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_LOOP), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0), -+ SRC2_REL(ABSOLUTE), -+ SRC2_ELEM(ELEM_W), -+ SRC2_NEG(0), -+ ALU_INST(SQ_OP3_INST_MULADD), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Y), -+ CLAMP(0)); -+ /* 37 srcX / w */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(0), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_X), -+ SRC0_NEG(0), -+ SRC1_SEL(256), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_W), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_AR_X), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MUL), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_X), -+ CLAMP(0)); -+ -+ /* 38 srcY / h */ -+ shader[i++] = ALU_DWORD0(SRC0_SEL(0), -+ SRC0_REL(ABSOLUTE), -+ SRC0_ELEM(ELEM_Y), -+ SRC0_NEG(0), -+ SRC1_SEL(257), -+ SRC1_REL(ABSOLUTE), -+ SRC1_ELEM(ELEM_W), -+ SRC1_NEG(0), -+ INDEX_MODE(SQ_INDEX_AR_X), -+ PRED_SEL(SQ_PRED_SEL_OFF), -+ LAST(1)); -+ shader[i++] = ALU_DWORD1_OP2(ChipSet, -+ SRC0_ABS(0), -+ SRC1_ABS(0), -+ UPDATE_EXECUTE_MASK(0), -+ UPDATE_PRED(0), -+ WRITE_MASK(1), -+ FOG_MERGE(0), -+ OMOD(SQ_ALU_OMOD_OFF), -+ ALU_INST(SQ_OP2_INST_MUL), -+ BANK_SWIZZLE(SQ_ALU_VEC_012), -+ DST_GPR(0), -+ DST_REL(ABSOLUTE), -+ DST_ELEM(ELEM_Y), -+ CLAMP(0)); -+ -+ /* 39 */ -+ shader[i++] = 0x00000000; -+ shader[i++] = 0x00000000; -+ -+ /* 40/41 - dst */ - shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), - FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), - FETCH_WHOLE_QUAD(0), -@@ -1748,7 +2126,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - CONST_BUF_NO_STRIDE(0), - MEGA_FETCH(1)); - shader[i++] = VTX_DWORD_PAD; -- /* 20/21 - src */ -+ /* 42/43 - src */ - shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), - FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), - FETCH_WHOLE_QUAD(0), -@@ -1761,8 +2139,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) - DST_REL(0), - DST_SEL_X(SQ_SEL_X), - DST_SEL_Y(SQ_SEL_Y), -- DST_SEL_Z(SQ_SEL_0), -- DST_SEL_W(SQ_SEL_1), -+ DST_SEL_Z(SQ_SEL_1), -+ DST_SEL_W(SQ_SEL_0), - USE_CONST_FIELDS(0), - DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ - NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ -diff --git a/src/r600_state.h b/src/r600_state.h -index 181e167..44e7600 100644 ---- a/src/r600_state.h -+++ b/src/r600_state.h -@@ -195,10 +195,10 @@ do { \ - do { \ - if ((reg) >= SET_CONFIG_REG_offset && (reg) < SET_CONFIG_REG_end) { \ - PACK3((ib), IT_SET_CONFIG_REG, (num) + 1); \ -- E32(ib, ((reg) - SET_CONFIG_REG_offset) >> 2); \ -+ E32(ib, ((reg) - SET_CONFIG_REG_offset) >> 2); \ - } else if ((reg) >= SET_CONTEXT_REG_offset && (reg) < SET_CONTEXT_REG_end) { \ -- PACK3((ib), IT_SET_CONTEXT_REG, (num) + 1); \ -- E32(ib, ((reg) - 0x28000) >> 2); \ -+ PACK3((ib), IT_SET_CONTEXT_REG, (num) + 1); \ -+ E32(ib, ((reg) - SET_CONTEXT_REG_offset) >> 2); \ - } else if ((reg) >= SET_ALU_CONST_offset && (reg) < SET_ALU_CONST_end) { \ - PACK3((ib), IT_SET_ALU_CONST, (num) + 1); \ - E32(ib, ((reg) - SET_ALU_CONST_offset) >> 2); \ -diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c -index 88745d5..2f0b6b1 100644 ---- a/src/r600_textured_videofuncs.c -+++ b/src/r600_textured_videofuncs.c -@@ -45,6 +45,15 @@ - - #include "damage.h" - -+/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces -+ note the difference to the parameters used in overlay are due -+ to 10bit vs. float calcs */ -+static REF_TRANSFORM trans[2] = -+{ -+ {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */ -+ {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */ -+}; -+ - static void - R600DoneTexturedVideo(ScrnInfoPtr pScrn) - { -@@ -114,19 +123,92 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) - tex_resource_t tex_res; - tex_sampler_t tex_samp; - shader_config_t vs_conf, ps_conf; -- int uv_offset; -- static float ps_alu_consts[] = { -- 1.0, 0.0, 1.4020, 0, /* r - c[0] */ -- 1.0, -0.34414, -0.71414, 0, /* g - c[1] */ -- 1.0, 1.7720, 0.0, 0, /* b - c[2] */ -- /* Constants for undoing Y'CbCr scaling -- * - Y' is scaled from 16:235 -- * - Cb/Cr are scaled from 16:240 -- * Unscaled value N' = N * N_mul + N_shift (N' in range [-0.5, 0.5]) -- * Vector is [Y_mul, Y_shfit, C_mul, C_shift] -- */ -- 256.0/219.0, -16.0/219.0, 256.0/224.0, -128.0/224.0, -- }; -+ /* -+ * y' = y - .0625 -+ * u' = u - .5 -+ * v' = v - .5; -+ * -+ * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' -+ * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' -+ * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' -+ * -+ * DP3 might look like the straightforward solution -+ * but we'd need to move the texture yuv values in -+ * the same reg for this to work. Therefore use MADs. -+ * Brightness just adds to the off constant. -+ * Contrast is multiplication of luminance. -+ * Saturation and hue change the u and v coeffs. -+ * Default values (before adjustments - depend on colorspace): -+ * yco = 1.1643 -+ * uco = 0, -0.39173, 2.017 -+ * vco = 1.5958, -0.8129, 0 -+ * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], -+ * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], -+ * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], -+ * -+ * temp = MAD(yco, yuv.yyyy, off) -+ * temp = MAD(uco, yuv.uuuu, temp) -+ * result = MAD(vco, yuv.vvvv, temp) -+ */ -+ /* TODO: calc consts in the shader */ -+ const float Loff = -0.0627; -+ const float Coff = -0.502; -+ float uvcosf, uvsinf; -+ float yco; -+ float uco[3], vco[3], off[3]; -+ float bright, cont, gamma; -+ int ref = pPriv->transform_index; -+ Bool needgamma = FALSE; -+ float ps_alu_consts[12]; -+ float vs_alu_consts[4]; -+ -+ cont = RTFContrast(pPriv->contrast); -+ bright = RTFBrightness(pPriv->brightness); -+ gamma = (float)pPriv->gamma / 1000.0; -+ uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); -+ uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); -+ /* overlay video also does pre-gamma contrast/sat adjust, should we? */ -+ -+ yco = trans[ref].RefLuma * cont; -+ uco[0] = -trans[ref].RefRCr * uvsinf; -+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; -+ uco[2] = trans[ref].RefBCb * uvcosf; -+ vco[0] = trans[ref].RefRCr * uvcosf; -+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; -+ vco[2] = trans[ref].RefBCb * uvsinf; -+ off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; -+ off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; -+ off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; -+ -+ // XXX -+ gamma = 1.0; -+ -+ if (gamma != 1.0) { -+ needgamma = TRUE; -+ /* note: gamma correction is out = in ^ gamma; -+ gpu can only do LG2/EX2 therefore we transform into -+ in ^ gamma = 2 ^ (log2(in) * gamma). -+ Lots of scalar ops, unfortunately (better solution?) - -+ without gamma that's 3 inst, with gamma it's 10... -+ could use different gamma factors per channel, -+ if that's of any use. */ -+ } -+ -+ /* setup the ps consts */ -+ ps_alu_consts[0] = off[0]; -+ ps_alu_consts[1] = off[1]; -+ ps_alu_consts[2] = off[2]; -+ ps_alu_consts[3] = yco; -+ -+ ps_alu_consts[4] = uco[0]; -+ ps_alu_consts[5] = uco[1]; -+ ps_alu_consts[6] = uco[2]; -+ ps_alu_consts[7] = gamma; -+ -+ ps_alu_consts[8] = vco[0]; -+ ps_alu_consts[9] = vco[1]; -+ ps_alu_consts[10] = vco[2]; -+ ps_alu_consts[11] = 0.0; - - CLEAR (cb_conf); - CLEAR (tex_res); -@@ -162,6 +244,10 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) - EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); - EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); - -+ set_generic_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); -+ set_screen_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); -+ set_window_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); -+ - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->xv_vs_offset; - -@@ -215,7 +301,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) - switch(pPriv->id) { - case FOURCC_YV12: - case FOURCC_I420: -- accel_state->src_mc_addr[0] = pPriv->src_offset; -+ accel_state->src_mc_addr[0] = pPriv->src_offset + info->fbLocation + pScrn->fbOffset; - accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; - - /* flush texture cache */ -@@ -260,26 +346,23 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) - set_tex_sampler (pScrn, accel_state->ib, &tex_samp); - - /* U or V texture */ -- uv_offset = accel_state->src_pitch[0] * pPriv->h; -- uv_offset = (uv_offset + 255) & ~255; -- - cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->src_size[0] / 4, -- accel_state->src_mc_addr[0] + uv_offset); -+ accel_state->src_mc_addr[0] + pPriv->planev_offset); - - tex_res.id = 1; - tex_res.format = FMT_8; - tex_res.w = pPriv->w >> 1; - tex_res.h = pPriv->h >> 1; -- tex_res.pitch = accel_state->src_pitch[0] >> 1; -+ tex_res.pitch = ((accel_state->src_pitch[0] >> 1) + 255) & ~255; - tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ - tex_res.dst_sel_y = SQ_SEL_1; - tex_res.dst_sel_z = SQ_SEL_1; - tex_res.dst_sel_w = SQ_SEL_1; - tex_res.interlaced = 0; - -- tex_res.base = accel_state->src_mc_addr[0] + uv_offset; -- tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset; -+ tex_res.base = accel_state->src_mc_addr[0] + pPriv->planev_offset; -+ tex_res.mip_base = accel_state->src_mc_addr[0] + pPriv->planev_offset; - set_tex_resource (pScrn, accel_state->ib, &tex_res); - - /* U or V sampler */ -@@ -287,26 +370,23 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) - set_tex_sampler (pScrn, accel_state->ib, &tex_samp); - - /* U or V texture */ -- uv_offset += ((accel_state->src_pitch[0] >> 1) * (pPriv->h >> 1)); -- uv_offset = (uv_offset + 255) & ~255; -- - cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->src_size[0] / 4, -- accel_state->src_mc_addr[0] + uv_offset); -+ accel_state->src_mc_addr[0] + pPriv->planeu_offset); - - tex_res.id = 2; - tex_res.format = FMT_8; - tex_res.w = pPriv->w >> 1; - tex_res.h = pPriv->h >> 1; -- tex_res.pitch = accel_state->src_pitch[0] >> 1; -+ tex_res.pitch = ((accel_state->src_pitch[0] >> 1) + 255) & ~255; - tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ - tex_res.dst_sel_y = SQ_SEL_1; - tex_res.dst_sel_z = SQ_SEL_1; - tex_res.dst_sel_w = SQ_SEL_1; - tex_res.interlaced = 0; - -- tex_res.base = accel_state->src_mc_addr[0] + uv_offset; -- tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset; -+ tex_res.base = accel_state->src_mc_addr[0] + pPriv->planeu_offset; -+ tex_res.mip_base = accel_state->src_mc_addr[0] + pPriv->planeu_offset; - set_tex_resource (pScrn, accel_state->ib, &tex_res); - - /* UV sampler */ -@@ -316,7 +396,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) - case FOURCC_UYVY: - case FOURCC_YUY2: - default: -- accel_state->src_mc_addr[0] = pPriv->src_offset; -+ accel_state->src_mc_addr[0] = pPriv->src_offset + info->fbLocation + pScrn->fbOffset; - accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; - - /* flush texture cache */ -@@ -446,6 +526,15 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) - EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); - - -+ vs_alu_consts[0] = 1.0 / pPriv->w; -+ vs_alu_consts[1] = 1.0 / pPriv->h; -+ vs_alu_consts[2] = 0.0; -+ vs_alu_consts[3] = 0.0; -+ -+ /* VS alu constants */ -+ set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs, -+ sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts); -+ - if (pPriv->vsync) { - xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn, - pPriv->drw_x, -@@ -484,28 +573,30 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) - dstw = pBox->x2 - pBox->x1; - dsth = pBox->y2 - pBox->y1; - -- srcX = ((pBox->x1 - pPriv->drw_x) * -- pPriv->src_w) / pPriv->dst_w; -- srcY = ((pBox->y1 - pPriv->drw_y) * -- pPriv->src_h) / pPriv->dst_h; -+ srcX = pPriv->src_x; -+ srcX += ((pBox->x1 - pPriv->drw_x) * -+ pPriv->src_w) / pPriv->dst_w; -+ srcY = pPriv->src_y; -+ srcY += ((pBox->y1 - pPriv->drw_y) * -+ pPriv->src_h) / pPriv->dst_h; - - srcw = (pPriv->src_w * dstw) / pPriv->dst_w; - srch = (pPriv->src_h * dsth) / pPriv->dst_h; - - vb[0] = (float)dstX; - vb[1] = (float)dstY; -- vb[2] = (float)srcX / pPriv->w; -- vb[3] = (float)srcY / pPriv->h; -+ vb[2] = (float)srcX; -+ vb[3] = (float)srcY; - - vb[4] = (float)dstX; - vb[5] = (float)(dstY + dsth); -- vb[6] = (float)srcX / pPriv->w; -- vb[7] = (float)(srcY + srch) / pPriv->h; -+ vb[6] = (float)srcX; -+ vb[7] = (float)(srcY + srch); - - vb[8] = (float)(dstX + dstw); - vb[9] = (float)(dstY + dsth); -- vb[10] = (float)(srcX + srcw) / pPriv->w; -- vb[11] = (float)(srcY + srch) / pPriv->h; -+ vb[10] = (float)(srcX + srcw); -+ vb[11] = (float)(srcY + srch); - - accel_state->vb_index += 3; - -diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c -index bce597b..6423e58 100644 ---- a/src/r6xx_accel.c -+++ b/src/r6xx_accel.c -@@ -96,75 +96,6 @@ wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib) - - } - --static void --reset_cb(ScrnInfoPtr pScrn, drmBufPtr ib) --{ -- int i; -- -- PACK0(ib, CB_COLOR0_INFO, 8); -- for (i = 0; i < 8; i++) -- E32(ib, 0); --} -- --static void --reset_td_samplers(ScrnInfoPtr pScrn, drmBufPtr ib) --{ -- int i; -- -- wait_3d_idle(pScrn, ib); -- -- PACK0(ib, TD_PS_SAMPLER0_BORDER_RED, 4*TD_PS_SAMPLER0_BORDER_RED_num); -- for (i = 0; i < 4*TD_PS_SAMPLER0_BORDER_RED_num; i++) -- E32(ib, 0); -- PACK0(ib, TD_VS_SAMPLER0_BORDER_RED, 4*TD_VS_SAMPLER0_BORDER_RED_num); -- for (i = 0; i < 4*TD_VS_SAMPLER0_BORDER_RED_num; i++) -- E32(ib, 0); -- -- wait_3d_idle(pScrn, ib); --} -- --static void --reset_sampler_const (ScrnInfoPtr pScrn, drmBufPtr ib) --{ -- int i; -- -- for (i = 0; i < SQ_TEX_SAMPLER_WORD_all_num; i++) { -- PACK0(ib, SQ_TEX_SAMPLER_WORD + i * SQ_TEX_SAMPLER_WORD_offset, 3); -- E32(ib, SQ_TEX_DEPTH_COMPARE_LESSEQUAL << DEPTH_COMPARE_FUNCTION_shift); -- E32(ib, MAX_LOD_mask); -- E32(ib, 0); -- } --} -- --static void --reset_dx9_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib) --{ -- int i; -- -- const int count = SQ_ALU_CONSTANT_all_num * (SQ_ALU_CONSTANT_offset >> 2); -- -- PACK0(ib, SQ_ALU_CONSTANT, count); -- for (i = 0; i < count; i++) -- EFLOAT(ib, 0.0); --} -- --static void --reset_bool_loop_const(ScrnInfoPtr pScrn, drmBufPtr ib) --{ -- int i; -- -- -- PACK0(ib, SQ_BOOL_CONST, SQ_BOOL_CONST_all_num); -- for (i = 0; i < SQ_BOOL_CONST_all_num; i++) -- E32(ib, 0); -- -- PACK0(ib, SQ_LOOP_CONST, SQ_LOOP_CONST_all_num); -- -- for (i = 0; i < SQ_LOOP_CONST_all_num; i++) -- E32(ib, 0); -- --} -- - void - start_3d(ScrnInfoPtr pScrn, drmBufPtr ib) - { -@@ -654,10 +585,8 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) - memset(&tex_res, 0, sizeof(tex_resource_t)); - memset(&fs_conf, 0, sizeof(shader_config_t)); - --#if 1 - if (accel_state->XInited3D) - return; --#endif - - accel_state->XInited3D = TRUE; - -@@ -690,11 +619,6 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) - 0)); - } - -- reset_td_samplers(pScrn, ib); -- reset_dx9_alu_consts(pScrn, ib); -- reset_bool_loop_const (pScrn, ib); -- reset_sampler_const (pScrn, ib); -- - // SQ - sq_conf.ps_prio = 0; - sq_conf.vs_prio = 1; -@@ -784,6 +708,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) - sq_conf.num_es_stack_entries = 0; - break; - case CHIP_FAMILY_RV730: -+ case CHIP_FAMILY_RV740: - sq_conf.num_ps_gprs = 84; - sq_conf.num_vs_gprs = 36; - sq_conf.num_temp_gprs = 4; -@@ -853,24 +778,12 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) - EREG(ib, SX_ALPHA_REF, 0); - - // CB -- reset_cb(pScrn, ib); -- - PACK0(ib, CB_BLEND_RED, 4); - E32(ib, 0x00000000); - E32(ib, 0x00000000); - E32(ib, 0x00000000); - E32(ib, 0x00000000); - -- /* CB_COLOR_CONTROL.PER_MRT_BLEND is off */ -- // RV6xx+ have per-MRT blend -- if (info->ChipFamily > CHIP_FAMILY_R600) { -- PACK0(ib, CB_BLEND0_CONTROL, CB_BLEND0_CONTROL_num); -- for (i = 0; i < CB_BLEND0_CONTROL_num; i++) -- E32(ib, 0); -- } -- -- EREG(ib, CB_BLEND_CONTROL, 0); -- - if (info->ChipFamily < CHIP_FAMILY_RV770) { - PACK0(ib, CB_FOG_RED, 3); - E32(ib, 0x00000000); -@@ -878,7 +791,6 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) - E32(ib, 0x00000000); - } - -- EREG(ib, CB_COLOR_CONTROL, 0); - PACK0(ib, CB_CLRCMP_CONTROL, 4); - E32(ib, 1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC - E32(ib, 0); // CB_CLRCMP_SRC -@@ -896,11 +808,8 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) - EREG(ib, CB_TARGET_MASK, (0x0f << TARGET0_ENABLE_shift)); - - // SC -- set_generic_scissor(pScrn, ib, 0, 0, 8192, 8192); -- set_screen_scissor(pScrn, ib, 0, 0, 8192, 8192); - EREG(ib, PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | - (0 << WINDOW_Y_OFFSET_shift))); -- set_window_scissor(pScrn, ib, 0, 0, 8192, 8192); - - EREG(ib, PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); - -@@ -947,7 +856,6 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) - EFLOAT(ib, 0.0f); // PA_CL_VPORT_YOFFSET - EFLOAT(ib, 0.0f); // PA_CL_VPORT_ZSCALE - EFLOAT(ib, 0.0f); // PA_CL_VPORT_ZOFFSET -- EREG(ib, PA_CL_CLIP_CNTL, (CLIP_DISABLE_bit | DX_CLIP_SPACE_DEF_bit)); - EREG(ib, PA_CL_VTE_CNTL, 0); - EREG(ib, PA_CL_VS_OUT_CNTL, 0); - EREG(ib, PA_CL_NANINF_CNTL, 0); -@@ -957,11 +865,6 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) - EFLOAT(ib, 1.0); // PA_CL_GB_HORZ_CLIP_ADJ - EFLOAT(ib, 1.0); // PA_CL_GB_HORZ_DISC_ADJ - -- /* user clipping planes are disabled by default */ -- PACK0(ib, PA_CL_UCP_0_X, 24); -- for (i = 0; i < 24; i++) -- EFLOAT(ib, 0.0); -- - // SU - EREG(ib, PA_SU_SC_MODE_CNTL, FACE_bit); - EREG(ib, PA_SU_POINT_SIZE, 0); -@@ -973,7 +876,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) - EREG(ib, PA_SU_POLY_OFFSET_FRONT_OFFSET, 0); - - EREG(ib, PA_SU_LINE_CNTL, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ -- EREG(ib, PA_SU_VTX_CNTL, ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | -+ EREG(ib, PA_SU_VTX_CNTL, ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit | - (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */ - EREG(ib, PA_SU_POLY_OFFSET_CLAMP, 0); - -@@ -983,20 +886,11 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) - else - EREG(ib, R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift)); - -- EREG(ib, SPI_INTERP_CONTROL_0, ((2 << PNT_SPRITE_OVRD_X_shift) | -- (3 << PNT_SPRITE_OVRD_Y_shift) | -- (0 << PNT_SPRITE_OVRD_Z_shift) | -- (1 << PNT_SPRITE_OVRD_W_shift))); /* s,t,0,1 */ - EREG(ib, SPI_INPUT_Z, 0); - EREG(ib, SPI_FOG_CNTL, 0); - EREG(ib, SPI_FOG_FUNC_SCALE, 0); - EREG(ib, SPI_FOG_FUNC_BIAS, 0); - -- PACK0(ib, SPI_VS_OUT_ID_0, SPI_VS_OUT_ID_0_num); -- for (i = 0; i < SPI_VS_OUT_ID_0_num; i++) /* identity mapping */ -- E32(ib, 0x03020100 + i*0x04040404); -- EREG(ib, SPI_VS_OUT_CONFIG, 0); -- - // clear FS - fs_setup(pScrn, ib, &fs_conf); - -@@ -1028,24 +922,6 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) - EREG(ib, VGT_VTX_CNT_EN, 0); - EREG(ib, VGT_STRMOUT_BUFFER_EN, 0); - -- // clear tex resources - PS -- for (i = 0; i < 16; i++) { -- tex_res.id = i; -- set_tex_resource(pScrn, ib, &tex_res); -- } -- -- // clear tex resources - VS -- for (i = 160; i < 164; i++) { -- tex_res.id = i; -- set_tex_resource(pScrn, ib, &tex_res); -- } -- -- // clear tex resources - FS -- for (i = 320; i < 335; i++) { -- tex_res.id = i; -- set_tex_resource(pScrn, ib, &tex_res); -- } -- - } - - -diff --git a/src/radeon.h b/src/radeon.h -index d488429..d5402b2 100644 ---- a/src/radeon.h -+++ b/src/radeon.h -@@ -75,6 +75,7 @@ - #include "dri.h" - #include "GL/glxint.h" - #include "xf86drm.h" -+#include "radeon_drm.h" - - #ifdef DAMAGE - #include "damage.h" -@@ -85,6 +86,15 @@ - #include "xf86Crtc.h" - #include "X11/Xatom.h" - -+#ifdef XF86DRM_MODE -+#include "radeon_bo.h" -+#include "radeon_cs.h" -+#include "radeon_dri2.h" -+#include "drmmode_display.h" -+#else -+#include "radeon_dummy_bufmgr.h" -+#endif -+ - /* Render support */ - #ifdef RENDER - #include "picturestr.h" -@@ -186,7 +196,7 @@ typedef enum { - OPTION_SUBPIXEL_ORDER, - #endif - OPTION_SHOWCACHE, -- OPTION_DYNAMIC_CLOCKS, -+ OPTION_CLOCK_GATING, - OPTION_BIOS_HOTKEYS, - OPTION_VGA_ACCESS, - OPTION_REVERSE_DDC, -@@ -207,7 +217,9 @@ typedef enum { - OPTION_INT10, - OPTION_EXA_VSYNC, - OPTION_ATOM_TVOUT, -- OPTION_R4XX_ATOM -+ OPTION_R4XX_ATOM, -+ OPTION_FORCE_LOW_POWER, -+ OPTION_DYNAMIC_PM - } RADEONOpts; - - -@@ -259,6 +271,8 @@ typedef struct { - #define RADEON_PLL_PREFER_HIGH_FB_DIV (1 << 7) - #define RADEON_PLL_PREFER_LOW_POST_DIV (1 << 8) - #define RADEON_PLL_PREFER_HIGH_POST_DIV (1 << 9) -+#define RADEON_PLL_USE_FRAC_FB_DIV (1 << 10) -+#define RADEON_PLL_PREFER_CLOSEST_LOWER (1 << 11) - - typedef struct { - uint16_t reference_freq; -@@ -275,6 +289,8 @@ typedef struct { - uint32_t max_post_div; - uint32_t min_feedback_div; - uint32_t max_feedback_div; -+ uint32_t min_frac_feedback_div; -+ uint32_t max_frac_feedback_div; - uint32_t best_vco; - } RADEONPLLRec, *RADEONPLLPtr; - -@@ -318,7 +334,6 @@ typedef enum { - CHIP_FAMILY_RS690, - CHIP_FAMILY_RS740, - CHIP_FAMILY_R600, /* r600 */ -- CHIP_FAMILY_R630, - CHIP_FAMILY_RV610, - CHIP_FAMILY_RV630, - CHIP_FAMILY_RV670, -@@ -329,6 +344,7 @@ typedef enum { - CHIP_FAMILY_RV770, - CHIP_FAMILY_RV730, - CHIP_FAMILY_RV710, -+ CHIP_FAMILY_RV740, - CHIP_FAMILY_LAST - } RADEONChipFamily; - -@@ -377,6 +393,11 @@ typedef enum { - (info->ChipFamily == CHIP_FAMILY_RS400) || \ - (info->ChipFamily == CHIP_FAMILY_RS480)) - -+#define IS_R200_3D ((info->ChipFamily == CHIP_FAMILY_RV250) || \ -+ (info->ChipFamily == CHIP_FAMILY_RV280) || \ -+ (info->ChipFamily == CHIP_FAMILY_RS300) || \ -+ (info->ChipFamily == CHIP_FAMILY_R200)) -+ - /* - * Errata workarounds - */ -@@ -412,8 +433,38 @@ typedef enum { - CARD_PCIE - } RADEONCardType; - -+typedef enum { -+ POWER_DEFAULT, -+ POWER_LOW, -+ POWER_HIGH -+} RADEONPMType; -+ -+typedef struct { -+ RADEONPMType type; -+ uint32_t sclk; -+ uint32_t mclk; -+ uint32_t pcie_lanes; -+ uint32_t flags; -+} RADEONPowerMode; -+ -+typedef struct { -+ /* power modes */ -+ int num_modes; -+ int current_mode; -+ RADEONPowerMode mode[3]; -+ -+ Bool clock_gating_enabled; -+ Bool dynamic_mode_enabled; -+ Bool force_low_power_enabled; -+} RADEONPowerManagement; -+ - typedef struct _atomBiosHandle *atomBiosHandlePtr; - -+struct radeon_exa_pixmap_priv { -+ struct radeon_bo *bo; -+ int flags; -+}; -+ - typedef struct { - uint32_t pci_device_id; - RADEONChipFamily chip_family; -@@ -424,6 +475,25 @@ typedef struct { - int singledac; - } RADEONCardInfo; - -+#define RADEON_2D_EXA_COPY 1 -+#define RADEON_2D_EXA_SOLID 2 -+ -+struct radeon_2d_state { -+ int op; // -+ uint32_t dst_pitch_offset; -+ uint32_t src_pitch_offset; -+ uint32_t dp_gui_master_cntl; -+ uint32_t dp_cntl; -+ uint32_t dp_write_mask; -+ uint32_t dp_brush_frgd_clr; -+ uint32_t dp_brush_bkgd_clr; -+ uint32_t dp_src_frgd_clr; -+ uint32_t dp_src_bkgd_clr; -+ uint32_t default_sc_bottom_right; -+ struct radeon_bo *dst_bo; -+ struct radeon_bo *src_bo; -+}; -+ - #ifdef XF86DRI - struct radeon_cp { - Bool CPRuns; /* CP is running */ -@@ -602,9 +672,15 @@ struct radeon_accel_state { - #define EXA_ENGINEMODE_2D 1 - #define EXA_ENGINEMODE_3D 2 - -+ int composite_op; -+ PicturePtr dst_pic; -+ PicturePtr msk_pic; -+ PicturePtr src_pic; -+ PixmapPtr dst_pix; -+ PixmapPtr msk_pix; -+ PixmapPtr src_pix; - Bool is_transform[2]; - PictTransform *transform[2]; -- Bool has_mask; - /* Whether we are tiling horizontally and vertically */ - Bool need_src_tile_x; - Bool need_src_tile_y; -@@ -789,6 +865,9 @@ typedef struct { - RADEONCardType cardType; /* Current card is a PCI card */ - struct radeon_cp *cp; - struct radeon_dri *dri; -+#ifdef XF86DRM_MODE -+ struct radeon_dri2 dri2; -+#endif - #ifdef USE_EXA - Bool accelDFS; - #endif -@@ -892,6 +971,38 @@ typedef struct { - - Bool r4xx_atom; - -+ /* pm */ -+ RADEONPowerManagement pm; -+ -+ /* igp info */ -+ float igp_sideport_mclk; -+ float igp_system_mclk; -+ float igp_ht_link_clk; -+ float igp_ht_link_width; -+ -+ int can_resize; -+ void (*reemit_current2d)(ScrnInfoPtr pScrn, int op); // emit the current 2D state into the IB -+ struct radeon_2d_state state_2d; -+ Bool kms_enabled; -+ struct radeon_bo *front_bo; -+#ifdef XF86DRM_MODE -+ struct radeon_bo_manager *bufmgr; -+ struct radeon_cs_manager *csm; -+ struct radeon_cs *cs; -+ -+ struct radeon_bo *cursor_bo[2]; -+ uint64_t vram_size; -+ uint64_t gart_size; -+ drmmode_rec drmmode; -+#else -+ /* fake bool */ -+ Bool cs; -+#endif -+ -+ /* Xv bicubic filtering */ -+ struct radeon_bo *bicubic_bo; -+ void *bicubic_memory; -+ int bicubic_offset; - } RADEONInfoRec, *RADEONInfoPtr; - - #define RADEONWaitForFifo(pScrn, entries) \ -@@ -973,6 +1084,7 @@ extern int RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info); - # ifdef USE_XAA - extern Bool RADEONSetupMemXAA_DRI(int scrnIndex, ScreenPtr pScreen); - # endif -+uint32_t radeonGetPixmapOffset(PixmapPtr pPix); - #endif - - #ifdef USE_XAA -@@ -1015,6 +1127,7 @@ extern void RADEONBlank(ScrnInfoPtr pScrn); - extern void RADEONComputePLL(RADEONPLLPtr pll, unsigned long freq, - uint32_t *chosen_dot_clock_freq, - uint32_t *chosen_feedback_div, -+ uint32_t *chosen_frac_feedback_div, - uint32_t *chosen_reference_div, - uint32_t *chosen_post_div, int flags); - extern DisplayModePtr RADEONCrtcFindClosestMode(xf86CrtcPtr crtc, -@@ -1057,9 +1170,11 @@ extern int RADEONMinBits(int val); - extern unsigned RADEONINMC(ScrnInfoPtr pScrn, int addr); - extern unsigned RADEONINPLL(ScrnInfoPtr pScrn, int addr); - extern unsigned RADEONINPCIE(ScrnInfoPtr pScrn, int addr); -+extern unsigned R600INPCIE_PORT(ScrnInfoPtr pScrn, int addr); - extern void RADEONOUTMC(ScrnInfoPtr pScrn, int addr, uint32_t data); - extern void RADEONOUTPLL(ScrnInfoPtr pScrn, int addr, uint32_t data); - extern void RADEONOUTPCIE(ScrnInfoPtr pScrn, int addr, uint32_t data); -+extern void R600OUTPCIE_PORT(ScrnInfoPtr pScrn, int addr, uint32_t data); - extern void RADEONPllErrataAfterData(RADEONInfoPtr info); - extern void RADEONPllErrataAfterIndex(RADEONInfoPtr info); - extern void RADEONWaitForVerticalSync(ScrnInfoPtr pScrn); -@@ -1069,6 +1184,19 @@ extern void RADEONInitMemMapRegisters(ScrnInfoPtr pScrn, RADEONSavePtr save, - extern void RADEONRestoreMemMapRegisters(ScrnInfoPtr pScrn, - RADEONSavePtr restore); - -+Bool RADEONGetRec(ScrnInfoPtr pScrn); -+void RADEONFreeRec(ScrnInfoPtr pScrn); -+Bool RADEONPreInitVisual(ScrnInfoPtr pScrn); -+Bool RADEONPreInitWeight(ScrnInfoPtr pScrn); -+ -+ -+/* radeon_pm.c */ -+extern void RADEONPMInit(ScrnInfoPtr pScrn); -+extern void RADEONPMBlockHandler(ScrnInfoPtr pScrn); -+extern void RADEONPMEnterVT(ScrnInfoPtr pScrn); -+extern void RADEONPMLeaveVT(ScrnInfoPtr pScrn); -+extern void RADEONPMFini(ScrnInfoPtr pScrn); -+ - #ifdef USE_EXA - /* radeon_exa.c */ - extern Bool RADEONSetupMemEXA(ScreenPtr pScreen); -@@ -1115,7 +1243,7 @@ extern void RADEONPrintPortMap(ScrnInfoPtr pScrn); - extern void RADEONSetOutputType(ScrnInfoPtr pScrn, - RADEONOutputPrivatePtr radeon_output); - extern Bool RADEONSetupConnectors(ScrnInfoPtr pScrn); --extern Bool RADEONI2CDoLock(xf86OutputPtr output, Bool lock_state); -+extern Bool RADEONI2CDoLock(xf86OutputPtr output, I2CBusPtr b, Bool lock_state); - - - /* radeon_tv.c */ -@@ -1136,6 +1264,7 @@ extern void RADEONUpdateHVPosition(xf86OutputPtr output, DisplayModePtr mode); - /* radeon_video.c */ - extern void RADEONInitVideo(ScreenPtr pScreen); - extern void RADEONResetVideo(ScrnInfoPtr pScrn); -+extern Bool radeon_load_bicubic_texture(ScrnInfoPtr pScrn); - - /* radeon_legacy_memory.c */ - extern uint32_t -@@ -1147,6 +1276,15 @@ extern void - radeon_legacy_free_memory(ScrnInfoPtr pScrn, - void *mem_struct); - -+#ifdef XF86DRM_MODE -+extern void radeon_cs_flush_indirect(ScrnInfoPtr pScrn); -+extern void radeon_ddx_cs_start(ScrnInfoPtr pScrn, -+ int num, const char *file, -+ const char *func, int line); -+#endif -+struct radeon_bo *radeon_get_pixmap_bo(PixmapPtr pPix); -+void radeon_set_pixmap_bo(PixmapPtr pPix, struct radeon_bo *bo); -+ - #ifdef XF86DRI - # ifdef USE_XAA - /* radeon_accelfuncs.c */ -@@ -1165,7 +1303,9 @@ do { \ - - #define RADEONCP_RELEASE(pScrn, info) \ - do { \ -- if (info->cp->CPInUse) { \ -+ if (info->cs) { \ -+ radeon_cs_flush_indirect(pScrn); \ -+ } else if (info->cp->CPInUse) { \ - RADEON_PURGE_CACHE(); \ - RADEON_WAIT_UNTIL_IDLE(); \ - RADEONCPReleaseIndirect(pScrn); \ -@@ -1200,7 +1340,7 @@ do { \ - - #define RADEONCP_REFRESH(pScrn, info) \ - do { \ -- if (!info->cp->CPInUse) { \ -+ if (!info->cp->CPInUse && !info->cs) { \ - if (info->cp->needCacheFlush) { \ - RADEON_PURGE_CACHE(); \ - RADEON_PURGE_ZCACHE(); \ -@@ -1231,54 +1371,59 @@ do { \ - xf86DrvMsg(pScrn->scrnIndex, X_INFO, \ - "BEGIN_RING(%d) in %s\n", (unsigned int)n, __FUNCTION__);\ - } \ -- if (++info->cp->dma_begin_count != 1) { \ -+ if (info->cs) { radeon_ddx_cs_start(pScrn, n, __FILE__, __func__, __LINE__); } else { \ -+ if (++info->cp->dma_begin_count != 1) { \ - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, \ - "BEGIN_RING without end at %s:%d\n", \ -- info->cp->dma_debug_func, info->cp->dma_debug_lineno); \ -+ info->cp->dma_debug_func, info->cp->dma_debug_lineno); \ - info->cp->dma_begin_count = 1; \ -- } \ -- info->cp->dma_debug_func = __FILE__; \ -- info->cp->dma_debug_lineno = __LINE__; \ -- if (!info->cp->indirectBuffer) { \ -+ } \ -+ info->cp->dma_debug_func = __FILE__; \ -+ info->cp->dma_debug_lineno = __LINE__; \ -+ if (!info->cp->indirectBuffer) { \ - info->cp->indirectBuffer = RADEONCPGetBuffer(pScrn); \ - info->cp->indirectStart = 0; \ -- } else if (info->cp->indirectBuffer->used + (n) * (int)sizeof(uint32_t) > \ -- info->cp->indirectBuffer->total) { \ -+ } else if (info->cp->indirectBuffer->used + (n) * (int)sizeof(uint32_t) > \ -+ info->cp->indirectBuffer->total) { \ - RADEONCPFlushIndirect(pScrn, 1); \ -+ } \ -+ __expected = n; \ -+ __head = (pointer)((char *)info->cp->indirectBuffer->address + \ -+ info->cp->indirectBuffer->used); \ -+ __count = 0; \ - } \ -- __expected = n; \ -- __head = (pointer)((char *)info->cp->indirectBuffer->address + \ -- info->cp->indirectBuffer->used); \ -- __count = 0; \ - } while (0) - - #define ADVANCE_RING() do { \ -- if (info->cp->dma_begin_count-- != 1) { \ -+ if (info->cs) radeon_cs_end(info->cs, __FILE__, __func__, __LINE__); else { \ -+ if (info->cp->dma_begin_count-- != 1) { \ - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, \ - "ADVANCE_RING without begin at %s:%d\n", \ - __FILE__, __LINE__); \ - info->cp->dma_begin_count = 0; \ -- } \ -- if (__count != __expected) { \ -+ } \ -+ if (__count != __expected) { \ - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, \ - "ADVANCE_RING count != expected (%d vs %d) at %s:%d\n", \ - __count, __expected, __FILE__, __LINE__); \ -- } \ -- if (RADEON_VERBOSE) { \ -+ } \ -+ if (RADEON_VERBOSE) { \ - xf86DrvMsg(pScrn->scrnIndex, X_INFO, \ - "ADVANCE_RING() start: %d used: %d count: %d\n", \ - info->cp->indirectStart, \ - info->cp->indirectBuffer->used, \ - __count * (int)sizeof(uint32_t)); \ -+ } \ -+ info->cp->indirectBuffer->used += __count * (int)sizeof(uint32_t); \ - } \ -- info->cp->indirectBuffer->used += __count * (int)sizeof(uint32_t); \ --} while (0) -+ } while (0) - - #define OUT_RING(x) do { \ - if (RADEON_VERBOSE) { \ - xf86DrvMsg(pScrn->scrnIndex, X_INFO, \ - " OUT_RING(0x%08x)\n", (unsigned int)(x)); \ - } \ -+ if (info->cs) radeon_cs_write_dword(info->cs, (x)); else \ - __head[__count++] = (x); \ - } while (0) - -@@ -1288,12 +1433,22 @@ do { \ - OUT_RING(val); \ - } while (0) - -+#define OUT_RING_RELOC(x, read_domains, write_domain) \ -+ do { \ -+ int _ret; \ -+ _ret = radeon_cs_write_reloc(info->cs, x, read_domains, write_domain, 0); \ -+ if (_ret) ErrorF("reloc emit failure %d\n", _ret); \ -+ } while(0) -+ -+ - #define FLUSH_RING() \ - do { \ - if (RADEON_VERBOSE) \ - xf86DrvMsg(pScrn->scrnIndex, X_INFO, \ - "FLUSH_RING in %s\n", __FUNCTION__); \ -- if (info->cp->indirectBuffer) \ -+ if (info->cs) \ -+ radeon_cs_flush_indirect(pScrn); \ -+ else if (info->cp->indirectBuffer) \ - RADEONCPFlushIndirect(pScrn, 0); \ - } while (0) - -@@ -1369,6 +1524,13 @@ do { \ - #endif /* XF86DRI */ - - #if defined(XF86DRI) && defined(USE_EXA) -+ -+#ifdef XF86DRM_MODE -+#define CS_FULL(cs) ((cs)->cdw > 15 * 1024) -+#else -+#define CS_FULL(cs) FALSE -+#endif -+ - #define RADEON_SWITCH_TO_2D() \ - do { \ - uint32_t flush = 0; \ -@@ -1379,8 +1541,12 @@ do { \ - case EXA_ENGINEMODE_2D: \ - break; \ - } \ -- if (flush && info->directRenderingEnabled) \ -- RADEONCPFlushIndirect(pScrn, 1); \ -+ if (flush) { \ -+ if (info->cs) \ -+ radeon_cs_flush_indirect(pScrn); \ -+ else if (info->directRenderingEnabled) \ -+ RADEONCPFlushIndirect(pScrn, 1); \ -+ } \ - info->accel_state->engineMode = EXA_ENGINEMODE_2D; \ - } while (0); - -@@ -1389,16 +1555,21 @@ do { \ - uint32_t flush = 0; \ - switch (info->accel_state->engineMode) { \ - case EXA_ENGINEMODE_UNKNOWN: \ -- case EXA_ENGINEMODE_2D: \ - flush = 1; \ -+ break; \ -+ case EXA_ENGINEMODE_2D: \ -+ flush = !info->cs || CS_FULL(info->cs); \ - case EXA_ENGINEMODE_3D: \ - break; \ - } \ - if (flush) { \ -- if (info->directRenderingEnabled) \ -+ if (info->cs) \ -+ radeon_cs_flush_indirect(pScrn); \ -+ else if (info->directRenderingEnabled) \ - RADEONCPFlushIndirect(pScrn, 1); \ -- RADEONInit3DEngine(pScrn); \ - } \ -+ if (!info->accel_state->XInited3D) \ -+ RADEONInit3DEngine(pScrn); \ - info->accel_state->engineMode = EXA_ENGINEMODE_3D; \ - } while (0); - #else -diff --git a/src/radeon_accel.c b/src/radeon_accel.c -index a9a4848..02905dd 100644 ---- a/src/radeon_accel.c -+++ b/src/radeon_accel.c -@@ -87,6 +87,8 @@ - #include "radeon_drm.h" - #endif - -+#include "ati_pciids_gen.h" -+ - /* Line support */ - #include "miline.h" - -@@ -373,6 +375,9 @@ void RADEONEngineRestore(ScrnInfoPtr pScrn) - RADEONInfoPtr info = RADEONPTR(pScrn); - unsigned char *RADEONMMIO = info->MMIO; - -+ if (info->cs) -+ return; -+ - xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, - "EngineRestore (%d/%d)\n", - info->CurrentLayout.pixel_code, -@@ -419,6 +424,24 @@ void RADEONEngineRestore(ScrnInfoPtr pScrn) - info->accel_state->XInited3D = FALSE; - } - -+static int RADEONDRMGetNumPipes(ScrnInfoPtr pScrn, int *num_pipes) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ if (info->dri->pKernelDRMVersion->version_major < 2) { -+ drm_radeon_getparam_t np; -+ -+ memset(&np, 0, sizeof(np)); -+ np.param = RADEON_PARAM_NUM_GB_PIPES; -+ np.value = num_pipes; -+ return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np, sizeof(np)); -+ } else { -+ struct drm_radeon_info np2; -+ np2.value = (unsigned long)num_pipes; -+ np2.request = RADEON_INFO_NUM_GB_PIPES; -+ return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INFO, &np2, sizeof(np2)); -+ } -+} -+ - /* Initialize the acceleration hardware */ - void RADEONEngineInit(ScrnInfoPtr pScrn) - { -@@ -434,15 +457,9 @@ void RADEONEngineInit(ScrnInfoPtr pScrn) - - #ifdef XF86DRI - if (info->directRenderingEnabled && (IS_R300_3D || IS_R500_3D)) { -- drm_radeon_getparam_t np; - int num_pipes; - -- memset(&np, 0, sizeof(np)); -- np.param = RADEON_PARAM_NUM_GB_PIPES; -- np.value = &num_pipes; -- -- if (drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np, -- sizeof(np)) < 0) { -+ if(RADEONDRMGetNumPipes(pScrn, &num_pipes) < 0) { - xf86DrvMsg(pScrn->scrnIndex, X_WARNING, - "Failed to determine num pipes from DRM, falling back to " - "manual look-up!\n"); -@@ -453,59 +470,67 @@ void RADEONEngineInit(ScrnInfoPtr pScrn) - } - #endif - -- if ((info->ChipFamily == CHIP_FAMILY_RV410) || -- (info->ChipFamily == CHIP_FAMILY_R420) || -- (info->ChipFamily == CHIP_FAMILY_RS600) || -- (info->ChipFamily == CHIP_FAMILY_RS690) || -- (info->ChipFamily == CHIP_FAMILY_RS740) || -- (info->ChipFamily == CHIP_FAMILY_RS400) || -- (info->ChipFamily == CHIP_FAMILY_RS480) || -- IS_R500_3D) { -- if (info->accel_state->num_gb_pipes == 0) { -- uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT); -- -- info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1; -- if (IS_R500_3D) -- OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4)); -- } -- } else { -- if (info->accel_state->num_gb_pipes == 0) { -- if ((info->ChipFamily == CHIP_FAMILY_R300) || -- (info->ChipFamily == CHIP_FAMILY_R350)) { -- /* R3xx chips */ -- info->accel_state->num_gb_pipes = 2; -- } else { -- /* RV3xx chips */ -- info->accel_state->num_gb_pipes = 1; -+ if (!info->cs) { -+ if ((info->ChipFamily == CHIP_FAMILY_RV410) || -+ (info->ChipFamily == CHIP_FAMILY_R420) || -+ (info->ChipFamily == CHIP_FAMILY_RS600) || -+ (info->ChipFamily == CHIP_FAMILY_RS690) || -+ (info->ChipFamily == CHIP_FAMILY_RS740) || -+ (info->ChipFamily == CHIP_FAMILY_RS400) || -+ (info->ChipFamily == CHIP_FAMILY_RS480) || -+ IS_R500_3D) { -+ if (info->accel_state->num_gb_pipes == 0) { -+ uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT); -+ -+ info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1; -+ if (IS_R500_3D) -+ OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4)); -+ } -+ } else { -+ if (info->accel_state->num_gb_pipes == 0) { -+ if ((info->ChipFamily == CHIP_FAMILY_R300) || -+ (info->ChipFamily == CHIP_FAMILY_R350)) { -+ /* R3xx chips */ -+ info->accel_state->num_gb_pipes = 2; -+ } else { -+ /* RV3xx chips */ -+ info->accel_state->num_gb_pipes = 1; -+ } - } -- } -- } -- -- if (IS_R300_3D || IS_R500_3D) -- xf86DrvMsg(pScrn->scrnIndex, X_INFO, -- "num quad-pipes is %d\n", info->accel_state->num_gb_pipes); -- -- if (IS_R300_3D || IS_R500_3D) { -- uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16 | R300_SUBPIXEL_1_16); -- -- switch(info->accel_state->num_gb_pipes) { -- case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break; -- case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break; -- case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break; -- default: -- case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break; - } - -- OUTREG(R300_GB_TILE_CONFIG, gb_tile_config); -- OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); -- OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG); -- OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) | -- R300_DC_AUTOFLUSH_ENABLE | -- R300_DC_DC_DISABLE_IGNORE_PE)); -- } else -- OUTREG(RADEON_RB3D_CNTL, 0); -+ /* RV410 SE cards only have 1 quadpipe */ -+ if ((info->Chipset == PCI_CHIP_RV410_5E4C) || -+ (info->Chipset == PCI_CHIP_RV410_5E4F)) -+ info->accel_state->num_gb_pipes = 1; -+ -+ if (IS_R300_3D || IS_R500_3D) -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, -+ "num quad-pipes is %d\n", info->accel_state->num_gb_pipes); -+ -+ if (IS_R300_3D || IS_R500_3D) { -+ uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16); -+ -+ switch(info->accel_state->num_gb_pipes) { -+ case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break; -+ case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break; -+ case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break; -+ default: -+ case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break; -+ } - -- RADEONEngineReset(pScrn); -+ OUTREG(R300_GB_TILE_CONFIG, gb_tile_config); -+ OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); -+ if (info->ChipFamily >= CHIP_FAMILY_R420) -+ OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG); -+ OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) | -+ R300_DC_AUTOFLUSH_ENABLE | -+ R300_DC_DC_DISABLE_IGNORE_PE)); -+ } else -+ OUTREG(RADEON_RB3D_CNTL, 0); -+ -+ RADEONEngineReset(pScrn); -+ } - - switch (info->CurrentLayout.pixel_code) { - case 8: datatype = 2; break; -@@ -529,6 +554,24 @@ void RADEONEngineInit(ScrnInfoPtr pScrn) - RADEONEngineRestore(pScrn); - } - -+uint32_t radeonGetPixmapOffset(PixmapPtr pPix) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ uint32_t offset = 0; -+ if (info->cs) -+ return 0; -+#ifdef USE_EXA -+ if (info->useEXA) { -+ offset = exaGetPixmapOffset(pPix); -+ } else -+#endif -+ { -+ offset = pPix->devPrivate.ptr - info->FB; -+ } -+ offset += info->fbLocation + pScrn->fbOffset; -+ return offset; -+} - - #define ACCEL_MMIO - #define ACCEL_PREAMBLE() unsigned char *RADEONMMIO = info->MMIO -@@ -613,6 +656,8 @@ int RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info) - } - } - -+#define RADEON_IB_RESERVE (16 * sizeof(uint32_t)) -+ - /* Get an indirect buffer for the CP 2D acceleration commands */ - drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn) - { -@@ -689,6 +734,7 @@ void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard) - int start = info->cp->indirectStart; - drm_radeon_indirect_t indirect; - -+ assert(!info->cs); - if (!buffer) return; - if (start == buffer->used && !discard) return; - -@@ -738,6 +784,7 @@ void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn) - int start = info->cp->indirectStart; - drm_radeon_indirect_t indirect; - -+ assert(!info->cs); - if (info->ChipFamily >= CHIP_FAMILY_R600) { - if (buffer && (buffer->used & 0x3c)) { - RING_LOCALS; -@@ -1072,8 +1119,10 @@ void RADEONInit3DEngine(ScrnInfoPtr pScrn) - if (info->directRenderingEnabled) { - drm_radeon_sarea_t *pSAREAPriv; - -- pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen); -- pSAREAPriv->ctx_owner = DRIGetContext(pScrn->pScreen); -+ if (!info->kms_enabled) { -+ pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen); -+ pSAREAPriv->ctx_owner = DRIGetContext(pScrn->pScreen); -+ } - RADEONInit3DEngineCP(pScrn); - } else - #endif -diff --git a/src/radeon_accelfuncs.c b/src/radeon_accelfuncs.c -index 45eb6d5..2d6fe01 100644 ---- a/src/radeon_accelfuncs.c -+++ b/src/radeon_accelfuncs.c -@@ -1345,10 +1345,7 @@ FUNC_NAME(RADEONAccelInit)(ScreenPtr pScreen, XAAInfoRecPtr a) - xf86DrvMsg(pScrn->scrnIndex, X_INFO, "XAA Render acceleration " - "unsupported on Radeon 9500/9700 and newer. " - "Please use EXA instead.\n"); -- } else if ((info->ChipFamily == CHIP_FAMILY_RV250) || -- (info->ChipFamily == CHIP_FAMILY_RV280) || -- (info->ChipFamily == CHIP_FAMILY_RS300) || -- (info->ChipFamily == CHIP_FAMILY_R200)) { -+ } else if (IS_R200_3D) { - a->SetupForCPUToScreenAlphaTexture2 = - FUNC_NAME(R200SetupForCPUToScreenAlphaTexture); - a->SubsequentCPUToScreenAlphaTexture = -diff --git a/src/radeon_atombios.c b/src/radeon_atombios.c -index a657fac..bf3bfa0 100644 ---- a/src/radeon_atombios.c -+++ b/src/radeon_atombios.c -@@ -524,7 +524,7 @@ rhdAtomASICInit(atomBiosHandlePtr handle) - } - - int --atombios_dyn_clk_setup(ScrnInfoPtr pScrn, int enable) -+atombios_clk_gating_setup(ScrnInfoPtr pScrn, Bool enable) - { - RADEONInfoPtr info = RADEONPTR(pScrn); - DYNAMIC_CLOCK_GATING_PS_ALLOCATION dynclk_data; -@@ -548,13 +548,19 @@ atombios_dyn_clk_setup(ScrnInfoPtr pScrn, int enable) - } - - int --atombios_static_pwrmgt_setup(ScrnInfoPtr pScrn, int enable) -+atombios_static_pwrmgt_setup(ScrnInfoPtr pScrn, Bool enable) - { - RADEONInfoPtr info = RADEONPTR(pScrn); - ENABLE_ASIC_STATIC_PWR_MGT_PS_ALLOCATION pwrmgt_data; - AtomBiosArgRec data; - unsigned char *space; - -+ /* disabling static power management causes hangs on some r4xx chips */ -+ if (((info->ChipFamily == CHIP_FAMILY_R420) || -+ (info->ChipFamily == CHIP_FAMILY_RV410)) && -+ !enable) -+ return ATOM_NOT_IMPLEMENTED; -+ - pwrmgt_data.ucEnable = enable; - - data.exec.index = GetIndexIntoMasterTable(COMMAND, EnableASIC_StaticPwrMgt); -@@ -571,6 +577,59 @@ atombios_static_pwrmgt_setup(ScrnInfoPtr pScrn, int enable) - - } - -+int -+atombios_set_engine_clock(ScrnInfoPtr pScrn, uint32_t engclock) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ SET_ENGINE_CLOCK_PS_ALLOCATION eng_clock_ps; -+ AtomBiosArgRec data; -+ unsigned char *space; -+ -+ RADEONWaitForIdleMMIO(pScrn); -+ -+ eng_clock_ps.ulTargetEngineClock = engclock; /* 10 khz */ -+ -+ /*ErrorF("Attempting to set engine clock to: %d\n", engclock);*/ -+ data.exec.index = GetIndexIntoMasterTable(COMMAND, SetEngineClock); -+ data.exec.dataSpace = (void *)&space; -+ data.exec.pspace = &eng_clock_ps; -+ -+ if (RHDAtomBiosFunc(info->atomBIOS->scrnIndex, info->atomBIOS, ATOMBIOS_EXEC, &data) == ATOM_SUCCESS) { -+ /* ErrorF("Set engine clock success\n"); */ -+ return ATOM_SUCCESS; -+ } -+ /* ErrorF("Set engine clock failed\n"); */ -+ return ATOM_NOT_IMPLEMENTED; -+} -+ -+int -+atombios_set_memory_clock(ScrnInfoPtr pScrn, uint32_t memclock) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ SET_MEMORY_CLOCK_PS_ALLOCATION mem_clock_ps; -+ AtomBiosArgRec data; -+ unsigned char *space; -+ -+ if (info->IsIGP) -+ return ATOM_SUCCESS; -+ -+ RADEONWaitForIdleMMIO(pScrn); -+ -+ mem_clock_ps.ulTargetMemoryClock = memclock; /* 10 khz */ -+ -+ /* ErrorF("Attempting to set mem clock to: %d\n", memclock); */ -+ data.exec.index = GetIndexIntoMasterTable(COMMAND, SetMemoryClock); -+ data.exec.dataSpace = (void *)&space; -+ data.exec.pspace = &mem_clock_ps; -+ -+ if (RHDAtomBiosFunc(info->atomBIOS->scrnIndex, info->atomBIOS, ATOMBIOS_EXEC, &data) == ATOM_SUCCESS) { -+ /* ErrorF("Set memory clock success\n"); */ -+ return ATOM_SUCCESS; -+ } -+ /* ErrorF("Set memory clock failed\n"); */ -+ return ATOM_NOT_IMPLEMENTED; -+} -+ - # endif - - static AtomBiosResult -@@ -1377,11 +1436,11 @@ const int object_connector_convert[] = - CONNECTOR_CTV, - CONNECTOR_STV, - CONNECTOR_NONE, -+ CONNECTOR_NONE, - CONNECTOR_DIN, - CONNECTOR_SCART, - CONNECTOR_HDMI_TYPE_A, - CONNECTOR_HDMI_TYPE_B, -- CONNECTOR_HDMI_TYPE_B, - CONNECTOR_LVDS, - CONNECTOR_DIN, - CONNECTOR_NONE, -@@ -1534,6 +1593,7 @@ static void RADEONApplyATOMQuirks(ScrnInfoPtr pScrn, int index) - info->BiosConnector[index].ConnectorType = CONNECTOR_DVI_D; - } - } -+ - /* a-bit f-i90hd - ciaranm on #radeonhd - this board has no DVI */ - if ((info->Chipset == PCI_CHIP_RS600_7941) && - (PCI_SUB_VENDOR_ID(info->PciInfo) == 0x147b) && -@@ -1565,21 +1625,17 @@ static void RADEONApplyATOMQuirks(ScrnInfoPtr pScrn, int index) - } - } - -- /* some BIOSes seem to report DAC on HDMI - they hurt me with their lies */ -- if ((info->BiosConnector[index].ConnectorType == CONNECTOR_HDMI_TYPE_A) || -- (info->BiosConnector[index].ConnectorType == CONNECTOR_HDMI_TYPE_B)) { -- info->BiosConnector[index].devices &= ~(ATOM_DEVICE_CRT_SUPPORT); -- } -- -- /* ASUS HD 3600 XT board lists the DVI port as HDMI */ -- if ((info->Chipset == PCI_CHIP_RV635_9598) && -- (PCI_SUB_VENDOR_ID(info->PciInfo) == 0x1043) && -- (PCI_SUB_DEVICE_ID(info->PciInfo) == 0x01da)) { -- if (info->BiosConnector[index].ConnectorType == CONNECTOR_HDMI_TYPE_B) -- info->BiosConnector[index].ConnectorType = CONNECTOR_DVI_D; -+ /* some BIOSes seem to report DAC on HDMI - usually this is a board with -+ * HDMI + VGA reporting as HDMI -+ */ -+ if (info->BiosConnector[index].ConnectorType == CONNECTOR_HDMI_TYPE_A) { -+ if (info->BiosConnector[index].devices & (ATOM_DEVICE_CRT_SUPPORT)) { -+ info->BiosConnector[index].devices &= ~(ATOM_DEVICE_DFP_SUPPORT); -+ info->BiosConnector[index].ConnectorType = CONNECTOR_VGA; -+ info->BiosConnector[index].connector_object = 0; -+ } - } - -- - } - - uint32_t -@@ -1761,17 +1817,17 @@ RADEONGetATOMConnectorInfoFromBIOSObject (ScrnInfoPtr pScrn) - ATOM_DISPLAY_OBJECT_PATH *path; - addr += path_size; - path = (ATOM_DISPLAY_OBJECT_PATH *)addr; -- path_size += path->usSize; -+ path_size += le16_to_cpu(path->usSize); - -- if (device_support & path->usDeviceTag) { -+ if (device_support & le16_to_cpu(path->usDeviceTag)) { - uint8_t con_obj_id, con_obj_num, con_obj_type; - -- con_obj_id = (path->usConnObjectId & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; -- con_obj_num = (path->usConnObjectId & ENUM_ID_MASK) >> ENUM_ID_SHIFT; -- con_obj_type = (path->usConnObjectId & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; -+ con_obj_id = (le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; -+ con_obj_num = (le16_to_cpu(path->usConnObjectId) & ENUM_ID_MASK) >> ENUM_ID_SHIFT; -+ con_obj_type = (le16_to_cpu(path->usConnObjectId) & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; - -- if ((path->usDeviceTag == ATOM_DEVICE_TV1_SUPPORT) || -- (path->usDeviceTag == ATOM_DEVICE_TV2_SUPPORT)) { -+ if ((le16_to_cpu(path->usDeviceTag) == ATOM_DEVICE_TV1_SUPPORT) || -+ (le16_to_cpu(path->usDeviceTag) == ATOM_DEVICE_TV2_SUPPORT)) { - if (!enable_tv) { - info->BiosConnector[i].valid = FALSE; - continue; -@@ -1779,7 +1835,7 @@ RADEONGetATOMConnectorInfoFromBIOSObject (ScrnInfoPtr pScrn) - } - - /* don't support CV yet */ -- if (path->usDeviceTag == ATOM_DEVICE_CV_SUPPORT) { -+ if (le16_to_cpu(path->usDeviceTag) == ATOM_DEVICE_CV_SUPPORT) { - info->BiosConnector[i].valid = FALSE; - continue; - } -@@ -1810,15 +1866,15 @@ RADEONGetATOMConnectorInfoFromBIOSObject (ScrnInfoPtr pScrn) - continue; - } else - info->BiosConnector[i].valid = TRUE; -- info->BiosConnector[i].devices = path->usDeviceTag; -- info->BiosConnector[i].connector_object = path->usConnObjectId; -+ info->BiosConnector[i].devices = le16_to_cpu(path->usDeviceTag); -+ info->BiosConnector[i].connector_object = le16_to_cpu(path->usConnObjectId); - -- for (j = 0; j < ((path->usSize - 8) / 2); j++) { -+ for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) { - uint8_t enc_obj_id, enc_obj_num, enc_obj_type; - -- enc_obj_id = (path->usGraphicObjIds[j] & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; -- enc_obj_num = (path->usGraphicObjIds[j] & ENUM_ID_MASK) >> ENUM_ID_SHIFT; -- enc_obj_type = (path->usGraphicObjIds[j] & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; -+ enc_obj_id = (le16_to_cpu(path->usGraphicObjIds[j]) & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; -+ enc_obj_num = (le16_to_cpu(path->usGraphicObjIds[j]) & ENUM_ID_MASK) >> ENUM_ID_SHIFT; -+ enc_obj_type = (le16_to_cpu(path->usGraphicObjIds[j]) & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; - - if (enc_obj_type == GRAPH_OBJECT_TYPE_ENCODER) { - if (enc_obj_num == 2) -@@ -1826,15 +1882,15 @@ RADEONGetATOMConnectorInfoFromBIOSObject (ScrnInfoPtr pScrn) - else - info->BiosConnector[i].linkb = FALSE; - -- if (!radeon_add_encoder(pScrn, enc_obj_id, path->usDeviceTag)) -+ if (!radeon_add_encoder(pScrn, enc_obj_id, le16_to_cpu(path->usDeviceTag))) - return FALSE; - } - } - - /* look up gpio for ddc */ -- if ((path->usDeviceTag & (ATOM_DEVICE_TV_SUPPORT | ATOM_DEVICE_CV_SUPPORT)) == 0) { -+ if ((le16_to_cpu(path->usDeviceTag) & (ATOM_DEVICE_TV_SUPPORT | ATOM_DEVICE_CV_SUPPORT)) == 0) { - for (j = 0; j < con_obj->ucNumberOfObjects; j++) { -- if (path->usConnObjectId == le16_to_cpu(con_obj->asObjects[j].usObjectID)) { -+ if (le16_to_cpu(path->usConnObjectId) == le16_to_cpu(con_obj->asObjects[j].usObjectID)) { - ATOM_COMMON_RECORD_HEADER *Record = (ATOM_COMMON_RECORD_HEADER *) - ((char *)&atomDataPtr->Object_Header->sHeader - + le16_to_cpu(con_obj->asObjects[j].usRecordOffset)); -@@ -1881,7 +1937,6 @@ RADEONGetATOMConnectorInfoFromBIOSObject (ScrnInfoPtr pScrn) - for (j = 0; j < ATOM_MAX_SUPPORTED_DEVICE; j++) { - if (info->BiosConnector[j].valid && (i != j) ) { - if (info->BiosConnector[i].i2c_line_mux == info->BiosConnector[j].i2c_line_mux) { -- ErrorF("Shared DDC line: %d %d\n", i, j); - info->BiosConnector[i].shared_ddc = TRUE; - info->BiosConnector[j].shared_ddc = TRUE; - } -@@ -1954,6 +2009,35 @@ RADEONGetATOMLVDSInfo(ScrnInfoPtr pScrn, radeon_lvds_ptr lvds) - native_mode->VBlank, native_mode->VOverPlus, native_mode->VSyncWidth); - } - -+void -+RADEONATOMGetIGPInfo(ScrnInfoPtr pScrn) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ atomDataTablesPtr atomDataPtr; -+ unsigned short size; -+ uint8_t crev, frev; -+ -+ atomDataPtr = info->atomBIOS->atomDataPtr; -+ -+ if (!rhdAtomGetTableRevisionAndSize((ATOM_COMMON_TABLE_HEADER *)(atomDataPtr->IntegratedSystemInfo.base), &frev, &crev, &size)) -+ return; -+ -+ switch (crev) { -+ case 1: -+ info->igp_sideport_mclk = atomDataPtr->IntegratedSystemInfo.IntegratedSystemInfo->ulBootUpMemoryClock / 100.0; -+ info->igp_system_mclk = le16_to_cpu(atomDataPtr->IntegratedSystemInfo.IntegratedSystemInfo->usK8MemoryClock); -+ info->igp_ht_link_clk = le16_to_cpu(atomDataPtr->IntegratedSystemInfo.IntegratedSystemInfo->usFSBClock); -+ info->igp_ht_link_width = atomDataPtr->IntegratedSystemInfo.IntegratedSystemInfo->ucHTLinkWidth; -+ break; -+ case 2: -+ info->igp_sideport_mclk = atomDataPtr->IntegratedSystemInfo.IntegratedSystemInfo_v2->ulBootUpSidePortClock / 100.0; -+ info->igp_system_mclk = atomDataPtr->IntegratedSystemInfo.IntegratedSystemInfo_v2->ulBootUpUMAClock / 100.0; -+ info->igp_ht_link_clk = atomDataPtr->IntegratedSystemInfo.IntegratedSystemInfo_v2->ulHTLinkFreq / 100.0; -+ info->igp_ht_link_width = le16_to_cpu(atomDataPtr->IntegratedSystemInfo.IntegratedSystemInfo_v2->usMinHTLinkWidth); -+ break; -+ } -+} -+ - Bool - RADEONGetATOMTVInfo(xf86OutputPtr output) - { -@@ -2240,8 +2324,11 @@ RADEONGetATOMConnectorInfoFromBIOSConnectorTable (ScrnInfoPtr pScrn) - info->BiosConnector[i].ddc_i2c.valid = FALSE; - else if ((info->ChipFamily == CHIP_FAMILY_RS690) || - (info->ChipFamily == CHIP_FAMILY_RS740)) { -- /* IGP DFP ports use non-standard gpio entries */ -- if ((i == ATOM_DEVICE_DFP2_INDEX) || (i == ATOM_DEVICE_DFP3_INDEX)) -+ /* IGP DFP ports sometimes use non-standard gpio entries */ -+ if ((i == ATOM_DEVICE_DFP2_INDEX) && (ci.sucI2cId.sbfAccess.bfI2C_LineMux == 2)) -+ info->BiosConnector[i].ddc_i2c = -+ RADEONLookupGPIOLineForDDC(pScrn, ci.sucI2cId.sbfAccess.bfI2C_LineMux + 1); -+ else if ((i == ATOM_DEVICE_DFP3_INDEX) && (ci.sucI2cId.sbfAccess.bfI2C_LineMux == 1)) - info->BiosConnector[i].ddc_i2c = - RADEONLookupGPIOLineForDDC(pScrn, ci.sucI2cId.sbfAccess.bfI2C_LineMux + 1); - else -@@ -2303,6 +2390,8 @@ RADEONGetATOMConnectorInfoFromBIOSConnectorTable (ScrnInfoPtr pScrn) - ((j == ATOM_DEVICE_CRT1_INDEX) || - (j == ATOM_DEVICE_CRT2_INDEX))) { - info->BiosConnector[i].devices |= info->BiosConnector[j].devices; -+ if (info->BiosConnector[i].ConnectorType == CONNECTOR_DVI_D) -+ info->BiosConnector[i].ConnectorType = CONNECTOR_DVI_I; - info->BiosConnector[j].valid = FALSE; - } else if (((j == ATOM_DEVICE_DFP1_INDEX) || - (j == ATOM_DEVICE_DFP2_INDEX) || -@@ -2310,6 +2399,8 @@ RADEONGetATOMConnectorInfoFromBIOSConnectorTable (ScrnInfoPtr pScrn) - ((i == ATOM_DEVICE_CRT1_INDEX) || - (i == ATOM_DEVICE_CRT2_INDEX))) { - info->BiosConnector[j].devices |= info->BiosConnector[i].devices; -+ if (info->BiosConnector[j].ConnectorType == CONNECTOR_DVI_D) -+ info->BiosConnector[j].ConnectorType = CONNECTOR_DVI_I; - info->BiosConnector[i].valid = FALSE; - } else { - info->BiosConnector[i].shared_ddc = TRUE; -@@ -2322,12 +2413,6 @@ RADEONGetATOMConnectorInfoFromBIOSConnectorTable (ScrnInfoPtr pScrn) - } - } - -- for (i = 0; i < ATOM_MAX_SUPPORTED_DEVICE; i++) { -- if (info->encoders[i] != NULL) { -- ErrorF("encoder: 0x%x\n", info->encoders[i]->encoder_id); -- } -- } -- - return TRUE; - } - -diff --git a/src/radeon_atombios.h b/src/radeon_atombios.h -index b9a5398..81e5a33 100644 ---- a/src/radeon_atombios.h -+++ b/src/radeon_atombios.h -@@ -117,10 +117,16 @@ extern Bool - RADEONGetATOMConnectorInfoFromBIOSConnectorTable (ScrnInfoPtr pScrn); - - extern int --atombios_dyn_clk_setup(ScrnInfoPtr pScrn, int enable); -+atombios_clk_gating_setup(ScrnInfoPtr pScrn, Bool enable); - - extern int --atombios_static_pwrmgt_setup(ScrnInfoPtr pScrn, int enable); -+atombios_static_pwrmgt_setup(ScrnInfoPtr pScrn, Bool enable); -+ -+extern int -+atombios_set_engine_clock(ScrnInfoPtr pScrn, uint32_t engclock); -+ -+extern int -+atombios_set_memory_clock(ScrnInfoPtr pScrn, uint32_t memclock); - - extern Bool - RADEONGetATOMTVInfo(xf86OutputPtr output); -@@ -266,6 +272,9 @@ typedef struct _atomBiosHandle { - extern Bool - RADEONATOMGetTVTimings(ScrnInfoPtr pScrn, int index, SET_CRTC_TIMING_PARAMETERS_PS_ALLOCATION *crtc_timing, int32_t *pixel_clock); - -+extern void -+RADEONATOMGetIGPInfo(ScrnInfoPtr pScrn); -+ - extern uint32_t - radeon_get_device_index(uint32_t device_support); - extern radeon_encoder_ptr -diff --git a/src/radeon_bios.c b/src/radeon_bios.c -index 9b5cb88..ecf5403 100644 ---- a/src/radeon_bios.c -+++ b/src/radeon_bios.c -@@ -107,7 +107,62 @@ radeon_read_disabled_bios(ScrnInfoPtr pScrn) - - xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Attempting to read un-POSTed bios\n"); - -- if (info->ChipFamily >= CHIP_FAMILY_R600) { -+ if (info->ChipFamily >= CHIP_FAMILY_RV770) { -+ uint32_t viph_control = INREG(RADEON_VIPH_CONTROL); -+ uint32_t bus_cntl = INREG(RADEON_BUS_CNTL); -+ uint32_t d1vga_control = INREG(AVIVO_D1VGA_CONTROL); -+ uint32_t d2vga_control = INREG(AVIVO_D2VGA_CONTROL); -+ uint32_t vga_render_control = INREG(AVIVO_VGA_RENDER_CONTROL); -+ uint32_t rom_cntl = INREG(R600_ROM_CNTL); -+ uint32_t cg_spll_func_cntl = 0; -+ uint32_t cg_spll_status; -+ -+ /* disable VIP */ -+ OUTREG(RADEON_VIPH_CONTROL, (viph_control & ~RADEON_VIPH_EN)); -+ -+ /* enable the rom */ -+ OUTREG(RADEON_BUS_CNTL, (bus_cntl & ~RADEON_BUS_BIOS_DIS_ROM)); -+ -+ /* Disable VGA mode */ -+ OUTREG(AVIVO_D1VGA_CONTROL, (d1vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE | -+ AVIVO_DVGA_CONTROL_TIMING_SELECT))); -+ OUTREG(AVIVO_D2VGA_CONTROL, (d2vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE | -+ AVIVO_DVGA_CONTROL_TIMING_SELECT))); -+ OUTREG(AVIVO_VGA_RENDER_CONTROL, (vga_render_control & ~AVIVO_VGA_VSTATUS_CNTL_MASK)); -+ -+ if (info->ChipFamily == CHIP_FAMILY_RV730) { -+ cg_spll_func_cntl = INREG(R600_CG_SPLL_FUNC_CNTL); -+ -+ /* enable bypass mode */ -+ OUTREG(R600_CG_SPLL_FUNC_CNTL, (cg_spll_func_cntl | R600_SPLL_BYPASS_EN)); -+ -+ /* wait for SPLL_CHG_STATUS to change to 1 */ -+ cg_spll_status = 0; -+ while (!(cg_spll_status & R600_SPLL_CHG_STATUS)) -+ cg_spll_status = INREG(R600_CG_SPLL_STATUS); -+ -+ OUTREG(R600_ROM_CNTL, (rom_cntl & ~R600_SCK_OVERWRITE)); -+ } else -+ OUTREG(R600_ROM_CNTL, (rom_cntl | R600_SCK_OVERWRITE)); -+ -+ ret = radeon_read_bios(pScrn); -+ -+ /* restore regs */ -+ if (info->ChipFamily == CHIP_FAMILY_RV730) { -+ OUTREG(R600_CG_SPLL_FUNC_CNTL, cg_spll_func_cntl); -+ -+ /* wait for SPLL_CHG_STATUS to change to 1 */ -+ cg_spll_status = 0; -+ while (!(cg_spll_status & R600_SPLL_CHG_STATUS)) -+ cg_spll_status = INREG(R600_CG_SPLL_STATUS); -+ } -+ OUTREG(RADEON_VIPH_CONTROL, viph_control); -+ OUTREG(RADEON_BUS_CNTL, bus_cntl); -+ OUTREG(AVIVO_D1VGA_CONTROL, d1vga_control); -+ OUTREG(AVIVO_D2VGA_CONTROL, d2vga_control); -+ OUTREG(AVIVO_VGA_RENDER_CONTROL, vga_render_control); -+ OUTREG(R600_ROM_CNTL, rom_cntl); -+ } else if (info->ChipFamily >= CHIP_FAMILY_R600) { - uint32_t viph_control = INREG(RADEON_VIPH_CONTROL); - uint32_t bus_cntl = INREG(RADEON_BUS_CNTL); - uint32_t d1vga_control = INREG(AVIVO_D1VGA_CONTROL); -@@ -127,7 +182,7 @@ radeon_read_disabled_bios(ScrnInfoPtr pScrn) - /* enable the rom */ - OUTREG(RADEON_BUS_CNTL, (bus_cntl & ~RADEON_BUS_BIOS_DIS_ROM)); - -- /* Disable VGA mode */ -+ /* Disable VGA mode */ - OUTREG(AVIVO_D1VGA_CONTROL, (d1vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE | - AVIVO_DVGA_CONTROL_TIMING_SELECT))); - OUTREG(AVIVO_D2VGA_CONTROL, (d2vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE | -@@ -273,6 +328,7 @@ radeon_card_posted(ScrnInfoPtr pScrn) - unsigned char *RADEONMMIO = info->MMIO; - uint32_t reg; - -+ /* first check CRTCs */ - if (IS_AVIVO_VARIANT) { - reg = INREG(AVIVO_D1CRTC_CONTROL) | INREG(AVIVO_D2CRTC_CONTROL); - if (reg & AVIVO_CRTC_EN) -@@ -283,6 +339,15 @@ radeon_card_posted(ScrnInfoPtr pScrn) - return TRUE; - } - -+ /* then check MEM_SIZE, in case something turned the crtcs off */ -+ if (info->ChipFamily >= CHIP_FAMILY_R600) -+ reg = INREG(R600_CONFIG_MEMSIZE); -+ else -+ reg = INREG(RADEON_CONFIG_MEMSIZE); -+ -+ if (reg) -+ return TRUE; -+ - return FALSE; - } - -diff --git a/src/radeon_chipinfo_gen.h b/src/radeon_chipinfo_gen.h -index 6321246..41144c7 100644 ---- a/src/radeon_chipinfo_gen.h -+++ b/src/radeon_chipinfo_gen.h -@@ -1,5 +1,5 @@ - /* This file is autogenerated please do not edit */ --RADEONCardInfo RADEONCards[] = { -+static RADEONCardInfo RADEONCards[] = { - { 0x3150, CHIP_FAMILY_RV380, 1, 0, 0, 0, 0 }, - { 0x3151, CHIP_FAMILY_RV380, 0, 0, 0, 0, 0 }, - { 0x3152, CHIP_FAMILY_RV380, 1, 0, 0, 0, 0 }, -@@ -40,6 +40,8 @@ RADEONCardInfo RADEONCards[] = { - { 0x4A4E, CHIP_FAMILY_R420, 1, 0, 0, 0, 0 }, - { 0x4A4F, CHIP_FAMILY_R420, 0, 0, 0, 0, 0 }, - { 0x4A50, CHIP_FAMILY_R420, 0, 0, 0, 0, 0 }, -+ { 0x4A54, CHIP_FAMILY_R420, 0, 0, 0, 0, 0 }, -+ { 0x4B48, CHIP_FAMILY_R420, 0, 0, 0, 0, 0 }, - { 0x4B49, CHIP_FAMILY_R420, 0, 0, 0, 0, 0 }, - { 0x4B4A, CHIP_FAMILY_R420, 0, 0, 0, 0, 0 }, - { 0x4B4B, CHIP_FAMILY_R420, 0, 0, 0, 0, 0 }, -@@ -254,6 +256,7 @@ RADEONCardInfo RADEONCards[] = { - { 0x9440, CHIP_FAMILY_RV770, 0, 0, 0, 0, 0 }, - { 0x9441, CHIP_FAMILY_RV770, 0, 0, 0, 0, 0 }, - { 0x9442, CHIP_FAMILY_RV770, 0, 0, 0, 0, 0 }, -+ { 0x9443, CHIP_FAMILY_RV770, 0, 0, 0, 0, 0 }, - { 0x9444, CHIP_FAMILY_RV770, 0, 0, 0, 0, 0 }, - { 0x9446, CHIP_FAMILY_RV770, 0, 0, 0, 0, 0 }, - { 0x944A, CHIP_FAMILY_RV770, 1, 0, 0, 0, 0 }, -@@ -271,15 +274,26 @@ RADEONCardInfo RADEONCards[] = { - { 0x946B, CHIP_FAMILY_RV770, 1, 0, 0, 0, 0 }, - { 0x947A, CHIP_FAMILY_RV770, 1, 0, 0, 0, 0 }, - { 0x947B, CHIP_FAMILY_RV770, 1, 0, 0, 0, 0 }, -+ { 0x9480, CHIP_FAMILY_RV730, 1, 0, 0, 0, 0 }, - { 0x9487, CHIP_FAMILY_RV730, 0, 0, 0, 0, 0 }, -+ { 0x9488, CHIP_FAMILY_RV730, 1, 0, 0, 0, 0 }, - { 0x9489, CHIP_FAMILY_RV730, 1, 0, 0, 0, 0 }, - { 0x948F, CHIP_FAMILY_RV730, 0, 0, 0, 0, 0 }, - { 0x9490, CHIP_FAMILY_RV730, 0, 0, 0, 0, 0 }, - { 0x9491, CHIP_FAMILY_RV730, 0, 0, 0, 0, 0 }, -+ { 0x9495, CHIP_FAMILY_RV730, 0, 0, 0, 0, 0 }, - { 0x9498, CHIP_FAMILY_RV730, 0, 0, 0, 0, 0 }, - { 0x949C, CHIP_FAMILY_RV730, 0, 0, 0, 0, 0 }, - { 0x949E, CHIP_FAMILY_RV730, 0, 0, 0, 0, 0 }, - { 0x949F, CHIP_FAMILY_RV730, 0, 0, 0, 0, 0 }, -+ { 0x94A0, CHIP_FAMILY_RV740, 1, 0, 0, 0, 0 }, -+ { 0x94A1, CHIP_FAMILY_RV740, 1, 0, 0, 0, 0 }, -+ { 0x94A3, CHIP_FAMILY_RV740, 1, 0, 0, 0, 0 }, -+ { 0x94B1, CHIP_FAMILY_RV740, 0, 0, 0, 0, 0 }, -+ { 0x94B3, CHIP_FAMILY_RV740, 0, 0, 0, 0, 0 }, -+ { 0x94B4, CHIP_FAMILY_RV740, 0, 0, 0, 0, 0 }, -+ { 0x94B5, CHIP_FAMILY_RV740, 0, 0, 0, 0, 0 }, -+ { 0x94B9, CHIP_FAMILY_RV740, 1, 0, 0, 0, 0 }, - { 0x94C0, CHIP_FAMILY_RV610, 0, 0, 0, 0, 0 }, - { 0x94C1, CHIP_FAMILY_RV610, 0, 0, 0, 0, 0 }, - { 0x94C3, CHIP_FAMILY_RV610, 0, 0, 0, 0, 0 }, -@@ -312,6 +326,7 @@ RADEONCardInfo RADEONCards[] = { - { 0x9552, CHIP_FAMILY_RV710, 1, 0, 0, 0, 0 }, - { 0x9553, CHIP_FAMILY_RV710, 1, 0, 0, 0, 0 }, - { 0x9555, CHIP_FAMILY_RV710, 1, 0, 0, 0, 0 }, -+ { 0x9557, CHIP_FAMILY_RV710, 1, 0, 0, 0, 0 }, - { 0x9580, CHIP_FAMILY_RV630, 0, 0, 0, 0, 0 }, - { 0x9581, CHIP_FAMILY_RV630, 1, 0, 0, 0, 0 }, - { 0x9583, CHIP_FAMILY_RV630, 1, 0, 0, 0, 0 }, -diff --git a/src/radeon_chipset_gen.h b/src/radeon_chipset_gen.h -index 631eda8..12dc322 100644 ---- a/src/radeon_chipset_gen.h -+++ b/src/radeon_chipset_gen.h -@@ -40,6 +40,8 @@ static SymTabRec RADEONChipsets[] = { - { PCI_CHIP_R420_JN, "ATI Radeon Mobility 9800 (M18) JN (AGP)" }, - { PCI_CHIP_R420_4A4F, "ATI Radeon X800 SE (R420) (AGP)" }, - { PCI_CHIP_R420_JP, "ATI Radeon X800XT (R420) JP (AGP)" }, -+ { PCI_CHIP_R420_JT, "ATI Radeon X800 VE (R420) JT (AGP)" }, -+ { PCI_CHIP_R481_4B48, "ATI Radeon X850 (R480) (AGP)" }, - { PCI_CHIP_R481_4B49, "ATI Radeon X850 XT (R480) (AGP)" }, - { PCI_CHIP_R481_4B4A, "ATI Radeon X850 SE (R480) (AGP)" }, - { PCI_CHIP_R481_4B4B, "ATI Radeon X850 PRO (R480) (AGP)" }, -@@ -254,6 +256,7 @@ static SymTabRec RADEONChipsets[] = { - { PCI_CHIP_RV770_9440, "ATI Radeon 4800 Series" }, - { PCI_CHIP_RV770_9441, "ATI Radeon HD 4870 x2" }, - { PCI_CHIP_RV770_9442, "ATI Radeon 4800 Series" }, -+ { PCI_CHIP_RV770_9443, "ATI Radeon HD 4850 x2" }, - { PCI_CHIP_RV770_9444, "ATI FirePro V8750 (FireGL)" }, - { PCI_CHIP_RV770_9446, "ATI FirePro V7760 (FireGL)" }, - { PCI_CHIP_RV770_944A, "ATI Mobility RADEON HD 4850" }, -@@ -271,15 +274,26 @@ static SymTabRec RADEONChipsets[] = { - { PCI_CHIP_RV770_946B, "ATI M98" }, - { PCI_CHIP_RV770_947A, "ATI M98" }, - { PCI_CHIP_RV770_947B, "ATI M98" }, -+ { PCI_CHIP_RV730_9480, "ATI Mobility Radeon HD 4650" }, - { PCI_CHIP_RV730_9487, "ATI Radeon RV730 (AGP)" }, -+ { PCI_CHIP_RV730_9488, "ATI Mobility Radeon HD 4670" }, - { PCI_CHIP_RV730_9489, "ATI FirePro M5750" }, - { PCI_CHIP_RV730_948F, "ATI Radeon RV730 (AGP)" }, - { PCI_CHIP_RV730_9490, "ATI RV730XT [Radeon HD 4670]" }, - { PCI_CHIP_RV730_9491, "ATI RADEON E4600" }, -+ { PCI_CHIP_RV730_9495, "ATI Radeon HD 4600 Series" }, - { PCI_CHIP_RV730_9498, "ATI RV730 PRO [Radeon HD 4650]" }, - { PCI_CHIP_RV730_949C, "ATI FirePro V7750 (FireGL)" }, - { PCI_CHIP_RV730_949E, "ATI FirePro V5700 (FireGL)" }, - { PCI_CHIP_RV730_949F, "ATI FirePro V3750 (FireGL)" }, -+ { PCI_CHIP_RV740_94A0, "ATI Mobility Radeon HD 4830" }, -+ { PCI_CHIP_RV740_94A1, "ATI Mobility Radeon HD 4850" }, -+ { PCI_CHIP_RV740_94A3, "ATI FirePro M7740" }, -+ { PCI_CHIP_RV740_94B1, "ATI RV740" }, -+ { PCI_CHIP_RV740_94B3, "ATI Radeon HD 4770" }, -+ { PCI_CHIP_RV740_94B4, "ATI Radeon HD 4700 Series" }, -+ { PCI_CHIP_RV740_94B5, "ATI Radeon HD 4770" }, -+ { PCI_CHIP_RV740_94B9, "ATI FirePro M5750" }, - { PCI_CHIP_RV610_94C0, "ATI RV610" }, - { PCI_CHIP_RV610_94C1, "ATI Radeon HD 2400 XT" }, - { PCI_CHIP_RV610_94C3, "ATI Radeon HD 2400 Pro" }, -@@ -312,6 +326,7 @@ static SymTabRec RADEONChipsets[] = { - { PCI_CHIP_RV710_9552, "ATI Mobility Radeon 4300 Series" }, - { PCI_CHIP_RV710_9553, "ATI Mobility Radeon 4500 Series" }, - { PCI_CHIP_RV710_9555, "ATI Mobility Radeon 4500 Series" }, -+ { PCI_CHIP_RV710_9557, "ATI FirePro RG220" }, - { PCI_CHIP_RV630_9580, "ATI RV630" }, - { PCI_CHIP_RV630_9581, "ATI Mobility Radeon HD 2600" }, - { PCI_CHIP_RV630_9583, "ATI Mobility Radeon HD 2600 XT" }, -diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c -index a9bc7d2..6f501a6 100644 ---- a/src/radeon_commonfuncs.c -+++ b/src/radeon_commonfuncs.c -@@ -56,6 +56,7 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - { - RADEONInfoPtr info = RADEONPTR(pScrn); - uint32_t gb_tile_config, su_reg_dest, vap_cntl; -+ int size; - ACCEL_PREAMBLE(); - - info->accel_state->texW[0] = info->accel_state->texH[0] = -@@ -63,13 +64,15 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - - if (IS_R300_3D || IS_R500_3D) { - -- BEGIN_ACCEL(3); -- OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D); -- OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, R300_ZC_FLUSH | R300_ZC_FREE); -- OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); -- FINISH_ACCEL(); -+ if (!info->cs) { -+ BEGIN_ACCEL(3); -+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D); -+ OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, R300_ZC_FLUSH | R300_ZC_FREE); -+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); -+ FINISH_ACCEL(); -+ } - -- gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16 | R300_SUBPIXEL_1_16); -+ gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16); - - switch(info->accel_state->num_gb_pipes) { - case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break; -@@ -79,20 +82,26 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break; - } - -- BEGIN_ACCEL(5); -- OUT_ACCEL_REG(R300_GB_TILE_CONFIG, gb_tile_config); -- OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); -- OUT_ACCEL_REG(R300_DST_PIPE_CONFIG, R300_PIPE_AUTO_CONFIG); -- OUT_ACCEL_REG(R300_GB_SELECT, 0); -- OUT_ACCEL_REG(R300_GB_ENABLE, 0); -- FINISH_ACCEL(); -+ if (info->dri->pKernelDRMVersion->version_major < 2) { -+ size = (info->ChipFamily >= CHIP_FAMILY_R420) ? 5 : 4; -+ BEGIN_ACCEL(size); -+ OUT_ACCEL_REG(R300_GB_TILE_CONFIG, gb_tile_config); -+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); -+ if (info->ChipFamily >= CHIP_FAMILY_R420) -+ OUT_ACCEL_REG(R300_DST_PIPE_CONFIG, R300_PIPE_AUTO_CONFIG); -+ OUT_ACCEL_REG(R300_GB_SELECT, 0); -+ OUT_ACCEL_REG(R300_GB_ENABLE, 0); -+ FINISH_ACCEL(); -+ } - - if (IS_R500_3D) { - su_reg_dest = ((1 << info->accel_state->num_gb_pipes) - 1); -- BEGIN_ACCEL(2); -- OUT_ACCEL_REG(R500_SU_REG_DEST, su_reg_dest); -- OUT_ACCEL_REG(R500_VAP_INDEX_OFFSET, 0); -- FINISH_ACCEL(); -+ if (info->dri->pKernelDRMVersion->version_major < 2) { -+ BEGIN_ACCEL(2); -+ OUT_ACCEL_REG(R500_SU_REG_DEST, su_reg_dest); -+ OUT_ACCEL_REG(R500_VAP_INDEX_OFFSET, 0); -+ FINISH_ACCEL(); -+ } - } - - BEGIN_ACCEL(3); -@@ -101,29 +110,34 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); - FINISH_ACCEL(); - -- BEGIN_ACCEL(5); -+ BEGIN_ACCEL(3); - OUT_ACCEL_REG(R300_GB_AA_CONFIG, 0); - OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D); - OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, R300_ZC_FLUSH | R300_ZC_FREE); -- OUT_ACCEL_REG(R300_GB_MSPOS0, ((8 << R300_MS_X0_SHIFT) | -- (8 << R300_MS_Y0_SHIFT) | -- (8 << R300_MS_X1_SHIFT) | -- (8 << R300_MS_Y1_SHIFT) | -- (8 << R300_MS_X2_SHIFT) | -- (8 << R300_MS_Y2_SHIFT) | -- (8 << R300_MSBD0_Y_SHIFT) | -- (7 << R300_MSBD0_X_SHIFT))); -- OUT_ACCEL_REG(R300_GB_MSPOS1, ((8 << R300_MS_X3_SHIFT) | -- (8 << R300_MS_Y3_SHIFT) | -- (8 << R300_MS_X4_SHIFT) | -- (8 << R300_MS_Y4_SHIFT) | -- (8 << R300_MS_X5_SHIFT) | -- (8 << R300_MS_Y5_SHIFT) | -- (8 << R300_MSBD1_SHIFT))); - FINISH_ACCEL(); - -- BEGIN_ACCEL(5); -- OUT_ACCEL_REG(R300_GA_ENHANCE, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL); -+ if (info->dri->pKernelDRMVersion->version_major < 2) { -+ BEGIN_ACCEL(3); -+ OUT_ACCEL_REG(R300_GB_MSPOS0, ((6 << R300_MS_X0_SHIFT) | -+ (6 << R300_MS_Y0_SHIFT) | -+ (6 << R300_MS_X1_SHIFT) | -+ (6 << R300_MS_Y1_SHIFT) | -+ (6 << R300_MS_X2_SHIFT) | -+ (6 << R300_MS_Y2_SHIFT) | -+ (6 << R300_MSBD0_Y_SHIFT) | -+ (6 << R300_MSBD0_X_SHIFT))); -+ OUT_ACCEL_REG(R300_GB_MSPOS1, ((6 << R300_MS_X3_SHIFT) | -+ (6 << R300_MS_Y3_SHIFT) | -+ (6 << R300_MS_X4_SHIFT) | -+ (6 << R300_MS_Y4_SHIFT) | -+ (6 << R300_MS_X5_SHIFT) | -+ (6 << R300_MS_Y5_SHIFT) | -+ (6 << R300_MSBD1_SHIFT))); -+ OUT_ACCEL_REG(R300_GA_ENHANCE, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL); -+ FINISH_ACCEL(); -+ } -+ -+ BEGIN_ACCEL(4); - OUT_ACCEL_REG(R300_GA_POLY_MODE, R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE); - OUT_ACCEL_REG(R300_GA_ROUND_MODE, (R300_GEOMETRY_ROUND_NEAREST | - R300_COLOR_ROUND_NEAREST)); -@@ -220,10 +234,10 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - - /* pre-load the vertex shaders */ - if (info->accel_state->has_tcl) { -- /* exa mask/Xv bicubic shader program */ -- BEGIN_ACCEL(13); -- OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0); -- /* PVS inst 0 */ -+ BEGIN_ACCEL(37); -+ /* exa composite shader program */ -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_INST_INDEX(0)); -+ /* PVS inst 0 - dst X,Y */ - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, - (R300_PVS_DST_OPCODE(R300_VE_ADD) | - R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | -@@ -235,8 +249,8 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - R300_PVS_SRC_OFFSET(0) | - R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | - R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | -- R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) | -- R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W))); -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_1))); - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, - (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(0) | -@@ -252,20 +266,26 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); - -- /* PVS inst 1 */ -+ /* PVS inst 1 - src X */ - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -- (R300_PVS_DST_OPCODE(R300_VE_ADD) | -- R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | -- R300_PVS_DST_OFFSET(1) | -- R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y | -- R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W)); -+ (R300_PVS_DST_OPCODE(R300_VE_DOT_PRODUCT) | -+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_TEMPORARY) | -+ R300_PVS_DST_OFFSET(0) | -+ R300_PVS_DST_WE_X)); - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, - (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(6) | - R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | - R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_1) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_CONSTANT) | -+ R300_PVS_SRC_OFFSET(0) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | - R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) | -- R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W))); -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, - (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(6) | -@@ -273,6 +293,27 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ -+ /* PVS inst 2 - src Y */ -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_DST_OPCODE(R300_VE_DOT_PRODUCT) | -+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_TEMPORARY) | -+ R300_PVS_DST_OFFSET(0) | -+ R300_PVS_DST_WE_Y)); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -+ R300_PVS_SRC_OFFSET(6) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_1) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_CONSTANT) | -+ R300_PVS_SRC_OFFSET(1) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, - (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(6) | -@@ -281,82 +322,138 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); - -- /* PVS inst 2 */ -+ /* PVS inst 3 - src X / w */ - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -- (R300_PVS_DST_OPCODE(R300_VE_ADD) | -+ (R300_PVS_DST_OPCODE(R300_VE_MULTIPLY) | - R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | -- R300_PVS_DST_OFFSET(2) | -- R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y | -- R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W)); -+ R300_PVS_DST_OFFSET(1) | -+ R300_PVS_DST_WE_X)); - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -- (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -- R300_PVS_SRC_OFFSET(7) | -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_TEMPORARY) | -+ R300_PVS_SRC_OFFSET(0) | - R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | -- R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | -- R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) | -- R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W))); -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_CONSTANT) | -+ R300_PVS_SRC_OFFSET(0) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_W) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, - (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -- R300_PVS_SRC_OFFSET(7) | -+ R300_PVS_SRC_OFFSET(6) | - R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ -+ /* PVS inst 4 - src y / h */ -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_DST_OPCODE(R300_VE_MULTIPLY) | -+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | -+ R300_PVS_DST_OFFSET(1) | -+ R300_PVS_DST_WE_Y)); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_TEMPORARY) | -+ R300_PVS_SRC_OFFSET(0) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_CONSTANT) | -+ R300_PVS_SRC_OFFSET(1) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_W) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, - (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -- R300_PVS_SRC_OFFSET(7) | -+ R300_PVS_SRC_OFFSET(6) | - R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -- FINISH_ACCEL(); - -- BEGIN_ACCEL(9); -- /* exa no mask instruction */ -- OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, 3); -- /* PVS inst 0 */ -+ /* PVS inst 5 - mask X */ - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -- (R300_PVS_DST_OPCODE(R300_VE_ADD) | -- R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | -+ (R300_PVS_DST_OPCODE(R300_VE_DOT_PRODUCT) | -+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_TEMPORARY) | - R300_PVS_DST_OFFSET(0) | -- R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y | -- R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W)); -+ R300_PVS_DST_WE_Z)); - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, - (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -- R300_PVS_SRC_OFFSET(0) | -+ R300_PVS_SRC_OFFSET(7) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_1) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_CONSTANT) | -+ R300_PVS_SRC_OFFSET(2) | - R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | - R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | - R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) | -- R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W))); -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, - (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -- R300_PVS_SRC_OFFSET(0) | -+ R300_PVS_SRC_OFFSET(7) | - R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ -+ /* PVS inst 6 - mask Y */ -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_DST_OPCODE(R300_VE_DOT_PRODUCT) | -+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_TEMPORARY) | -+ R300_PVS_DST_OFFSET(0) | -+ R300_PVS_DST_WE_W)); - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, - (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -- R300_PVS_SRC_OFFSET(0) | -+ R300_PVS_SRC_OFFSET(7) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_1) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_CONSTANT) | -+ R300_PVS_SRC_OFFSET(3) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -+ R300_PVS_SRC_OFFSET(7) | - R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); - -- /* PVS inst 1 */ -+ /* PVS inst 7 - mask X / w */ - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -- (R300_PVS_DST_OPCODE(R300_VE_ADD) | -+ (R300_PVS_DST_OPCODE(R300_VE_MULTIPLY) | - R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | -- R300_PVS_DST_OFFSET(1) | -- R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y | -- R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W)); -+ R300_PVS_DST_OFFSET(2) | -+ R300_PVS_DST_WE_X)); - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -- (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -- R300_PVS_SRC_OFFSET(6) | -- R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | -- R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | -- R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) | -- R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W))); -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_TEMPORARY) | -+ R300_PVS_SRC_OFFSET(0) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_Z) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_CONSTANT) | -+ R300_PVS_SRC_OFFSET(2) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_W) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, - (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(6) | -@@ -364,6 +461,27 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ -+ /* PVS inst 8 - mask y / h */ -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_DST_OPCODE(R300_VE_MULTIPLY) | -+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | -+ R300_PVS_DST_OFFSET(2) | -+ R300_PVS_DST_WE_Y)); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_TEMPORARY) | -+ R300_PVS_SRC_OFFSET(0) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_W) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_CONSTANT) | -+ R300_PVS_SRC_OFFSET(3) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_W) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, - (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(6) | -@@ -375,7 +493,7 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - - /* Xv shader program */ - BEGIN_ACCEL(9); -- OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, 5); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_INST_INDEX(9)); - - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, - (R300_PVS_DST_OPCODE(R300_VE_ADD) | -@@ -388,8 +506,8 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - R300_PVS_SRC_OFFSET(0) | - R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | - R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | -- R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) | -- R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W))); -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_1))); - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, - (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(0) | -@@ -409,15 +527,14 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - (R300_PVS_DST_OPCODE(R300_VE_ADD) | - R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | - R300_PVS_DST_OFFSET(1) | -- R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y | -- R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W)); -+ R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y)); - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, - (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(6) | - R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | - R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | -- R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) | -- R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W))); -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_1))); - OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, - (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(6) | -@@ -433,6 +550,97 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | - R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); - FINISH_ACCEL(); -+ -+ /* Xv bicubic shader program */ -+ BEGIN_ACCEL(13); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_INST_INDEX(11)); -+ /* PVS inst 0 */ -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_DST_OPCODE(R300_VE_ADD) | -+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | -+ R300_PVS_DST_OFFSET(0) | -+ R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y | -+ R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W)); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -+ R300_PVS_SRC_OFFSET(0) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_1))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -+ R300_PVS_SRC_OFFSET(0) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -+ R300_PVS_SRC_OFFSET(0) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ -+ /* PVS inst 1 */ -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_DST_OPCODE(R300_VE_ADD) | -+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | -+ R300_PVS_DST_OFFSET(1) | -+ R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y | -+ R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W)); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -+ R300_PVS_SRC_OFFSET(6) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_1))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -+ R300_PVS_SRC_OFFSET(6) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -+ R300_PVS_SRC_OFFSET(6) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ -+ /* PVS inst 2 */ -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_DST_OPCODE(R300_VE_ADD) | -+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | -+ R300_PVS_DST_OFFSET(2) | -+ R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y | -+ R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W)); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -+ R300_PVS_SRC_OFFSET(7) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_1))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -+ R300_PVS_SRC_OFFSET(7) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, -+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | -+ R300_PVS_SRC_OFFSET(7) | -+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) | -+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0))); -+ FINISH_ACCEL(); - } - - /* pre-load the RS instructions */ -@@ -552,10 +760,10 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - OUT_ACCEL_REG(R300_SC_EDGERULE, 0xA5294A5); - if (IS_R300_3D) { - /* clip has offset 1440 */ -- OUT_ACCEL_REG(R300_SC_CLIP_0_A, ((1088 << R300_CLIP_X_SHIFT) | -- (1088 << R300_CLIP_Y_SHIFT))); -- OUT_ACCEL_REG(R300_SC_CLIP_0_B, (((1080 + 2920) << R300_CLIP_X_SHIFT) | -- ((1080 + 2920) << R300_CLIP_Y_SHIFT))); -+ OUT_ACCEL_REG(R300_SC_CLIP_0_A, ((1440 << R300_CLIP_X_SHIFT) | -+ (1440 << R300_CLIP_Y_SHIFT))); -+ OUT_ACCEL_REG(R300_SC_CLIP_0_B, ((4080 << R300_CLIP_X_SHIFT) | -+ (4080 << R300_CLIP_Y_SHIFT))); - } else { - OUT_ACCEL_REG(R300_SC_CLIP_0_A, ((0 << R300_CLIP_X_SHIFT) | - (0 << R300_CLIP_Y_SHIFT))); -@@ -565,10 +773,7 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); - OUT_ACCEL_REG(R300_SC_SCREENDOOR, 0xffffff); - FINISH_ACCEL(); -- } else if ((info->ChipFamily == CHIP_FAMILY_RV250) || -- (info->ChipFamily == CHIP_FAMILY_RV280) || -- (info->ChipFamily == CHIP_FAMILY_RS300) || -- (info->ChipFamily == CHIP_FAMILY_R200)) { -+ } else if (IS_R200_3D) { - - BEGIN_ACCEL(6); - if (info->ChipFamily == CHIP_FAMILY_RS300) { -@@ -584,9 +789,7 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - R200_VAP_VF_MAX_VTX_NUM); - FINISH_ACCEL(); - -- BEGIN_ACCEL(5); -- OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0); -- OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, 0x07ff07ff); -+ BEGIN_ACCEL(3); - OUT_ACCEL_REG(RADEON_AUX_SC_CNTL, 0); - OUT_ACCEL_REG(RADEON_RB3D_PLANEMASK, 0xffffffff); - OUT_ACCEL_REG(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD | -@@ -604,15 +807,11 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - else - OUT_ACCEL_REG(RADEON_SE_CNTL_STATUS, RADEON_TCL_BYPASS); - OUT_ACCEL_REG(RADEON_SE_COORD_FMT, -- RADEON_VTX_XY_PRE_MULT_1_OVER_W0 | -- RADEON_VTX_ST0_NONPARAMETRIC | -- RADEON_VTX_ST1_NONPARAMETRIC | -- RADEON_TEX1_W_ROUTING_USE_W0); -+ RADEON_VTX_XY_PRE_MULT_1_OVER_W0 | -+ RADEON_TEX1_W_ROUTING_USE_W0); - FINISH_ACCEL(); - -- BEGIN_ACCEL(5); -- OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0); -- OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, 0x07ff07ff); -+ BEGIN_ACCEL(3); - OUT_ACCEL_REG(RADEON_AUX_SC_CNTL, 0); - OUT_ACCEL_REG(RADEON_RB3D_PLANEMASK, 0xffffffff); - OUT_ACCEL_REG(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD | -@@ -626,6 +825,39 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) - - } - -+#if defined(ACCEL_CP) && defined(XF86DRM_MODE) -+void drmmode_wait_for_vline(ScrnInfoPtr pScrn, PixmapPtr pPix, -+ int crtc, int start, int stop) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); -+ drmmode_crtc_private_ptr drmmode_crtc = xf86_config->crtc[crtc]->driver_private; -+ ACCEL_PREAMBLE(); -+ -+ BEGIN_ACCEL(3); -+ -+ if (IS_AVIVO_VARIANT) { -+ uint32_t reg = AVIVO_D1MODE_VLINE_START_END; /* this is just a marker */ -+ OUT_ACCEL_REG(reg, -+ ((start << AVIVO_D1MODE_VLINE_START_SHIFT) | -+ (stop << AVIVO_D1MODE_VLINE_END_SHIFT) | -+ AVIVO_D1MODE_VLINE_INV)); -+ } else { -+ OUT_ACCEL_REG(RADEON_CRTC_GUI_TRIG_VLINE, /* another placeholder */ -+ ((start << RADEON_CRTC_GUI_TRIG_VLINE_START_SHIFT) | -+ (stop << RADEON_CRTC_GUI_TRIG_VLINE_END_SHIFT) | -+ RADEON_CRTC_GUI_TRIG_VLINE_INV | -+ RADEON_CRTC_GUI_TRIG_VLINE_STALL)); -+ } -+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, (RADEON_WAIT_CRTC_VLINE | -+ RADEON_ENG_DISPLAY_SELECT_CRTC0)); -+ -+ OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_NOP, 0)); -+ OUT_RING(drmmode_crtc->mode_crtc->crtc_id); -+ FINISH_ACCEL(); -+} -+#endif -+ - /* inserts a wait for vline in the command stream */ - void FUNC_NAME(RADEONWaitForVLine)(ScrnInfoPtr pScrn, PixmapPtr pPix, - int crtc, int start, int stop) -@@ -644,16 +876,21 @@ void FUNC_NAME(RADEONWaitForVLine)(ScrnInfoPtr pScrn, PixmapPtr pPix, - if (!xf86_config->crtc[crtc]->enabled) - return; - -+ if (info->cs) { -+ if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen)) -+ return; -+ } else { - #ifdef USE_EXA -- if (info->useEXA) -- offset = exaGetPixmapOffset(pPix); -- else -+ if (info->useEXA) -+ offset = exaGetPixmapOffset(pPix); -+ else - #endif -- offset = pPix->devPrivate.ptr - info->FB; -+ offset = pPix->devPrivate.ptr - info->FB; - -- /* if drawing to front buffer */ -- if (offset != 0) -- return; -+ /* if drawing to front buffer */ -+ if (offset != 0) -+ return; -+ } - - start = max(start, 0); - stop = min(stop, xf86_config->crtc[crtc]->mode.VDisplay); -@@ -661,6 +898,13 @@ void FUNC_NAME(RADEONWaitForVLine)(ScrnInfoPtr pScrn, PixmapPtr pPix, - if (start > xf86_config->crtc[crtc]->mode.VDisplay) - return; - -+#if defined(ACCEL_CP) && defined(XF86DRM_MODE) -+ if (info->kms_enabled) { -+ drmmode_wait_for_vline(pScrn, pPix, crtc, start, stop); -+ return; -+ } -+#endif -+ - BEGIN_ACCEL(2); - - if (IS_AVIVO_VARIANT) { -@@ -675,12 +919,14 @@ void FUNC_NAME(RADEONWaitForVLine)(ScrnInfoPtr pScrn, PixmapPtr pPix, - OUT_ACCEL_REG(RADEON_CRTC_GUI_TRIG_VLINE, - ((start << RADEON_CRTC_GUI_TRIG_VLINE_START_SHIFT) | - (stop << RADEON_CRTC_GUI_TRIG_VLINE_END_SHIFT) | -- RADEON_CRTC_GUI_TRIG_VLINE_INV)); -+ RADEON_CRTC_GUI_TRIG_VLINE_INV | -+ RADEON_CRTC_GUI_TRIG_VLINE_STALL)); - else - OUT_ACCEL_REG(RADEON_CRTC2_GUI_TRIG_VLINE, - ((start << RADEON_CRTC_GUI_TRIG_VLINE_START_SHIFT) | - (stop << RADEON_CRTC_GUI_TRIG_VLINE_END_SHIFT) | -- RADEON_CRTC_GUI_TRIG_VLINE_INV)); -+ RADEON_CRTC_GUI_TRIG_VLINE_INV | -+ RADEON_CRTC_GUI_TRIG_VLINE_STALL)); - } - - if (crtc == 0) -diff --git a/src/radeon_crtc.c b/src/radeon_crtc.c -index 4b508ce..3899064 100644 ---- a/src/radeon_crtc.c -+++ b/src/radeon_crtc.c -@@ -77,6 +77,9 @@ radeon_crtc_dpms(xf86CrtcPtr crtc, int mode) - if ((mode == DPMSModeOn) && radeon_crtc->enabled) - return; - -+ if (mode == DPMSModeOff) -+ radeon_crtc_modeset_ioctl(crtc, FALSE); -+ - if (IS_AVIVO_VARIANT || info->r4xx_atom) { - atombios_crtc_dpms(crtc, mode); - } else { -@@ -97,6 +100,11 @@ radeon_crtc_dpms(xf86CrtcPtr crtc, int mode) - } - } - -+ if (mode != DPMSModeOff) { -+ radeon_crtc_modeset_ioctl(crtc, TRUE); -+ radeon_crtc_load_lut(crtc); -+ } -+ - if (mode == DPMSModeOn) - radeon_crtc->enabled = TRUE; - else -@@ -115,9 +123,6 @@ radeon_crtc_mode_prepare(xf86CrtcPtr crtc) - { - RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; - -- if (radeon_crtc->initialized) -- radeon_crtc_dpms(crtc, DPMSModeOff); -- - if (radeon_crtc->enabled) - crtc->funcs->hide_cursor(crtc); - } -@@ -132,16 +137,20 @@ RADEONComputePLL(RADEONPLLPtr pll, - unsigned long freq, - uint32_t *chosen_dot_clock_freq, - uint32_t *chosen_feedback_div, -+ uint32_t *chosen_frac_feedback_div, - uint32_t *chosen_reference_div, - uint32_t *chosen_post_div, - int flags) - { - uint32_t min_ref_div = pll->min_ref_div; - uint32_t max_ref_div = pll->max_ref_div; -+ uint32_t min_fractional_feed_div = 0; -+ uint32_t max_fractional_feed_div = 0; - uint32_t best_vco = pll->best_vco; - uint32_t best_post_div = 1; - uint32_t best_ref_div = 1; - uint32_t best_feedback_div = 1; -+ uint32_t best_frac_feedback_div = 0; - uint32_t best_freq = -1; - uint32_t best_error = 0xffffffff; - uint32_t best_vco_diff = 1; -@@ -165,6 +174,11 @@ RADEONComputePLL(RADEONPLLPtr pll, - } - } - -+ if (flags & RADEON_PLL_USE_FRAC_FB_DIV) { -+ min_fractional_feed_div = pll->min_frac_feedback_div; -+ max_fractional_feed_div = pll->max_frac_feedback_div; -+ } -+ - for (post_div = pll->min_post_div; post_div <= pll->max_post_div; ++post_div) { - uint32_t ref_div; - -@@ -182,7 +196,7 @@ RADEONComputePLL(RADEONPLLPtr pll, - } - - for (ref_div = min_ref_div; ref_div <= max_ref_div; ++ref_div) { -- uint32_t feedback_div, current_freq, error, vco_diff; -+ uint32_t feedback_div, current_freq = 0, error, vco_diff; - uint32_t pll_in = pll->reference_freq / ref_div; - uint32_t min_feed_div = pll->min_feedback_div; - uint32_t max_feed_div = pll->max_feedback_div+1; -@@ -192,11 +206,15 @@ RADEONComputePLL(RADEONPLLPtr pll, - - while (min_feed_div < max_feed_div) { - uint32_t vco; -+ uint32_t min_frac_feed_div = min_fractional_feed_div; -+ uint32_t max_frac_feed_div = max_fractional_feed_div+1; -+ uint32_t frac_feedback_div; -+ CARD64 tmp; - - feedback_div = (min_feed_div+max_feed_div)/2; - -- vco = RADEONDiv((CARD64)pll->reference_freq * feedback_div, -- ref_div); -+ tmp = (CARD64)pll->reference_freq * feedback_div; -+ vco = RADEONDiv(tmp, ref_div); - - if (vco < pll->pll_out_min) { - min_feed_div = feedback_div+1; -@@ -206,45 +224,59 @@ RADEONComputePLL(RADEONPLLPtr pll, - continue; - } - -- current_freq = RADEONDiv((CARD64)pll->reference_freq * 10000 * feedback_div, -- ref_div * post_div); -- -- error = abs(current_freq - freq); -- vco_diff = abs(vco - best_vco); -- -- if ((best_vco == 0 && error < best_error) || -- (best_vco != 0 && -- (error < best_error - 100 || -- (abs(error - best_error) < 100 && vco_diff < best_vco_diff )))) { -- best_post_div = post_div; -- best_ref_div = ref_div; -- best_feedback_div = feedback_div; -- best_freq = current_freq; -- best_error = error; -- best_vco_diff = vco_diff; -- } else if (current_freq == freq) { -- if (best_freq == -1) { -- best_post_div = post_div; -- best_ref_div = ref_div; -- best_feedback_div = feedback_div; -- best_freq = current_freq; -- best_error = error; -- best_vco_diff = vco_diff; -- } else if (((flags & RADEON_PLL_PREFER_LOW_REF_DIV) && (ref_div < best_ref_div)) || -- ((flags & RADEON_PLL_PREFER_HIGH_REF_DIV) && (ref_div > best_ref_div)) || -- ((flags & RADEON_PLL_PREFER_LOW_FB_DIV) && (feedback_div < best_feedback_div)) || -- ((flags & RADEON_PLL_PREFER_HIGH_FB_DIV) && (feedback_div > best_feedback_div)) || -- ((flags & RADEON_PLL_PREFER_LOW_POST_DIV) && (post_div < best_post_div)) || -- ((flags & RADEON_PLL_PREFER_HIGH_POST_DIV) && (post_div > best_post_div))) { -+ while (min_frac_feed_div < max_frac_feed_div) { -+ frac_feedback_div = (min_frac_feed_div+max_frac_feed_div)/2; -+ tmp = (CARD64)pll->reference_freq * 10000 * feedback_div; -+ tmp += (CARD64)pll->reference_freq * 1000 * frac_feedback_div; -+ current_freq = RADEONDiv(tmp, ref_div * post_div); -+ -+ if (flags & RADEON_PLL_PREFER_CLOSEST_LOWER) { -+ error = freq - current_freq; -+ error = error < 0 ? 0xffffffff : error; -+ } else -+ error = abs(current_freq - freq); -+ vco_diff = abs(vco - best_vco); -+ -+ if ((best_vco == 0 && error < best_error) || -+ (best_vco != 0 && -+ (error < best_error - 100 || -+ (abs(error - best_error) < 100 && vco_diff < best_vco_diff )))) { - best_post_div = post_div; - best_ref_div = ref_div; - best_feedback_div = feedback_div; -+ best_frac_feedback_div = frac_feedback_div; - best_freq = current_freq; - best_error = error; - best_vco_diff = vco_diff; -+ } else if (current_freq == freq) { -+ if (best_freq == -1) { -+ best_post_div = post_div; -+ best_ref_div = ref_div; -+ best_feedback_div = feedback_div; -+ best_frac_feedback_div = frac_feedback_div; -+ best_freq = current_freq; -+ best_error = error; -+ best_vco_diff = vco_diff; -+ } else if (((flags & RADEON_PLL_PREFER_LOW_REF_DIV) && (ref_div < best_ref_div)) || -+ ((flags & RADEON_PLL_PREFER_HIGH_REF_DIV) && (ref_div > best_ref_div)) || -+ ((flags & RADEON_PLL_PREFER_LOW_FB_DIV) && (feedback_div < best_feedback_div)) || -+ ((flags & RADEON_PLL_PREFER_HIGH_FB_DIV) && (feedback_div > best_feedback_div)) || -+ ((flags & RADEON_PLL_PREFER_LOW_POST_DIV) && (post_div < best_post_div)) || -+ ((flags & RADEON_PLL_PREFER_HIGH_POST_DIV) && (post_div > best_post_div))) { -+ best_post_div = post_div; -+ best_ref_div = ref_div; -+ best_feedback_div = feedback_div; -+ best_frac_feedback_div = frac_feedback_div; -+ best_freq = current_freq; -+ best_error = error; -+ best_vco_diff = vco_diff; -+ } - } -+ if (current_freq < freq) -+ min_frac_feed_div = frac_feedback_div+1; -+ else -+ max_frac_feed_div = frac_feedback_div; - } -- - if (current_freq < freq) - min_feed_div = feedback_div+1; - else -@@ -255,6 +287,7 @@ RADEONComputePLL(RADEONPLLPtr pll, - - ErrorF("best_freq: %u\n", (unsigned int)best_freq); - ErrorF("best_feedback_div: %u\n", (unsigned int)best_feedback_div); -+ ErrorF("best_frac_feedback_div: %u\n", (unsigned int)best_frac_feedback_div); - ErrorF("best_ref_div: %u\n", (unsigned int)best_ref_div); - ErrorF("best_post_div: %u\n", (unsigned int)best_post_div); - -@@ -262,6 +295,7 @@ RADEONComputePLL(RADEONPLLPtr pll, - FatalError("Couldn't find valid PLL dividers\n"); - *chosen_dot_clock_freq = best_freq / 10000; - *chosen_feedback_div = best_feedback_div; -+ *chosen_frac_feedback_div = best_frac_feedback_div; - *chosen_reference_div = best_ref_div; - *chosen_post_div = best_post_div; - -@@ -286,8 +320,6 @@ radeon_crtc_mode_commit(xf86CrtcPtr crtc) - { - if (crtc->scrn->pScreen != NULL) - xf86_reload_cursors(crtc->scrn->pScreen); -- -- radeon_crtc_dpms(crtc, DPMSModeOn); - } - - void -diff --git a/src/radeon_cursor.c b/src/radeon_cursor.c -index 0fcdcf0..5ecdfad 100644 ---- a/src/radeon_cursor.c -+++ b/src/radeon_cursor.c -@@ -73,14 +73,17 @@ - #define CURSOR_SWAPPING_DECL_MMIO unsigned char *RADEONMMIO = info->MMIO; - #define CURSOR_SWAPPING_START() \ - do { \ -+ if (info->ChipFamily < CHIP_FAMILY_R600) \ - OUTREG(RADEON_SURFACE_CNTL, \ - (info->ModeReg->surface_cntl | \ - RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP) & \ - ~(RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP)); \ - } while (0) --#define CURSOR_SWAPPING_END() (OUTREG(RADEON_SURFACE_CNTL, \ -- info->ModeReg->surface_cntl)) -- -+#define CURSOR_SWAPPING_END() \ -+ do { \ -+ if (info->ChipFamily < CHIP_FAMILY_R600) \ -+ OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl); \ -+ } while (0) - #else - - #define CURSOR_SWAPPING_DECL_MMIO -@@ -97,13 +100,14 @@ avivo_setup_cursor(xf86CrtcPtr crtc, Bool enable) - RADEONInfoPtr info = RADEONPTR(crtc->scrn); - unsigned char *RADEONMMIO = info->MMIO; - -- OUTREG(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset, 0); -+ /* always use the same cursor mode even if the cursor is disabled, -+ * otherwise you may end up with cursor curruption bands -+ */ -+ OUTREG(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset, (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT)); - - if (enable) { - OUTREG(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - info->fbLocation + radeon_crtc->cursor_offset + pScrn->fbOffset); -- OUTREG(AVIVO_D1CUR_SIZE + radeon_crtc->crtc_offset, -- ((CURSOR_WIDTH - 1) << 16) | (CURSOR_HEIGHT - 1)); - OUTREG(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset, - AVIVO_D1CURSOR_EN | (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT)); - } -@@ -138,9 +142,6 @@ radeon_crtc_show_cursor (xf86CrtcPtr crtc) - - if (IS_AVIVO_VARIANT) { - avivo_lock_cursor(crtc, TRUE); -- OUTREG(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset, -- INREG(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset) -- | AVIVO_D1CURSOR_EN); - avivo_setup_cursor(crtc, TRUE); - avivo_lock_cursor(crtc, FALSE); - } else { -@@ -171,9 +172,6 @@ radeon_crtc_hide_cursor (xf86CrtcPtr crtc) - - if (IS_AVIVO_VARIANT) { - avivo_lock_cursor(crtc, TRUE); -- OUTREG(AVIVO_D1CUR_CONTROL+ radeon_crtc->crtc_offset, -- INREG(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset) -- & ~(AVIVO_D1CURSOR_EN)); - avivo_setup_cursor(crtc, FALSE); - avivo_lock_cursor(crtc, FALSE); - } else { -@@ -196,6 +194,7 @@ void - radeon_crtc_set_cursor_position (xf86CrtcPtr crtc, int x, int y) - { - ScrnInfoPtr pScrn = crtc->scrn; -+ RADEONEntPtr pRADEONEnt = RADEONEntPriv(pScrn); - RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; - int crtc_id = radeon_crtc->crtc_id; - RADEONInfoPtr info = RADEONPTR(pScrn); -@@ -210,15 +209,38 @@ radeon_crtc_set_cursor_position (xf86CrtcPtr crtc, int x, int y) - if (yorigin >= CURSOR_HEIGHT) yorigin = CURSOR_HEIGHT - 1; - - if (IS_AVIVO_VARIANT) { -+ int w = CURSOR_WIDTH; -+ - /* avivo cursor spans the full fb width */ - if (crtc->rotatedData == NULL) { - x += crtc->x; - y += crtc->y; - } -+ -+ if (pRADEONEnt->Controller[0]->enabled && -+ pRADEONEnt->Controller[1]->enabled) { -+ int cursor_end, frame_end; -+ -+ cursor_end = x - xorigin + w; -+ frame_end = crtc->x + mode->CrtcHDisplay; -+ -+ if (cursor_end >= frame_end) { -+ w = w - (cursor_end - frame_end); -+ if (!(frame_end & 0x7f)) -+ w--; -+ } else { -+ if (!(cursor_end & 0x7f)) -+ w--; -+ } -+ if (w <= 0) -+ w = 1; -+ } -+ - avivo_lock_cursor(crtc, TRUE); - OUTREG(AVIVO_D1CUR_POSITION + radeon_crtc->crtc_offset, ((xorigin ? 0 : x) << 16) - | (yorigin ? 0 : y)); - OUTREG(AVIVO_D1CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin); -+ OUTREG(AVIVO_D1CUR_SIZE + radeon_crtc->crtc_offset, ((w - 1) << 16) | (CURSOR_HEIGHT - 1)); - avivo_lock_cursor(crtc, FALSE); - } else { - if (mode->Flags & V_DBLSCAN) -@@ -320,23 +342,17 @@ Bool RADEONCursorInit(ScreenPtr pScreen) - { - ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; - RADEONInfoPtr info = RADEONPTR(pScrn); -+ unsigned char *RADEONMMIO = info->MMIO; - xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); -- int width; -- int width_bytes; -- int height; -- int size_bytes; - int c; - -- size_bytes = CURSOR_WIDTH * 4 * CURSOR_HEIGHT; -- width = pScrn->displayWidth; -- width_bytes = width * (pScrn->bitsPerPixel / 8); -- height = ((size_bytes * xf86_config->num_crtc) + width_bytes - 1) / width_bytes; -- int align = IS_AVIVO_VARIANT ? 4096 : 256; -+ for (c = 0; c < xf86_config->num_crtc; c++) { -+ xf86CrtcPtr crtc = xf86_config->crtc[c]; -+ RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; - -- if (!info->useEXA) { -- for (c = 0; c < xf86_config->num_crtc; c++) { -- xf86CrtcPtr crtc = xf86_config->crtc[c]; -- RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; -+ if (!info->useEXA) { -+ int size_bytes = CURSOR_WIDTH * 4 * CURSOR_HEIGHT; -+ int align = IS_AVIVO_VARIANT ? 4096 : 256; - - radeon_crtc->cursor_offset = - radeon_legacy_allocate_memory(pScrn, &radeon_crtc->cursor_mem, size_bytes, align); -@@ -350,6 +366,10 @@ Bool RADEONCursorInit(ScreenPtr pScreen) - c, - (unsigned int)radeon_crtc->cursor_offset); - } -+ /* set the cursor mode the same on both crtcs to avoid corruption */ -+ if (IS_AVIVO_VARIANT) -+ OUTREG(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset, -+ (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT)); - } - - return xf86_cursors_init (pScreen, CURSOR_WIDTH, CURSOR_HEIGHT, -diff --git a/src/radeon_dri.c b/src/radeon_dri.c -index f6c6261..a359bc4 100644 ---- a/src/radeon_dri.c -+++ b/src/radeon_dri.c -@@ -745,6 +745,8 @@ static radeon_agpmode_quirk radeon_agpmode_quirk_list[] = { - { PCI_VENDOR_INTEL,0x2570, PCI_VENDOR_ATI,0x4a4e, PCI_VENDOR_DELL,0x5106, 4 }, - /* Intel 82865G/PE/P DRAM Controller/Host-Hub / RV280 [Radeon 9200 SE] Needs AGPMode 4 (lp #300304) */ - { PCI_VENDOR_INTEL,0x2570, PCI_VENDOR_ATI,0x5964, 0x148c,0x2073, 4 }, -+ /* Intel 82855PM host bridge / Mobility M7 LW Needs AGPMode 4 (lp: #353996) */ -+ { PCI_VENDOR_INTEL,0x3340, PCI_VENDOR_ATI,0x4c57, PCI_VENDOR_IBM,0x0530, 4 }, - /* Intel 82855PM Processor to I/O Controller / Mobility M6 LY Needs AGPMode 1 (deb #467235) */ - { PCI_VENDOR_INTEL,0x3340, PCI_VENDOR_ATI,0x4c59, PCI_VENDOR_IBM,0x052f, 1 }, - /* Intel 82855PM host bridge / Mobility 9600 M10 RV350 Needs AGPMode 1 (lp #195051) */ -@@ -767,6 +769,14 @@ static radeon_agpmode_quirk radeon_agpmode_quirk_list[] = { - { PCI_VENDOR_INTEL,0x3580, PCI_VENDOR_ATI,0x4e50, PCI_VENDOR_ASUS,0x1942, 1 }, - /* Intel 82852/82855 host bridge / Mobility 9600/9700 Needs AGPMode 1 (deb #510208) */ - { PCI_VENDOR_INTEL,0x3580, PCI_VENDOR_ATI,0x4e50, 0x10cf,0x127f, 1 }, -+ /* Intel 82443BX/ZX/DX Host bridge / RV280 [Radeon 9200] Needs AGPMode 1 (lp #370205) */ -+ { PCI_VENDOR_INTEL,0x7190, PCI_VENDOR_ATI,0x5961, 0x174b,0x7c13, 1 }, -+ -+ /* Ali Corp M1671 Super P4 Northbridge / Mobility M6 LY Needs AGPMode 1 (lp #146303)*/ -+ { 0x10b9,0x1671, PCI_VENDOR_ATI,0x4c59, 0x103c,0x0027, 1 }, -+ -+ /* SiS Host Bridge 655 / R420 [Radeon X800] Needs AGPMode 4 (lp #371296) */ -+ { 0x1039,0x0655, PCI_VENDOR_ATI,0x4a4b, PCI_VENDOR_ATI,0x4422, 4 }, - - /* ASRock K7VT4A+ AGP 8x / ATI Radeon 9250 AGP Needs AGPMode 4 (lp #133192) */ - { 0x1849,0x3189, PCI_VENDOR_ATI,0x5960, 0x1787,0x5960, 4 }, -@@ -787,6 +797,8 @@ static radeon_agpmode_quirk radeon_agpmode_quirk_list[] = { - { PCI_VENDOR_VIA,0x3189, PCI_VENDOR_ATI,0x5960, 0x1462,0x0380, 4 }, - /* VIA VT8377 Host Bridge / RV280 Needs AGPMode 4 (ati ML) */ - { PCI_VENDOR_VIA,0x3189, PCI_VENDOR_ATI,0x5964, 0x148c,0x2073, 4 }, -+ /* VIA VT8377 Host Bridge / RV280 Needs AGPMode 4 (fdo #12544) */ -+ { PCI_VENDOR_VIA,0x3189, PCI_VENDOR_ATI,0x5964, 0x1043,0xc008, 4 }, - - /* ATI Host Bridge / RV280 [M9+] Needs AGPMode 1 (phoronix forum) */ - { PCI_VENDOR_ATI,0xcbb2, PCI_VENDOR_ATI,0x5c61, PCI_VENDOR_SONY,0x8175, 1 }, -@@ -794,6 +806,9 @@ static radeon_agpmode_quirk radeon_agpmode_quirk_list[] = { - /* HP Host Bridge / R300 [FireGL X1] Needs AGPMode 2 (fdo #7770) */ - { PCI_VENDOR_HP,0x122e, PCI_VENDOR_ATI,0x4e47, PCI_VENDOR_ATI,0x0152, 2 }, - -+ /* nVidia Host Bridge / R420 [X800 Pro] Needs AGPMode 4 (fdo #22726) */ -+ { 0x10de,0x00e1, PCI_VENDOR_ATI,0x4a49, PCI_VENDOR_ATI,0x0002, 4 }, -+ - { 0, 0, 0, 0, 0, 0, 0 }, - }; - -@@ -1556,12 +1571,13 @@ Bool RADEONDRIScreenInit(ScreenPtr pScreen) - info->dri->pDRIInfo = pDRIInfo; - pDRIInfo->drmDriverName = RADEON_DRIVER_NAME; - -- if ( (info->ChipFamily >= CHIP_FAMILY_R300) ) { -+ if ( (info->ChipFamily >= CHIP_FAMILY_R600) ) -+ pDRIInfo->clientDriverName = R600_DRIVER_NAME; -+ else if ( (info->ChipFamily >= CHIP_FAMILY_R300) ) - pDRIInfo->clientDriverName = R300_DRIVER_NAME; -- } else -- if ( info->ChipFamily >= CHIP_FAMILY_R200 ) -+ else if ( info->ChipFamily >= CHIP_FAMILY_R200 ) - pDRIInfo->clientDriverName = R200_DRIVER_NAME; -- else -+ else - pDRIInfo->clientDriverName = RADEON_DRIVER_NAME; - - if (xf86LoaderCheckSymbol("DRICreatePCIBusID")) { -diff --git a/src/radeon_dri2.c b/src/radeon_dri2.c -new file mode 100644 -index 0000000..08da996 ---- /dev/null -+++ b/src/radeon_dri2.c -@@ -0,0 +1,334 @@ -+/* -+ * Copyright 2008 Kristian Høgsberg -+ * Copyright 2008 Jérôme Glisse -+ * -+ * All Rights Reserved. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining -+ * a copy of this software and associated documentation files (the -+ * "Software"), to deal in the Software without restriction, including -+ * without limitation on the rights to use, copy, modify, merge, -+ * publish, distribute, sublicense, and/or sell copies of the Software, -+ * and to permit persons to whom the Software is furnished to do so, -+ * subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the -+ * next paragraph) shall be included in all copies or substantial -+ * portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -+ * NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR -+ * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -+ * DEALINGS IN THE SOFTWARE. -+ */ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include -+#include -+#include -+#include -+ -+#include "radeon.h" -+#include "radeon_dri2.h" -+#include "radeon_version.h" -+ -+#ifdef RADEON_DRI2 -+ -+#include "radeon_bo_gem.h" -+ -+#if DRI2INFOREC_VERSION >= 1 -+#define USE_DRI2_1_1_0 -+#endif -+ -+struct dri2_buffer_priv { -+ PixmapPtr pixmap; -+ unsigned int attachment; -+}; -+ -+ -+#ifndef USE_DRI2_1_1_0 -+static DRI2BufferPtr -+radeon_dri2_create_buffers(DrawablePtr drawable, -+ unsigned int *attachments, -+ int count) -+{ -+ ScreenPtr pScreen = drawable->pScreen; -+ DRI2BufferPtr buffers; -+ struct dri2_buffer_priv *privates; -+ PixmapPtr pixmap, depth_pixmap; -+ struct radeon_exa_pixmap_priv *driver_priv; -+ int i, r; -+ -+ buffers = xcalloc(count, sizeof *buffers); -+ if (buffers == NULL) { -+ return NULL; -+ } -+ privates = xcalloc(count, sizeof(struct dri2_buffer_priv)); -+ if (privates == NULL) { -+ xfree(buffers); -+ return NULL; -+ } -+ -+ depth_pixmap = NULL; -+ for (i = 0; i < count; i++) { -+ if (attachments[i] == DRI2BufferFrontLeft) { -+ if (drawable->type == DRAWABLE_PIXMAP) { -+ pixmap = (Pixmap*)drawable; -+ } else { -+ pixmap = (*pScreen->GetWindowPixmap)((WindowPtr)drawable); -+ } -+ pixmap->refcnt++; -+ } else if (attachments[i] == DRI2BufferStencil && depth_pixmap) { -+ pixmap = depth_pixmap; -+ pixmap->refcnt++; -+ } else { -+ pixmap = (*pScreen->CreatePixmap)(pScreen, -+ drawable->width, -+ drawable->height, -+ drawable->depth, -+ 0); -+ } -+ -+ if (attachments[i] == DRI2BufferDepth) { -+ depth_pixmap = pixmap; -+ } -+ driver_priv = exaGetPixmapDriverPrivate(pixmap); -+ r = radeon_gem_get_kernel_name(driver_priv->bo, &buffers[i].name); -+ if (r) -+ return r; -+ -+ buffers[i].attachment = attachments[i]; -+ buffers[i].pitch = pixmap->devKind; -+ buffers[i].cpp = pixmap->drawable.bitsPerPixel / 8; -+ buffers[i].driverPrivate = &privates[i]; -+ buffers[i].flags = 0; -+ privates[i].pixmap = pixmap; -+ privates[i].attachment = attachments[i]; -+ } -+ return buffers; -+} -+#else -+static DRI2BufferPtr -+radeon_dri2_create_buffer(DrawablePtr drawable, -+ unsigned int attachment, -+ unsigned int format) -+{ -+ ScreenPtr pScreen = drawable->pScreen; -+ DRI2BufferPtr buffers; -+ struct dri2_buffer_priv *privates; -+ PixmapPtr pixmap, depth_pixmap; -+ struct radeon_exa_pixmap_priv *driver_priv; -+ int r; -+ -+ buffers = xcalloc(1, sizeof *buffers); -+ if (buffers == NULL) { -+ return NULL; -+ } -+ privates = xcalloc(1, sizeof(struct dri2_buffer_priv)); -+ if (privates == NULL) { -+ xfree(buffers); -+ return NULL; -+ } -+ -+ depth_pixmap = NULL; -+ -+ if (attachment == DRI2BufferFrontLeft) { -+ if (drawable->type == DRAWABLE_PIXMAP) { -+ pixmap = (PixmapPtr)drawable; -+ } else { -+ pixmap = (*pScreen->GetWindowPixmap)((WindowPtr)drawable); -+ } -+ pixmap->refcnt++; -+ } else if (attachment == DRI2BufferStencil && depth_pixmap) { -+ pixmap = depth_pixmap; -+ pixmap->refcnt++; -+ } else { -+ pixmap = (*pScreen->CreatePixmap)(pScreen, -+ drawable->width, -+ drawable->height, -+ (format != 0)?format:drawable->depth, -+ 0); -+ } -+ -+ if (attachment == DRI2BufferDepth) { -+ depth_pixmap = pixmap; -+ } -+ driver_priv = exaGetPixmapDriverPrivate(pixmap); -+ r = radeon_gem_get_kernel_name(driver_priv->bo, &buffers->name); -+ if (r) -+ return NULL; -+ -+ buffers->attachment = attachment; -+ buffers->pitch = pixmap->devKind; -+ buffers->cpp = pixmap->drawable.bitsPerPixel / 8; -+ buffers->driverPrivate = privates; -+ buffers->format = format; -+ buffers->flags = 0; /* not tiled */ -+ privates->pixmap = pixmap; -+ privates->attachment = attachment; -+ -+ return buffers; -+} -+#endif -+ -+#ifndef USE_DRI2_1_1_0 -+static void -+radeon_dri2_destroy_buffers(DrawablePtr drawable, -+ DRI2BufferPtr buffers, -+ int count) -+{ -+ ScreenPtr pScreen = drawable->pScreen; -+ struct dri2_buffer_priv *private; -+ int i; -+ -+ for (i = 0; i < count; i++) { -+ private = buffers[i].driverPrivate; -+ (*pScreen->DestroyPixmap)(private->pixmap); -+ } -+ if (buffers) { -+ xfree(buffers[0].driverPrivate); -+ xfree(buffers); -+ } -+} -+#else -+static void -+radeon_dri2_destroy_buffer(DrawablePtr drawable, DRI2BufferPtr buffers) -+{ -+ if(buffers) -+ { -+ ScreenPtr pScreen = drawable->pScreen; -+ struct dri2_buffer_priv *private; -+ -+ private = buffers->driverPrivate; -+ (*pScreen->DestroyPixmap)(private->pixmap); -+ -+ xfree(buffers->driverPrivate); -+ xfree(buffers); -+ } -+} -+#endif -+ -+static void -+radeon_dri2_copy_region(DrawablePtr drawable, -+ RegionPtr region, -+ DRI2BufferPtr dest_buffer, -+ DRI2BufferPtr src_buffer) -+{ -+ struct dri2_buffer_priv *src_private = src_buffer->driverPrivate; -+ struct dri2_buffer_priv *dst_private = dest_buffer->driverPrivate; -+ ScreenPtr pScreen = drawable->pScreen; -+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; -+ PixmapPtr src_pixmap; -+ PixmapPtr dst_pixmap; -+ RegionPtr copy_clip; -+ GCPtr gc; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ Bool vsync; -+ -+ src_pixmap = src_private->pixmap; -+ dst_pixmap = dst_private->pixmap; -+ if (src_private->attachment == DRI2BufferFrontLeft) { -+ src_pixmap = (PixmapPtr)drawable; -+ } -+ if (dst_private->attachment == DRI2BufferFrontLeft) { -+ dst_pixmap = (PixmapPtr)drawable; -+ } -+ gc = GetScratchGC(drawable->depth, pScreen); -+ copy_clip = REGION_CREATE(pScreen, NULL, 0); -+ REGION_COPY(pScreen, copy_clip, region); -+ (*gc->funcs->ChangeClip) (gc, CT_REGION, copy_clip, 0); -+ ValidateGC(&dst_pixmap->drawable, gc); -+ -+ vsync = info->accel_state->vsync; -+ info->accel_state->vsync = TRUE; -+ -+ (*gc->ops->CopyArea)(&src_pixmap->drawable, &dst_pixmap->drawable, gc, -+ 0, 0, drawable->width, drawable->height, 0, 0); -+ -+ info->accel_state->vsync = vsync; -+ -+ FreeScratchGC(gc); -+ radeon_cs_flush_indirect(pScrn); -+} -+ -+Bool -+radeon_dri2_screen_init(ScreenPtr pScreen) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ DRI2InfoRec dri2_info; -+ int fd; -+ char *bus_id; -+ char *tmp_bus_id; -+ int cmp; -+ int i; -+ -+ if (!info->useEXA) { -+ xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "DRI2 requires EXA\n"); -+ return FALSE; -+ } -+ -+ /* The whole drmOpen thing is a fiasco and we need to find a way -+ * back to just using open(2). For now, however, lets just make -+ * things worse with even more ad hoc directory walking code to -+ * discover the device file name. */ -+ bus_id = DRICreatePCIBusID(info->PciInfo); -+ for (i = 0; i < DRM_MAX_MINOR; i++) { -+ sprintf(info->dri2.device_name, DRM_DEV_NAME, DRM_DIR_NAME, i); -+ fd = open(info->dri2.device_name, O_RDWR); -+ if (fd < 0) -+ continue; -+ -+ tmp_bus_id = drmGetBusid(fd); -+ close(fd); -+ if (tmp_bus_id == NULL) -+ continue; -+ -+ cmp = strcmp(tmp_bus_id, bus_id); -+ drmFree(tmp_bus_id); -+ if (cmp == 0) -+ break; -+ } -+ xfree(bus_id); -+ -+ if (i == DRM_MAX_MINOR) { -+ xf86DrvMsg(pScrn->scrnIndex, X_WARNING, -+ "DRI2: failed to open drm device\n"); -+ return FALSE; -+ } -+ -+ if ( (info->ChipFamily >= CHIP_FAMILY_R300) ) { -+ dri2_info.driverName = R300_DRIVER_NAME; -+ } else if ( info->ChipFamily >= CHIP_FAMILY_R200 ) { -+ dri2_info.driverName = R200_DRIVER_NAME; -+ } else { -+ dri2_info.driverName = RADEON_DRIVER_NAME; -+ } -+ dri2_info.fd = info->dri2.drm_fd; -+ dri2_info.deviceName = info->dri2.device_name; -+#ifndef USE_DRI2_1_1_0 -+ dri2_info.version = 1; -+ dri2_info.CreateBuffers = radeon_dri2_create_buffers; -+ dri2_info.DestroyBuffers = radeon_dri2_destroy_buffers; -+#else -+ dri2_info.version = DRI2INFOREC_VERSION; -+ dri2_info.CreateBuffer = radeon_dri2_create_buffer; -+ dri2_info.DestroyBuffer = radeon_dri2_destroy_buffer; -+#endif -+ dri2_info.CopyRegion = radeon_dri2_copy_region; -+ info->dri2.enabled = DRI2ScreenInit(pScreen, &dri2_info); -+ return info->dri2.enabled; -+} -+ -+void radeon_dri2_close_screen(ScreenPtr pScreen) -+{ -+ DRI2CloseScreen(pScreen); -+} -+ -+#endif -diff --git a/src/radeon_dri2.h b/src/radeon_dri2.h -new file mode 100644 -index 0000000..899a626 ---- /dev/null -+++ b/src/radeon_dri2.h -@@ -0,0 +1,42 @@ -+/* -+ * Copyright 2008 Jerome Glisse -+ * -+ * All Rights Reserved. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining -+ * a copy of this software and associated documentation files (the -+ * "Software"), to deal in the Software without restriction, including -+ * without limitation on the rights to use, copy, modify, merge, -+ * publish, distribute, sublicense, and/or sell copies of the Software, -+ * and to permit persons to whom the Software is furnished to do so, -+ * subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the -+ * next paragraph) shall be included in all copies or substantial -+ * portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -+ * NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR -+ * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -+ * DEALINGS IN THE SOFTWARE. -+ */ -+#ifndef RADEON_DRI2_H -+#define RADEON_DRI2_H -+ -+struct radeon_dri2 { -+ int drm_fd; -+ Bool enabled; -+ char device_name[64]; -+}; -+ -+#ifdef RADEON_DRI2 -+#include "dri2.h" -+Bool radeon_dri2_screen_init(ScreenPtr pScreen); -+void radeon_dri2_close_screen(ScreenPtr pScreen); -+#endif -+ -+#endif -diff --git a/src/radeon_driver.c b/src/radeon_driver.c -index 8673f5e..29c0b11 100644 ---- a/src/radeon_driver.c -+++ b/src/radeon_driver.c -@@ -92,9 +92,11 @@ - /* X and server generic header files */ - #include "xf86.h" - #include "xf86_OSproc.h" --#include "xf86RAC.h" - #include "xf86RandR12.h" -+#if GET_ABI_MAJOR(ABI_VIDEODRV_VERSION) < 6 -+#include "xf86RAC.h" - #include "xf86Resources.h" -+#endif - #include "xf86cmap.h" - #include "vbe.h" - -@@ -104,8 +106,13 @@ - #include "vgaHW.h" - #endif - -+#ifdef HAVE_XEXTPROTO_71 -+#include -+#else - #define DPMS_SERVER - #include -+#endif -+ - - #include "atipciids.h" - #include "radeon_chipset_gen.h" -@@ -118,8 +125,6 @@ static Bool RADEONCloseScreen(int scrnIndex, ScreenPtr pScreen); - static Bool RADEONSaveScreen(ScreenPtr pScreen, int mode); - static void RADEONSave(ScrnInfoPtr pScrn); - --static void RADEONSetDynamicClock(ScrnInfoPtr pScrn, int mode); --static void RADEONForceSomeClocks(ScrnInfoPtr pScrn); - static void RADEONSaveMemMapRegisters(ScrnInfoPtr pScrn, RADEONSavePtr save); - - static void -@@ -176,7 +181,7 @@ static const OptionInfoRec RADEONOptions[] = { - { OPTION_SUBPIXEL_ORDER, "SubPixelOrder", OPTV_ANYSTR, {0}, FALSE }, - #endif - { OPTION_SHOWCACHE, "ShowCache", OPTV_BOOLEAN, {0}, FALSE }, -- { OPTION_DYNAMIC_CLOCKS, "DynamicClocks", OPTV_BOOLEAN, {0}, FALSE }, -+ { OPTION_CLOCK_GATING, "ClockGating", OPTV_BOOLEAN, {0}, FALSE }, - { OPTION_VGA_ACCESS, "VGAAccess", OPTV_BOOLEAN, {0}, TRUE }, - { OPTION_REVERSE_DDC, "ReverseDDC", OPTV_BOOLEAN, {0}, FALSE }, - { OPTION_LVDS_PROBE_PLL, "LVDSProbePLL", OPTV_BOOLEAN, {0}, FALSE }, -@@ -197,6 +202,8 @@ static const OptionInfoRec RADEONOptions[] = { - { OPTION_EXA_VSYNC, "EXAVSync", OPTV_BOOLEAN, {0}, FALSE }, - { OPTION_ATOM_TVOUT, "ATOMTVOut", OPTV_BOOLEAN, {0}, FALSE }, - { OPTION_R4XX_ATOM, "R4xxATOM", OPTV_BOOLEAN, {0}, FALSE }, -+ { OPTION_FORCE_LOW_POWER, "ForceLowPowerMode", OPTV_BOOLEAN, {0}, FALSE }, -+ { OPTION_DYNAMIC_PM, "DynamicPM", OPTV_BOOLEAN, {0}, FALSE }, - { -1, NULL, OPTV_NONE, {0}, FALSE } - }; - -@@ -340,7 +347,7 @@ RADEONPostInt10Check(ScrnInfoPtr pScrn, void *ptr) - } - - /* Allocate our private RADEONInfoRec */ --static Bool RADEONGetRec(ScrnInfoPtr pScrn) -+Bool RADEONGetRec(ScrnInfoPtr pScrn) - { - if (pScrn->driverPrivate) return TRUE; - -@@ -349,7 +356,7 @@ static Bool RADEONGetRec(ScrnInfoPtr pScrn) - } - - /* Free our private RADEONInfoRec */ --static void RADEONFreeRec(ScrnInfoPtr pScrn) -+void RADEONFreeRec(ScrnInfoPtr pScrn) - { - RADEONInfoPtr info; - int i; -@@ -616,6 +623,12 @@ unsigned RADEONINMC(ScrnInfoPtr pScrn, int addr) - } else if (info->ChipFamily == CHIP_FAMILY_RS600) { - OUTREG(RS600_MC_INDEX, ((addr & RS600_MC_ADDR_MASK) | RS600_MC_IND_CITF_ARB0)); - data = INREG(RS600_MC_DATA); -+ } else if ((info->ChipFamily == CHIP_FAMILY_RS780) || -+ (info->ChipFamily == CHIP_FAMILY_RS880)) { -+ OUTREG(RS780_MC_INDEX, (addr & RS780_MC_INDEX_MASK)); -+ data = INREG(RS780_MC_DATA); -+ } else if (info->ChipFamily >= CHIP_FAMILY_R600) { -+ data = 0; - } else if (IS_AVIVO_VARIANT) { - OUTREG(AVIVO_MC_INDEX, (addr & 0xff) | 0x7f0000); - (void)INREG(AVIVO_MC_INDEX); -@@ -652,6 +665,13 @@ void RADEONOUTMC(ScrnInfoPtr pScrn, int addr, uint32_t data) - RS600_MC_IND_CITF_ARB0 | - RS600_MC_IND_WR_EN)); - OUTREG(RS600_MC_DATA, data); -+ } else if ((info->ChipFamily == CHIP_FAMILY_RS780) || -+ (info->ChipFamily == CHIP_FAMILY_RS880)) { -+ OUTREG(RS780_MC_INDEX, ((addr & RS780_MC_INDEX_MASK) | -+ RS780_MC_INDEX_WR_EN)); -+ OUTREG(RS780_MC_DATA, data); -+ } else if (info->ChipFamily >= CHIP_FAMILY_R600) { -+ // do nothing - } else if (IS_AVIVO_VARIANT) { - OUTREG(AVIVO_MC_INDEX, (addr & 0xff) | 0xff0000); - (void)INREG(AVIVO_MC_INDEX); -@@ -691,6 +711,29 @@ void RADEONOUTPCIE(ScrnInfoPtr pScrn, int addr, uint32_t data) - OUTREG(RADEON_PCIE_DATA, data); - } - -+/* Read PCIE PORT register */ -+unsigned R600INPCIE_PORT(ScrnInfoPtr pScrn, int addr) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ unsigned char *RADEONMMIO = info->MMIO; -+ CARD32 data; -+ -+ OUTREG(R600_PCIE_PORT_INDEX, addr & 0xff); -+ data = INREG(R600_PCIE_PORT_DATA); -+ -+ return data; -+} -+ -+/* Write PCIE PORT register */ -+void R600OUTPCIE_PORT(ScrnInfoPtr pScrn, int addr, uint32_t data) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ unsigned char *RADEONMMIO = info->MMIO; -+ -+ OUTREG(R600_PCIE_PORT_INDEX, ((addr) & 0xff)); -+ OUTREG(R600_PCIE_PORT_DATA, data); -+} -+ - static Bool radeon_get_mc_idle(ScrnInfoPtr pScrn) - { - RADEONInfoPtr info = RADEONPTR(pScrn); -@@ -1183,9 +1226,13 @@ static void RADEONGetClockInfo(ScrnInfoPtr pScrn) - if (IS_AVIVO_VARIANT) { - pll->min_post_div = 2; - pll->max_post_div = 0x7f; -+ pll->min_frac_feedback_div = 0; -+ pll->max_frac_feedback_div = 9; - } else { - pll->min_post_div = 1; - pll->max_post_div = 12; //16 on crtc0 -+ pll->min_frac_feedback_div = 0; -+ pll->max_frac_feedback_div = 0; - } - pll->min_ref_div = 2; - pll->max_ref_div = 0x3ff; -@@ -1224,7 +1271,7 @@ static void RADEONGetClockInfo(ScrnInfoPtr pScrn) - - - /* This is called by RADEONPreInit to set up the default visual */ --static Bool RADEONPreInitVisual(ScrnInfoPtr pScrn) -+Bool RADEONPreInitVisual(ScrnInfoPtr pScrn) - { - RADEONInfoPtr info = RADEONPTR(pScrn); - -@@ -1281,7 +1328,7 @@ static Bool RADEONPreInitVisual(ScrnInfoPtr pScrn) - } - - /* This is called by RADEONPreInit to handle all color weight issues */ --static Bool RADEONPreInitWeight(ScrnInfoPtr pScrn) -+Bool RADEONPreInitWeight(ScrnInfoPtr pScrn) - { - RADEONInfoPtr info = RADEONPTR(pScrn); - -@@ -1814,16 +1861,6 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn) - break; - } - -- if (info->ChipFamily >= CHIP_FAMILY_R600) { -- xf86DrvMsg(pScrn->scrnIndex, X_WARNING, -- "R600 support is mostly incomplete and very experimental\n"); -- } -- -- if ((info->ChipFamily >= CHIP_FAMILY_RV515) && (info->ChipFamily < CHIP_FAMILY_R600)) { -- xf86DrvMsg(pScrn->scrnIndex, X_WARNING, -- "R500 support is under development. Please report any issues to xorg-driver-ati@lists.x.org\n"); -- } -- - from = X_PROBED; - info->LinearAddr = PCI_REGION_BASE(info->PciInfo, 0, REGION_MEM) & ~0x1ffffffULL; - pScrn->memPhysBase = info->LinearAddr; -@@ -1942,7 +1979,6 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn) - } - } - -- - xf86DrvMsg(pScrn->scrnIndex, X_INFO, "%s card detected\n", - (info->cardType==CARD_PCI) ? "PCI" : - (info->cardType==CARD_PCIE) ? "PCIE" : "AGP"); -@@ -1962,12 +1998,15 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn) - if (strcmp(s, "AGP") == 0) { - info->cardType = CARD_AGP; - xf86DrvMsg(pScrn->scrnIndex, X_CONFIG, "Forced into AGP mode\n"); -- } else if (strcmp(s, "PCI") == 0) { -- info->cardType = CARD_PCI; -- xf86DrvMsg(pScrn->scrnIndex, X_CONFIG, "Forced into PCI mode\n"); -- } else if (strcmp(s, "PCIE") == 0) { -- info->cardType = CARD_PCIE; -- xf86DrvMsg(pScrn->scrnIndex, X_CONFIG, "Forced into PCI Express mode\n"); -+ } else if ((strcmp(s, "PCI") == 0) || -+ (strcmp(s, "PCIE") == 0)) { -+ if (info->ChipFamily >= CHIP_FAMILY_RV380) { -+ info->cardType = CARD_PCIE; -+ xf86DrvMsg(pScrn->scrnIndex, X_CONFIG, "Forced into PCI Express mode\n"); -+ } else { -+ info->cardType = CARD_PCI; -+ xf86DrvMsg(pScrn->scrnIndex, X_CONFIG, "Forced into PCI mode\n"); -+ } - } else { - xf86DrvMsg(pScrn->scrnIndex, X_CONFIG, - "Invalid BusType option, using detected type\n"); -@@ -2044,6 +2083,7 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) - #if defined(USE_EXA) && defined(USE_XAA) - char *optstr; - #endif -+ int maxy = info->FbMapSize / (pScrn->displayWidth * info->CurrentLayout.pixel_bytes); - - if (!(info->accel_state = xcalloc(1, sizeof(struct radeon_accel_state)))) { - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to allocate accel_state rec!\n"); -@@ -2064,13 +2104,22 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) - info->accel_state->has_tcl = TRUE; - } - -- info->useEXA = FALSE; -+ /* if we have shadow fb bail */ -+ if (info->r600_shadow_fb) { -+ info->useEXA = FALSE; -+ return TRUE; -+ } - -- if (info->ChipFamily >= CHIP_FAMILY_R600) { -- xf86DrvMsg(pScrn->scrnIndex, X_DEFAULT, -- "Will attempt to use R6xx/R7xx EXA support if DRI is enabled.\n"); -+#ifdef XF86DRI -+ if ((!info->directRenderingEnabled) || -+ (maxy <= pScrn->virtualY * 3) || -+ (pScrn->videoRam <= 32768)) -+ info->useEXA = FALSE; -+ else - info->useEXA = TRUE; -- } -+#else -+ info->useEXA = FALSE; -+#endif - - if (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE)) { - int errmaj = 0, errmin = 0; -@@ -2085,6 +2134,8 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) - info->useEXA = TRUE; - } else if (xf86NameCmp(optstr, "XAA") == 0) { - from = X_CONFIG; -+ if (info->ChipFamily < CHIP_FAMILY_R600) -+ info->useEXA = FALSE; - } - } - #else /* USE_XAA */ -@@ -2095,6 +2146,9 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) - xf86DrvMsg(pScrn->scrnIndex, from, - "Using %s acceleration architecture\n", - info->useEXA ? "EXA" : "XAA"); -+ else -+ xf86DrvMsg(pScrn->scrnIndex, X_DEFAULT, -+ "Will attempt to use R6xx/R7xx EXA support if DRI is enabled.\n"); - - #ifdef USE_EXA - if (info->useEXA) { -@@ -2130,6 +2184,9 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) - } - } - #endif /* USE_XAA */ -+ } else { -+ /* NoAccel */ -+ info->useEXA = FALSE; - } - - return TRUE; -@@ -2706,12 +2763,15 @@ static Bool RADEONPreInitControllers(ScrnInfoPtr pScrn) - mask = 1; - else - mask = 2; -- -+ - if (!RADEONAllocateControllers(pScrn, mask)) - return FALSE; - - RADEONGetClockInfo(pScrn); - -+ if (info->IsAtomBios && info->IsIGP) -+ RADEONATOMGetIGPInfo(pScrn); -+ - if (!RADEONSetupConnectors(pScrn)) { - return FALSE; - } -@@ -2720,7 +2780,7 @@ static Bool RADEONPreInitControllers(ScrnInfoPtr pScrn) - /* fixup outputs for zaphod */ - RADEONFixZaphodOutputs(pScrn); - } -- -+ - RADEONPrintPortMap(pScrn); - - info->first_load_no_devices = FALSE; -@@ -2778,7 +2838,6 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) - xf86Int10InfoPtr pInt10 = NULL; - void *int10_save = NULL; - const char *s; -- int crtc_max_X, crtc_max_Y; - RADEONEntPtr pRADEONEnt; - DevUnion* pPriv; - -@@ -2793,6 +2852,7 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) - - info->IsSecondary = FALSE; - info->IsPrimary = FALSE; -+ info->kms_enabled = FALSE; - - info->pEnt = xf86GetEntityInfo(pScrn->entityList[pScrn->numEntities - 1]); - if (info->pEnt->location.type != BUS_PCI) goto fail; -@@ -2882,12 +2942,14 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) - PCI_DEV_DEV(info->PciInfo), - PCI_DEV_FUNC(info->PciInfo)); - -+#ifndef XSERVER_LIBPCIACCESS - if (xf86RegisterResources(info->pEnt->index, 0, ResExclusive)) - goto fail; - - xf86SetOperatingState(resVga, info->pEnt->index, ResUnusedOpr); - - pScrn->racMemFlags = RAC_FB | RAC_COLORMAP | RAC_VIEWPORT | RAC_CURSOR; -+#endif - pScrn->monitor = pScrn->confScreen->monitor; - - /* Allocate an xf86CrtcConfig */ -@@ -2984,51 +3046,10 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) - - RADEONPreInitColorTiling(pScrn); - -- /* we really need an FB manager... */ -- if (pScrn->display->virtualX) { -- crtc_max_X = pScrn->display->virtualX; -- crtc_max_Y = pScrn->display->virtualY; -- if (info->allowColorTiling) { -- if (crtc_max_X > info->MaxSurfaceWidth || -- crtc_max_Y > info->MaxLines) { -- info->allowColorTiling = FALSE; -- xf86DrvMsg(pScrn->scrnIndex, X_WARNING, -- "Requested desktop size exceeds surface limts for tiling, ColorTiling disabled\n"); -- } -- } -- if (crtc_max_X > 8192) -- crtc_max_X = 8192; -- if (crtc_max_Y > 8192) -- crtc_max_Y = 8192; -- } else { -- /* -- * note that these aren't really the CRTC limits, they're just -- * heuristics until we have a better memory manager. -- */ -- if (pScrn->videoRam <= 16384) { -- crtc_max_X = 1600; -- crtc_max_Y = 1200; -- } else if (IS_R300_VARIANT) { -- crtc_max_X = 2560; -- crtc_max_Y = 1200; -- } else if (IS_AVIVO_VARIANT) { -- crtc_max_X = 2560; -- crtc_max_Y = 1600; -- } else { -- crtc_max_X = 2048; -- crtc_max_Y = 1200; -- } -- } -- xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Max desktop size set to %dx%d\n", -- crtc_max_X, crtc_max_Y); -- xf86DrvMsg(pScrn->scrnIndex, X_INFO, -- "For a larger or smaller max desktop size, add a Virtual line to your xorg.conf\n"); -- xf86DrvMsg(pScrn->scrnIndex, X_INFO, -- "If you are having trouble with 3D, " -- "reduce the desktop size by adjusting the Virtual line to your xorg.conf\n"); -- -- /*xf86CrtcSetSizeRange (pScrn, 320, 200, info->MaxSurfaceWidth, info->MaxLines);*/ -- xf86CrtcSetSizeRange (pScrn, 320, 200, crtc_max_X, crtc_max_Y); -+ if (IS_AVIVO_VARIANT) -+ xf86CrtcSetSizeRange (pScrn, 320, 200, 8192, 8192); -+ else -+ xf86CrtcSetSizeRange (pScrn, 320, 200, 4096, 4096); - - RADEONPreInitDDC(pScrn); - -@@ -3062,8 +3083,6 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) - } - } - -- ErrorF("after xf86InitialConfiguration\n"); -- - RADEONSetPitch(pScrn); - - /* Set display resolution */ -@@ -3239,6 +3258,9 @@ static void RADEONBlockHandler(int i, pointer blockData, - #ifdef USE_EXA - info->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN; - #endif -+ -+ if (info->pm.dynamic_mode_enabled) -+ RADEONPMBlockHandler(pScrn); - } - - static void -@@ -3372,21 +3394,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, - /* blank the outputs/crtcs */ - RADEONBlank(pScrn); - -- if (info->IsMobility && !IS_AVIVO_VARIANT) { -- if (xf86ReturnOptValBool(info->Options, OPTION_DYNAMIC_CLOCKS, FALSE)) { -- RADEONSetDynamicClock(pScrn, 1); -- } else { -- RADEONSetDynamicClock(pScrn, 0); -- } -- } else if (IS_AVIVO_VARIANT) { -- if (xf86ReturnOptValBool(info->Options, OPTION_DYNAMIC_CLOCKS, FALSE)) { -- atombios_static_pwrmgt_setup(pScrn, 1); -- atombios_dyn_clk_setup(pScrn, 1); -- } -- } -- -- if (IS_R300_VARIANT || IS_RV100_VARIANT) -- RADEONForceSomeClocks(pScrn); -+ RADEONPMInit(pScrn); - - if (info->allowColorTiling && (pScrn->virtualX > info->MaxSurfaceWidth)) { - xf86DrvMsg(pScrn->scrnIndex, X_INFO, -@@ -3764,6 +3772,10 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, - } - } - -+ /* Clear the framebuffer */ -+ memset(info->FB + pScrn->fbOffset, 0, -+ pScrn->virtualY * pScrn->displayWidth * info->CurrentLayout.pixel_bytes); -+ - /* set the modes with desired rotation, etc. */ - if (!xf86SetDesiredModes (pScrn)) - return FALSE; -@@ -5639,27 +5651,17 @@ Bool RADEONEnterVT(int scrnIndex, int flags) - /* Makes sure the engine is idle before doing anything */ - RADEONWaitForIdleMMIO(pScrn); - -- if (info->IsMobility && !IS_AVIVO_VARIANT) { -- if (xf86ReturnOptValBool(info->Options, OPTION_DYNAMIC_CLOCKS, FALSE)) { -- RADEONSetDynamicClock(pScrn, 1); -- } else { -- RADEONSetDynamicClock(pScrn, 0); -- } -- } else if (IS_AVIVO_VARIANT) { -- if (xf86ReturnOptValBool(info->Options, OPTION_DYNAMIC_CLOCKS, FALSE)) { -- atombios_static_pwrmgt_setup(pScrn, 1); -- atombios_dyn_clk_setup(pScrn, 1); -- } -- } -- -- if (IS_R300_VARIANT || IS_RV100_VARIANT) -- RADEONForceSomeClocks(pScrn); -+ RADEONPMEnterVT(pScrn); - - for (i = 0; i < config->num_crtc; i++) - radeon_crtc_modeset_ioctl(config->crtc[i], TRUE); - - pScrn->vtSema = TRUE; - -+ /* Clear the framebuffer */ -+ memset(info->FB + pScrn->fbOffset, 0, -+ pScrn->virtualY * pScrn->displayWidth * info->CurrentLayout.pixel_bytes); -+ - if (!xf86SetDesiredModes(pScrn)) - return FALSE; - -@@ -5667,7 +5669,7 @@ Bool RADEONEnterVT(int scrnIndex, int flags) - RADEONRestoreSurfaces(pScrn, info->ModeReg); - #ifdef XF86DRI - if (info->directRenderingEnabled) { -- if (info->cardType == CARD_PCIE && -+ if (info->cardType == CARD_PCIE && - info->dri->pKernelDRMVersion->version_minor >= 19 && - info->FbSecureSize) { - #if X_BYTE_ORDER == X_BIG_ENDIAN -@@ -5681,7 +5683,7 @@ Bool RADEONEnterVT(int scrnIndex, int flags) - #if X_BYTE_ORDER == X_BIG_ENDIAN - OUTREG(RADEON_SURFACE_CNTL, sctrl); - #endif -- } -+ } - - /* get the DRI back into shape after resume */ - RADEONDRISetVBlankInterrupt (pScrn, TRUE); -@@ -5709,6 +5711,8 @@ Bool RADEONEnterVT(int scrnIndex, int flags) - DRIUnlock(pScrn->pScreen); - } - #endif -+ if (IS_R500_3D || IS_R300_3D) -+ radeon_load_bicubic_texture(pScrn); - - return TRUE; - } -@@ -5788,6 +5792,8 @@ void RADEONLeaveVT(int scrnIndex, int flags) - - xf86_hide_cursors (pScrn); - -+ RADEONPMLeaveVT(pScrn); -+ - RADEONRestore(pScrn); - - for (i = 0; i < config->num_crtc; i++) -@@ -5811,6 +5817,8 @@ static Bool RADEONCloseScreen(int scrnIndex, ScreenPtr pScreen) - xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, - "RADEONCloseScreen\n"); - -+ RADEONPMFini(pScrn); -+ - /* Mark acceleration as stopped or we might try to access the engine at - * wrong times, especially if we had DRI, after DRI has been stopped - */ -@@ -5908,362 +5916,3 @@ void RADEONFreeScreen(int scrnIndex, int flags) - #endif - RADEONFreeRec(pScrn); - } -- --static void RADEONForceSomeClocks(ScrnInfoPtr pScrn) --{ -- /* It appears from r300 and rv100 may need some clocks forced-on */ -- uint32_t tmp; -- -- tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -- tmp |= RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_VIP; -- OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); --} -- --static void RADEONSetDynamicClock(ScrnInfoPtr pScrn, int mode) --{ -- RADEONInfoPtr info = RADEONPTR(pScrn); -- RADEONEntPtr pRADEONEnt = RADEONEntPriv(pScrn); -- unsigned char *RADEONMMIO = info->MMIO; -- uint32_t tmp; -- switch(mode) { -- case 0: /* Turn everything OFF (ForceON to everything)*/ -- if ( !pRADEONEnt->HasCRTC2 ) { -- tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -- tmp |= (RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_HDP | -- RADEON_SCLK_FORCE_DISP1 | RADEON_SCLK_FORCE_TOP | -- RADEON_SCLK_FORCE_E2 | RADEON_SCLK_FORCE_SE | -- RADEON_SCLK_FORCE_IDCT | RADEON_SCLK_FORCE_VIP | -- RADEON_SCLK_FORCE_RE | RADEON_SCLK_FORCE_PB | -- RADEON_SCLK_FORCE_TAM | RADEON_SCLK_FORCE_TDM | -- RADEON_SCLK_FORCE_RB); -- OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -- } else if (info->ChipFamily == CHIP_FAMILY_RV350) { -- /* for RV350/M10, no delays are required. */ -- tmp = INPLL(pScrn, R300_SCLK_CNTL2); -- tmp |= (R300_SCLK_FORCE_TCL | -- R300_SCLK_FORCE_GA | -- R300_SCLK_FORCE_CBA); -- OUTPLL(pScrn, R300_SCLK_CNTL2, tmp); -- -- tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -- tmp |= (RADEON_SCLK_FORCE_DISP2 | RADEON_SCLK_FORCE_CP | -- RADEON_SCLK_FORCE_HDP | RADEON_SCLK_FORCE_DISP1 | -- RADEON_SCLK_FORCE_TOP | RADEON_SCLK_FORCE_E2 | -- R300_SCLK_FORCE_VAP | RADEON_SCLK_FORCE_IDCT | -- RADEON_SCLK_FORCE_VIP | R300_SCLK_FORCE_SR | -- R300_SCLK_FORCE_PX | R300_SCLK_FORCE_TX | -- R300_SCLK_FORCE_US | RADEON_SCLK_FORCE_TV_SCLK | -- R300_SCLK_FORCE_SU | RADEON_SCLK_FORCE_OV0); -- OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -- -- tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL); -- tmp |= RADEON_SCLK_MORE_FORCEON; -- OUTPLL(pScrn, RADEON_SCLK_MORE_CNTL, tmp); -- -- tmp = INPLL(pScrn, RADEON_MCLK_CNTL); -- tmp |= (RADEON_FORCEON_MCLKA | -- RADEON_FORCEON_MCLKB | -- RADEON_FORCEON_YCLKA | -- RADEON_FORCEON_YCLKB | -- RADEON_FORCEON_MC); -- OUTPLL(pScrn, RADEON_MCLK_CNTL, tmp); -- -- tmp = INPLL(pScrn, RADEON_VCLK_ECP_CNTL); -- tmp &= ~(RADEON_PIXCLK_ALWAYS_ONb | -- RADEON_PIXCLK_DAC_ALWAYS_ONb | -- R300_DISP_DAC_PIXCLK_DAC_BLANK_OFF); -- OUTPLL(pScrn, RADEON_VCLK_ECP_CNTL, tmp); -- -- tmp = INPLL(pScrn, RADEON_PIXCLKS_CNTL); -- tmp &= ~(RADEON_PIX2CLK_ALWAYS_ONb | -- RADEON_PIX2CLK_DAC_ALWAYS_ONb | -- RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb | -- R300_DVOCLK_ALWAYS_ONb | -- RADEON_PIXCLK_BLEND_ALWAYS_ONb | -- RADEON_PIXCLK_GV_ALWAYS_ONb | -- R300_PIXCLK_DVO_ALWAYS_ONb | -- RADEON_PIXCLK_LVDS_ALWAYS_ONb | -- RADEON_PIXCLK_TMDS_ALWAYS_ONb | -- R300_PIXCLK_TRANS_ALWAYS_ONb | -- R300_PIXCLK_TVO_ALWAYS_ONb | -- R300_P2G2CLK_ALWAYS_ONb | -- R300_P2G2CLK_ALWAYS_ONb | -- R300_DISP_DAC_PIXCLK_DAC2_BLANK_OFF); -- OUTPLL(pScrn, RADEON_PIXCLKS_CNTL, tmp); -- } else { -- tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -- tmp |= (RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_E2); -- tmp |= RADEON_SCLK_FORCE_SE; -- -- if ( !pRADEONEnt->HasCRTC2 ) { -- tmp |= ( RADEON_SCLK_FORCE_RB | -- RADEON_SCLK_FORCE_TDM | -- RADEON_SCLK_FORCE_TAM | -- RADEON_SCLK_FORCE_PB | -- RADEON_SCLK_FORCE_RE | -- RADEON_SCLK_FORCE_VIP | -- RADEON_SCLK_FORCE_IDCT | -- RADEON_SCLK_FORCE_TOP | -- RADEON_SCLK_FORCE_DISP1 | -- RADEON_SCLK_FORCE_DISP2 | -- RADEON_SCLK_FORCE_HDP ); -- } else if ((info->ChipFamily == CHIP_FAMILY_R300) || -- (info->ChipFamily == CHIP_FAMILY_R350)) { -- tmp |= ( RADEON_SCLK_FORCE_HDP | -- RADEON_SCLK_FORCE_DISP1 | -- RADEON_SCLK_FORCE_DISP2 | -- RADEON_SCLK_FORCE_TOP | -- RADEON_SCLK_FORCE_IDCT | -- RADEON_SCLK_FORCE_VIP); -- } -- OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -- -- usleep(16000); -- -- if ((info->ChipFamily == CHIP_FAMILY_R300) || -- (info->ChipFamily == CHIP_FAMILY_R350)) { -- tmp = INPLL(pScrn, R300_SCLK_CNTL2); -- tmp |= ( R300_SCLK_FORCE_TCL | -- R300_SCLK_FORCE_GA | -- R300_SCLK_FORCE_CBA); -- OUTPLL(pScrn, R300_SCLK_CNTL2, tmp); -- usleep(16000); -- } -- -- if (info->IsIGP) { -- tmp = INPLL(pScrn, RADEON_MCLK_CNTL); -- tmp &= ~(RADEON_FORCEON_MCLKA | -- RADEON_FORCEON_YCLKA); -- OUTPLL(pScrn, RADEON_MCLK_CNTL, tmp); -- usleep(16000); -- } -- -- if ((info->ChipFamily == CHIP_FAMILY_RV200) || -- (info->ChipFamily == CHIP_FAMILY_RV250) || -- (info->ChipFamily == CHIP_FAMILY_RV280)) { -- tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL); -- tmp |= RADEON_SCLK_MORE_FORCEON; -- OUTPLL(pScrn, RADEON_SCLK_MORE_CNTL, tmp); -- usleep(16000); -- } -- -- tmp = INPLL(pScrn, RADEON_PIXCLKS_CNTL); -- tmp &= ~(RADEON_PIX2CLK_ALWAYS_ONb | -- RADEON_PIX2CLK_DAC_ALWAYS_ONb | -- RADEON_PIXCLK_BLEND_ALWAYS_ONb | -- RADEON_PIXCLK_GV_ALWAYS_ONb | -- RADEON_PIXCLK_DIG_TMDS_ALWAYS_ONb | -- RADEON_PIXCLK_LVDS_ALWAYS_ONb | -- RADEON_PIXCLK_TMDS_ALWAYS_ONb); -- -- OUTPLL(pScrn, RADEON_PIXCLKS_CNTL, tmp); -- usleep(16000); -- -- tmp = INPLL(pScrn, RADEON_VCLK_ECP_CNTL); -- tmp &= ~(RADEON_PIXCLK_ALWAYS_ONb | -- RADEON_PIXCLK_DAC_ALWAYS_ONb); -- OUTPLL(pScrn, RADEON_VCLK_ECP_CNTL, tmp); -- } -- xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Dynamic Clock Scaling Disabled\n"); -- break; -- case 1: -- if (!pRADEONEnt->HasCRTC2) { -- tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -- if ((INREG(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) > -- RADEON_CFG_ATI_REV_A13) { -- tmp &= ~(RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_RB); -- } -- tmp &= ~(RADEON_SCLK_FORCE_HDP | RADEON_SCLK_FORCE_DISP1 | -- RADEON_SCLK_FORCE_TOP | RADEON_SCLK_FORCE_SE | -- RADEON_SCLK_FORCE_IDCT | RADEON_SCLK_FORCE_RE | -- RADEON_SCLK_FORCE_PB | RADEON_SCLK_FORCE_TAM | -- RADEON_SCLK_FORCE_TDM); -- OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -- } else if ((info->ChipFamily == CHIP_FAMILY_R300) || -- (info->ChipFamily == CHIP_FAMILY_R350) || -- (info->ChipFamily == CHIP_FAMILY_RV350)) { -- if (info->ChipFamily == CHIP_FAMILY_RV350) { -- tmp = INPLL(pScrn, R300_SCLK_CNTL2); -- tmp &= ~(R300_SCLK_FORCE_TCL | -- R300_SCLK_FORCE_GA | -- R300_SCLK_FORCE_CBA); -- tmp |= (R300_SCLK_TCL_MAX_DYN_STOP_LAT | -- R300_SCLK_GA_MAX_DYN_STOP_LAT | -- R300_SCLK_CBA_MAX_DYN_STOP_LAT); -- OUTPLL(pScrn, R300_SCLK_CNTL2, tmp); -- -- tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -- tmp &= ~(RADEON_SCLK_FORCE_DISP2 | RADEON_SCLK_FORCE_CP | -- RADEON_SCLK_FORCE_HDP | RADEON_SCLK_FORCE_DISP1 | -- RADEON_SCLK_FORCE_TOP | RADEON_SCLK_FORCE_E2 | -- R300_SCLK_FORCE_VAP | RADEON_SCLK_FORCE_IDCT | -- RADEON_SCLK_FORCE_VIP | R300_SCLK_FORCE_SR | -- R300_SCLK_FORCE_PX | R300_SCLK_FORCE_TX | -- R300_SCLK_FORCE_US | RADEON_SCLK_FORCE_TV_SCLK | -- R300_SCLK_FORCE_SU | RADEON_SCLK_FORCE_OV0); -- tmp |= RADEON_DYN_STOP_LAT_MASK; -- OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -- -- tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL); -- tmp &= ~RADEON_SCLK_MORE_FORCEON; -- tmp |= RADEON_SCLK_MORE_MAX_DYN_STOP_LAT; -- OUTPLL(pScrn, RADEON_SCLK_MORE_CNTL, tmp); -- -- tmp = INPLL(pScrn, RADEON_VCLK_ECP_CNTL); -- tmp |= (RADEON_PIXCLK_ALWAYS_ONb | -- RADEON_PIXCLK_DAC_ALWAYS_ONb); -- OUTPLL(pScrn, RADEON_VCLK_ECP_CNTL, tmp); -- -- tmp = INPLL(pScrn, RADEON_PIXCLKS_CNTL); -- tmp |= (RADEON_PIX2CLK_ALWAYS_ONb | -- RADEON_PIX2CLK_DAC_ALWAYS_ONb | -- RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb | -- R300_DVOCLK_ALWAYS_ONb | -- RADEON_PIXCLK_BLEND_ALWAYS_ONb | -- RADEON_PIXCLK_GV_ALWAYS_ONb | -- R300_PIXCLK_DVO_ALWAYS_ONb | -- RADEON_PIXCLK_LVDS_ALWAYS_ONb | -- RADEON_PIXCLK_TMDS_ALWAYS_ONb | -- R300_PIXCLK_TRANS_ALWAYS_ONb | -- R300_PIXCLK_TVO_ALWAYS_ONb | -- R300_P2G2CLK_ALWAYS_ONb | -- R300_P2G2CLK_ALWAYS_ONb); -- OUTPLL(pScrn, RADEON_PIXCLKS_CNTL, tmp); -- -- tmp = INPLL(pScrn, RADEON_MCLK_MISC); -- tmp |= (RADEON_MC_MCLK_DYN_ENABLE | -- RADEON_IO_MCLK_DYN_ENABLE); -- OUTPLL(pScrn, RADEON_MCLK_MISC, tmp); -- -- tmp = INPLL(pScrn, RADEON_MCLK_CNTL); -- tmp |= (RADEON_FORCEON_MCLKA | -- RADEON_FORCEON_MCLKB); -- -- tmp &= ~(RADEON_FORCEON_YCLKA | -- RADEON_FORCEON_YCLKB | -- RADEON_FORCEON_MC); -- -- /* Some releases of vbios have set DISABLE_MC_MCLKA -- and DISABLE_MC_MCLKB bits in the vbios table. Setting these -- bits will cause H/W hang when reading video memory with dynamic clocking -- enabled. */ -- if ((tmp & R300_DISABLE_MC_MCLKA) && -- (tmp & R300_DISABLE_MC_MCLKB)) { -- /* If both bits are set, then check the active channels */ -- tmp = INPLL(pScrn, RADEON_MCLK_CNTL); -- if (info->RamWidth == 64) { -- if (INREG(RADEON_MEM_CNTL) & R300_MEM_USE_CD_CH_ONLY) -- tmp &= ~R300_DISABLE_MC_MCLKB; -- else -- tmp &= ~R300_DISABLE_MC_MCLKA; -- } else { -- tmp &= ~(R300_DISABLE_MC_MCLKA | -- R300_DISABLE_MC_MCLKB); -- } -- } -- -- OUTPLL(pScrn, RADEON_MCLK_CNTL, tmp); -- } else { -- tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -- tmp &= ~(R300_SCLK_FORCE_VAP); -- tmp |= RADEON_SCLK_FORCE_CP; -- OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -- usleep(15000); -- -- tmp = INPLL(pScrn, R300_SCLK_CNTL2); -- tmp &= ~(R300_SCLK_FORCE_TCL | -- R300_SCLK_FORCE_GA | -- R300_SCLK_FORCE_CBA); -- OUTPLL(pScrn, R300_SCLK_CNTL2, tmp); -- } -- } else { -- tmp = INPLL(pScrn, RADEON_CLK_PWRMGT_CNTL); -- -- tmp &= ~(RADEON_ACTIVE_HILO_LAT_MASK | -- RADEON_DISP_DYN_STOP_LAT_MASK | -- RADEON_DYN_STOP_MODE_MASK); -- -- tmp |= (RADEON_ENGIN_DYNCLK_MODE | -- (0x01 << RADEON_ACTIVE_HILO_LAT_SHIFT)); -- OUTPLL(pScrn, RADEON_CLK_PWRMGT_CNTL, tmp); -- usleep(15000); -- -- tmp = INPLL(pScrn, RADEON_CLK_PIN_CNTL); -- tmp |= RADEON_SCLK_DYN_START_CNTL; -- OUTPLL(pScrn, RADEON_CLK_PIN_CNTL, tmp); -- usleep(15000); -- -- /* When DRI is enabled, setting DYN_STOP_LAT to zero can cause some R200 -- to lockup randomly, leave them as set by BIOS. -- */ -- tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -- /*tmp &= RADEON_SCLK_SRC_SEL_MASK;*/ -- tmp &= ~RADEON_SCLK_FORCEON_MASK; -- -- /*RAGE_6::A11 A12 A12N1 A13, RV250::A11 A12, R300*/ -- if (((info->ChipFamily == CHIP_FAMILY_RV250) && -- ((INREG(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) < -- RADEON_CFG_ATI_REV_A13)) || -- ((info->ChipFamily == CHIP_FAMILY_RV100) && -- ((INREG(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <= -- RADEON_CFG_ATI_REV_A13))){ -- tmp |= RADEON_SCLK_FORCE_CP; -- tmp |= RADEON_SCLK_FORCE_VIP; -- } -- -- OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -- -- if ((info->ChipFamily == CHIP_FAMILY_RV200) || -- (info->ChipFamily == CHIP_FAMILY_RV250) || -- (info->ChipFamily == CHIP_FAMILY_RV280)) { -- tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL); -- tmp &= ~RADEON_SCLK_MORE_FORCEON; -- -- /* RV200::A11 A12 RV250::A11 A12 */ -- if (((info->ChipFamily == CHIP_FAMILY_RV200) || -- (info->ChipFamily == CHIP_FAMILY_RV250)) && -- ((INREG(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) < -- RADEON_CFG_ATI_REV_A13)) { -- tmp |= RADEON_SCLK_MORE_FORCEON; -- } -- OUTPLL(pScrn, RADEON_SCLK_MORE_CNTL, tmp); -- usleep(15000); -- } -- -- /* RV200::A11 A12, RV250::A11 A12 */ -- if (((info->ChipFamily == CHIP_FAMILY_RV200) || -- (info->ChipFamily == CHIP_FAMILY_RV250)) && -- ((INREG(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) < -- RADEON_CFG_ATI_REV_A13)) { -- tmp = INPLL(pScrn, RADEON_PLL_PWRMGT_CNTL); -- tmp |= RADEON_TCL_BYPASS_DISABLE; -- OUTPLL(pScrn, RADEON_PLL_PWRMGT_CNTL, tmp); -- } -- usleep(15000); -- -- /*enable dynamic mode for display clocks (PIXCLK and PIX2CLK)*/ -- tmp = INPLL(pScrn, RADEON_PIXCLKS_CNTL); -- tmp |= (RADEON_PIX2CLK_ALWAYS_ONb | -- RADEON_PIX2CLK_DAC_ALWAYS_ONb | -- RADEON_PIXCLK_BLEND_ALWAYS_ONb | -- RADEON_PIXCLK_GV_ALWAYS_ONb | -- RADEON_PIXCLK_DIG_TMDS_ALWAYS_ONb | -- RADEON_PIXCLK_LVDS_ALWAYS_ONb | -- RADEON_PIXCLK_TMDS_ALWAYS_ONb); -- -- OUTPLL(pScrn, RADEON_PIXCLKS_CNTL, tmp); -- usleep(15000); -- -- tmp = INPLL(pScrn, RADEON_VCLK_ECP_CNTL); -- tmp |= (RADEON_PIXCLK_ALWAYS_ONb | -- RADEON_PIXCLK_DAC_ALWAYS_ONb); -- -- OUTPLL(pScrn, RADEON_VCLK_ECP_CNTL, tmp); -- usleep(15000); -- } -- xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Dynamic Clock Scaling Enabled\n"); -- break; -- default: -- break; -- } --} -diff --git a/src/radeon_drm.h b/src/radeon_drm.h -index 54bc234..daa42d6 100644 ---- a/src/radeon_drm.h -+++ b/src/radeon_drm.h -@@ -493,6 +493,16 @@ typedef struct { - #define DRM_RADEON_SETPARAM 0x19 - #define DRM_RADEON_SURF_ALLOC 0x1a - #define DRM_RADEON_SURF_FREE 0x1b -+/* KMS ioctl */ -+#define DRM_RADEON_GEM_INFO 0x1c -+#define DRM_RADEON_GEM_CREATE 0x1d -+#define DRM_RADEON_GEM_MMAP 0x1e -+#define DRM_RADEON_GEM_PREAD 0x21 -+#define DRM_RADEON_GEM_PWRITE 0x22 -+#define DRM_RADEON_GEM_SET_DOMAIN 0x23 -+#define DRM_RADEON_GEM_WAIT_IDLE 0x24 -+#define DRM_RADEON_CS 0x26 -+#define DRM_RADEON_INFO 0x27 - - #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) - #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) -@@ -521,6 +531,17 @@ typedef struct { - #define DRM_IOCTL_RADEON_SETPARAM DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SETPARAM, drm_radeon_setparam_t) - #define DRM_IOCTL_RADEON_SURF_ALLOC DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_ALLOC, drm_radeon_surface_alloc_t) - #define DRM_IOCTL_RADEON_SURF_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_FREE, drm_radeon_surface_free_t) -+/* KMS */ -+#define DRM_IOCTL_RADEON_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_INFO, struct drm_radeon_gem_info) -+#define DRM_IOCTL_RADEON_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_CREATE, struct drm_radeon_gem_create) -+#define DRM_IOCTL_RADEON_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_MMAP, struct drm_radeon_gem_mmap) -+#define DRM_IOCTL_RADEON_GEM_PREAD DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PREAD, struct drm_radeon_gem_pread) -+#define DRM_IOCTL_RADEON_GEM_PWRITE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PWRITE, struct drm_radeon_gem_pwrite) -+#define DRM_IOCTL_RADEON_GEM_SET_DOMAIN DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_DOMAIN, struct drm_radeon_gem_set_domain) -+#define DRM_IOCTL_RADEON_GEM_WAIT_IDLE DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle) -+#define DRM_IOCTL_RADEON_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs) -+#define DRM_IOCTL_RADEON_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info) -+ - - typedef struct drm_radeon_init { - enum { -@@ -528,7 +549,7 @@ typedef struct drm_radeon_init { - RADEON_CLEANUP_CP = 0x02, - RADEON_INIT_R200_CP = 0x03, - RADEON_INIT_R300_CP = 0x04, -- RADEON_INIT_R600_CP = 0x05, -+ RADEON_INIT_R600_CP = 0x05 - } func; - unsigned long sarea_priv_offset; - int is_pci; /* for overriding only */ -@@ -753,4 +774,112 @@ typedef struct drm_radeon_surface_free { - #define DRM_RADEON_VBLANK_CRTC1 1 - #define DRM_RADEON_VBLANK_CRTC2 2 - -+/* -+ * Kernel modesetting world below. -+ */ -+#define RADEON_GEM_DOMAIN_CPU 0x1 -+#define RADEON_GEM_DOMAIN_GTT 0x2 -+#define RADEON_GEM_DOMAIN_VRAM 0x4 -+ -+struct drm_radeon_gem_info { -+ uint64_t gart_size; -+ uint64_t vram_size; -+ uint64_t vram_visible; -+}; -+ -+#define RADEON_GEM_NO_BACKING_STORE 1 -+ -+struct drm_radeon_gem_create { -+ uint64_t size; -+ uint64_t alignment; -+ uint32_t handle; -+ uint32_t initial_domain; -+ uint32_t flags; -+}; -+ -+struct drm_radeon_gem_mmap { -+ uint32_t handle; -+ uint32_t pad; -+ uint64_t offset; -+ uint64_t size; -+ uint64_t addr_ptr; -+}; -+ -+struct drm_radeon_gem_set_domain { -+ uint32_t handle; -+ uint32_t read_domains; -+ uint32_t write_domain; -+}; -+ -+struct drm_radeon_gem_wait_idle { -+ uint32_t handle; -+ uint32_t pad; -+}; -+ -+struct drm_radeon_gem_busy { -+ uint32_t handle; -+ uint32_t busy; -+}; -+ -+struct drm_radeon_gem_pread { -+ /** Handle for the object being read. */ -+ uint32_t handle; -+ uint32_t pad; -+ /** Offset into the object to read from */ -+ uint64_t offset; -+ /** Length of data to read */ -+ uint64_t size; -+ /** Pointer to write the data into. */ -+ /* void *, but pointers are not 32/64 compatible */ -+ uint64_t data_ptr; -+}; -+ -+struct drm_radeon_gem_pwrite { -+ /** Handle for the object being written to. */ -+ uint32_t handle; -+ uint32_t pad; -+ /** Offset into the object to write to */ -+ uint64_t offset; -+ /** Length of data to write */ -+ uint64_t size; -+ /** Pointer to read the data from. */ -+ /* void *, but pointers are not 32/64 compatible */ -+ uint64_t data_ptr; -+}; -+ -+#define RADEON_CHUNK_ID_RELOCS 0x01 -+#define RADEON_CHUNK_ID_IB 0x02 -+ -+struct drm_radeon_cs_chunk { -+ uint32_t chunk_id; -+ uint32_t length_dw; -+ uint64_t chunk_data; -+}; -+ -+struct drm_radeon_cs_reloc { -+ uint32_t handle; -+ uint32_t read_domains; -+ uint32_t write_domain; -+ uint32_t flags; -+}; -+ -+struct drm_radeon_cs { -+ uint32_t num_chunks; -+ uint32_t cs_id; -+ /* this points to uint64_t * which point to cs chunks */ -+ uint64_t chunks; -+ /* updates to the limits after this CS ioctl */ -+ uint64_t gart_limit; -+ uint64_t vram_limit; -+}; -+ -+#define RADEON_INFO_DEVICE_ID 0x00 -+#define RADEON_INFO_NUM_GB_PIPES 0x01 -+ -+struct drm_radeon_info { -+ uint32_t request; -+ uint32_t pad; -+ uint64_t value; -+}; -+ - #endif -diff --git a/src/radeon_dummy_bufmgr.h b/src/radeon_dummy_bufmgr.h -new file mode 100644 -index 0000000..e08e656 ---- /dev/null -+++ b/src/radeon_dummy_bufmgr.h -@@ -0,0 +1,62 @@ -+ -+#ifndef RADEON_DUMMY_BUFMGR_H -+#define RADEON_DUMMY_BUFMGR_H -+/* when we don't have modesetting but we still need these functions */ -+ -+struct radeon_bo { -+ int dummy; -+ void *ptr; -+}; -+ -+static inline int radeon_cs_begin(Bool dummy, int d2, const char *file, -+ const char *func, int line) -+{ -+ return 0; -+} -+ -+static inline int radeon_cs_end(Bool dummy, const char *file, -+ const char *func, int line) -+{ -+ return 0; -+} -+ -+static inline void radeon_cs_write_dword(Bool cs, uint32_t dword) -+{ -+} -+ -+static inline int radeon_cs_write_reloc(Bool cs, -+ struct radeon_bo *bo, -+ uint32_t read_domain, -+ uint32_t write_domain, -+ uint32_t flags) -+{ -+ return 0; -+} -+ -+static inline int radeon_bo_map(struct radeon_bo *bo, int write) {return 0;} -+static inline void radeon_bo_ref(struct radeon_bo *bo) {return;} -+static inline struct radeon_bo *radeon_bo_unref(struct radeon_bo *bo) {return NULL;} -+static inline void radeon_bo_unmap(struct radeon_bo *bo) {return;} -+static inline int radeon_bo_wait(struct radeon_bo *bo) {return 0;} -+ -+static inline int radeon_cs_space_add_persistent_bo(Bool cs, struct radeon_bo *bo, -+ uint32_t read_domains, uint32_t write_domain) -+{ -+ return 0; -+} -+ -+static inline int radeon_cs_space_check(Bool cs) -+{ -+ return 0; -+} -+ -+static inline void radeon_cs_flush_indirect(ScrnInfoPtr pScrn) -+{ -+} -+ -+static inline void radeon_ddx_cs_start(ScrnInfoPtr pScrn, int n, -+ const char *file, const char *func, int line) -+{ -+} -+ -+#endif -diff --git a/src/radeon_exa.c b/src/radeon_exa.c -index ae68146..25e3311 100644 ---- a/src/radeon_exa.c -+++ b/src/radeon_exa.c -@@ -120,6 +120,19 @@ static __inline__ uint32_t F_TO_DW(float val) - return tmp.l; - } - -+ -+#ifdef XF86DRM_MODE -+ -+static inline void radeon_add_pixmap(struct radeon_cs *cs, PixmapPtr pPix, int read_domains, int write_domain) -+{ -+ struct radeon_exa_pixmap_priv *driver_priv = exaGetPixmapDriverPrivate(pPix); -+ -+ radeon_cs_space_add_persistent_bo(cs, driver_priv->bo, read_domains, write_domain); -+} -+ -+#endif /* XF86DRM_MODE */ -+ -+ - /* Assumes that depth 15 and 16 can be used as depth 16, which is okay since we - * require src and dest datatypes to be equal. - */ -@@ -179,7 +192,6 @@ static Bool RADEONGetOffsetPitch(PixmapPtr pPix, int bpp, uint32_t *pitch_offset - - Bool RADEONGetPixmapOffsetPitch(PixmapPtr pPix, uint32_t *pitch_offset) - { -- RINFO_FROM_SCREEN(pPix->drawable.pScreen); - uint32_t pitch, offset; - int bpp; - -@@ -187,7 +199,7 @@ Bool RADEONGetPixmapOffsetPitch(PixmapPtr pPix, uint32_t *pitch_offset) - if (bpp == 24) - bpp = 8; - -- offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; -+ offset = radeonGetPixmapOffset(pPix); - pitch = exaGetPixmapPitch(pPix); - - return RADEONGetOffsetPitch(pPix, bpp, pitch_offset, offset, pitch); -@@ -224,7 +236,7 @@ int RADEONBiggerCrtcArea(PixmapPtr pPix) - - static unsigned long swapper_surfaces[6]; - --static Bool RADEONPrepareAccess(PixmapPtr pPix, int index) -+static Bool RADEONPrepareAccess_BE(PixmapPtr pPix, int index) - { - RINFO_FROM_SCREEN(pPix->drawable.pScreen); - unsigned char *RADEONMMIO = info->MMIO; -@@ -290,7 +302,7 @@ static Bool RADEONPrepareAccess(PixmapPtr pPix, int index) - return TRUE; - } - --static void RADEONFinishAccess(PixmapPtr pPix, int index) -+static void RADEONFinishAccess_BE(PixmapPtr pPix, int index) - { - RINFO_FROM_SCREEN(pPix->drawable.pScreen); - unsigned char *RADEONMMIO = info->MMIO; -@@ -323,6 +335,121 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index) - - #endif /* X_BYTE_ORDER == X_BIG_ENDIAN */ - -+#ifdef XF86DRM_MODE -+static Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; -+ struct radeon_exa_pixmap_priv *driver_priv; -+ int ret; -+ -+ driver_priv = exaGetPixmapDriverPrivate(pPix); -+ if (!driver_priv) -+ return FALSE; -+ -+ /* if we have more refs than just the BO then flush */ -+ if (driver_priv->bo->cref > 1) -+ radeon_cs_flush_indirect(pScrn); -+ -+ radeon_bo_wait(driver_priv->bo); -+ -+ /* flush IB */ -+ ret = radeon_bo_map(driver_priv->bo, 1); -+ if (ret) { -+ FatalError("failed to map pixmap %d\n", ret); -+ return FALSE; -+ } -+ -+ pPix->devPrivate.ptr = driver_priv->bo->ptr; -+ -+ return TRUE; -+} -+ -+static void RADEONFinishAccess_CS(PixmapPtr pPix, int index) -+{ -+ struct radeon_exa_pixmap_priv *driver_priv; -+ -+ driver_priv = exaGetPixmapDriverPrivate(pPix); -+ if (!driver_priv) -+ return; -+ -+ radeon_bo_unmap(driver_priv->bo); -+ pPix->devPrivate.ptr = NULL; -+} -+ -+ -+void *RADEONEXACreatePixmap(ScreenPtr pScreen, int size, int align) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ struct radeon_exa_pixmap_priv *new_priv; -+ -+ new_priv = xcalloc(1, sizeof(struct radeon_exa_pixmap_priv)); -+ if (!new_priv) -+ return NULL; -+ -+ if (size == 0) -+ return new_priv; -+ -+ new_priv->bo = radeon_bo_open(info->bufmgr, 0, size, -+ align, RADEON_GEM_DOMAIN_VRAM | -+ RADEON_GEM_DOMAIN_GTT, 0); -+ if (!new_priv->bo) { -+ xfree(new_priv); -+ ErrorF("Failed to alloc memory\n"); -+ return NULL; -+ } -+ -+ return new_priv; -+ -+} -+ -+static void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv) -+{ -+ struct radeon_exa_pixmap_priv *driver_priv = driverPriv; -+ -+ if (!driverPriv) -+ return; -+ -+ if (driver_priv->bo) -+ radeon_bo_unref(driver_priv->bo); -+ xfree(driverPriv); -+} -+ -+struct radeon_bo *radeon_get_pixmap_bo(PixmapPtr pPix) -+{ -+ struct radeon_exa_pixmap_priv *driver_priv; -+ driver_priv = exaGetPixmapDriverPrivate(pPix); -+ return driver_priv->bo; -+} -+ -+void radeon_set_pixmap_bo(PixmapPtr pPix, struct radeon_bo *bo) -+{ -+ struct radeon_exa_pixmap_priv *driver_priv; -+ -+ driver_priv = exaGetPixmapDriverPrivate(pPix); -+ if (driver_priv) { -+ if (driver_priv->bo) -+ radeon_bo_unref(driver_priv->bo); -+ -+ radeon_bo_ref(bo); -+ driver_priv->bo = bo; -+ } -+} -+ -+static Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix) -+{ -+ struct radeon_exa_pixmap_priv *driver_priv; -+ -+ driver_priv = exaGetPixmapDriverPrivate(pPix); -+ -+ if (!driver_priv) -+ return FALSE; -+ if (driver_priv->bo) -+ return TRUE; -+ return FALSE; -+} -+#endif -+ - #define ENTER_DRAW(x) TRACE - #define LEAVE_DRAW(x) TRACE - /***********************************************************************/ -@@ -332,6 +459,7 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index) - #define BEGIN_ACCEL(n) RADEONWaitForFifo(pScrn, (n)) - #define OUT_ACCEL_REG(reg, val) OUTREG(reg, val) - #define OUT_ACCEL_REG_F(reg, val) OUTREG(reg, F_TO_DW(val)) -+#define OUT_RELOC(x, read, write) do {} while(0) - #define FINISH_ACCEL() - - #ifdef RENDER -@@ -345,6 +473,7 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index) - #undef OUT_ACCEL_REG - #undef OUT_ACCEL_REG_F - #undef FINISH_ACCEL -+#undef OUT_RELOC - - #ifdef XF86DRI - -@@ -355,6 +484,7 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index) - #define BEGIN_ACCEL(n) BEGIN_RING(2*(n)) - #define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val) - #define FINISH_ACCEL() ADVANCE_RING() -+#define OUT_RELOC(x, read, write) OUT_RING_RELOC(x, read, write) - - #define OUT_RING_F(x) OUT_RING(F_TO_DW(x)) - -@@ -523,6 +653,10 @@ RADEONTexOffsetStart(PixmapPtr pPix) - { - RINFO_FROM_SCREEN(pPix->drawable.pScreen); - unsigned long long offset; -+ -+ if (exaGetPixmapDriverPrivate(pPix)) -+ return -1; -+ - exaMoveInPixmap(pPix); - ExaOffscreenMarkUsed(pPix); - -diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c -index 59cb46f..19adffb 100644 ---- a/src/radeon_exa_funcs.c -+++ b/src/radeon_exa_funcs.c -@@ -74,6 +74,9 @@ FUNC_NAME(RADEONSync)(ScreenPtr pScreen, int marker) - ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; - RADEONInfoPtr info = RADEONPTR(pScrn); - -+ if (info->cs) -+ return; -+ - TRACE; - - if (info->accel_state->exaMarkerSynced != marker) { -@@ -84,12 +87,71 @@ FUNC_NAME(RADEONSync)(ScreenPtr pScreen, int marker) - RADEONPTR(pScrn)->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN; - } - -+static void FUNC_NAME(Emit2DState)(ScrnInfoPtr pScrn, int op) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ int has_src; -+ ACCEL_PREAMBLE(); -+ -+ /* don't emit if no operation in progress */ -+ if (info->state_2d.op == 0 && op == 0) -+ return; -+ -+ has_src = info->state_2d.src_pitch_offset || (info->cs && info->state_2d.src_bo); -+ -+ if (has_src) { -+ BEGIN_ACCEL_RELOC(10, 2); -+ } else { -+ BEGIN_ACCEL_RELOC(9, 1); -+ } -+ OUT_ACCEL_REG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, info->state_2d.default_sc_bottom_right); -+ OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, info->state_2d.dp_gui_master_cntl); -+ OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR, info->state_2d.dp_brush_frgd_clr); -+ OUT_ACCEL_REG(RADEON_DP_BRUSH_BKGD_CLR, info->state_2d.dp_brush_bkgd_clr); -+ OUT_ACCEL_REG(RADEON_DP_SRC_FRGD_CLR, info->state_2d.dp_src_frgd_clr); -+ OUT_ACCEL_REG(RADEON_DP_SRC_BKGD_CLR, info->state_2d.dp_src_bkgd_clr); -+ OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, info->state_2d.dp_write_mask); -+ OUT_ACCEL_REG(RADEON_DP_CNTL, info->state_2d.dp_cntl); -+ -+ OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, info->state_2d.dst_pitch_offset); -+ if (info->cs) -+ OUT_RELOC(info->state_2d.dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); -+ -+ if (has_src) { -+ OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, info->state_2d.src_pitch_offset); -+ if (info->cs) -+ OUT_RELOC(info->state_2d.src_bo, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0); -+ -+ } -+ FINISH_ACCEL(); -+ -+ if (op) -+ info->state_2d.op = op; -+ if (info->cs) -+ info->reemit_current2d = FUNC_NAME(Emit2DState); -+} -+ -+static void -+FUNC_NAME(RADEONDone2D)(PixmapPtr pPix) -+{ -+ RINFO_FROM_SCREEN(pPix->drawable.pScreen); -+ ACCEL_PREAMBLE(); -+ -+ TRACE; -+ -+ info->state_2d.op = 0; -+ BEGIN_ACCEL(2); -+ OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL); -+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, -+ RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); -+ FINISH_ACCEL(); -+} -+ - static Bool - FUNC_NAME(RADEONPrepareSolid)(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) - { - RINFO_FROM_SCREEN(pPix->drawable.pScreen); - uint32_t datatype, dst_pitch_offset; -- ACCEL_PREAMBLE(); - - TRACE; - -@@ -102,20 +164,47 @@ FUNC_NAME(RADEONPrepareSolid)(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) - - RADEON_SWITCH_TO_2D(); - -- BEGIN_ACCEL(5); -- OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, -- RADEON_GMC_DST_PITCH_OFFSET_CNTL | -- RADEON_GMC_BRUSH_SOLID_COLOR | -- (datatype << 8) | -- RADEON_GMC_SRC_DATATYPE_COLOR | -- RADEON_ROP[alu].pattern | -- RADEON_GMC_CLR_CMP_CNTL_DIS); -- OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR, fg); -- OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, pm); -- OUT_ACCEL_REG(RADEON_DP_CNTL, -- (RADEON_DST_X_LEFT_TO_RIGHT | RADEON_DST_Y_TOP_TO_BOTTOM)); -- OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset); -- FINISH_ACCEL(); -+#ifdef XF86DRM_MODE -+ if (info->cs) { -+ struct radeon_exa_pixmap_priv *driver_priv; -+ int ret; -+ -+ radeon_cs_space_reset_bos(info->cs); -+ -+ driver_priv = exaGetPixmapDriverPrivate(pPix); -+ radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); -+ -+ ret = radeon_cs_space_check(info->cs); -+ if (ret) -+ RADEON_FALLBACK(("Not enough RAM to hw accel solid operation\n")); -+ -+ driver_priv = exaGetPixmapDriverPrivate(pPix); -+ if (driver_priv) -+ info->state_2d.dst_bo = driver_priv->bo; -+ } -+#endif -+ -+ info->state_2d.default_sc_bottom_right = (RADEON_DEFAULT_SC_RIGHT_MAX | -+ RADEON_DEFAULT_SC_BOTTOM_MAX); -+ info->state_2d.dp_brush_bkgd_clr = 0x00000000; -+ info->state_2d.dp_src_frgd_clr = 0xffffffff; -+ info->state_2d.dp_src_bkgd_clr = 0x00000000; -+ info->state_2d.dp_gui_master_cntl = (RADEON_GMC_DST_PITCH_OFFSET_CNTL | -+ RADEON_GMC_BRUSH_SOLID_COLOR | -+ (datatype << 8) | -+ RADEON_GMC_SRC_DATATYPE_COLOR | -+ RADEON_ROP[alu].pattern | -+ RADEON_GMC_CLR_CMP_CNTL_DIS); -+ info->state_2d.dp_brush_frgd_clr = fg; -+ info->state_2d.dp_cntl = (RADEON_DST_X_LEFT_TO_RIGHT | RADEON_DST_Y_TOP_TO_BOTTOM); -+ info->state_2d.dp_write_mask = pm; -+ info->state_2d.dst_pitch_offset = dst_pitch_offset; -+ info->state_2d.src_pitch_offset = 0; -+ info->state_2d.src_bo = NULL; -+ -+ info->accel_state->dst_pix = pPix; -+ -+ FUNC_NAME(Emit2DState)(pScrn, RADEON_2D_EXA_SOLID); - - return TRUE; - } -@@ -129,6 +218,14 @@ FUNC_NAME(RADEONSolid)(PixmapPtr pPix, int x1, int y1, int x2, int y2) - - TRACE; - -+#if defined(ACCEL_CP) && defined(XF86DRM_MODE) -+ if (info->cs && CS_FULL(info->cs)) { -+ FUNC_NAME(RADEONDone2D)(info->accel_state->dst_pix); -+ radeon_cs_flush_indirect(pScrn); -+ FUNC_NAME(Emit2DState)(pScrn, RADEON_2D_EXA_SOLID); -+ } -+#endif -+ - if (info->accel_state->vsync) - FUNC_NAME(RADEONWaitForVLine)(pScrn, pPix, RADEONBiggerCrtcArea(pPix), y1, y2); - -@@ -138,48 +235,35 @@ FUNC_NAME(RADEONSolid)(PixmapPtr pPix, int x1, int y1, int x2, int y2) - FINISH_ACCEL(); - } - --static void --FUNC_NAME(RADEONDoneSolid)(PixmapPtr pPix) --{ -- RINFO_FROM_SCREEN(pPix->drawable.pScreen); -- ACCEL_PREAMBLE(); -- -- TRACE; -- -- BEGIN_ACCEL(2); -- OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL); -- OUT_ACCEL_REG(RADEON_WAIT_UNTIL, -- RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); -- FINISH_ACCEL(); --} -- - void - FUNC_NAME(RADEONDoPrepareCopy)(ScrnInfoPtr pScrn, uint32_t src_pitch_offset, - uint32_t dst_pitch_offset, uint32_t datatype, int rop, - Pixel planemask) - { - RADEONInfoPtr info = RADEONPTR(pScrn); -- ACCEL_PREAMBLE(); -- -- RADEON_SWITCH_TO_2D(); - -- BEGIN_ACCEL(5); -- OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, -- RADEON_GMC_DST_PITCH_OFFSET_CNTL | -- RADEON_GMC_SRC_PITCH_OFFSET_CNTL | -- RADEON_GMC_BRUSH_NONE | -- (datatype << 8) | -- RADEON_GMC_SRC_DATATYPE_COLOR | -- RADEON_ROP[rop].rop | -- RADEON_DP_SRC_SOURCE_MEMORY | -- RADEON_GMC_CLR_CMP_CNTL_DIS); -- OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, planemask); -- OUT_ACCEL_REG(RADEON_DP_CNTL, -- ((info->accel_state->xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) | -- (info->accel_state->ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0))); -- OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset); -- OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, src_pitch_offset); -- FINISH_ACCEL(); -+ /* setup 2D state */ -+ info->state_2d.dp_gui_master_cntl = (RADEON_GMC_DST_PITCH_OFFSET_CNTL | -+ RADEON_GMC_SRC_PITCH_OFFSET_CNTL | -+ RADEON_GMC_BRUSH_NONE | -+ (datatype << 8) | -+ RADEON_GMC_SRC_DATATYPE_COLOR | -+ RADEON_ROP[rop].rop | -+ RADEON_DP_SRC_SOURCE_MEMORY | -+ RADEON_GMC_CLR_CMP_CNTL_DIS); -+ info->state_2d.dp_cntl = ((info->accel_state->xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) | -+ (info->accel_state->ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0)); -+ info->state_2d.dp_brush_frgd_clr = 0xffffffff; -+ info->state_2d.dp_brush_bkgd_clr = 0x00000000; -+ info->state_2d.dp_src_frgd_clr = 0xffffffff; -+ info->state_2d.dp_src_bkgd_clr = 0x00000000; -+ info->state_2d.dp_write_mask = planemask; -+ info->state_2d.dst_pitch_offset = dst_pitch_offset; -+ info->state_2d.src_pitch_offset = src_pitch_offset; -+ info->state_2d.default_sc_bottom_right = (RADEON_DEFAULT_SC_RIGHT_MAX -+ | RADEON_DEFAULT_SC_BOTTOM_MAX); -+ -+ FUNC_NAME(Emit2DState)(pScrn, RADEON_2D_EXA_COPY); - } - - static Bool -@@ -190,11 +274,34 @@ FUNC_NAME(RADEONPrepareCopy)(PixmapPtr pSrc, PixmapPtr pDst, - { - RINFO_FROM_SCREEN(pDst->drawable.pScreen); - uint32_t datatype, src_pitch_offset, dst_pitch_offset; -- - TRACE; - -+ RADEON_SWITCH_TO_2D(); -+ -+#ifdef XF86DRM_MODE -+ if (info->cs) { -+ struct radeon_exa_pixmap_priv *driver_priv; -+ int ret; -+ -+ radeon_cs_space_reset_bos(info->cs); -+ -+ driver_priv = exaGetPixmapDriverPrivate(pSrc); -+ radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); -+ info->state_2d.src_bo = driver_priv->bo; -+ -+ driver_priv = exaGetPixmapDriverPrivate(pDst); -+ radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); -+ info->state_2d.dst_bo = driver_priv->bo; -+ -+ ret = radeon_cs_space_check(info->cs); -+ if (ret) -+ RADEON_FALLBACK(("Not enough RAM to hw accel copy operation\n")); -+ } -+#endif -+ - info->accel_state->xdir = xdir; - info->accel_state->ydir = ydir; -+ info->accel_state->dst_pix = pDst; - - if (pDst->drawable.bitsPerPixel == 24) - RADEON_FALLBACK(("24bpp unsupported")); -@@ -222,6 +329,14 @@ FUNC_NAME(RADEONCopy)(PixmapPtr pDst, - - TRACE; - -+#if defined(ACCEL_CP) && defined(XF86DRM_MODE) -+ if (info->cs && CS_FULL(info->cs)) { -+ FUNC_NAME(RADEONDone2D)(info->accel_state->dst_pix); -+ radeon_cs_flush_indirect(pScrn); -+ FUNC_NAME(Emit2DState)(pScrn, RADEON_2D_EXA_COPY); -+ } -+#endif -+ - if (info->accel_state->xdir < 0) { - srcX += w - 1; - dstX += w - 1; -@@ -231,7 +346,7 @@ FUNC_NAME(RADEONCopy)(PixmapPtr pDst, - dstY += h - 1; - } - -- if (info->accel_state->vsync) -+ if (info->accel_state->vsync) - FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), dstY, dstY + h); - - BEGIN_ACCEL(3); -@@ -243,22 +358,6 @@ FUNC_NAME(RADEONCopy)(PixmapPtr pDst, - FINISH_ACCEL(); - } - --static void --FUNC_NAME(RADEONDoneCopy)(PixmapPtr pDst) --{ -- RINFO_FROM_SCREEN(pDst->drawable.pScreen); -- ACCEL_PREAMBLE(); -- -- TRACE; -- -- BEGIN_ACCEL(2); -- OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL); -- OUT_ACCEL_REG(RADEON_WAIT_UNTIL, -- RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); -- FINISH_ACCEL(); --} -- -- - #ifdef ACCEL_CP - - static Bool -@@ -459,38 +558,46 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) - - info->accel_state->exa->PrepareSolid = FUNC_NAME(RADEONPrepareSolid); - info->accel_state->exa->Solid = FUNC_NAME(RADEONSolid); -- info->accel_state->exa->DoneSolid = FUNC_NAME(RADEONDoneSolid); -+ info->accel_state->exa->DoneSolid = FUNC_NAME(RADEONDone2D); - - info->accel_state->exa->PrepareCopy = FUNC_NAME(RADEONPrepareCopy); - info->accel_state->exa->Copy = FUNC_NAME(RADEONCopy); -- info->accel_state->exa->DoneCopy = FUNC_NAME(RADEONDoneCopy); -+ info->accel_state->exa->DoneCopy = FUNC_NAME(RADEONDone2D); - - info->accel_state->exa->MarkSync = FUNC_NAME(RADEONMarkSync); - info->accel_state->exa->WaitMarker = FUNC_NAME(RADEONSync); - #ifdef ACCEL_CP -- info->accel_state->exa->UploadToScreen = RADEONUploadToScreenCP; -- if (info->accelDFS) -- info->accel_state->exa->DownloadFromScreen = RADEONDownloadFromScreenCP; -+ if (!info->kms_enabled) { -+ info->accel_state->exa->UploadToScreen = RADEONUploadToScreenCP; -+ if (info->accelDFS) -+ info->accel_state->exa->DownloadFromScreen = RADEONDownloadFromScreenCP; -+ } - #endif - - #if X_BYTE_ORDER == X_BIG_ENDIAN -- info->accel_state->exa->PrepareAccess = RADEONPrepareAccess; -- info->accel_state->exa->FinishAccess = RADEONFinishAccess; --#endif /* X_BYTE_ORDER == X_BIG_ENDIAN */ -+ info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_BE; -+ info->accel_state->exa->FinishAccess = RADEONFinishAccess_BE; -+#endif - - info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS; - #ifdef EXA_SUPPORTS_PREPARE_AUX - info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX; - #endif -+#ifdef EXA_SUPPORTS_OFFSCREEN_OVERLAPS -+ /* The 2D engine supports overlapping memory areas */ -+ info->accel_state->exa->flags |= EXA_SUPPORTS_OFFSCREEN_OVERLAPS; -+#endif - info->accel_state->exa->pixmapOffsetAlign = RADEON_BUFFER_ALIGN + 1; - info->accel_state->exa->pixmapPitchAlign = 64; - -+#ifdef EXA_HANDLES_PIXMAPS -+ if (info->cs) -+ info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS; -+#endif -+ - #ifdef RENDER - if (info->RenderAccel) { -- if (info->ChipFamily >= CHIP_FAMILY_R600) -- xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration " -- "unsupported on R600 and newer cards.\n"); -- else if (IS_R300_3D || IS_R500_3D) { -+ if (IS_R300_3D || IS_R500_3D) { - if ((info->ChipFamily < CHIP_FAMILY_RS400) - #ifdef XF86DRI - || (info->directRenderingEnabled) -@@ -505,10 +612,7 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) - info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite); - } else - xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA Composite requires CP on R5xx/IGP\n"); -- } else if ((info->ChipFamily == CHIP_FAMILY_RV250) || -- (info->ChipFamily == CHIP_FAMILY_RV280) || -- (info->ChipFamily == CHIP_FAMILY_RS300) || -- (info->ChipFamily == CHIP_FAMILY_R200)) { -+ } else if (IS_R200_3D) { - xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration " - "enabled for R200 type cards.\n"); - info->accel_state->exa->CheckComposite = R200CheckComposite; -@@ -528,15 +632,28 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) - } - #endif - -+#ifdef XF86DRM_MODE -+#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) -+ if (info->cs) { -+ info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap; -+ info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap; -+ info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen; -+ info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS; -+ info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS; -+ } -+#endif -+#endif -+ -+ - #if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3) - xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n"); - - info->accel_state->exa->maxPitchBytes = 16320; -- info->accel_state->exa->maxX = 8192; -+ info->accel_state->exa->maxX = 8191; - #else - info->accel_state->exa->maxX = 16320 / 4; - #endif -- info->accel_state->exa->maxY = 8192; -+ info->accel_state->exa->maxY = 8191; - - if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) { - xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n"); -diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c -index 571204a..9cd70cd 100644 ---- a/src/radeon_exa_render.c -+++ b/src/radeon_exa_render.c -@@ -365,13 +365,14 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - Bool repeat = pPict->repeat && pPict->repeatType != RepeatPad && - !(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y)); - int i; -+ struct radeon_exa_pixmap_priv *driver_priv; - ACCEL_PREAMBLE(); - - txpitch = exaGetPixmapPitch(pPix); -- txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; -+ txoffset = 0; -+ -+ CHECK_OFFSET(pPix, 0x1f, "texture"); - -- if ((txoffset & 0x1f) != 0) -- RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset)); - if ((txpitch & 0x1f) != 0) - RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch)); - -@@ -395,8 +396,8 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - txformat |= RADEON_TXFORMAT_NON_POWER2; - txformat |= unit << 24; /* RADEON_TXFORMAT_ST_ROUTE_STQX */ - -- info->accel_state->texW[unit] = 1; -- info->accel_state->texH[unit] = 1; -+ info->accel_state->texW[unit] = w; -+ info->accel_state->texH[unit] = h; - - switch (pPict->filter) { - case PictFilterNearest: -@@ -426,23 +427,27 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - } - } - -- BEGIN_ACCEL(5); -+ BEGIN_ACCEL_RELOC(5, 1); - if (unit == 0) { - OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, txfilter); - OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat); -- OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, txoffset); - OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, - (pPix->drawable.width - 1) | - ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT)); - OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, txpitch - 32); -+ -+ EMIT_READ_OFFSET(RADEON_PP_TXOFFSET_0, txoffset, pPix); -+ /* emit a texture relocation */ - } else { - OUT_ACCEL_REG(RADEON_PP_TXFILTER_1, txfilter); - OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat); -- OUT_ACCEL_REG(RADEON_PP_TXOFFSET_1, txoffset); -+ - OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1, - (pPix->drawable.width - 1) | - ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT)); - OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch - 32); -+ EMIT_READ_OFFSET(RADEON_PP_TXOFFSET_1, txoffset, pPix); -+ /* emit a texture relocation */ - } - FINISH_ACCEL(); - -@@ -537,6 +542,45 @@ static Bool R100CheckComposite(int op, PicturePtr pSrcPicture, - - return TRUE; - } -+ -+static Bool -+RADEONPrepareCompositeCS(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, -+ PicturePtr pDstPicture, PixmapPtr pSrc, PixmapPtr pMask, -+ PixmapPtr pDst) -+{ -+ RINFO_FROM_SCREEN(pDst->drawable.pScreen); -+ -+ info->accel_state->composite_op = op; -+ info->accel_state->dst_pic = pDstPicture; -+ info->accel_state->msk_pic = pMaskPicture; -+ info->accel_state->src_pic = pSrcPicture; -+ info->accel_state->dst_pix = pDst; -+ info->accel_state->msk_pix = pMask; -+ info->accel_state->src_pix = pSrc; -+ -+#ifdef XF86DRM_MODE -+ if (info->cs) { -+ int ret; -+ -+ radeon_cs_space_reset_bos(info->cs); -+ -+ radeon_add_pixmap(info->cs, pSrc, -+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); -+ -+ if (pMask) -+ radeon_add_pixmap(info->cs, pMask, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); -+ -+ radeon_add_pixmap(info->cs, pDst, 0, RADEON_GEM_DOMAIN_VRAM); -+ -+ ret = radeon_cs_space_check(info->cs); -+ if (ret) -+ RADEON_FALLBACK(("Not enough RAM to hw accel composite operation\n")); -+ } -+#endif -+ -+ return TRUE; -+} -+ - #endif /* ONLY_ONCE */ - - static Bool FUNC_NAME(R100PrepareComposite)(int op, -@@ -548,9 +592,10 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op, - PixmapPtr pDst) - { - RINFO_FROM_SCREEN(pDst->drawable.pScreen); -- uint32_t dst_format, dst_offset, dst_pitch, colorpitch; -+ uint32_t dst_format, dst_pitch, colorpitch; - uint32_t pp_cntl, blendcntl, cblend, ablend; - int pixel_shift; -+ struct radeon_exa_pixmap_priv *driver_priv; - ACCEL_PREAMBLE(); - - TRACE; -@@ -561,29 +606,25 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op, - if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha) - RADEON_FALLBACK(("Can't dst alpha blend A8\n")); - -- if (pMask) -- info->accel_state->has_mask = TRUE; -- else -- info->accel_state->has_mask = FALSE; -- - pixel_shift = pDst->drawable.bitsPerPixel >> 4; - -- dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; - dst_pitch = exaGetPixmapPitch(pDst); - colorpitch = dst_pitch >> pixel_shift; - if (RADEONPixmapIsColortiled(pDst)) - colorpitch |= RADEON_COLOR_TILE_ENABLE; - -- dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; -- dst_pitch = exaGetPixmapPitch(pDst); -- if ((dst_offset & 0x0f) != 0) -- RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset)); -+ CHECK_OFFSET(pDst, 0x0f, "destination"); -+ - if (((dst_pitch >> pixel_shift) & 0x7) != 0) - RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch)); - - if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE)) - return FALSE; - -+ RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture, -+ pSrc, pMask, pDst); -+ -+ /* switch to 3D after doing buffer space checks as the latter may flush */ - RADEON_SWITCH_TO_3D(); - - if (!FUNC_NAME(R100TextureSetup)(pSrcPicture, pSrc, 0)) -@@ -598,11 +639,11 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op, - info->accel_state->is_transform[1] = FALSE; - } - -- BEGIN_ACCEL(8); -+ BEGIN_ACCEL_RELOC(10, 2); - OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl); - OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE); -- OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset); -- OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch); -+ EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pDst); -+ EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pDst); - - /* IN operator: Multiply src by mask components or mask alpha. - * BLEND_CTL_ADD is A * B + C. -@@ -650,6 +691,10 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op, - blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format); - - OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl); -+ -+ OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0); -+ OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, (((pDst->drawable.width) << RADEON_RE_WIDTH_SHIFT) | -+ ((pDst->drawable.height) << RADEON_RE_HEIGHT_SHIFT))); - FINISH_ACCEL(); - - return TRUE; -@@ -701,13 +746,14 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - Bool repeat = pPict->repeat && pPict->repeatType != RepeatPad && - !(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y)); - int i; -+ struct radeon_exa_pixmap_priv *driver_priv; - ACCEL_PREAMBLE(); - - txpitch = exaGetPixmapPitch(pPix); -- txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; - -- if ((txoffset & 0x1f) != 0) -- RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset)); -+ txoffset = 0; -+ CHECK_OFFSET(pPix, 0x1f, "texture"); -+ - if ((txpitch & 0x1f) != 0) - RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch)); - -@@ -764,7 +810,7 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - } - } - -- BEGIN_ACCEL(6); -+ BEGIN_ACCEL_RELOC(6, 1); - if (unit == 0) { - OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); - OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); -@@ -772,7 +818,7 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - OUT_ACCEL_REG(R200_PP_TXSIZE_0, (pPix->drawable.width - 1) | - ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT)); - OUT_ACCEL_REG(R200_PP_TXPITCH_0, txpitch - 32); -- OUT_ACCEL_REG(R200_PP_TXOFFSET_0, txoffset); -+ EMIT_READ_OFFSET(R200_PP_TXOFFSET_0, txoffset, pPix); - } else { - OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter); - OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat); -@@ -780,7 +826,8 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - OUT_ACCEL_REG(R200_PP_TXSIZE_1, (pPix->drawable.width - 1) | - ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT)); - OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch - 32); -- OUT_ACCEL_REG(R200_PP_TXOFFSET_1, txoffset); -+ EMIT_READ_OFFSET(R200_PP_TXOFFSET_1, txoffset, pPix); -+ /* emit a texture relocation */ - } - FINISH_ACCEL(); - -@@ -803,6 +850,10 @@ static Bool R200CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskP - - TRACE; - -+ /* Check for unsupported compositing operations. */ -+ if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0])) -+ RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op)); -+ - if (!pSrcPicture->pDrawable) - return FALSE; - -@@ -870,9 +921,10 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, - PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) - { - RINFO_FROM_SCREEN(pDst->drawable.pScreen); -- uint32_t dst_format, dst_offset, dst_pitch; -+ uint32_t dst_format, dst_pitch; - uint32_t pp_cntl, blendcntl, cblend, ablend, colorpitch; - int pixel_shift; -+ struct radeon_exa_pixmap_priv *driver_priv; - ACCEL_PREAMBLE(); - - TRACE; -@@ -883,27 +935,25 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, - if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha) - RADEON_FALLBACK(("Can't dst alpha blend A8\n")); - -- if (pMask) -- info->accel_state->has_mask = TRUE; -- else -- info->accel_state->has_mask = FALSE; -- - pixel_shift = pDst->drawable.bitsPerPixel >> 4; - -- dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; - dst_pitch = exaGetPixmapPitch(pDst); - colorpitch = dst_pitch >> pixel_shift; - if (RADEONPixmapIsColortiled(pDst)) - colorpitch |= RADEON_COLOR_TILE_ENABLE; - -- if ((dst_offset & 0x0f) != 0) -- RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset)); -+ CHECK_OFFSET(pDst, 0xf, "destination"); -+ - if (((dst_pitch >> pixel_shift) & 0x7) != 0) - RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch)); - - if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE)) - return FALSE; - -+ RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture, -+ pSrc, pMask, pDst); -+ -+ /* switch to 3D after doing buffer space checks as it may flush */ - RADEON_SWITCH_TO_3D(); - - if (!FUNC_NAME(R200TextureSetup)(pSrcPicture, pSrc, 0)) -@@ -918,11 +968,13 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, - info->accel_state->is_transform[1] = FALSE; - } - -- BEGIN_ACCEL(11); -+ BEGIN_ACCEL_RELOC(13, 2); - - OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl); - OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE); -- OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset); -+ -+ EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pDst); -+ EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pDst); - - OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); - if (pMask) -@@ -933,7 +985,7 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, - OUT_ACCEL_REG(R200_SE_VTX_FMT_1, - (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); - -- OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch); -+ - - /* IN operator: Multiply src by mask components or mask alpha. - * BLEND_CTL_ADD is A * B + C. -@@ -978,6 +1030,11 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, - /* Op operator. */ - blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format); - OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl); -+ -+ OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0); -+ OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, (((pDst->drawable.width) << RADEON_RE_WIDTH_SHIFT) | -+ ((pDst->drawable.height) << RADEON_RE_HEIGHT_SHIFT))); -+ - FINISH_ACCEL(); - - return TRUE; -@@ -991,6 +1048,10 @@ static Bool R300CheckCompositeTexture(PicturePtr pPict, - int unit, - Bool is_r500) - { -+ ScreenPtr pScreen = pDstPict->pDrawable->pScreen; -+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ - int w = pPict->pDrawable->width; - int h = pPict->pDrawable->height; - int i; -@@ -1016,8 +1077,17 @@ static Bool R300CheckCompositeTexture(PicturePtr pPict, - RADEON_FALLBACK(("Unsupported picture format 0x%x\n", - (int)pPict->format)); - -- if (!RADEONCheckTexturePOT(pPict, unit == 0)) -+ if (!RADEONCheckTexturePOT(pPict, unit == 0)) { -+ if (info->cs) { -+ struct radeon_exa_pixmap_priv *driver_priv; -+ PixmapPtr pPix; -+ -+ pPix = RADEONGetDrawablePixmap(pPict->pDrawable); -+ driver_priv = exaGetPixmapDriverPrivate(pPix); -+ //TODOradeon_bufmgr_gem_force_gtt(driver_priv->bo); -+ } - return FALSE; -+ } - - if (pPict->filter != PictFilterNearest && - pPict->filter != PictFilterBilinear) -@@ -1049,15 +1119,16 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - int w = pPict->pDrawable->width; - int h = pPict->pDrawable->height; - int i, pixel_shift; -+ struct radeon_exa_pixmap_priv *driver_priv; - ACCEL_PREAMBLE(); - - TRACE; - - txpitch = exaGetPixmapPitch(pPix); -- txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; -+ txoffset = 0; -+ -+ CHECK_OFFSET(pPix, 0x1f, "texture"); - -- if ((txoffset & 0x1f) != 0) -- RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset)); - if ((txpitch & 0x1f) != 0) - RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch)); - -@@ -1077,6 +1148,13 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - - txformat1 = R300TexFormats[i].card_fmt; - -+ if (IS_R300_3D) { -+ if ((unit == 0) && info->accel_state->msk_pic) -+ txformat1 |= R300_TX_FORMAT_CACHE_HALF_REGION_0; -+ else if (unit == 1) -+ txformat1 |= R300_TX_FORMAT_CACHE_HALF_REGION_1; -+ } -+ - txformat0 = ((((w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | - (((h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT)); - -@@ -1092,9 +1170,6 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - */ - txformat0 |= R300_TXPITCH_EN; - -- info->accel_state->texW[unit] = w; -- info->accel_state->texH[unit] = h; -- - txfilter = (unit << R300_TX_ID_SHIFT); - - if (pPict->repeat) { -@@ -1139,13 +1214,15 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); - } - -- BEGIN_ACCEL(pPict->repeat ? 6 : 7); -+ BEGIN_ACCEL_RELOC(pPict->repeat ? 6 : 7, 1); - OUT_ACCEL_REG(R300_TX_FILTER0_0 + (unit * 4), txfilter); - OUT_ACCEL_REG(R300_TX_FILTER1_0 + (unit * 4), 0); - OUT_ACCEL_REG(R300_TX_FORMAT0_0 + (unit * 4), txformat0); - OUT_ACCEL_REG(R300_TX_FORMAT1_0 + (unit * 4), txformat1); - OUT_ACCEL_REG(R300_TX_FORMAT2_0 + (unit * 4), txpitch); -- OUT_ACCEL_REG(R300_TX_OFFSET_0 + (unit * 4), txoffset); -+ -+ EMIT_READ_OFFSET((R300_TX_OFFSET_0 + (unit * 4)), txoffset, pPix); -+ - if (!pPict->repeat) - OUT_ACCEL_REG(R300_TX_BORDER_COLOR_0 + (unit * 4), 0); - FINISH_ACCEL(); -@@ -1153,8 +1230,61 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - if (pPict->transform != 0) { - info->accel_state->is_transform[unit] = TRUE; - info->accel_state->transform[unit] = pPict->transform; -+ -+ /* setup the PVS consts */ -+ if (info->accel_state->has_tcl) { -+ info->accel_state->texW[unit] = 1; -+ info->accel_state->texH[unit] = 1; -+ BEGIN_ACCEL(9); -+ if (IS_R300_3D) -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_CONST_INDEX(unit * 2)); -+ else -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R500_PVS_VECTOR_CONST_INDEX(unit * 2)); -+ -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][0]))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][1]))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][2]))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/w)); -+ -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][0]))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][1]))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][2]))); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/h)); -+ -+ FINISH_ACCEL(); -+ } else { -+ info->accel_state->texW[unit] = w; -+ info->accel_state->texH[unit] = h; -+ } - } else { - info->accel_state->is_transform[unit] = FALSE; -+ -+ /* setup the PVS consts */ -+ if (info->accel_state->has_tcl) { -+ info->accel_state->texW[unit] = 1; -+ info->accel_state->texH[unit] = 1; -+ -+ BEGIN_ACCEL(9); -+ if (IS_R300_3D) -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_CONST_INDEX(unit * 2)); -+ else -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R500_PVS_VECTOR_CONST_INDEX(unit * 2)); -+ -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0)); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0)); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0)); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/w)); -+ -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0)); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0)); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0)); -+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/h)); -+ -+ FINISH_ACCEL(); -+ } else { -+ info->accel_state->texW[unit] = w; -+ info->accel_state->texH[unit] = h; -+ } - } - - return TRUE; -@@ -1188,8 +1318,8 @@ static Bool R300CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskP - } else { - max_tex_w = 2048; - max_tex_h = 2048; -- max_dst_w = 2560; -- max_dst_h = 2560; -+ max_dst_w = 4021; -+ max_dst_h = 4021; - } - - if (pSrcPixmap->drawable.width > max_tex_w || -@@ -1251,25 +1381,21 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) - { - RINFO_FROM_SCREEN(pDst->drawable.pScreen); -- uint32_t dst_format, dst_offset, dst_pitch; -+ uint32_t dst_format, dst_pitch; - uint32_t txenable, colorpitch; -- uint32_t blendcntl; -+ uint32_t blendcntl, output_fmt; -+ uint32_t src_color, src_alpha; -+ uint32_t mask_color, mask_alpha; - int pixel_shift; -+ struct radeon_exa_pixmap_priv *driver_priv; - ACCEL_PREAMBLE(); -- - TRACE; - - if (!R300GetDestFormat(pDstPicture, &dst_format)) - return FALSE; - -- if (pMask) -- info->accel_state->has_mask = TRUE; -- else -- info->accel_state->has_mask = FALSE; -- - pixel_shift = pDst->drawable.bitsPerPixel >> 4; - -- dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; - dst_pitch = exaGetPixmapPitch(pDst); - colorpitch = dst_pitch >> pixel_shift; - -@@ -1278,14 +1404,18 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - - colorpitch |= dst_format; - -- if ((dst_offset & 0x0f) != 0) -- RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset)); -+ CHECK_OFFSET(pDst, 0x0f, "destination"); -+ - if (((dst_pitch >> pixel_shift) & 0x7) != 0) - RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch)); - - if (!RADEONSetupSourceTile(pSrcPicture, pSrc, TRUE, FALSE)) - return FALSE; - -+ RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture, -+ pSrc, pMask, pDst); -+ -+ /* have to execute switch after doing buffer sizing check as the latter flushes */ - RADEON_SWITCH_TO_3D(); - - if (!FUNC_NAME(R300TextureSetup)(pSrcPicture, pSrc, 0)) -@@ -1303,9 +1433,10 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - /* setup the VAP */ - if (info->accel_state->has_tcl) { - if (pMask) -- BEGIN_ACCEL(8); -+ BEGIN_ACCEL(10); - else -- BEGIN_ACCEL(7); -+ BEGIN_ACCEL(9); -+ OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0); - } else { - if (pMask) - BEGIN_ACCEL(6); -@@ -1356,22 +1487,28 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - - /* load the vertex shader - * We pre-load vertex programs in RADEONInit3DEngine(): -- * - exa no mask -- * - exa mask -+ * - exa - * - Xv -+ * - Xv bicubic - * Here we select the offset of the vertex program we want to use - */ - if (info->accel_state->has_tcl) { - if (pMask) { -+ /* consts used by vertex shaders */ -+ OUT_ACCEL_REG(R300_VAP_PVS_CONST_CNTL, (R300_PVS_CONST_BASE_OFFSET(0) | -+ R300_PVS_MAX_CONST_ADDR(3))); - OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, - ((0 << R300_PVS_FIRST_INST_SHIFT) | -- (2 << R300_PVS_XYZW_VALID_INST_SHIFT) | -- (2 << R300_PVS_LAST_INST_SHIFT))); -+ (8 << R300_PVS_XYZW_VALID_INST_SHIFT) | -+ (8 << R300_PVS_LAST_INST_SHIFT))); - OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, -- (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); -+ (8 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); - } else { -+ /* consts used by vertex shaders */ -+ OUT_ACCEL_REG(R300_VAP_PVS_CONST_CNTL, (R300_PVS_CONST_BASE_OFFSET(0) | -+ R300_PVS_MAX_CONST_ADDR(3))); - OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, -- ((3 << R300_PVS_FIRST_INST_SHIFT) | -+ ((0 << R300_PVS_FIRST_INST_SHIFT) | - (4 << R300_PVS_XYZW_VALID_INST_SHIFT) | - (4 << R300_PVS_LAST_INST_SHIFT))); - OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, -@@ -1393,12 +1530,33 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - OUT_ACCEL_REG(R300_TX_ENABLE, txenable); - FINISH_ACCEL(); - -+ /* shader output swizzling */ -+ switch (pDstPicture->format) { -+ case PICT_a8r8g8b8: -+ case PICT_x8r8g8b8: -+ default: -+ output_fmt = (R300_OUT_FMT_C4_8 | -+ R300_OUT_FMT_C0_SEL_BLUE | -+ R300_OUT_FMT_C1_SEL_GREEN | -+ R300_OUT_FMT_C2_SEL_RED | -+ R300_OUT_FMT_C3_SEL_ALPHA); -+ break; -+ case PICT_a8b8g8r8: -+ case PICT_x8b8g8r8: -+ output_fmt = (R300_OUT_FMT_C4_8 | -+ R300_OUT_FMT_C0_SEL_RED | -+ R300_OUT_FMT_C1_SEL_GREEN | -+ R300_OUT_FMT_C2_SEL_BLUE | -+ R300_OUT_FMT_C3_SEL_ALPHA); -+ break; -+ case PICT_a8: -+ output_fmt = (R300_OUT_FMT_C4_8 | -+ R300_OUT_FMT_C0_SEL_ALPHA); -+ break; -+ } -+ - /* setup pixel shader */ - if (IS_R300_3D) { -- uint32_t output_fmt; -- int src_color, src_alpha; -- int mask_color, mask_alpha; -- - if (PICT_FORMAT_RGB(pSrcPicture->format) == 0) - src_color = R300_ALU_RGB_0_0; - else -@@ -1409,45 +1567,22 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - else - src_alpha = R300_ALU_ALPHA_SRC0_A; - -- if (pMask && pMaskPicture->componentAlpha) { -- if (RadeonBlendOp[op].src_alpha) { -- if (PICT_FORMAT_A(pSrcPicture->format) == 0) { -- src_color = R300_ALU_RGB_1_0; -- src_alpha = R300_ALU_ALPHA_1_0; -- } else { -- src_color = R300_ALU_RGB_SRC0_AAA; -- src_alpha = R300_ALU_ALPHA_SRC0_A; -- } -- -+ if (pMask) { -+ if (pMaskPicture->componentAlpha) { -+ if (RadeonBlendOp[op].src_alpha) { -+ if (PICT_FORMAT_A(pSrcPicture->format) == 0) -+ src_color = R300_ALU_RGB_1_0; -+ else -+ src_color = R300_ALU_RGB_SRC0_AAA; -+ } else -+ src_color = R300_ALU_RGB_SRC0_RGB; - mask_color = R300_ALU_RGB_SRC1_RGB; -- -- if (PICT_FORMAT_A(pMaskPicture->format) == 0) -- mask_alpha = R300_ALU_ALPHA_1_0; -- else -- mask_alpha = R300_ALU_ALPHA_SRC1_A; -- - } else { -- src_color = R300_ALU_RGB_SRC0_RGB; -- -- if (PICT_FORMAT_A(pSrcPicture->format) == 0) -- src_alpha = R300_ALU_ALPHA_1_0; -- else -- src_alpha = R300_ALU_ALPHA_SRC0_A; -- -- mask_color = R300_ALU_RGB_SRC1_RGB; -- - if (PICT_FORMAT_A(pMaskPicture->format) == 0) -- mask_alpha = R300_ALU_ALPHA_1_0; -+ mask_color = R300_ALU_RGB_1_0; - else -- mask_alpha = R300_ALU_ALPHA_SRC1_A; -- -+ mask_color = R300_ALU_RGB_SRC1_AAA; - } -- } else if (pMask) { -- if (PICT_FORMAT_A(pMaskPicture->format) == 0) -- mask_color = R300_ALU_RGB_1_0; -- else -- mask_color = R300_ALU_RGB_SRC1_AAA; -- - if (PICT_FORMAT_A(pMaskPicture->format) == 0) - mask_alpha = R300_ALU_ALPHA_1_0; - else -@@ -1457,32 +1592,6 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - mask_alpha = R300_ALU_ALPHA_1_0; - } - -- /* shader output swizzling */ -- switch (pDstPicture->format) { -- case PICT_a8r8g8b8: -- case PICT_x8r8g8b8: -- default: -- output_fmt = (R300_OUT_FMT_C4_8 | -- R300_OUT_FMT_C0_SEL_BLUE | -- R300_OUT_FMT_C1_SEL_GREEN | -- R300_OUT_FMT_C2_SEL_RED | -- R300_OUT_FMT_C3_SEL_ALPHA); -- break; -- case PICT_a8b8g8r8: -- case PICT_x8b8g8r8: -- output_fmt = (R300_OUT_FMT_C4_8 | -- R300_OUT_FMT_C0_SEL_RED | -- R300_OUT_FMT_C1_SEL_GREEN | -- R300_OUT_FMT_C2_SEL_BLUE | -- R300_OUT_FMT_C3_SEL_ALPHA); -- break; -- case PICT_a8: -- output_fmt = (R300_OUT_FMT_C4_8 | -- R300_OUT_FMT_C0_SEL_ALPHA); -- break; -- } -- -- - /* setup the rasterizer, load FS */ - if (pMask) { - BEGIN_ACCEL(16); -@@ -1492,7 +1601,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - R300_RS_COUNT_HIRES_EN)); - - /* R300_INST_COUNT_RS - highest RS instruction used */ -- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6)); -+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); - - OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | - R300_ALU_CODE_SIZE(0) | -@@ -1514,7 +1623,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | - R300_RS_COUNT_HIRES_EN)); - -- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); -+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); - - OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | - R300_ALU_CODE_SIZE(0) | -@@ -1626,10 +1735,6 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - R300_ALU_ALPHA_CLAMP)); - FINISH_ACCEL(); - } else { -- uint32_t output_fmt; -- uint32_t src_color, src_alpha; -- uint32_t mask_color, mask_alpha; -- - if (PICT_FORMAT_RGB(pSrcPicture->format) == 0) - src_color = (R500_ALU_RGB_R_SWIZ_A_0 | - R500_ALU_RGB_G_SWIZ_A_0 | -@@ -1644,59 +1749,35 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - else - src_alpha = R500_ALPHA_SWIZ_A_A; - -- if (pMask && pMaskPicture->componentAlpha) { -- if (RadeonBlendOp[op].src_alpha) { -- if (PICT_FORMAT_A(pSrcPicture->format) == 0) { -- src_color = (R500_ALU_RGB_R_SWIZ_A_1 | -- R500_ALU_RGB_G_SWIZ_A_1 | -- R500_ALU_RGB_B_SWIZ_A_1); -- src_alpha = R500_ALPHA_SWIZ_A_1; -- } else { -- src_color = (R500_ALU_RGB_R_SWIZ_A_A | -- R500_ALU_RGB_G_SWIZ_A_A | -- R500_ALU_RGB_B_SWIZ_A_A); -- src_alpha = R500_ALPHA_SWIZ_A_A; -- } -+ if (pMask) { -+ if (pMaskPicture->componentAlpha) { -+ if (RadeonBlendOp[op].src_alpha) { -+ if (PICT_FORMAT_A(pSrcPicture->format) == 0) -+ src_color = (R500_ALU_RGB_R_SWIZ_A_1 | -+ R500_ALU_RGB_G_SWIZ_A_1 | -+ R500_ALU_RGB_B_SWIZ_A_1); -+ else -+ src_color = (R500_ALU_RGB_R_SWIZ_A_A | -+ R500_ALU_RGB_G_SWIZ_A_A | -+ R500_ALU_RGB_B_SWIZ_A_A); -+ } else -+ src_color = (R500_ALU_RGB_R_SWIZ_A_R | -+ R500_ALU_RGB_G_SWIZ_A_G | -+ R500_ALU_RGB_B_SWIZ_A_B); - - mask_color = (R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_G_SWIZ_B_G | - R500_ALU_RGB_B_SWIZ_B_B); -- -- if (PICT_FORMAT_A(pMaskPicture->format) == 0) -- mask_alpha = R500_ALPHA_SWIZ_B_1; -- else -- mask_alpha = R500_ALPHA_SWIZ_B_A; -- - } else { -- src_color = (R500_ALU_RGB_R_SWIZ_A_R | -- R500_ALU_RGB_G_SWIZ_A_G | -- R500_ALU_RGB_B_SWIZ_A_B); -- -- if (PICT_FORMAT_A(pSrcPicture->format) == 0) -- src_alpha = R500_ALPHA_SWIZ_A_1; -- else -- src_alpha = R500_ALPHA_SWIZ_A_A; -- -- mask_color = (R500_ALU_RGB_R_SWIZ_B_R | -- R500_ALU_RGB_G_SWIZ_B_G | -- R500_ALU_RGB_B_SWIZ_B_B); -- - if (PICT_FORMAT_A(pMaskPicture->format) == 0) -- mask_alpha = R500_ALPHA_SWIZ_B_1; -+ mask_color = (R500_ALU_RGB_R_SWIZ_B_1 | -+ R500_ALU_RGB_G_SWIZ_B_1 | -+ R500_ALU_RGB_B_SWIZ_B_1); - else -- mask_alpha = R500_ALPHA_SWIZ_B_A; -- -+ mask_color = (R500_ALU_RGB_R_SWIZ_B_A | -+ R500_ALU_RGB_G_SWIZ_B_A | -+ R500_ALU_RGB_B_SWIZ_B_A); - } -- } else if (pMask) { -- if (PICT_FORMAT_A(pMaskPicture->format) == 0) -- mask_color = (R500_ALU_RGB_R_SWIZ_B_1 | -- R500_ALU_RGB_G_SWIZ_B_1 | -- R500_ALU_RGB_B_SWIZ_B_1); -- else -- mask_color = (R500_ALU_RGB_R_SWIZ_B_A | -- R500_ALU_RGB_G_SWIZ_B_A | -- R500_ALU_RGB_B_SWIZ_B_A); -- - if (PICT_FORMAT_A(pMaskPicture->format) == 0) - mask_alpha = R500_ALPHA_SWIZ_B_1; - else -@@ -1708,31 +1789,6 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - mask_alpha = R500_ALPHA_SWIZ_B_1; - } - -- /* shader output swizzling */ -- switch (pDstPicture->format) { -- case PICT_a8r8g8b8: -- case PICT_x8r8g8b8: -- default: -- output_fmt = (R300_OUT_FMT_C4_8 | -- R300_OUT_FMT_C0_SEL_BLUE | -- R300_OUT_FMT_C1_SEL_GREEN | -- R300_OUT_FMT_C2_SEL_RED | -- R300_OUT_FMT_C3_SEL_ALPHA); -- break; -- case PICT_a8b8g8r8: -- case PICT_x8b8g8r8: -- output_fmt = (R300_OUT_FMT_C4_8 | -- R300_OUT_FMT_C0_SEL_RED | -- R300_OUT_FMT_C1_SEL_GREEN | -- R300_OUT_FMT_C2_SEL_BLUE | -- R300_OUT_FMT_C3_SEL_ALPHA); -- break; -- case PICT_a8: -- output_fmt = (R300_OUT_FMT_C4_8 | -- R300_OUT_FMT_C0_SEL_ALPHA); -- break; -- } -- - BEGIN_ACCEL(7); - if (pMask) { - /* 4 components: 2 for tex0, 2 for tex1 */ -@@ -1741,7 +1797,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - R300_RS_COUNT_HIRES_EN)); - - /* 2 RS instructions: 1 for tex0 (src), 1 for tex1 (mask) */ -- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6)); -+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); - - OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | - R500_US_CODE_END_ADDR(2))); -@@ -1753,7 +1809,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | - R300_RS_COUNT_HIRES_EN)); - -- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); -+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); - - OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | - R500_US_CODE_END_ADDR(1))); -@@ -1933,16 +1989,24 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - - /* Clear out scissoring */ - BEGIN_ACCEL(2); -- OUT_ACCEL_REG(R300_SC_SCISSOR0, ((0 << R300_SCISSOR_X_SHIFT) | -- (0 << R300_SCISSOR_Y_SHIFT))); -- OUT_ACCEL_REG(R300_SC_SCISSOR1, ((8191 << R300_SCISSOR_X_SHIFT) | -- (8191 << R300_SCISSOR_Y_SHIFT))); -- FINISH_ACCEL(); -+ if (IS_R300_3D) { -+ OUT_ACCEL_REG(R300_SC_SCISSOR0, ((1440 << R300_SCISSOR_X_SHIFT) | -+ (1440 << R300_SCISSOR_Y_SHIFT))); -+ OUT_ACCEL_REG(R300_SC_SCISSOR1, (((pDst->drawable.width + 1440 - 1) << R300_SCISSOR_X_SHIFT) | -+ ((pDst->drawable.height + 1440 - 1) << R300_SCISSOR_Y_SHIFT))); - -- BEGIN_ACCEL(3); -+ } else { -+ OUT_ACCEL_REG(R300_SC_SCISSOR0, ((0 << R300_SCISSOR_X_SHIFT) | -+ (0 << R300_SCISSOR_Y_SHIFT))); -+ OUT_ACCEL_REG(R300_SC_SCISSOR1, (((pDst->drawable.width - 1) << R300_SCISSOR_X_SHIFT) | -+ ((pDst->drawable.height - 1) << R300_SCISSOR_Y_SHIFT))); -+ } -+ FINISH_ACCEL(); - -- OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset); -- OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch); -+ -+ BEGIN_ACCEL_RELOC(3, 2); -+ EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pDst); -+ EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pDst); - - blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format); - OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, blendcntl | R300_ALPHA_BLEND_ENABLE | R300_READ_ENABLE); -@@ -1950,7 +2014,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - FINISH_ACCEL(); - - BEGIN_ACCEL(1); -- if (info->accel_state->has_mask) -+ if (pMask) - OUT_ACCEL_REG(R300_VAP_VTX_SIZE, 6); - else - OUT_ACCEL_REG(R300_VAP_VTX_SIZE, 4); -@@ -1959,6 +2023,25 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, - return TRUE; - } - -+static void FUNC_NAME(RadeonDoneComposite)(PixmapPtr pDst) -+{ -+ RINFO_FROM_SCREEN(pDst->drawable.pScreen); -+ ACCEL_PREAMBLE(); -+ -+ ENTER_DRAW(0); -+ -+ if (IS_R300_3D || IS_R500_3D) { -+ BEGIN_ACCEL(3); -+ OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); -+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); -+ } else -+ BEGIN_ACCEL(1); -+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); -+ FINISH_ACCEL(); -+ -+ LEAVE_DRAW(0); -+} -+ - - #ifdef ACCEL_CP - -@@ -2033,6 +2116,20 @@ static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn, - /* ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n", - srcX, srcY, maskX, maskY,dstX, dstY, w, h); */ - -+#if defined(ACCEL_CP) && defined(XF86DRM_MODE) -+ if (info->cs && CS_FULL(info->cs)) { -+ FUNC_NAME(RadeonDoneComposite)(info->accel_state->dst_pix); -+ radeon_cs_flush_indirect(pScrn); -+ info->accel_state->exa->PrepareComposite(info->accel_state->composite_op, -+ info->accel_state->dst_pic, -+ info->accel_state->msk_pic, -+ info->accel_state->src_pic, -+ info->accel_state->dst_pix, -+ info->accel_state->msk_pix, -+ info->accel_state->src_pix); -+ } -+#endif -+ - srcTopLeft.x = IntToxFixed(srcX); - srcTopLeft.y = IntToxFixed(srcY); - srcTopRight.x = IntToxFixed(srcX + w); -@@ -2043,13 +2140,15 @@ static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn, - srcBottomRight.y = IntToxFixed(srcY + h); - - if (info->accel_state->is_transform[0]) { -- transformPoint(info->accel_state->transform[0], &srcTopLeft); -- transformPoint(info->accel_state->transform[0], &srcTopRight); -- transformPoint(info->accel_state->transform[0], &srcBottomLeft); -- transformPoint(info->accel_state->transform[0], &srcBottomRight); -+ if ((info->ChipFamily < CHIP_FAMILY_R300) || !info->accel_state->has_tcl) { -+ transformPoint(info->accel_state->transform[0], &srcTopLeft); -+ transformPoint(info->accel_state->transform[0], &srcTopRight); -+ transformPoint(info->accel_state->transform[0], &srcBottomLeft); -+ transformPoint(info->accel_state->transform[0], &srcBottomRight); -+ } - } - -- if (info->accel_state->has_mask) { -+ if (info->accel_state->msk_pic) { - maskTopLeft.x = IntToxFixed(maskX); - maskTopLeft.y = IntToxFixed(maskY); - maskTopRight.x = IntToxFixed(maskX + w); -@@ -2060,10 +2159,12 @@ static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn, - maskBottomRight.y = IntToxFixed(maskY + h); - - if (info->accel_state->is_transform[1]) { -- transformPoint(info->accel_state->transform[1], &maskTopLeft); -- transformPoint(info->accel_state->transform[1], &maskTopRight); -- transformPoint(info->accel_state->transform[1], &maskBottomLeft); -- transformPoint(info->accel_state->transform[1], &maskBottomRight); -+ if ((info->ChipFamily < CHIP_FAMILY_R300) || !info->accel_state->has_tcl) { -+ transformPoint(info->accel_state->transform[1], &maskTopLeft); -+ transformPoint(info->accel_state->transform[1], &maskTopRight); -+ transformPoint(info->accel_state->transform[1], &maskBottomLeft); -+ transformPoint(info->accel_state->transform[1], &maskBottomRight); -+ } - } - - vtx_count = 6; -@@ -2078,7 +2179,7 @@ static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn, - BEGIN_RING(3 * vtx_count + 3); - OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, - 3 * vtx_count + 1)); -- if (info->accel_state->has_mask) -+ if (info->accel_state->msk_pic) - OUT_RING(RADEON_CP_VC_FRMT_XY | - RADEON_CP_VC_FRMT_ST0 | - RADEON_CP_VC_FRMT_ST1); -@@ -2128,7 +2229,7 @@ static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn, - - #endif - -- if (info->accel_state->has_mask) { -+ if (info->accel_state->msk_pic) { - if (IS_R300_3D || IS_R500_3D) { - VTX_OUT_MASK((float)dstX, (float)dstY, - xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0], xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0], -@@ -2236,24 +2337,5 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst, - } - } - --static void FUNC_NAME(RadeonDoneComposite)(PixmapPtr pDst) --{ -- RINFO_FROM_SCREEN(pDst->drawable.pScreen); -- ACCEL_PREAMBLE(); -- -- ENTER_DRAW(0); -- -- if (IS_R300_3D || IS_R500_3D) { -- BEGIN_ACCEL(3); -- OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); -- OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); -- } else -- BEGIN_ACCEL(1); -- OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); -- FINISH_ACCEL(); -- -- LEAVE_DRAW(0); --} -- - #undef ONLY_ONCE - #undef FUNC_NAME -diff --git a/src/radeon_kms.c b/src/radeon_kms.c -new file mode 100644 -index 0000000..148386b ---- /dev/null -+++ b/src/radeon_kms.c -@@ -0,0 +1,874 @@ -+/* -+ * Copyright © 2009 Red Hat, Inc. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ * -+ * Authors: -+ * Dave Airlie -+ * -+ */ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include -+#include -+/* Driver data structures */ -+#include "radeon.h" -+#include "radeon_reg.h" -+#include "radeon_probe.h" -+#include "micmap.h" -+ -+#include "shadow.h" -+ -+#include "atipciids.h" -+ -+ -+ -+#ifdef XF86DRM_MODE -+ -+#include "radeon_chipset_gen.h" -+#include "radeon_chipinfo_gen.h" -+ -+#define CURSOR_WIDTH 64 -+#define CURSOR_HEIGHT 64 -+ -+#include "radeon_bo_gem.h" -+#include "radeon_cs_gem.h" -+static Bool radeon_setup_kernel_mem(ScreenPtr pScreen); -+ -+const OptionInfoRec RADEONOptions_KMS[] = { -+ { OPTION_NOACCEL, "NoAccel", OPTV_BOOLEAN, {0}, FALSE }, -+ { OPTION_SW_CURSOR, "SWcursor", OPTV_BOOLEAN, {0}, FALSE }, -+ { OPTION_PAGE_FLIP, "EnablePageFlip", OPTV_BOOLEAN, {0}, FALSE }, -+ { OPTION_ACCEL_DFS, "AccelDFS", OPTV_BOOLEAN, {0}, FALSE }, -+ { OPTION_IGNORE_EDID, "IgnoreEDID", OPTV_BOOLEAN, {0}, FALSE }, -+ { OPTION_COLOR_TILING, "ColorTiling", OPTV_BOOLEAN, {0}, FALSE }, -+ { OPTION_RENDER_ACCEL, "RenderAccel", OPTV_BOOLEAN, {0}, FALSE }, -+ { OPTION_SUBPIXEL_ORDER, "SubPixelOrder", OPTV_ANYSTR, {0}, FALSE }, -+ { OPTION_ACCELMETHOD, "AccelMethod", OPTV_STRING, {0}, FALSE }, -+ { OPTION_DRI, "DRI", OPTV_BOOLEAN, {0}, FALSE }, -+ { OPTION_TVSTD, "TVStandard", OPTV_STRING, {0}, FALSE }, -+ { OPTION_EXA_VSYNC, "EXAVSync", OPTV_BOOLEAN, {0}, FALSE }, -+ { -1, NULL, OPTV_NONE, {0}, FALSE } -+}; -+ -+void radeon_cs_flush_indirect(ScrnInfoPtr pScrn) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ int ret; -+ -+ if (!info->cs->cdw) -+ return; -+ radeon_cs_emit(info->cs); -+ radeon_cs_erase(info->cs); -+ -+ ret = radeon_cs_space_check(info->cs); -+ if (ret) -+ ErrorF("space check failed in flush\n"); -+ -+ if (info->reemit_current2d) -+ info->reemit_current2d(pScrn, 0); -+ if (info->dri2.enabled) { -+ info->accel_state->XInited3D = FALSE; -+ info->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN; -+ } -+} -+ -+void radeon_ddx_cs_start(ScrnInfoPtr pScrn, -+ int n, const char *file, -+ const char *func, int line) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ if (info->cs->cdw + n > info->cs->ndw) { -+ radeon_cs_flush_indirect(pScrn); -+ -+ } -+ radeon_cs_begin(info->cs, n, file, func, line); -+} -+ -+ -+extern _X_EXPORT int gRADEONEntityIndex; -+ -+static int getRADEONEntityIndex(void) -+{ -+ return gRADEONEntityIndex; -+} -+ -+static void * -+radeonShadowWindow(ScreenPtr screen, CARD32 row, CARD32 offset, int mode, -+ CARD32 *size, void *closure) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[screen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ int stride; -+ -+ stride = (pScrn->displayWidth * pScrn->bitsPerPixel) / 8; -+ *size = stride; -+ -+ return ((uint8_t *)info->front_bo->ptr + row * stride + offset); -+} -+ -+static Bool RADEONCreateScreenResources_KMS(ScreenPtr pScreen) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ PixmapPtr pixmap; -+ -+ pScreen->CreateScreenResources = info->CreateScreenResources; -+ if (!(*pScreen->CreateScreenResources)(pScreen)) -+ return FALSE; -+ pScreen->CreateScreenResources = RADEONCreateScreenResources_KMS; -+ -+ if (info->r600_shadow_fb) { -+ pixmap = pScreen->GetScreenPixmap(pScreen); -+ -+ if (!shadowAdd(pScreen, pixmap, shadowUpdatePackedWeak(), -+ radeonShadowWindow, 0, NULL)) -+ return FALSE; -+ } -+ -+ if (info->dri2.enabled) { -+ if (info->front_bo) { -+ PixmapPtr pPix = pScreen->GetScreenPixmap(pScreen); -+ radeon_set_pixmap_bo(pPix, info->front_bo); -+ } -+ } -+ return TRUE; -+} -+ -+static void RADEONBlockHandler_KMS(int i, pointer blockData, -+ pointer pTimeout, pointer pReadmask) -+{ -+ ScreenPtr pScreen = screenInfo.screens[i]; -+ ScrnInfoPtr pScrn = xf86Screens[i]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ pScreen->BlockHandler = info->BlockHandler; -+ (*pScreen->BlockHandler) (i, blockData, pTimeout, pReadmask); -+ pScreen->BlockHandler = RADEONBlockHandler_KMS; -+ -+ if (info->VideoTimerCallback) -+ (*info->VideoTimerCallback)(pScrn, currentTime.milliseconds); -+ -+ info->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN; -+ radeon_cs_flush_indirect(pScrn); -+} -+ -+static Bool RADEONPreInitAccel_KMS(ScrnInfoPtr pScrn) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ if (!(info->accel_state = xcalloc(1, sizeof(struct radeon_accel_state)))) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to allocate accel_state rec!\n"); -+ return FALSE; -+ } -+ -+ if (info->ChipFamily >= CHIP_FAMILY_R600) { -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, -+ "Using shadowfb for KMS on R600+\n"); -+ info->r600_shadow_fb = TRUE; -+ if (!xf86LoadSubModule(pScrn, "shadow")) -+ info->r600_shadow_fb = FALSE; -+ return TRUE; -+ } -+ -+ -+ if ((info->ChipFamily == CHIP_FAMILY_RS100) || -+ (info->ChipFamily == CHIP_FAMILY_RS200) || -+ (info->ChipFamily == CHIP_FAMILY_RS300) || -+ (info->ChipFamily == CHIP_FAMILY_RS400) || -+ (info->ChipFamily == CHIP_FAMILY_RS480) || -+ (info->ChipFamily == CHIP_FAMILY_RS600) || -+ (info->ChipFamily == CHIP_FAMILY_RS690) || -+ (info->ChipFamily == CHIP_FAMILY_RS740)) -+ info->accel_state->has_tcl = FALSE; -+ else { -+ info->accel_state->has_tcl = TRUE; -+ } -+ -+ info->useEXA = TRUE; -+ -+ if (info->useEXA) { -+ int errmaj = 0, errmin = 0; -+ info->exaReq.majorversion = EXA_VERSION_MAJOR; -+ info->exaReq.minorversion = EXA_VERSION_MINOR; -+ if (!LoadSubModule(pScrn->module, "exa", NULL, NULL, NULL, -+ &info->exaReq, &errmaj, &errmin)) { -+ LoaderErrorMsg(NULL, "exa", errmaj, errmin); -+ return FALSE; -+ } -+ } -+ -+ return TRUE; -+} -+ -+static Bool RADEONPreInitChipType_KMS(ScrnInfoPtr pScrn) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ uint32_t cmd_stat; -+ int i; -+ -+ info->Chipset = PCI_DEV_DEVICE_ID(info->PciInfo); -+ pScrn->chipset = (char *)xf86TokenToString(RADEONChipsets, info->Chipset); -+ if (!pScrn->chipset) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "ChipID 0x%04x is not recognized\n", info->Chipset); -+ return FALSE; -+ } -+ -+ if (info->Chipset < 0) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "Chipset \"%s\" is not recognized\n", pScrn->chipset); -+ return FALSE; -+ } -+ xf86DrvMsg(pScrn->scrnIndex, X_PROBED, -+ "Chipset: \"%s\" (ChipID = 0x%04x)\n", -+ pScrn->chipset, -+ info->Chipset); -+ -+ for (i = 0; i < sizeof(RADEONCards) / sizeof(RADEONCardInfo); i++) { -+ if (info->Chipset == RADEONCards[i].pci_device_id) { -+ RADEONCardInfo *card = &RADEONCards[i]; -+ info->ChipFamily = card->chip_family; -+ info->IsMobility = card->mobility; -+ info->IsIGP = card->igp; -+ break; -+ } -+ } -+ -+ info->cardType = CARD_PCI; -+ -+ PCI_READ_LONG(info->PciInfo, &cmd_stat, PCI_CMD_STAT_REG); -+ if (cmd_stat & RADEON_CAP_LIST) { -+ uint32_t cap_ptr, cap_id; -+ -+ PCI_READ_LONG(info->PciInfo, &cap_ptr, RADEON_CAPABILITIES_PTR_PCI_CONFIG); -+ cap_ptr &= RADEON_CAP_PTR_MASK; -+ -+ while(cap_ptr != RADEON_CAP_ID_NULL) { -+ PCI_READ_LONG(info->PciInfo, &cap_id, cap_ptr); -+ if ((cap_id & 0xff)== RADEON_CAP_ID_AGP) { -+ info->cardType = CARD_AGP; -+ break; -+ } -+ if ((cap_id & 0xff)== RADEON_CAP_ID_EXP) { -+ info->cardType = CARD_PCIE; -+ break; -+ } -+ cap_ptr = (cap_id >> 8) & RADEON_CAP_PTR_MASK; -+ } -+ } -+ -+ -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "%s card detected\n", -+ (info->cardType==CARD_PCI) ? "PCI" : -+ (info->cardType==CARD_PCIE) ? "PCIE" : "AGP"); -+ -+ /* treat PCIE IGP cards as PCI */ -+ if (info->cardType == CARD_PCIE && info->IsIGP) -+ info->cardType = CARD_PCI; -+ -+ if ((info->ChipFamily >= CHIP_FAMILY_R600) && info->IsIGP) -+ info->cardType = CARD_PCIE; -+ -+ /* not sure about gart table requirements */ -+ if ((info->ChipFamily == CHIP_FAMILY_RS600) && info->IsIGP) -+ info->cardType = CARD_PCIE; -+ -+#ifdef RENDER -+ info->RenderAccel = xf86ReturnOptValBool(info->Options, OPTION_RENDER_ACCEL, -+ info->Chipset != PCI_CHIP_RN50_515E && -+ info->Chipset != PCI_CHIP_RN50_5969); -+#endif -+ return TRUE; -+} -+ -+static Bool radeon_alloc_dri(ScrnInfoPtr pScrn) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ if (!(info->dri = xcalloc(1, sizeof(struct radeon_dri)))) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR,"Unable to allocate dri rec!\n"); -+ return FALSE; -+ } -+ -+ if (!(info->cp = xcalloc(1, sizeof(struct radeon_cp)))) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR,"Unable to allocate cp rec!\n"); -+ return FALSE; -+ } -+ return TRUE; -+} -+ -+Bool RADEONPreInit_KMS(ScrnInfoPtr pScrn, int flags) -+{ -+ RADEONInfoPtr info; -+ RADEONEntPtr pRADEONEnt; -+ DevUnion* pPriv; -+ int zaphod_mask = 0; -+ char *bus_id; -+ Gamma zeros = { 0.0, 0.0, 0.0 }; -+ -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "RADEONPreInit_KMS\n"); -+ if (pScrn->numEntities != 1) return FALSE; -+ if (!RADEONGetRec(pScrn)) return FALSE; -+ -+ info = RADEONPTR(pScrn); -+ info->MMIO = NULL; -+ info->IsSecondary = FALSE; -+ info->IsPrimary = FALSE; -+ info->kms_enabled = TRUE; -+ info->pEnt = xf86GetEntityInfo(pScrn->entityList[pScrn->numEntities - 1]); -+ if (info->pEnt->location.type != BUS_PCI) goto fail; -+ -+ pPriv = xf86GetEntityPrivate(pScrn->entityList[0], -+ getRADEONEntityIndex()); -+ pRADEONEnt = pPriv->ptr; -+ -+ if(xf86IsEntityShared(pScrn->entityList[0])) -+ { -+ if(xf86IsPrimInitDone(pScrn->entityList[0])) -+ { -+ info->IsSecondary = TRUE; -+ pRADEONEnt->pSecondaryScrn = pScrn; -+ } -+ else -+ { -+ info->IsPrimary = TRUE; -+ xf86SetPrimInitDone(pScrn->entityList[0]); -+ pRADEONEnt->pPrimaryScrn = pScrn; -+ pRADEONEnt->HasSecondary = FALSE; -+ } -+ } -+ -+ info->PciInfo = xf86GetPciInfoForEntity(info->pEnt->index); -+ pScrn->monitor = pScrn->confScreen->monitor; -+ -+ if (!RADEONPreInitVisual(pScrn)) -+ goto fail; -+ -+ xf86CollectOptions(pScrn, NULL); -+ if (!(info->Options = xalloc(sizeof(RADEONOptions_KMS)))) -+ goto fail; -+ -+ memcpy(info->Options, RADEONOptions_KMS, sizeof(RADEONOptions_KMS)); -+ xf86ProcessOptions(pScrn->scrnIndex, pScrn->options, info->Options); -+ -+ if (!RADEONPreInitWeight(pScrn)) -+ goto fail; -+ -+ if (!RADEONPreInitChipType_KMS(pScrn)) -+ goto fail; -+ -+ if (!radeon_alloc_dri(pScrn)) -+ return FALSE; -+ -+ zaphod_mask = 0xf; -+ if (info->IsPrimary) -+ zaphod_mask = 0xd; -+ if (info->IsSecondary) -+ zaphod_mask = 0x2; -+ -+ bus_id = DRICreatePCIBusID(info->PciInfo); -+ if (drmmode_pre_init(pScrn, &info->drmmode, bus_id, "radeon", pScrn->bitsPerPixel / 8, zaphod_mask) == FALSE) { -+ xfree(bus_id); -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Kernel modesetting setup failed\n"); -+ goto fail; -+ } -+ -+ info->dri->drmFD = info->drmmode.fd; -+ info->dri2.drm_fd = info->drmmode.fd; -+ info->dri2.enabled = FALSE; -+ xfree(bus_id); -+ info->dri->pKernelDRMVersion = drmGetVersion(info->dri->drmFD); -+ if (info->dri->pKernelDRMVersion == NULL) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "RADEONDRIGetVersion failed to get the DRM version\n"); -+ goto fail; -+ } -+ -+ { -+ struct drm_radeon_gem_info mminfo; -+ -+ if (!drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo))) -+ { -+ info->vram_size = mminfo.vram_visible; -+ info->gart_size = mminfo.gart_size; -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, -+ "mem size init: gart size :%llx vram size: s:%llx visible:%llx\n", -+ mminfo.gart_size, mminfo.vram_size, mminfo.vram_visible); -+ } -+ } -+ -+ if (info->ChipFamily < CHIP_FAMILY_R600) { -+ info->useEXA = TRUE; -+ info->directRenderingEnabled = TRUE; -+ } -+ -+ RADEONSetPitch(pScrn); -+ -+ /* Set display resolution */ -+ xf86SetDpi(pScrn, 0, 0); -+ -+ /* Get ScreenInit function */ -+ if (!xf86LoadSubModule(pScrn, "fb")) return FALSE; -+ -+ if (!xf86SetGamma(pScrn, zeros)) return FALSE; -+ -+ if (!xf86ReturnOptValBool(info->Options, OPTION_SW_CURSOR, FALSE)) { -+ if (!xf86LoadSubModule(pScrn, "ramdac")) return FALSE; -+ } -+ -+ if (!RADEONPreInitAccel_KMS(pScrn)) goto fail; -+ -+ if (pScrn->modes == NULL) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "No modes.\n"); -+ goto fail; -+ } -+ -+ return TRUE; -+ fail: -+ RADEONFreeRec(pScrn); -+ return FALSE; -+ -+} -+ -+static Bool RADEONCursorInit_KMS(ScreenPtr pScreen) -+{ -+ return xf86_cursors_init (pScreen, CURSOR_WIDTH, CURSOR_HEIGHT, -+ (HARDWARE_CURSOR_TRUECOLOR_AT_8BPP | -+ HARDWARE_CURSOR_AND_SOURCE_WITH_MASK | -+ HARDWARE_CURSOR_SOURCE_MASK_INTERLEAVE_1 | -+ HARDWARE_CURSOR_ARGB)); -+} -+ -+static Bool RADEONSaveScreen_KMS(ScreenPtr pScreen, int mode) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; -+ Bool unblank; -+ -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "RADEONSaveScreen(%d)\n", mode); -+ -+ unblank = xf86IsUnblank(mode); -+ if (unblank) SetTimeSinceLastInputEvent(); -+ -+ if ((pScrn != NULL) && pScrn->vtSema) { -+ if (unblank) -+ RADEONUnblank(pScrn); -+ else -+ RADEONBlank(pScrn); -+ } -+ return TRUE; -+} -+ -+/* Called at the end of each server generation. Restore the original -+ * text mode, unmap video memory, and unwrap and call the saved -+ * CloseScreen function. -+ */ -+static Bool RADEONCloseScreen_KMS(int scrnIndex, ScreenPtr pScreen) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[scrnIndex]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "RADEONCloseScreen\n"); -+ -+ if (info->accel_state->exa) { -+ exaDriverFini(pScreen); -+ xfree(info->accel_state->exa); -+ info->accel_state->exa = NULL; -+ } -+ -+ drmDropMaster(info->dri->drmFD); -+ -+ if (info->cursor) xf86DestroyCursorInfoRec(info->cursor); -+ info->cursor = NULL; -+ -+ pScrn->vtSema = FALSE; -+ xf86ClearPrimInitDone(info->pEnt->index); -+ pScreen->BlockHandler = info->BlockHandler; -+ pScreen->CloseScreen = info->CloseScreen; -+ return (*pScreen->CloseScreen)(scrnIndex, pScreen); -+} -+ -+ -+void RADEONFreeScreen_KMS(int scrnIndex, int flags) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[scrnIndex]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "RADEONFreeScreen\n"); -+ -+ /* when server quits at PreInit, we don't need do this anymore*/ -+ if (!info) return; -+ -+ RADEONFreeRec(pScrn); -+} -+ -+Bool RADEONScreenInit_KMS(int scrnIndex, ScreenPtr pScreen, -+ int argc, char **argv) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ int subPixelOrder = SubPixelUnknown; -+ char* s; -+ void *front_ptr; -+ -+ pScrn->fbOffset = 0; -+ -+ miClearVisualTypes(); -+ if (!miSetVisualTypes(pScrn->depth, -+ miGetDefaultVisualMask(pScrn->depth), -+ pScrn->rgbBits, -+ pScrn->defaultVisual)) return FALSE; -+ miSetPixmapDepths (); -+ -+ info->directRenderingEnabled = FALSE; -+ if (info->r600_shadow_fb == FALSE) -+ info->directRenderingEnabled = radeon_dri2_screen_init(pScreen); -+ -+ front_ptr = info->FB; -+ -+ info->bufmgr = radeon_bo_manager_gem_ctor(info->dri->drmFD); -+ if (!info->bufmgr) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "failed to initialise GEM buffer manager"); -+ return FALSE; -+ } -+ drmmode_set_bufmgr(pScrn, &info->drmmode, info->bufmgr); -+ -+ info->csm = radeon_cs_manager_gem_ctor(info->dri->drmFD); -+ if (!info->csm) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "failed to initialise command submission manager"); -+ return FALSE; -+ } -+ -+ info->cs = radeon_cs_create(info->csm, RADEON_BUFFER_SIZE/4); -+ if (!info->cs) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "failed to initialise command submission buffer"); -+ return FALSE; -+ } -+ -+ radeon_cs_set_limit(info->cs, RADEON_GEM_DOMAIN_GTT, info->gart_size); -+ radeon_cs_space_set_flush(info->cs, (void(*)(void *))radeon_cs_flush_indirect, pScrn); -+ -+ radeon_setup_kernel_mem(pScreen); -+ front_ptr = info->front_bo->ptr; -+ -+ if (info->r600_shadow_fb) { -+ info->fb_shadow = xcalloc(1, -+ pScrn->displayWidth * pScrn->virtualY * -+ ((pScrn->bitsPerPixel + 7) >> 3)); -+ if (info->fb_shadow == NULL) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "Failed to allocate shadow framebuffer\n"); -+ info->r600_shadow_fb = FALSE; -+ } else { -+ if (!fbScreenInit(pScreen, info->fb_shadow, -+ pScrn->virtualX, pScrn->virtualY, -+ pScrn->xDpi, pScrn->yDpi, pScrn->displayWidth, -+ pScrn->bitsPerPixel)) -+ return FALSE; -+ } -+ } -+ -+ if (info->r600_shadow_fb == FALSE) { -+ /* Init fb layer */ -+ if (!fbScreenInit(pScreen, front_ptr, -+ pScrn->virtualX, pScrn->virtualY, -+ pScrn->xDpi, pScrn->yDpi, pScrn->displayWidth, -+ pScrn->bitsPerPixel)) -+ return FALSE; -+ } -+ -+ xf86SetBlackWhitePixels(pScreen); -+ -+ if (pScrn->bitsPerPixel > 8) { -+ VisualPtr visual; -+ -+ visual = pScreen->visuals + pScreen->numVisuals; -+ while (--visual >= pScreen->visuals) { -+ if ((visual->class | DynamicClass) == DirectColor) { -+ visual->offsetRed = pScrn->offset.red; -+ visual->offsetGreen = pScrn->offset.green; -+ visual->offsetBlue = pScrn->offset.blue; -+ visual->redMask = pScrn->mask.red; -+ visual->greenMask = pScrn->mask.green; -+ visual->blueMask = pScrn->mask.blue; -+ } -+ } -+ } -+ -+ /* Must be after RGB order fixed */ -+ fbPictureInit (pScreen, 0, 0); -+ -+#ifdef RENDER -+ if ((s = xf86GetOptValString(info->Options, OPTION_SUBPIXEL_ORDER))) { -+ if (strcmp(s, "RGB") == 0) subPixelOrder = SubPixelHorizontalRGB; -+ else if (strcmp(s, "BGR") == 0) subPixelOrder = SubPixelHorizontalBGR; -+ else if (strcmp(s, "NONE") == 0) subPixelOrder = SubPixelNone; -+ PictureSetSubpixelOrder (pScreen, subPixelOrder); -+ } -+#endif -+ -+ pScrn->vtSema = TRUE; -+ /* Backing store setup */ -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "Initializing backing store\n"); -+ miInitializeBackingStore(pScreen); -+ xf86SetBackingStore(pScreen); -+ -+ -+ if (info->directRenderingEnabled) { -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Direct rendering enabled\n"); -+ } else { -+ xf86DrvMsg(pScrn->scrnIndex, X_WARNING, -+ "Direct rendering disabled\n"); -+ } -+ -+ if (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE)) { -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "Initializing Acceleration\n"); -+ if (RADEONAccelInit(pScreen)) { -+ xf86DrvMsg(scrnIndex, X_INFO, "Acceleration enabled\n"); -+ info->accelOn = TRUE; -+ } else { -+ xf86DrvMsg(scrnIndex, X_ERROR, -+ "Acceleration initialization failed\n"); -+ xf86DrvMsg(scrnIndex, X_INFO, "Acceleration disabled\n"); -+ info->accelOn = FALSE; -+ } -+ } else { -+ xf86DrvMsg(scrnIndex, X_INFO, "Acceleration disabled\n"); -+ info->accelOn = FALSE; -+ } -+ -+ /* Init DPMS */ -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "Initializing DPMS\n"); -+ xf86DPMSInit(pScreen, xf86DPMSSet, 0); -+ -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "Initializing Cursor\n"); -+ -+ /* Set Silken Mouse */ -+ xf86SetSilkenMouse(pScreen); -+ -+ /* Cursor setup */ -+ miDCInitialize(pScreen, xf86GetPointerScreenFuncs()); -+ -+ if (!xf86ReturnOptValBool(info->Options, OPTION_SW_CURSOR, FALSE)) { -+ if (RADEONCursorInit_KMS(pScreen)) { -+ } -+ } -+ -+ /* Init Xv */ -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "Initializing Xv\n"); -+ RADEONInitVideo(pScreen); -+ -+ if (info->r600_shadow_fb == TRUE) { -+ if (!shadowSetup(pScreen)) { -+ xf86DrvMsg(scrnIndex, X_ERROR, -+ "Shadowfb initialization failed\n"); -+ return FALSE; -+ } -+ } -+ pScrn->pScreen = pScreen; -+ -+ if (!drmmode_set_desired_modes(pScrn, &info->drmmode)) -+ return FALSE; -+ -+ /* Provide SaveScreen & wrap BlockHandler and CloseScreen */ -+ /* Wrap CloseScreen */ -+ info->CloseScreen = pScreen->CloseScreen; -+ pScreen->CloseScreen = RADEONCloseScreen_KMS; -+ pScreen->SaveScreen = RADEONSaveScreen_KMS; -+ info->BlockHandler = pScreen->BlockHandler; -+ pScreen->BlockHandler = RADEONBlockHandler_KMS; -+ info->CreateScreenResources = pScreen->CreateScreenResources; -+ pScreen->CreateScreenResources = RADEONCreateScreenResources_KMS; -+ -+ if (!xf86CrtcScreenInit (pScreen)) -+ return FALSE; -+ -+ /* Wrap pointer motion to flip touch screen around */ -+// info->PointerMoved = pScrn->PointerMoved; -+// pScrn->PointerMoved = RADEONPointerMoved; -+ -+ if (!drmmode_setup_colormap(pScreen, pScrn)) -+ return FALSE; -+ -+ /* Note unused options */ -+ if (serverGeneration == 1) -+ xf86ShowUnusedOptions(pScrn->scrnIndex, pScrn->options); -+ -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "RADEONScreenInit finished\n"); -+ -+ return TRUE; -+} -+ -+Bool RADEONEnterVT_KMS(int scrnIndex, int flags) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[scrnIndex]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ int ret; -+ -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "RADEONEnterVT_KMS\n"); -+ -+ -+ ret = drmSetMaster(info->dri->drmFD); -+ if (ret) -+ ErrorF("Unable to retrieve master\n"); -+ -+ info->accel_state->XInited3D = FALSE; -+ info->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN; -+ -+ pScrn->vtSema = TRUE; -+ -+ if (!drmmode_set_desired_modes(pScrn, &info->drmmode)) -+ return FALSE; -+ -+ if (info->adaptor) -+ RADEONResetVideo(pScrn); -+ -+ return TRUE; -+} -+ -+ -+void RADEONLeaveVT_KMS(int scrnIndex, int flags) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[scrnIndex]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "RADEONLeaveVT_KMS\n"); -+ -+ drmDropMaster(info->dri->drmFD); -+ -+#ifdef HAVE_FREE_SHADOW -+ xf86RotateFreeShadow(pScrn); -+#endif -+ -+ xf86_hide_cursors (pScrn); -+ info->accel_state->XInited3D = FALSE; -+ info->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN; -+ -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -+ "Ok, leaving now...\n"); -+} -+ -+ -+Bool RADEONSwitchMode_KMS(int scrnIndex, DisplayModePtr mode, int flags) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[scrnIndex]; -+ Bool ret; -+ ret = xf86SetSingleMode (pScrn, mode, RR_Rotate_0); -+ return ret; -+ -+} -+ -+void RADEONAdjustFrame_KMS(int scrnIndex, int x, int y, int flags) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[scrnIndex]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ drmmode_adjust_frame(pScrn, &info->drmmode, x, y, flags); -+ return; -+} -+ -+static Bool radeon_setup_kernel_mem(ScreenPtr pScreen) -+{ -+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); -+ int cpp = info->CurrentLayout.pixel_bytes; -+ int screen_size; -+ int stride = pScrn->displayWidth * cpp; -+ int total_size_bytes = 0, remain_size_bytes; -+ int pagesize = 4096; -+ -+ if (info->accel_state->exa != NULL) { -+ xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map already initialized\n"); -+ return FALSE; -+ } -+ if (info->r600_shadow_fb == FALSE) { -+ info->accel_state->exa = exaDriverAlloc(); -+ if (info->accel_state->exa == NULL) -+ return FALSE; -+ } -+ -+ screen_size = RADEON_ALIGN(pScrn->virtualY, 16) * stride; -+ { -+ int cursor_size = 64 * 4 * 64; -+ int c; -+ -+ cursor_size = RADEON_ALIGN(cursor_size, pagesize); -+ for (c = 0; c < xf86_config->num_crtc; c++) { -+ /* cursor objects */ -+ info->cursor_bo[c] = radeon_bo_open(info->bufmgr, 0, cursor_size, -+ 0, RADEON_GEM_DOMAIN_VRAM, 0); -+ if (!info->cursor_bo[c]) { -+ return FALSE; -+ } -+ -+ if (radeon_bo_map(info->cursor_bo[c], 1)) { -+ ErrorF("Failed to map cursor buffer memory\n"); -+ } -+ -+ drmmode_set_cursor(pScrn, &info->drmmode, c, info->cursor_bo[c]); -+ total_size_bytes += cursor_size; -+ } -+ } -+ -+ screen_size = RADEON_ALIGN(screen_size, pagesize); -+ /* keep area front front buffer - but don't allocate it yet */ -+ total_size_bytes += screen_size; -+ -+ /* work out from the mm size what the exa / tex sizes need to be */ -+ remain_size_bytes = info->vram_size - total_size_bytes; -+ -+ info->dri->textureSize = 0; -+ -+ info->front_bo = radeon_bo_open(info->bufmgr, 0, screen_size, -+ 0, RADEON_GEM_DOMAIN_VRAM, 0); -+ if (info->r600_shadow_fb == TRUE) { -+ if (radeon_bo_map(info->front_bo, 1)) { -+ ErrorF("Failed to map cursor buffer memory\n"); -+ } -+ } -+ -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Front buffer size: %dK\n", info->front_bo->size/1024); -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Remaining VRAM size (used for pixmaps): %dK\n", remain_size_bytes/1024); -+ -+ /* set the emit limit at 90% of VRAM */ -+ remain_size_bytes = (remain_size_bytes / 10) * 9; -+ -+ radeon_cs_set_limit(info->cs, RADEON_GEM_DOMAIN_VRAM, remain_size_bytes); -+ return TRUE; -+} -+ -+#endif -diff --git a/src/radeon_legacy_memory.c b/src/radeon_legacy_memory.c -index 861fd97..02b95ed 100644 ---- a/src/radeon_legacy_memory.c -+++ b/src/radeon_legacy_memory.c -@@ -21,6 +21,20 @@ radeon_legacy_allocate_memory(ScrnInfoPtr pScrn, - RADEONInfoPtr info = RADEONPTR(pScrn); - uint32_t offset = 0; - -+#ifdef XF86DRM_MODE -+ if (info->cs) { -+ struct radeon_bo *video_bo; -+ -+ video_bo = radeon_bo_open(info->bufmgr, 0, size, 4096, 0, 0); -+ -+ *mem_struct = video_bo; -+ -+ if (!video_bo) -+ return 0; -+ -+ return (uint32_t)-1; -+ } -+#endif - #ifdef USE_EXA - if (info->useEXA) { - ExaOffscreenArea *area = *mem_struct; -@@ -94,6 +108,14 @@ radeon_legacy_free_memory(ScrnInfoPtr pScrn, - void *mem_struct) - { - RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+#ifdef XF86DRM_MODE -+ if (info->cs) { -+ struct radeon_bo *bo = mem_struct; -+ radeon_bo_unref(bo); -+ return; -+ } -+#endif - #ifdef USE_EXA - ScreenPtr pScreen = screenInfo.screens[pScrn->scrnIndex]; - -diff --git a/src/radeon_macros.h b/src/radeon_macros.h -index 19307c8..26d9825 100644 ---- a/src/radeon_macros.h -+++ b/src/radeon_macros.h -@@ -157,4 +157,53 @@ do { \ - #define INPCIE(pScrn, addr) RADEONINPCIE(pScrn, addr) - #define OUTPCIE(pScrn, addr, val) RADEONOUTPCIE(pScrn, addr, val) - -+#define INPCIE_P(pScrn, addr) R600INPCIE_PORT(pScrn, addr) -+#define OUTPCIE_P(pScrn, addr, val) R600OUTPCIE_PORT(pScrn, addr, val) -+ -+#define BEGIN_ACCEL_RELOC(n, r) do { \ -+ int _nqw = (n) + (info->cs ? (r) : 0); \ -+ BEGIN_ACCEL(_nqw); \ -+ } while (0) -+ -+#define CHECK_OFFSET(pPix, mask, type) do { \ -+ if (!info->cs) { \ -+ uint32_t _pix_offset = radeonGetPixmapOffset(pPix); \ -+ if ((_pix_offset & mask) != 0) \ -+ RADEON_FALLBACK(("Bad %s offset 0x%x\n", type, (int)_pix_offset)); \ -+ } \ -+ } while(0) -+ -+#define EMIT_OFFSET(reg, value, pPix, rd, wd) do { \ -+ if (info->cs) { \ -+ driver_priv = exaGetPixmapDriverPrivate(pPix); \ -+ OUT_ACCEL_REG((reg), (value)); \ -+ OUT_RELOC(driver_priv->bo, (rd), (wd)); \ -+ } else { \ -+ uint32_t _pix_offset; \ -+ _pix_offset = radeonGetPixmapOffset(pPix); \ -+ OUT_ACCEL_REG((reg), _pix_offset | value); \ -+ } \ -+ } while(0) -+ -+#define EMIT_READ_OFFSET(reg, value, pPix) EMIT_OFFSET(reg, value, pPix, (RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT), 0) -+#define EMIT_WRITE_OFFSET(reg, value, pPix) EMIT_OFFSET(reg, value, pPix, 0, RADEON_GEM_DOMAIN_VRAM) -+ -+#define OUT_TEXTURE_REG(reg, offset, bo) do { \ -+ if (info->cs) { \ -+ OUT_ACCEL_REG((reg), (offset)); \ -+ OUT_RELOC((bo), RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); \ -+ } else { \ -+ OUT_ACCEL_REG((reg), (offset) + info->fbLocation + pScrn->fbOffset);} \ -+ } while(0) -+ -+#define EMIT_COLORPITCH(reg, value, pPix) do { \ -+ if (info->cs) { \ -+ driver_priv = exaGetPixmapDriverPrivate(pPix); \ -+ OUT_ACCEL_REG((reg), value); \ -+ OUT_RELOC(driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); \ -+ } else { \ -+ OUT_ACCEL_REG((reg), value); \ -+ } \ -+}while(0) -+ - #endif -diff --git a/src/radeon_output.c b/src/radeon_output.c -index 712ac5f..f554824 100644 ---- a/src/radeon_output.c -+++ b/src/radeon_output.c -@@ -209,6 +209,12 @@ radeon_set_active_device(xf86OutputPtr output) - } - } - -+static Bool -+monitor_is_digital(xf86MonPtr MonInfo) -+{ -+ return (MonInfo->rawData[0x14] & 0x80) != 0; -+} -+ - static RADEONMonitorType - radeon_ddc_connected(xf86OutputPtr output) - { -@@ -219,24 +225,12 @@ radeon_ddc_connected(xf86OutputPtr output) - RADEONOutputPrivatePtr radeon_output = output->driver_private; - - if (radeon_output->pI2CBus) { -- /* RV410 RADEON_GPIO_VGA_DDC seems to only work via hw i2c -- * We may want to extend this to other cases if the need arises... -- */ -- if ((info->ChipFamily == CHIP_FAMILY_RV410) && -- (radeon_output->ddc_i2c.mask_clk_reg == RADEON_GPIO_VGA_DDC) && -- info->IsAtomBios) -- MonInfo = radeon_atom_get_edid(output); -- else if (info->get_hardcoded_edid_from_bios) { -+ if (info->get_hardcoded_edid_from_bios) - MonInfo = RADEONGetHardCodedEDIDFromBIOS(output); -- if (MonInfo == NULL) { -- RADEONI2CDoLock(output, TRUE); -- MonInfo = xf86OutputGetEDID(output, radeon_output->pI2CBus); -- RADEONI2CDoLock(output, FALSE); -- } -- } else { -- RADEONI2CDoLock(output, TRUE); -+ if (MonInfo == NULL) { -+ RADEONI2CDoLock(output, radeon_output->pI2CBus, TRUE); - MonInfo = xf86OutputGetEDID(output, radeon_output->pI2CBus); -- RADEONI2CDoLock(output, FALSE); -+ RADEONI2CDoLock(output, radeon_output->pI2CBus, FALSE); - } - } - if (MonInfo) { -@@ -246,9 +240,8 @@ radeon_ddc_connected(xf86OutputPtr output) - break; - case CONNECTOR_DVI_D: - case CONNECTOR_HDMI_TYPE_A: -- case CONNECTOR_HDMI_TYPE_B: - if (radeon_output->shared_ddc) { -- if (MonInfo->rawData[0x14] & 0x80) /* if it's digital and DVI/HDMI/etc. */ -+ if (monitor_is_digital(MonInfo)) - MonType = MT_DFP; - else - MonType = MT_NONE; -@@ -261,8 +254,10 @@ radeon_ddc_connected(xf86OutputPtr output) - * or AUXCH. - */ - MonType = MT_DFP; -+ break; -+ case CONNECTOR_HDMI_TYPE_B: - case CONNECTOR_DVI_I: -- if (MonInfo->rawData[0x14] & 0x80) /* if it's digital and DVI */ -+ if (monitor_is_digital(MonInfo)) - MonType = MT_DFP; - else - MonType = MT_CRT; -@@ -271,7 +266,7 @@ radeon_ddc_connected(xf86OutputPtr output) - case CONNECTOR_DVI_A: - default: - if (radeon_output->shared_ddc) { -- if (MonInfo->rawData[0x14] & 0x80) /* if it's digital and VGA */ -+ if (monitor_is_digital(MonInfo)) - MonType = MT_NONE; - else - MonType = MT_CRT; -@@ -442,8 +437,11 @@ radeon_mode_fixup(xf86OutputPtr output, DisplayModePtr mode, - RADEONInfoPtr info = RADEONPTR(output->scrn); - RADEONOutputPrivatePtr radeon_output = output->driver_private; - radeon_native_mode_ptr native_mode = &radeon_output->native_mode; -+ xf86CrtcPtr crtc = output->crtc; -+ RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; - - radeon_output->Flags &= ~RADEON_USE_RMX; -+ radeon_crtc->scaler_enabled = FALSE; - - /* - * Refresh the Crtc values without INTERLACE_HALVE_V -@@ -454,14 +452,15 @@ radeon_mode_fixup(xf86OutputPtr output, DisplayModePtr mode, - /* decide if we are using RMX */ - if ((radeon_output->active_device & (ATOM_DEVICE_LCD_SUPPORT | ATOM_DEVICE_DFP_SUPPORT)) - && radeon_output->rmx_type != RMX_OFF) { -- xf86CrtcPtr crtc = output->crtc; -- RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; - - if (IS_AVIVO_VARIANT || radeon_crtc->crtc_id == 0) { - if (mode->HDisplay < native_mode->PanelXRes || - mode->VDisplay < native_mode->PanelYRes) { - radeon_output->Flags |= RADEON_USE_RMX; -+ radeon_crtc->scaler_enabled = TRUE; - if (IS_AVIVO_VARIANT) { -+ radeon_crtc->hsc = (float)mode->HDisplay / (float)native_mode->PanelXRes; -+ radeon_crtc->vsc = (float)mode->VDisplay / (float)native_mode->PanelYRes; - /* set to the panel's native mode */ - adjusted_mode->HDisplay = native_mode->PanelXRes; - adjusted_mode->VDisplay = native_mode->PanelYRes; -@@ -507,6 +506,13 @@ radeon_mode_fixup(xf86OutputPtr output, DisplayModePtr mode, - } - } - -+ /* FIXME: vsc/hsc */ -+ if (radeon_output->active_device & (ATOM_DEVICE_TV_SUPPORT | ATOM_DEVICE_CV_SUPPORT)) { -+ radeon_crtc->scaler_enabled = TRUE; -+ radeon_crtc->hsc = (float)mode->HDisplay / (float)640; -+ radeon_crtc->vsc = (float)mode->VDisplay / (float)480; -+ } -+ - if (IS_AVIVO_VARIANT) { - /* hw bug */ - if ((mode->Flags & V_INTERLACE) -@@ -520,8 +526,32 @@ radeon_mode_fixup(xf86OutputPtr output, DisplayModePtr mode, - static void - radeon_mode_prepare(xf86OutputPtr output) - { -+ RADEONInfoPtr info = RADEONPTR(output->scrn); -+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR (output->scrn); -+ int o; -+ -+ for (o = 0; o < config->num_output; o++) { -+ xf86OutputPtr loop_output = config->output[o]; -+ if (loop_output == output) -+ continue; -+ else if (loop_output->crtc) { -+ xf86CrtcPtr other_crtc = loop_output->crtc; -+ RADEONCrtcPrivatePtr other_radeon_crtc = other_crtc->driver_private; -+ if (other_crtc->enabled) { -+ if (other_radeon_crtc->initialized) { -+ radeon_crtc_dpms(other_crtc, DPMSModeOff); -+ if (IS_AVIVO_VARIANT || info->r4xx_atom) -+ atombios_lock_crtc(info->atomBIOS, other_radeon_crtc->crtc_id, 1); -+ radeon_dpms(loop_output, DPMSModeOff); -+ } -+ } -+ } -+ } -+ - radeon_bios_output_lock(output, TRUE); - radeon_dpms(output, DPMSModeOff); -+ radeon_crtc_dpms(output->crtc, DPMSModeOff); -+ - } - - static void -@@ -541,7 +571,30 @@ radeon_mode_set(xf86OutputPtr output, DisplayModePtr mode, - static void - radeon_mode_commit(xf86OutputPtr output) - { -+ RADEONInfoPtr info = RADEONPTR(output->scrn); -+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR (output->scrn); -+ int o; -+ -+ for (o = 0; o < config->num_output; o++) { -+ xf86OutputPtr loop_output = config->output[o]; -+ if (loop_output == output) -+ continue; -+ else if (loop_output->crtc) { -+ xf86CrtcPtr other_crtc = loop_output->crtc; -+ RADEONCrtcPrivatePtr other_radeon_crtc = other_crtc->driver_private; -+ if (other_crtc->enabled) { -+ if (other_radeon_crtc->initialized) { -+ radeon_crtc_dpms(other_crtc, DPMSModeOn); -+ if (IS_AVIVO_VARIANT || info->r4xx_atom) -+ atombios_lock_crtc(info->atomBIOS, other_radeon_crtc->crtc_id, 0); -+ radeon_dpms(loop_output, DPMSModeOn); -+ } -+ } -+ } -+ } -+ - radeon_dpms(output, DPMSModeOn); -+ radeon_crtc_dpms(output->crtc, DPMSModeOn); - radeon_bios_output_lock(output, FALSE); - } - -@@ -1174,7 +1227,7 @@ radeon_create_resources(xf86OutputPtr output) - } - } - -- if ((!IS_AVIVO_VARIANT) && (radeon_output->devices & (ATOM_DEVICE_DFP2_SUPPORT))) { -+ if ((!IS_AVIVO_VARIANT) && (radeon_output->devices & (ATOM_DEVICE_DFP1_SUPPORT))) { - tmds_pll_atom = MAKE_ATOM("tmds_pll"); - - err = RRConfigureOutputProperty(output->randr_output, tmds_pll_atom, -@@ -1608,16 +1661,27 @@ static const xf86OutputFuncsRec radeon_output_funcs = { - }; - - Bool --RADEONI2CDoLock(xf86OutputPtr output, int lock_state) -+RADEONI2CDoLock(xf86OutputPtr output, I2CBusPtr b, int lock_state) - { - ScrnInfoPtr pScrn = output->scrn; - RADEONInfoPtr info = RADEONPTR(pScrn); -- RADEONOutputPrivatePtr radeon_output = output->driver_private; -- RADEONI2CBusPtr pRADEONI2CBus = radeon_output->pI2CBus->DriverPrivate.ptr; -+ RADEONI2CBusPtr pRADEONI2CBus = b->DriverPrivate.ptr; - unsigned char *RADEONMMIO = info->MMIO; - uint32_t temp; - - if (lock_state) { -+ /* RV410 appears to have a bug where the hw i2c in reset -+ * holds the i2c port in a bad state - switch hw i2c away before -+ * doing DDC - do this for all r200s/r300s for safety sakes */ -+ if ((info->ChipFamily >= CHIP_FAMILY_R200) && (!IS_AVIVO_VARIANT)) { -+ if (pRADEONI2CBus->mask_clk_reg == RADEON_GPIO_MONID) -+ OUTREG(RADEON_DVI_I2C_CNTL_0, (RADEON_I2C_SOFT_RST | -+ R200_DVI_I2C_PIN_SEL(R200_SEL_DDC1))); -+ else -+ OUTREG(RADEON_DVI_I2C_CNTL_0, (RADEON_I2C_SOFT_RST | -+ R200_DVI_I2C_PIN_SEL(R200_SEL_DDC3))); -+ } -+ - temp = INREG(pRADEONI2CBus->a_clk_reg); - temp &= ~(pRADEONI2CBus->a_clk_mask); - OUTREG(pRADEONI2CBus->a_clk_reg, temp); -@@ -2663,12 +2727,12 @@ Bool RADEONSetupConnectors(ScrnInfoPtr pScrn) - RADEONConnectorType conntype = info->BiosConnector[i].ConnectorType; - if ((conntype == CONNECTOR_DVI_D) || - (conntype == CONNECTOR_DVI_I) || -- (conntype == CONNECTOR_DVI_A)) { -+ (conntype == CONNECTOR_DVI_A) || -+ (conntype == CONNECTOR_HDMI_TYPE_B)) { - num_dvi++; - } else if (conntype == CONNECTOR_VGA) { - num_vga++; -- } else if ((conntype == CONNECTOR_HDMI_TYPE_A) || -- (conntype == CONNECTOR_HDMI_TYPE_B)) { -+ } else if (conntype == CONNECTOR_HDMI_TYPE_A) { - num_hdmi++; - } else if (conntype == CONNECTOR_DISPLAY_PORT) { - num_dp++; -@@ -2698,14 +2762,18 @@ Bool RADEONSetupConnectors(ScrnInfoPtr pScrn) - radeon_output->linkb = info->BiosConnector[i].linkb; - radeon_output->connector_id = info->BiosConnector[i].connector_object; - -+ /* Technically HDMI-B is a glorfied DL DVI so the bios is correct, -+ * but this can be confusing to users when it comes to output names, -+ * so call it DVI -+ */ - if ((conntype == CONNECTOR_DVI_D) || - (conntype == CONNECTOR_DVI_I) || -- (conntype == CONNECTOR_DVI_A)) { -+ (conntype == CONNECTOR_DVI_A) || -+ (conntype == CONNECTOR_HDMI_TYPE_B)) { - output = RADEONOutputCreate(pScrn, "DVI-%d", --num_dvi); - } else if (conntype == CONNECTOR_VGA) { - output = RADEONOutputCreate(pScrn, "VGA-%d", --num_vga); -- } else if ((conntype == CONNECTOR_HDMI_TYPE_A) || -- (conntype == CONNECTOR_HDMI_TYPE_B)) { -+ } else if (conntype == CONNECTOR_HDMI_TYPE_A) { - output = RADEONOutputCreate(pScrn, "HDMI-%d", --num_hdmi); - } else if (conntype == CONNECTOR_DISPLAY_PORT) { - output = RADEONOutputCreate(pScrn, "DisplayPort-%d", --num_dp); -diff --git a/src/radeon_pci_chipset_gen.h b/src/radeon_pci_chipset_gen.h -index d61c57d..1b85dcc 100644 ---- a/src/radeon_pci_chipset_gen.h -+++ b/src/radeon_pci_chipset_gen.h -@@ -40,6 +40,8 @@ PciChipsets RADEONPciChipsets[] = { - { PCI_CHIP_R420_JN, PCI_CHIP_R420_JN, RES_SHARED_VGA }, - { PCI_CHIP_R420_4A4F, PCI_CHIP_R420_4A4F, RES_SHARED_VGA }, - { PCI_CHIP_R420_JP, PCI_CHIP_R420_JP, RES_SHARED_VGA }, -+ { PCI_CHIP_R420_JT, PCI_CHIP_R420_JT, RES_SHARED_VGA }, -+ { PCI_CHIP_R481_4B48, PCI_CHIP_R481_4B48, RES_SHARED_VGA }, - { PCI_CHIP_R481_4B49, PCI_CHIP_R481_4B49, RES_SHARED_VGA }, - { PCI_CHIP_R481_4B4A, PCI_CHIP_R481_4B4A, RES_SHARED_VGA }, - { PCI_CHIP_R481_4B4B, PCI_CHIP_R481_4B4B, RES_SHARED_VGA }, -@@ -254,6 +256,7 @@ PciChipsets RADEONPciChipsets[] = { - { PCI_CHIP_RV770_9440, PCI_CHIP_RV770_9440, RES_SHARED_VGA }, - { PCI_CHIP_RV770_9441, PCI_CHIP_RV770_9441, RES_SHARED_VGA }, - { PCI_CHIP_RV770_9442, PCI_CHIP_RV770_9442, RES_SHARED_VGA }, -+ { PCI_CHIP_RV770_9443, PCI_CHIP_RV770_9443, RES_SHARED_VGA }, - { PCI_CHIP_RV770_9444, PCI_CHIP_RV770_9444, RES_SHARED_VGA }, - { PCI_CHIP_RV770_9446, PCI_CHIP_RV770_9446, RES_SHARED_VGA }, - { PCI_CHIP_RV770_944A, PCI_CHIP_RV770_944A, RES_SHARED_VGA }, -@@ -271,15 +274,26 @@ PciChipsets RADEONPciChipsets[] = { - { PCI_CHIP_RV770_946B, PCI_CHIP_RV770_946B, RES_SHARED_VGA }, - { PCI_CHIP_RV770_947A, PCI_CHIP_RV770_947A, RES_SHARED_VGA }, - { PCI_CHIP_RV770_947B, PCI_CHIP_RV770_947B, RES_SHARED_VGA }, -+ { PCI_CHIP_RV730_9480, PCI_CHIP_RV730_9480, RES_SHARED_VGA }, - { PCI_CHIP_RV730_9487, PCI_CHIP_RV730_9487, RES_SHARED_VGA }, -+ { PCI_CHIP_RV730_9488, PCI_CHIP_RV730_9488, RES_SHARED_VGA }, - { PCI_CHIP_RV730_9489, PCI_CHIP_RV730_9489, RES_SHARED_VGA }, - { PCI_CHIP_RV730_948F, PCI_CHIP_RV730_948F, RES_SHARED_VGA }, - { PCI_CHIP_RV730_9490, PCI_CHIP_RV730_9490, RES_SHARED_VGA }, - { PCI_CHIP_RV730_9491, PCI_CHIP_RV730_9491, RES_SHARED_VGA }, -+ { PCI_CHIP_RV730_9495, PCI_CHIP_RV730_9495, RES_SHARED_VGA }, - { PCI_CHIP_RV730_9498, PCI_CHIP_RV730_9498, RES_SHARED_VGA }, - { PCI_CHIP_RV730_949C, PCI_CHIP_RV730_949C, RES_SHARED_VGA }, - { PCI_CHIP_RV730_949E, PCI_CHIP_RV730_949E, RES_SHARED_VGA }, - { PCI_CHIP_RV730_949F, PCI_CHIP_RV730_949F, RES_SHARED_VGA }, -+ { PCI_CHIP_RV740_94A0, PCI_CHIP_RV740_94A0, RES_SHARED_VGA }, -+ { PCI_CHIP_RV740_94A1, PCI_CHIP_RV740_94A1, RES_SHARED_VGA }, -+ { PCI_CHIP_RV740_94A3, PCI_CHIP_RV740_94A3, RES_SHARED_VGA }, -+ { PCI_CHIP_RV740_94B1, PCI_CHIP_RV740_94B1, RES_SHARED_VGA }, -+ { PCI_CHIP_RV740_94B3, PCI_CHIP_RV740_94B3, RES_SHARED_VGA }, -+ { PCI_CHIP_RV740_94B4, PCI_CHIP_RV740_94B4, RES_SHARED_VGA }, -+ { PCI_CHIP_RV740_94B5, PCI_CHIP_RV740_94B5, RES_SHARED_VGA }, -+ { PCI_CHIP_RV740_94B9, PCI_CHIP_RV740_94B9, RES_SHARED_VGA }, - { PCI_CHIP_RV610_94C0, PCI_CHIP_RV610_94C0, RES_SHARED_VGA }, - { PCI_CHIP_RV610_94C1, PCI_CHIP_RV610_94C1, RES_SHARED_VGA }, - { PCI_CHIP_RV610_94C3, PCI_CHIP_RV610_94C3, RES_SHARED_VGA }, -@@ -312,6 +326,7 @@ PciChipsets RADEONPciChipsets[] = { - { PCI_CHIP_RV710_9552, PCI_CHIP_RV710_9552, RES_SHARED_VGA }, - { PCI_CHIP_RV710_9553, PCI_CHIP_RV710_9553, RES_SHARED_VGA }, - { PCI_CHIP_RV710_9555, PCI_CHIP_RV710_9555, RES_SHARED_VGA }, -+ { PCI_CHIP_RV710_9557, PCI_CHIP_RV710_9557, RES_SHARED_VGA }, - { PCI_CHIP_RV630_9580, PCI_CHIP_RV630_9580, RES_SHARED_VGA }, - { PCI_CHIP_RV630_9581, PCI_CHIP_RV630_9581, RES_SHARED_VGA }, - { PCI_CHIP_RV630_9583, PCI_CHIP_RV630_9583, RES_SHARED_VGA }, -diff --git a/src/radeon_pci_device_match_gen.h b/src/radeon_pci_device_match_gen.h -index a06b4a6..64127bd 100644 ---- a/src/radeon_pci_device_match_gen.h -+++ b/src/radeon_pci_device_match_gen.h -@@ -40,6 +40,8 @@ static const struct pci_id_match radeon_device_match[] = { - ATI_DEVICE_MATCH( PCI_CHIP_R420_JN, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_R420_4A4F, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_R420_JP, 0 ), -+ ATI_DEVICE_MATCH( PCI_CHIP_R420_JT, 0 ), -+ ATI_DEVICE_MATCH( PCI_CHIP_R481_4B48, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_R481_4B49, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_R481_4B4A, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_R481_4B4B, 0 ), -@@ -254,6 +256,7 @@ static const struct pci_id_match radeon_device_match[] = { - ATI_DEVICE_MATCH( PCI_CHIP_RV770_9440, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV770_9441, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV770_9442, 0 ), -+ ATI_DEVICE_MATCH( PCI_CHIP_RV770_9443, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV770_9444, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV770_9446, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV770_944A, 0 ), -@@ -271,15 +274,26 @@ static const struct pci_id_match radeon_device_match[] = { - ATI_DEVICE_MATCH( PCI_CHIP_RV770_946B, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV770_947A, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV770_947B, 0 ), -+ ATI_DEVICE_MATCH( PCI_CHIP_RV730_9480, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV730_9487, 0 ), -+ ATI_DEVICE_MATCH( PCI_CHIP_RV730_9488, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV730_9489, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV730_948F, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV730_9490, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV730_9491, 0 ), -+ ATI_DEVICE_MATCH( PCI_CHIP_RV730_9495, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV730_9498, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV730_949C, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV730_949E, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV730_949F, 0 ), -+ ATI_DEVICE_MATCH( PCI_CHIP_RV740_94A0, 0 ), -+ ATI_DEVICE_MATCH( PCI_CHIP_RV740_94A1, 0 ), -+ ATI_DEVICE_MATCH( PCI_CHIP_RV740_94A3, 0 ), -+ ATI_DEVICE_MATCH( PCI_CHIP_RV740_94B1, 0 ), -+ ATI_DEVICE_MATCH( PCI_CHIP_RV740_94B3, 0 ), -+ ATI_DEVICE_MATCH( PCI_CHIP_RV740_94B4, 0 ), -+ ATI_DEVICE_MATCH( PCI_CHIP_RV740_94B5, 0 ), -+ ATI_DEVICE_MATCH( PCI_CHIP_RV740_94B9, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV610_94C0, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV610_94C1, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV610_94C3, 0 ), -@@ -312,6 +326,7 @@ static const struct pci_id_match radeon_device_match[] = { - ATI_DEVICE_MATCH( PCI_CHIP_RV710_9552, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV710_9553, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV710_9555, 0 ), -+ ATI_DEVICE_MATCH( PCI_CHIP_RV710_9557, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV630_9580, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV630_9581, 0 ), - ATI_DEVICE_MATCH( PCI_CHIP_RV630_9583, 0 ), -diff --git a/src/radeon_pm.c b/src/radeon_pm.c -new file mode 100644 -index 0000000..fe8f214 ---- /dev/null -+++ b/src/radeon_pm.c -@@ -0,0 +1,886 @@ -+/* -+ * Copyright 2009 Advanced Micro Devices, Inc. -+ * -+ * All Rights Reserved. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining -+ * a copy of this software and associated documentation files (the -+ * "Software"), to deal in the Software without restriction, including -+ * without limitation on the rights to use, copy, modify, merge, -+ * publish, distribute, sublicense, and/or sell copies of the Software, -+ * and to permit persons to whom the Software is furnished to do so, -+ * subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the -+ * next paragraph) shall be included in all copies or substantial -+ * portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR -+ * AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -+ * DEALINGS IN THE SOFTWARE. -+ * -+ * Author: Alex Deucher -+ * -+ */ -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+ /* Driver data structures */ -+#include "radeon.h" -+#include "radeon_reg.h" -+#include "radeon_macros.h" -+#include "radeon_atombios.h" -+ -+#include "ati_pciids_gen.h" -+ -+/* 10 khz */ -+static uint32_t calc_eng_mem_clock(ScrnInfoPtr pScrn, -+ uint32_t req_clock, -+ int ref_div, -+ int *fb_div, -+ int *post_div) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ RADEONPLLPtr pll = &info->pll; -+ -+ if (req_clock < 15000) { -+ *post_div = 8; -+ req_clock *= 8; -+ } else if (req_clock < 30000) { -+ *post_div = 4; -+ req_clock *= 4; -+ } else if (req_clock < 60000) { -+ *post_div = 2; -+ req_clock *= 2; -+ } else -+ *post_div = 1; -+ -+ req_clock *= ref_div; -+ req_clock += pll->reference_freq; -+ req_clock /= (2 * pll->reference_freq); -+ -+ *fb_div = req_clock & 0xff; -+ -+ req_clock = (req_clock & 0xffff) << 1; -+ req_clock *= pll->reference_freq; -+ req_clock /= ref_div; -+ req_clock /= *post_div; -+ -+ return req_clock; -+ -+} -+ -+static void -+RADEONSetEngineClock(ScrnInfoPtr pScrn, uint32_t eng_clock) -+{ -+ uint32_t tmp; -+ int ref_div, fb_div, post_div; -+ -+ RADEONWaitForIdleMMIO(pScrn); -+ -+ tmp = INPLL(pScrn, RADEON_M_SPLL_REF_FB_DIV); -+ ref_div = tmp & RADEON_M_SPLL_REF_DIV_MASK; -+ -+ eng_clock = calc_eng_mem_clock(pScrn, eng_clock, ref_div, &fb_div, &post_div); -+ -+ tmp = INPLL(pScrn, RADEON_CLK_PIN_CNTL); -+ tmp &= ~RADEON_DONT_USE_XTALIN; -+ OUTPLL(pScrn, RADEON_CLK_PIN_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -+ tmp &= ~RADEON_SCLK_SRC_SEL_MASK; -+ OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -+ -+ usleep(10); -+ -+ tmp = INPLL(pScrn, RADEON_SPLL_CNTL); -+ tmp |= RADEON_SPLL_SLEEP; -+ OUTPLL(pScrn, RADEON_SPLL_CNTL, tmp); -+ -+ usleep(2); -+ -+ tmp = INPLL(pScrn, RADEON_SPLL_CNTL); -+ tmp |= RADEON_SPLL_RESET; -+ OUTPLL(pScrn, RADEON_SPLL_CNTL, tmp); -+ -+ usleep(200); -+ -+ tmp = INPLL(pScrn, RADEON_M_SPLL_REF_FB_DIV); -+ tmp &= ~(RADEON_SPLL_FB_DIV_MASK << RADEON_SPLL_FB_DIV_SHIFT); -+ tmp |= (fb_div & RADEON_SPLL_FB_DIV_MASK) << RADEON_SPLL_FB_DIV_SHIFT; -+ OUTPLL(pScrn, RADEON_M_SPLL_REF_FB_DIV, tmp); -+ -+ /* XXX: verify on different asics */ -+ tmp = INPLL(pScrn, RADEON_SPLL_CNTL); -+ tmp &= ~RADEON_SPLL_PVG_MASK; -+ if ((eng_clock * post_div) >= 90000) -+ tmp |= (0x7 << RADEON_SPLL_PVG_SHIFT); -+ else -+ tmp |= (0x4 << RADEON_SPLL_PVG_SHIFT); -+ OUTPLL(pScrn, RADEON_SPLL_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_SPLL_CNTL); -+ tmp &= ~RADEON_SPLL_SLEEP; -+ OUTPLL(pScrn, RADEON_SPLL_CNTL, tmp); -+ -+ usleep(2); -+ -+ tmp = INPLL(pScrn, RADEON_SPLL_CNTL); -+ tmp &= ~RADEON_SPLL_RESET; -+ OUTPLL(pScrn, RADEON_SPLL_CNTL, tmp); -+ -+ usleep(200); -+ -+ tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -+ tmp &= ~RADEON_SCLK_SRC_SEL_MASK; -+ switch (post_div) { -+ case 1: -+ default: -+ tmp |= 1; -+ break; -+ case 2: -+ tmp |= 2; -+ break; -+ case 4: -+ tmp |= 3; -+ break; -+ case 8: -+ tmp |= 4; -+ break; -+ } -+ OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -+ -+ usleep(20); -+ -+ tmp = INPLL(pScrn, RADEON_CLK_PIN_CNTL); -+ tmp |= RADEON_DONT_USE_XTALIN; -+ OUTPLL(pScrn, RADEON_CLK_PIN_CNTL, tmp); -+ -+ usleep(10); -+ -+} -+ -+static void LegacySetClockGating(ScrnInfoPtr pScrn, Bool enable) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ RADEONEntPtr pRADEONEnt = RADEONEntPriv(pScrn); -+ unsigned char *RADEONMMIO = info->MMIO; -+ uint32_t tmp; -+ -+ if (enable) { -+ if (!pRADEONEnt->HasCRTC2) { -+ tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -+ if ((INREG(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) > -+ RADEON_CFG_ATI_REV_A13) { -+ tmp &= ~(RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_RB); -+ } -+ tmp &= ~(RADEON_SCLK_FORCE_HDP | RADEON_SCLK_FORCE_DISP1 | -+ RADEON_SCLK_FORCE_TOP | RADEON_SCLK_FORCE_SE | -+ RADEON_SCLK_FORCE_IDCT | RADEON_SCLK_FORCE_RE | -+ RADEON_SCLK_FORCE_PB | RADEON_SCLK_FORCE_TAM | -+ RADEON_SCLK_FORCE_TDM); -+ OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -+ } else if (IS_R300_VARIANT) { -+ if ((info->ChipFamily == CHIP_FAMILY_RS400) || -+ (info->ChipFamily == CHIP_FAMILY_RS480)) { -+ tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -+ tmp &= ~(RADEON_SCLK_FORCE_DISP2 | RADEON_SCLK_FORCE_CP | -+ RADEON_SCLK_FORCE_HDP | RADEON_SCLK_FORCE_DISP1 | -+ RADEON_SCLK_FORCE_TOP | RADEON_SCLK_FORCE_E2 | -+ R300_SCLK_FORCE_VAP | RADEON_SCLK_FORCE_IDCT | -+ RADEON_SCLK_FORCE_VIP | R300_SCLK_FORCE_SR | -+ R300_SCLK_FORCE_PX | R300_SCLK_FORCE_TX | -+ R300_SCLK_FORCE_US | RADEON_SCLK_FORCE_TV_SCLK | -+ R300_SCLK_FORCE_SU | RADEON_SCLK_FORCE_OV0); -+ tmp |= RADEON_DYN_STOP_LAT_MASK; -+ tmp |= RADEON_SCLK_FORCE_TOP | RADEON_SCLK_FORCE_VIP; -+ OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL); -+ tmp &= ~RADEON_SCLK_MORE_FORCEON; -+ tmp |= RADEON_SCLK_MORE_MAX_DYN_STOP_LAT; -+ OUTPLL(pScrn, RADEON_SCLK_MORE_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_VCLK_ECP_CNTL); -+ tmp |= (RADEON_PIXCLK_ALWAYS_ONb | -+ RADEON_PIXCLK_DAC_ALWAYS_ONb); -+ OUTPLL(pScrn, RADEON_VCLK_ECP_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_PIXCLKS_CNTL); -+ tmp |= (RADEON_PIX2CLK_ALWAYS_ONb | -+ RADEON_PIX2CLK_DAC_ALWAYS_ONb | -+ RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb | -+ R300_DVOCLK_ALWAYS_ONb | -+ RADEON_PIXCLK_BLEND_ALWAYS_ONb | -+ RADEON_PIXCLK_GV_ALWAYS_ONb | -+ R300_PIXCLK_DVO_ALWAYS_ONb | -+ RADEON_PIXCLK_LVDS_ALWAYS_ONb | -+ RADEON_PIXCLK_TMDS_ALWAYS_ONb | -+ R300_PIXCLK_TRANS_ALWAYS_ONb | -+ R300_PIXCLK_TVO_ALWAYS_ONb | -+ R300_P2G2CLK_ALWAYS_ONb | -+ R300_P2G2CLK_ALWAYS_ONb); -+ OUTPLL(pScrn, RADEON_PIXCLKS_CNTL, tmp); -+ } else if (info->ChipFamily >= CHIP_FAMILY_RV350) { -+ tmp = INPLL(pScrn, R300_SCLK_CNTL2); -+ tmp &= ~(R300_SCLK_FORCE_TCL | -+ R300_SCLK_FORCE_GA | -+ R300_SCLK_FORCE_CBA); -+ tmp |= (R300_SCLK_TCL_MAX_DYN_STOP_LAT | -+ R300_SCLK_GA_MAX_DYN_STOP_LAT | -+ R300_SCLK_CBA_MAX_DYN_STOP_LAT); -+ OUTPLL(pScrn, R300_SCLK_CNTL2, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -+ tmp &= ~(RADEON_SCLK_FORCE_DISP2 | RADEON_SCLK_FORCE_CP | -+ RADEON_SCLK_FORCE_HDP | RADEON_SCLK_FORCE_DISP1 | -+ RADEON_SCLK_FORCE_TOP | RADEON_SCLK_FORCE_E2 | -+ R300_SCLK_FORCE_VAP | RADEON_SCLK_FORCE_IDCT | -+ RADEON_SCLK_FORCE_VIP | R300_SCLK_FORCE_SR | -+ R300_SCLK_FORCE_PX | R300_SCLK_FORCE_TX | -+ R300_SCLK_FORCE_US | RADEON_SCLK_FORCE_TV_SCLK | -+ R300_SCLK_FORCE_SU | RADEON_SCLK_FORCE_OV0); -+ tmp |= RADEON_DYN_STOP_LAT_MASK; -+ OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL); -+ tmp &= ~RADEON_SCLK_MORE_FORCEON; -+ tmp |= RADEON_SCLK_MORE_MAX_DYN_STOP_LAT; -+ OUTPLL(pScrn, RADEON_SCLK_MORE_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_VCLK_ECP_CNTL); -+ tmp |= (RADEON_PIXCLK_ALWAYS_ONb | -+ RADEON_PIXCLK_DAC_ALWAYS_ONb); -+ OUTPLL(pScrn, RADEON_VCLK_ECP_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_PIXCLKS_CNTL); -+ tmp |= (RADEON_PIX2CLK_ALWAYS_ONb | -+ RADEON_PIX2CLK_DAC_ALWAYS_ONb | -+ RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb | -+ R300_DVOCLK_ALWAYS_ONb | -+ RADEON_PIXCLK_BLEND_ALWAYS_ONb | -+ RADEON_PIXCLK_GV_ALWAYS_ONb | -+ R300_PIXCLK_DVO_ALWAYS_ONb | -+ RADEON_PIXCLK_LVDS_ALWAYS_ONb | -+ RADEON_PIXCLK_TMDS_ALWAYS_ONb | -+ R300_PIXCLK_TRANS_ALWAYS_ONb | -+ R300_PIXCLK_TVO_ALWAYS_ONb | -+ R300_P2G2CLK_ALWAYS_ONb | -+ R300_P2G2CLK_ALWAYS_ONb); -+ OUTPLL(pScrn, RADEON_PIXCLKS_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_MCLK_MISC); -+ tmp |= (RADEON_MC_MCLK_DYN_ENABLE | -+ RADEON_IO_MCLK_DYN_ENABLE); -+ OUTPLL(pScrn, RADEON_MCLK_MISC, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_MCLK_CNTL); -+ tmp |= (RADEON_FORCEON_MCLKA | -+ RADEON_FORCEON_MCLKB); -+ -+ tmp &= ~(RADEON_FORCEON_YCLKA | -+ RADEON_FORCEON_YCLKB | -+ RADEON_FORCEON_MC); -+ -+ /* Some releases of vbios have set DISABLE_MC_MCLKA -+ and DISABLE_MC_MCLKB bits in the vbios table. Setting these -+ bits will cause H/W hang when reading video memory with dynamic clocking -+ enabled. */ -+ if ((tmp & R300_DISABLE_MC_MCLKA) && -+ (tmp & R300_DISABLE_MC_MCLKB)) { -+ /* If both bits are set, then check the active channels */ -+ tmp = INPLL(pScrn, RADEON_MCLK_CNTL); -+ if (info->RamWidth == 64) { -+ if (INREG(RADEON_MEM_CNTL) & R300_MEM_USE_CD_CH_ONLY) -+ tmp &= ~R300_DISABLE_MC_MCLKB; -+ else -+ tmp &= ~R300_DISABLE_MC_MCLKA; -+ } else { -+ tmp &= ~(R300_DISABLE_MC_MCLKA | -+ R300_DISABLE_MC_MCLKB); -+ } -+ } -+ -+ OUTPLL(pScrn, RADEON_MCLK_CNTL, tmp); -+ } else { -+ tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -+ tmp &= ~(R300_SCLK_FORCE_VAP); -+ tmp |= RADEON_SCLK_FORCE_CP; -+ OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -+ usleep(15000); -+ -+ tmp = INPLL(pScrn, R300_SCLK_CNTL2); -+ tmp &= ~(R300_SCLK_FORCE_TCL | -+ R300_SCLK_FORCE_GA | -+ R300_SCLK_FORCE_CBA); -+ OUTPLL(pScrn, R300_SCLK_CNTL2, tmp); -+ } -+ } else { -+ tmp = INPLL(pScrn, RADEON_CLK_PWRMGT_CNTL); -+ -+ tmp &= ~(RADEON_ACTIVE_HILO_LAT_MASK | -+ RADEON_DISP_DYN_STOP_LAT_MASK | -+ RADEON_DYN_STOP_MODE_MASK); -+ -+ tmp |= (RADEON_ENGIN_DYNCLK_MODE | -+ (0x01 << RADEON_ACTIVE_HILO_LAT_SHIFT)); -+ OUTPLL(pScrn, RADEON_CLK_PWRMGT_CNTL, tmp); -+ usleep(15000); -+ -+ tmp = INPLL(pScrn, RADEON_CLK_PIN_CNTL); -+ tmp |= RADEON_SCLK_DYN_START_CNTL; -+ OUTPLL(pScrn, RADEON_CLK_PIN_CNTL, tmp); -+ usleep(15000); -+ -+ /* When DRI is enabled, setting DYN_STOP_LAT to zero can cause some R200 -+ to lockup randomly, leave them as set by BIOS. -+ */ -+ tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -+ /*tmp &= RADEON_SCLK_SRC_SEL_MASK;*/ -+ tmp &= ~RADEON_SCLK_FORCEON_MASK; -+ -+ /*RAGE_6::A11 A12 A12N1 A13, RV250::A11 A12, R300*/ -+ if (((info->ChipFamily == CHIP_FAMILY_RV250) && -+ ((INREG(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) < -+ RADEON_CFG_ATI_REV_A13)) || -+ ((info->ChipFamily == CHIP_FAMILY_RV100) && -+ ((INREG(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <= -+ RADEON_CFG_ATI_REV_A13))) { -+ tmp |= RADEON_SCLK_FORCE_CP; -+ tmp |= RADEON_SCLK_FORCE_VIP; -+ } -+ -+ OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -+ -+ if ((info->ChipFamily == CHIP_FAMILY_RV200) || -+ (info->ChipFamily == CHIP_FAMILY_RV250) || -+ (info->ChipFamily == CHIP_FAMILY_RV280)) { -+ tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL); -+ tmp &= ~RADEON_SCLK_MORE_FORCEON; -+ -+ /* RV200::A11 A12 RV250::A11 A12 */ -+ if (((info->ChipFamily == CHIP_FAMILY_RV200) || -+ (info->ChipFamily == CHIP_FAMILY_RV250)) && -+ ((INREG(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) < -+ RADEON_CFG_ATI_REV_A13)) { -+ tmp |= RADEON_SCLK_MORE_FORCEON; -+ } -+ OUTPLL(pScrn, RADEON_SCLK_MORE_CNTL, tmp); -+ usleep(15000); -+ } -+ -+ /* RV200::A11 A12, RV250::A11 A12 */ -+ if (((info->ChipFamily == CHIP_FAMILY_RV200) || -+ (info->ChipFamily == CHIP_FAMILY_RV250)) && -+ ((INREG(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) < -+ RADEON_CFG_ATI_REV_A13)) { -+ tmp = INPLL(pScrn, RADEON_PLL_PWRMGT_CNTL); -+ tmp |= RADEON_TCL_BYPASS_DISABLE; -+ OUTPLL(pScrn, RADEON_PLL_PWRMGT_CNTL, tmp); -+ } -+ usleep(15000); -+ -+ /*enable dynamic mode for display clocks (PIXCLK and PIX2CLK)*/ -+ tmp = INPLL(pScrn, RADEON_PIXCLKS_CNTL); -+ tmp |= (RADEON_PIX2CLK_ALWAYS_ONb | -+ RADEON_PIX2CLK_DAC_ALWAYS_ONb | -+ RADEON_PIXCLK_BLEND_ALWAYS_ONb | -+ RADEON_PIXCLK_GV_ALWAYS_ONb | -+ RADEON_PIXCLK_DIG_TMDS_ALWAYS_ONb | -+ RADEON_PIXCLK_LVDS_ALWAYS_ONb | -+ RADEON_PIXCLK_TMDS_ALWAYS_ONb); -+ -+ OUTPLL(pScrn, RADEON_PIXCLKS_CNTL, tmp); -+ usleep(15000); -+ -+ tmp = INPLL(pScrn, RADEON_VCLK_ECP_CNTL); -+ tmp |= (RADEON_PIXCLK_ALWAYS_ONb | -+ RADEON_PIXCLK_DAC_ALWAYS_ONb); -+ -+ OUTPLL(pScrn, RADEON_VCLK_ECP_CNTL, tmp); -+ usleep(15000); -+ } -+ } else { -+ /* Turn everything OFF (ForceON to everything)*/ -+ if ( !pRADEONEnt->HasCRTC2 ) { -+ tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -+ tmp |= (RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_HDP | -+ RADEON_SCLK_FORCE_DISP1 | RADEON_SCLK_FORCE_TOP | -+ RADEON_SCLK_FORCE_E2 | RADEON_SCLK_FORCE_SE | -+ RADEON_SCLK_FORCE_IDCT | RADEON_SCLK_FORCE_VIP | -+ RADEON_SCLK_FORCE_RE | RADEON_SCLK_FORCE_PB | -+ RADEON_SCLK_FORCE_TAM | RADEON_SCLK_FORCE_TDM | -+ RADEON_SCLK_FORCE_RB); -+ OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -+ } else if ((info->ChipFamily == CHIP_FAMILY_RS400) || -+ (info->ChipFamily == CHIP_FAMILY_RS480)) { -+ tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -+ tmp |= (RADEON_SCLK_FORCE_DISP2 | RADEON_SCLK_FORCE_CP | -+ RADEON_SCLK_FORCE_HDP | RADEON_SCLK_FORCE_DISP1 | -+ RADEON_SCLK_FORCE_TOP | RADEON_SCLK_FORCE_E2 | -+ R300_SCLK_FORCE_VAP | RADEON_SCLK_FORCE_IDCT | -+ RADEON_SCLK_FORCE_VIP | R300_SCLK_FORCE_SR | -+ R300_SCLK_FORCE_PX | R300_SCLK_FORCE_TX | -+ R300_SCLK_FORCE_US | RADEON_SCLK_FORCE_TV_SCLK | -+ R300_SCLK_FORCE_SU | RADEON_SCLK_FORCE_OV0); -+ OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL); -+ tmp |= RADEON_SCLK_MORE_FORCEON; -+ OUTPLL(pScrn, RADEON_SCLK_MORE_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_VCLK_ECP_CNTL); -+ tmp &= ~(RADEON_PIXCLK_ALWAYS_ONb | -+ RADEON_PIXCLK_DAC_ALWAYS_ONb | -+ R300_DISP_DAC_PIXCLK_DAC_BLANK_OFF); -+ OUTPLL(pScrn, RADEON_VCLK_ECP_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_PIXCLKS_CNTL); -+ tmp &= ~(RADEON_PIX2CLK_ALWAYS_ONb | -+ RADEON_PIX2CLK_DAC_ALWAYS_ONb | -+ RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb | -+ R300_DVOCLK_ALWAYS_ONb | -+ RADEON_PIXCLK_BLEND_ALWAYS_ONb | -+ RADEON_PIXCLK_GV_ALWAYS_ONb | -+ R300_PIXCLK_DVO_ALWAYS_ONb | -+ RADEON_PIXCLK_LVDS_ALWAYS_ONb | -+ RADEON_PIXCLK_TMDS_ALWAYS_ONb | -+ R300_PIXCLK_TRANS_ALWAYS_ONb | -+ R300_PIXCLK_TVO_ALWAYS_ONb | -+ R300_P2G2CLK_ALWAYS_ONb | -+ R300_P2G2CLK_ALWAYS_ONb | -+ R300_DISP_DAC_PIXCLK_DAC2_BLANK_OFF); -+ OUTPLL(pScrn, RADEON_PIXCLKS_CNTL, tmp); -+ } else if (info->ChipFamily >= CHIP_FAMILY_RV350) { -+ /* for RV350/M10, no delays are required. */ -+ tmp = INPLL(pScrn, R300_SCLK_CNTL2); -+ tmp |= (R300_SCLK_FORCE_TCL | -+ R300_SCLK_FORCE_GA | -+ R300_SCLK_FORCE_CBA); -+ OUTPLL(pScrn, R300_SCLK_CNTL2, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -+ tmp |= (RADEON_SCLK_FORCE_DISP2 | RADEON_SCLK_FORCE_CP | -+ RADEON_SCLK_FORCE_HDP | RADEON_SCLK_FORCE_DISP1 | -+ RADEON_SCLK_FORCE_TOP | RADEON_SCLK_FORCE_E2 | -+ R300_SCLK_FORCE_VAP | RADEON_SCLK_FORCE_IDCT | -+ RADEON_SCLK_FORCE_VIP | R300_SCLK_FORCE_SR | -+ R300_SCLK_FORCE_PX | R300_SCLK_FORCE_TX | -+ R300_SCLK_FORCE_US | RADEON_SCLK_FORCE_TV_SCLK | -+ R300_SCLK_FORCE_SU | RADEON_SCLK_FORCE_OV0); -+ OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL); -+ tmp |= RADEON_SCLK_MORE_FORCEON; -+ OUTPLL(pScrn, RADEON_SCLK_MORE_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_MCLK_CNTL); -+ tmp |= (RADEON_FORCEON_MCLKA | -+ RADEON_FORCEON_MCLKB | -+ RADEON_FORCEON_YCLKA | -+ RADEON_FORCEON_YCLKB | -+ RADEON_FORCEON_MC); -+ OUTPLL(pScrn, RADEON_MCLK_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_VCLK_ECP_CNTL); -+ tmp &= ~(RADEON_PIXCLK_ALWAYS_ONb | -+ RADEON_PIXCLK_DAC_ALWAYS_ONb | -+ R300_DISP_DAC_PIXCLK_DAC_BLANK_OFF); -+ OUTPLL(pScrn, RADEON_VCLK_ECP_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, RADEON_PIXCLKS_CNTL); -+ tmp &= ~(RADEON_PIX2CLK_ALWAYS_ONb | -+ RADEON_PIX2CLK_DAC_ALWAYS_ONb | -+ RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb | -+ R300_DVOCLK_ALWAYS_ONb | -+ RADEON_PIXCLK_BLEND_ALWAYS_ONb | -+ RADEON_PIXCLK_GV_ALWAYS_ONb | -+ R300_PIXCLK_DVO_ALWAYS_ONb | -+ RADEON_PIXCLK_LVDS_ALWAYS_ONb | -+ RADEON_PIXCLK_TMDS_ALWAYS_ONb | -+ R300_PIXCLK_TRANS_ALWAYS_ONb | -+ R300_PIXCLK_TVO_ALWAYS_ONb | -+ R300_P2G2CLK_ALWAYS_ONb | -+ R300_P2G2CLK_ALWAYS_ONb | -+ R300_DISP_DAC_PIXCLK_DAC2_BLANK_OFF); -+ OUTPLL(pScrn, RADEON_PIXCLKS_CNTL, tmp); -+ } else { -+ tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -+ tmp |= (RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_E2); -+ tmp |= RADEON_SCLK_FORCE_SE; -+ -+ if ( !pRADEONEnt->HasCRTC2 ) { -+ tmp |= ( RADEON_SCLK_FORCE_RB | -+ RADEON_SCLK_FORCE_TDM | -+ RADEON_SCLK_FORCE_TAM | -+ RADEON_SCLK_FORCE_PB | -+ RADEON_SCLK_FORCE_RE | -+ RADEON_SCLK_FORCE_VIP | -+ RADEON_SCLK_FORCE_IDCT | -+ RADEON_SCLK_FORCE_TOP | -+ RADEON_SCLK_FORCE_DISP1 | -+ RADEON_SCLK_FORCE_DISP2 | -+ RADEON_SCLK_FORCE_HDP ); -+ } else if ((info->ChipFamily == CHIP_FAMILY_R300) || -+ (info->ChipFamily == CHIP_FAMILY_R350)) { -+ tmp |= ( RADEON_SCLK_FORCE_HDP | -+ RADEON_SCLK_FORCE_DISP1 | -+ RADEON_SCLK_FORCE_DISP2 | -+ RADEON_SCLK_FORCE_TOP | -+ RADEON_SCLK_FORCE_IDCT | -+ RADEON_SCLK_FORCE_VIP); -+ } -+ OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -+ -+ usleep(16000); -+ -+ if ((info->ChipFamily == CHIP_FAMILY_R300) || -+ (info->ChipFamily == CHIP_FAMILY_R350)) { -+ tmp = INPLL(pScrn, R300_SCLK_CNTL2); -+ tmp |= ( R300_SCLK_FORCE_TCL | -+ R300_SCLK_FORCE_GA | -+ R300_SCLK_FORCE_CBA); -+ OUTPLL(pScrn, R300_SCLK_CNTL2, tmp); -+ usleep(16000); -+ } -+ -+ if (info->IsIGP) { -+ tmp = INPLL(pScrn, RADEON_MCLK_CNTL); -+ tmp &= ~(RADEON_FORCEON_MCLKA | -+ RADEON_FORCEON_YCLKA); -+ OUTPLL(pScrn, RADEON_MCLK_CNTL, tmp); -+ usleep(16000); -+ } -+ -+ if ((info->ChipFamily == CHIP_FAMILY_RV200) || -+ (info->ChipFamily == CHIP_FAMILY_RV250) || -+ (info->ChipFamily == CHIP_FAMILY_RV280)) { -+ tmp = INPLL(pScrn, RADEON_SCLK_MORE_CNTL); -+ tmp |= RADEON_SCLK_MORE_FORCEON; -+ OUTPLL(pScrn, RADEON_SCLK_MORE_CNTL, tmp); -+ usleep(16000); -+ } -+ -+ tmp = INPLL(pScrn, RADEON_PIXCLKS_CNTL); -+ tmp &= ~(RADEON_PIX2CLK_ALWAYS_ONb | -+ RADEON_PIX2CLK_DAC_ALWAYS_ONb | -+ RADEON_PIXCLK_BLEND_ALWAYS_ONb | -+ RADEON_PIXCLK_GV_ALWAYS_ONb | -+ RADEON_PIXCLK_DIG_TMDS_ALWAYS_ONb | -+ RADEON_PIXCLK_LVDS_ALWAYS_ONb | -+ RADEON_PIXCLK_TMDS_ALWAYS_ONb); -+ -+ OUTPLL(pScrn, RADEON_PIXCLKS_CNTL, tmp); -+ usleep(16000); -+ -+ tmp = INPLL(pScrn, RADEON_VCLK_ECP_CNTL); -+ tmp &= ~(RADEON_PIXCLK_ALWAYS_ONb | -+ RADEON_PIXCLK_DAC_ALWAYS_ONb); -+ OUTPLL(pScrn, RADEON_VCLK_ECP_CNTL, tmp); -+ } -+ } -+} -+ -+static void RADEONPMQuirks(ScrnInfoPtr pScrn) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ uint32_t tmp; -+ -+ RADEONWaitForIdleMMIO(pScrn); -+ -+ if (info->ChipFamily < CHIP_FAMILY_RV515) { -+ tmp = INPLL(pScrn, RADEON_SCLK_CNTL); -+ if (IS_R300_VARIANT || IS_RV100_VARIANT) -+ tmp |= RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_VIP; -+ if ((info->ChipFamily == CHIP_FAMILY_RV250) || (info->ChipFamily == CHIP_FAMILY_RV280)) -+ tmp |= RADEON_SCLK_FORCE_DISP1 | RADEON_SCLK_FORCE_DISP2; -+ if ((info->ChipFamily == CHIP_FAMILY_RV350) || (info->ChipFamily == CHIP_FAMILY_RV380)) -+ tmp |= R300_SCLK_FORCE_VAP; -+ if (info->ChipFamily == CHIP_FAMILY_R420) -+ tmp |= R300_SCLK_FORCE_PX | R300_SCLK_FORCE_TX; -+ OUTPLL(pScrn, RADEON_SCLK_CNTL, tmp); -+ } else if (info->ChipFamily < CHIP_FAMILY_R600) { -+ tmp = INPLL(pScrn, AVIVO_CP_DYN_CNTL); -+ tmp |= AVIVO_CP_FORCEON; -+ OUTPLL(pScrn, AVIVO_CP_DYN_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, AVIVO_E2_DYN_CNTL); -+ tmp |= AVIVO_E2_FORCEON; -+ OUTPLL(pScrn, AVIVO_E2_DYN_CNTL, tmp); -+ -+ tmp = INPLL(pScrn, AVIVO_IDCT_DYN_CNTL); -+ tmp |= AVIVO_IDCT_FORCEON; -+ OUTPLL(pScrn, AVIVO_IDCT_DYN_CNTL, tmp); -+ } -+} -+ -+static void -+RADEONSetPCIELanes(ScrnInfoPtr pScrn, int lanes) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ unsigned char *RADEONMMIO = info->MMIO; -+ uint32_t link_width_cntl, mask, target_reg; -+ -+ if (info->IsIGP) -+ return; -+ -+ /* don't change lanes on multi-gpu cards for now */ -+ if ((info->Chipset == PCI_CHIP_RV770_9441) || -+ (info->Chipset == PCI_CHIP_RV770_9443) || -+ (info->Chipset == PCI_CHIP_RV770_944B) || -+ (info->Chipset == PCI_CHIP_RV670_9506) || -+ (info->Chipset == PCI_CHIP_RV670_9509) || -+ (info->Chipset == PCI_CHIP_RV670_950F)) -+ return; -+ -+ RADEONWaitForIdleMMIO(pScrn); -+ -+ switch (lanes) { -+ case 0: -+ mask = RADEON_PCIE_LC_LINK_WIDTH_X0; -+ break; -+ case 1: -+ mask = RADEON_PCIE_LC_LINK_WIDTH_X1; -+ break; -+ case 2: -+ mask = RADEON_PCIE_LC_LINK_WIDTH_X2; -+ break; -+ case 4: -+ mask = RADEON_PCIE_LC_LINK_WIDTH_X4; -+ break; -+ case 8: -+ mask = RADEON_PCIE_LC_LINK_WIDTH_X8; -+ break; -+ case 12: -+ mask = RADEON_PCIE_LC_LINK_WIDTH_X12; -+ break; -+ case 16: -+ default: -+ mask = RADEON_PCIE_LC_LINK_WIDTH_X16; -+ break; -+ } -+ -+ if (info->ChipFamily >= CHIP_FAMILY_R600) { -+ link_width_cntl = INPCIE_P(pScrn, RADEON_PCIE_LC_LINK_WIDTH_CNTL); -+ -+ if ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) == -+ (mask << RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT)) -+ return; -+ -+ link_width_cntl &= ~(RADEON_PCIE_LC_LINK_WIDTH_MASK | -+ RADEON_PCIE_LC_RECONFIG_NOW | -+ R600_PCIE_LC_RECONFIG_ARC_MISSING_ESCAPE | -+ R600_PCIE_LC_SHORT_RECONFIG_EN | -+ R600_PCIE_LC_RENEGOTIATE_EN); -+ link_width_cntl |= mask; -+ -+#if 0 -+ /* some northbridges can renegotiate the link rather than requiring -+ * a complete re-config. -+ * e.g., AMD 780/790 northbridges (pci ids: 0x5956, 0x5957, 0x5958, etc.) -+ */ -+ if (northbridge can renegotiate) -+ link_width_cntl |= R600_PCIE_LC_RENEGOTIATE_EN; -+ else -+#endif -+ link_width_cntl |= R600_PCIE_LC_RECONFIG_ARC_MISSING_ESCAPE; -+ -+ OUTPCIE_P(pScrn, RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); -+ OUTPCIE_P(pScrn, RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl | RADEON_PCIE_LC_RECONFIG_NOW); -+ -+ if (info->ChipFamily >= CHIP_FAMILY_RV770) -+ target_reg = R700_TARGET_AND_CURRENT_PROFILE_INDEX; -+ else -+ target_reg = R600_TARGET_AND_CURRENT_PROFILE_INDEX; -+ -+ /* wait for lane set to complete */ -+ link_width_cntl = INREG(target_reg); -+ while (link_width_cntl == 0xffffffff) -+ link_width_cntl = INREG(target_reg); -+ -+ } else { -+ link_width_cntl = INPCIE(pScrn, RADEON_PCIE_LC_LINK_WIDTH_CNTL); -+ -+ if ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) == -+ (mask << RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT)) -+ return; -+ -+ link_width_cntl &= ~(RADEON_PCIE_LC_LINK_WIDTH_MASK | -+ RADEON_PCIE_LC_RECONFIG_NOW | -+ RADEON_PCIE_LC_RECONFIG_LATER | -+ RADEON_PCIE_LC_SHORT_RECONFIG_EN); -+ link_width_cntl |= mask; -+ OUTPCIE(pScrn, RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); -+ OUTPCIE(pScrn, RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl | RADEON_PCIE_LC_RECONFIG_NOW); -+ -+ /* wait for lane set to complete */ -+ link_width_cntl = INPCIE(pScrn, RADEON_PCIE_LC_LINK_WIDTH_CNTL); -+ while (link_width_cntl == 0xffffffff) -+ link_width_cntl = INPCIE(pScrn, RADEON_PCIE_LC_LINK_WIDTH_CNTL); -+ -+ } -+ -+} -+ -+static void -+RADEONSetClockGating(ScrnInfoPtr pScrn, Bool enable) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ RADEONWaitForIdleMMIO(pScrn); -+ -+ if (info->ChipFamily >= CHIP_FAMILY_R600) -+ atombios_static_pwrmgt_setup(pScrn, enable); -+ else { -+ if (info->IsAtomBios) { -+ atombios_static_pwrmgt_setup(pScrn, enable); -+ atombios_clk_gating_setup(pScrn, enable); -+ } else if (info->IsMobility) -+ LegacySetClockGating(pScrn, enable); -+ } -+ -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Dynamic Clock Gating %sabled\n", -+ enable ? "En" : "Dis"); -+} -+ -+static void RADEONSetStaticPowerMode(ScrnInfoPtr pScrn, RADEONPMType type) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ int i; -+ -+ for (i = 0; i < info->pm.num_modes; i++) { -+ if (info->pm.mode[i].type == type) -+ break; -+ } -+ -+ if (i == info->pm.num_modes) -+ return; -+ -+ if (i == info->pm.current_mode) -+ return; -+ -+ RADEONWaitForIdleMMIO(pScrn); -+ -+ if (info->IsAtomBios) -+ atombios_set_engine_clock(pScrn, info->pm.mode[i].sclk); -+ else -+ RADEONSetEngineClock(pScrn, info->pm.mode[i].sclk); -+ -+ if (info->cardType == CARD_PCIE) -+ RADEONSetPCIELanes(pScrn, info->pm.mode[i].pcie_lanes); -+ -+ info->pm.current_mode = i; -+ -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Power Mode Switch\n"); -+} -+ -+ -+void RADEONPMInit(ScrnInfoPtr pScrn) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ if (xf86ReturnOptValBool(info->Options, OPTION_CLOCK_GATING, FALSE)) { -+ info->pm.clock_gating_enabled = TRUE; -+ RADEONSetClockGating(pScrn, info->pm.clock_gating_enabled); -+ } else -+ info->pm.clock_gating_enabled = FALSE; -+ -+ info->pm.mode[0].type = POWER_DEFAULT; -+ info->pm.mode[0].sclk = (uint32_t)info->sclk * 100; /* 10 khz */ -+ info->pm.mode[0].mclk = (uint32_t)info->mclk * 100; /* 10 khz */ -+ info->pm.mode[0].pcie_lanes = 16; /* XXX: read back current lane config */ -+ info->pm.current_mode = 0; -+ info->pm.num_modes = 1; -+ -+ if (xf86ReturnOptValBool(info->Options, OPTION_DYNAMIC_PM, FALSE)) { -+ info->pm.dynamic_mode_enabled = TRUE; -+ info->pm.mode[1].type = POWER_LOW; -+ info->pm.mode[1].sclk = info->pm.mode[0].sclk / 4; -+ info->pm.mode[1].mclk = info->pm.mode[0].mclk / 4; -+ info->pm.mode[1].pcie_lanes = 1; -+ -+ info->pm.mode[2].type = POWER_HIGH; -+ info->pm.mode[2].sclk = info->pm.mode[0].sclk; -+ info->pm.mode[2].mclk = info->pm.mode[0].mclk; -+ info->pm.mode[2].pcie_lanes = 16; -+ -+ info->pm.num_modes += 2; -+ -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Dynamic Power Management Enabled\n"); -+ } else { -+ info->pm.dynamic_mode_enabled = FALSE; -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Dynamic Power Management Disabled\n"); -+ } -+ -+ if (xf86ReturnOptValBool(info->Options, OPTION_FORCE_LOW_POWER, FALSE)) { -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Force Low Power Mode Enabled\n"); -+ info->pm.force_low_power_enabled = TRUE; -+ if (info->pm.dynamic_mode_enabled) { -+ info->pm.mode[2].type = POWER_HIGH; -+ info->pm.mode[2].sclk = info->pm.mode[0].sclk / 2; -+ info->pm.mode[2].mclk = info->pm.mode[0].mclk / 2; -+ info->pm.mode[2].pcie_lanes = 4; -+ } else { -+ info->pm.mode[1].type = POWER_HIGH; -+ info->pm.mode[1].sclk = info->pm.mode[0].sclk / 2; -+ info->pm.mode[1].mclk = info->pm.mode[0].mclk / 2; -+ info->pm.mode[1].pcie_lanes = 4; -+ info->pm.num_modes += 1; -+ } -+ RADEONSetStaticPowerMode(pScrn, POWER_HIGH); -+ } else -+ info->pm.force_low_power_enabled = FALSE; -+ -+ RADEONPMQuirks(pScrn); -+} -+ -+void RADEONPMEnterVT(ScrnInfoPtr pScrn) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ if (info->pm.clock_gating_enabled) -+ RADEONSetClockGating(pScrn, info->pm.clock_gating_enabled); -+ RADEONPMQuirks(pScrn); -+ if (info->pm.force_low_power_enabled || info->pm.dynamic_mode_enabled) -+ RADEONSetStaticPowerMode(pScrn, POWER_HIGH); -+} -+ -+void RADEONPMLeaveVT(ScrnInfoPtr pScrn) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ if (info->pm.clock_gating_enabled) -+ RADEONSetClockGating(pScrn, FALSE); -+ if (info->pm.force_low_power_enabled || info->pm.dynamic_mode_enabled) -+ RADEONSetStaticPowerMode(pScrn, POWER_DEFAULT); -+} -+ -+void RADEONPMFini(ScrnInfoPtr pScrn) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ if (info->pm.clock_gating_enabled) -+ RADEONSetClockGating(pScrn, FALSE); -+ if (info->pm.force_low_power_enabled || info->pm.dynamic_mode_enabled) -+ RADEONSetStaticPowerMode(pScrn, POWER_DEFAULT); -+} -+ -+void RADEONPMBlockHandler(ScrnInfoPtr pScrn) -+{ -+ RADEONEntPtr pRADEONEnt = RADEONEntPriv(pScrn); -+ -+ if ((!pRADEONEnt->Controller[0]->enabled) && -+ (!pRADEONEnt->Controller[1]->enabled)) -+ RADEONSetStaticPowerMode(pScrn, POWER_LOW); -+ else -+ RADEONSetStaticPowerMode(pScrn, POWER_HIGH); -+ -+} -+ -diff --git a/src/radeon_probe.c b/src/radeon_probe.c -index 041bab6..42e7259 100644 ---- a/src/radeon_probe.c -+++ b/src/radeon_probe.c -@@ -36,6 +36,7 @@ - * Authors: - * Kevin E. Martin - * Rickard E. Faith -+ * KMS support - Dave Airlie - */ - - #include "radeon_probe.h" -@@ -44,12 +45,20 @@ - #include "atipcirename.h" - - #include "xf86.h" -+#if GET_ABI_MAJOR(ABI_VIDEODRV_VERSION) < 6 - #include "xf86Resources.h" -+#endif -+ -+#ifdef XF86DRM_MODE -+#include "xf86drmMode.h" -+#include "dri.h" -+#endif - - #include "radeon_chipset_gen.h" - - #include "radeon_pci_chipset_gen.h" - -+ - #ifdef XSERVER_LIBPCIACCESS - #include "radeon_pci_device_match_gen.h" - #endif -@@ -76,11 +85,42 @@ RADEONIdentify(int flags) - RADEONChipsets); - } - -+ -+#ifdef XF86DRM_MODE -+static Bool radeon_kernel_mode_enabled(ScrnInfoPtr pScrn, struct pci_device *pci_dev) -+{ -+ char *busIdString; -+ int ret; -+ -+ if (!xf86LoaderCheckSymbol("DRICreatePCIBusID")) { -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 0, -+ "[KMS] No DRICreatePCIBusID symbol, no kernel modesetting.\n"); -+ return FALSE; -+ } -+ -+ busIdString = DRICreatePCIBusID(pci_dev); -+ ret = drmCheckModesettingSupported(busIdString); -+ xfree(busIdString); -+ if (ret) { -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 0, -+ "[KMS] drm report modesetting isn't supported.\n"); -+ return FALSE; -+ } -+ -+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 0, -+ "[KMS] Kernel modesetting enabled.\n"); -+ return TRUE; -+} -+#else -+#define radeon_kernel_mode_enabled(x, y) FALSE -+#endif -+ - static Bool --radeon_get_scrninfo(int entity_num) -+radeon_get_scrninfo(int entity_num, void *pci_dev) - { - ScrnInfoPtr pScrn = NULL; - EntityInfoPtr pEnt; -+ int kms = 0; - - pScrn = xf86ConfigPciEntity(pScrn, 0, entity_num, RADEONPciChipsets, - NULL, -@@ -89,6 +129,11 @@ radeon_get_scrninfo(int entity_num) - if (!pScrn) - return FALSE; - -+ if (pci_dev) { -+ if (radeon_kernel_mode_enabled(pScrn, pci_dev)) -+ kms = 1; -+ } -+ - pScrn->driverVersion = RADEON_VERSION_CURRENT; - pScrn->driverName = RADEON_DRIVER_NAME; - pScrn->name = RADEON_NAME; -@@ -97,14 +142,29 @@ radeon_get_scrninfo(int entity_num) - #else - pScrn->Probe = RADEONProbe; - #endif -- pScrn->PreInit = RADEONPreInit; -- pScrn->ScreenInit = RADEONScreenInit; -- pScrn->SwitchMode = RADEONSwitchMode; -- pScrn->AdjustFrame = RADEONAdjustFrame; -- pScrn->EnterVT = RADEONEnterVT; -- pScrn->LeaveVT = RADEONLeaveVT; -- pScrn->FreeScreen = RADEONFreeScreen; -- pScrn->ValidMode = RADEONValidMode; -+ -+#ifdef XF86DRM_MODE -+ if (kms == 1) { -+ pScrn->PreInit = RADEONPreInit_KMS; -+ pScrn->ScreenInit = RADEONScreenInit_KMS; -+ pScrn->SwitchMode = RADEONSwitchMode_KMS; -+ pScrn->AdjustFrame = RADEONAdjustFrame_KMS; -+ pScrn->EnterVT = RADEONEnterVT_KMS; -+ pScrn->LeaveVT = RADEONLeaveVT_KMS; -+ pScrn->FreeScreen = RADEONFreeScreen_KMS; -+ pScrn->ValidMode = RADEONValidMode; -+ } else -+#endif -+ { -+ pScrn->PreInit = RADEONPreInit; -+ pScrn->ScreenInit = RADEONScreenInit; -+ pScrn->SwitchMode = RADEONSwitchMode; -+ pScrn->AdjustFrame = RADEONAdjustFrame; -+ pScrn->EnterVT = RADEONEnterVT; -+ pScrn->LeaveVT = RADEONLeaveVT; -+ pScrn->FreeScreen = RADEONFreeScreen; -+ pScrn->ValidMode = RADEONValidMode; -+ } - - pEnt = xf86GetEntityInfo(entity_num); - -@@ -178,7 +238,7 @@ RADEONProbe(DriverPtr drv, int flags) - foundScreen = TRUE; - } else { - for (i = 0; i < numUsed; i++) { -- if (radeon_get_scrninfo(usedChips[i])) -+ if (radeon_get_scrninfo(usedChips[i], NULL)) - foundScreen = TRUE; - } - } -@@ -199,7 +259,7 @@ radeon_pci_probe( - intptr_t match_data - ) - { -- return radeon_get_scrninfo(entity_num); -+ return radeon_get_scrninfo(entity_num, (void *)device); - } - - #endif /* XSERVER_LIBPCIACCESS */ -diff --git a/src/radeon_probe.h b/src/radeon_probe.h -index 6479972..9cac15c 100644 ---- a/src/radeon_probe.h -+++ b/src/radeon_probe.h -@@ -159,6 +159,9 @@ typedef struct _RADEONCrtcPrivateRec { - int can_tile; - Bool enabled; - Bool initialized; -+ Bool scaler_enabled; -+ float vsc; -+ float hsc; - } RADEONCrtcPrivateRec, *RADEONCrtcPrivatePtr; - - typedef struct _radeon_encoder { -@@ -216,6 +219,7 @@ typedef struct _radeon_lvds { - - typedef struct _radeon_dvo { - /* dvo */ -+ I2CBusPtr pI2CBus; - I2CDevPtr DVOChip; - RADEONI2CBusRec dvo_i2c; - int dvo_i2c_slave_addr; -@@ -611,6 +615,7 @@ typedef struct - RADEONSaveRec SavedReg; /* Original (text) mode */ - - void *MMIO; /* Map of MMIO region */ -+ int fd; /* for sharing across zaphod heads */ - } RADEONEntRec, *RADEONEntPtr; - - /* radeon_probe.c */ -@@ -632,4 +637,14 @@ extern ModeStatus RADEONValidMode(int, DisplayModePtr, Bool, int); - - extern const OptionInfoRec *RADEONOptionsWeak(void); - -+#ifdef XF86DRM_MODE -+extern Bool RADEONPreInit_KMS(ScrnInfoPtr, int); -+extern Bool RADEONScreenInit_KMS(int, ScreenPtr, int, char **); -+extern Bool RADEONSwitchMode_KMS(int, DisplayModePtr, int); -+extern void RADEONAdjustFrame_KMS(int, int, int, int); -+extern Bool RADEONEnterVT_KMS(int, int); -+extern void RADEONLeaveVT_KMS(int, int); -+extern void RADEONFreeScreen_KMS(int scrnIndex, int flags); -+#endif -+ - #endif /* _RADEON_PROBE_H_ */ -diff --git a/src/radeon_reg.h b/src/radeon_reg.h -index d74a30a..9df7fff 100644 ---- a/src/radeon_reg.h -+++ b/src/radeon_reg.h -@@ -276,19 +276,59 @@ - - #define RADEON_PCIE_INDEX 0x0030 - #define RADEON_PCIE_DATA 0x0034 -+#define R600_PCIE_PORT_INDEX 0x0038 -+#define R600_PCIE_PORT_DATA 0x003c -+/* PCIE_LC_LINK_WIDTH_CNTL is PCIE on r1xx-r5xx, PCIE_PORT on r6xx-r7xx */ -+#define RADEON_PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE */ -+# define RADEON_PCIE_LC_LINK_WIDTH_SHIFT 0 -+# define RADEON_PCIE_LC_LINK_WIDTH_MASK 0x7 -+# define RADEON_PCIE_LC_LINK_WIDTH_X0 0 -+# define RADEON_PCIE_LC_LINK_WIDTH_X1 1 -+# define RADEON_PCIE_LC_LINK_WIDTH_X2 2 -+# define RADEON_PCIE_LC_LINK_WIDTH_X4 3 -+# define RADEON_PCIE_LC_LINK_WIDTH_X8 4 -+# define RADEON_PCIE_LC_LINK_WIDTH_X12 5 -+# define RADEON_PCIE_LC_LINK_WIDTH_X16 6 -+# define RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT 4 -+# define RADEON_PCIE_LC_LINK_WIDTH_RD_MASK 0x70 -+# define R600_PCIE_LC_RECONFIG_ARC_MISSING_ESCAPE (1 << 7) -+# define RADEON_PCIE_LC_RECONFIG_NOW (1 << 8) -+# define RADEON_PCIE_LC_RECONFIG_LATER (1 << 9) -+# define RADEON_PCIE_LC_SHORT_RECONFIG_EN (1 << 10) -+# define R600_PCIE_LC_RENEGOTIATE_EN (1 << 10) -+# define R600_PCIE_LC_SHORT_RECONFIG_EN (1 << 11) -+#define R600_TARGET_AND_CURRENT_PROFILE_INDEX 0x70c -+#define R700_TARGET_AND_CURRENT_PROFILE_INDEX 0x66c - - #define RADEON_CACHE_CNTL 0x1724 - #define RADEON_CACHE_LINE 0x0f0c /* PCI */ - #define RADEON_CAPABILITIES_ID 0x0f50 /* PCI */ - #define RADEON_CAPABILITIES_PTR 0x0f34 /* PCI */ - #define RADEON_CLK_PIN_CNTL 0x0001 /* PLL */ -+# define RADEON_DONT_USE_XTALIN (1 << 4) - # define RADEON_SCLK_DYN_START_CNTL (1 << 15) - #define RADEON_CLOCK_CNTL_DATA 0x000c - #define RADEON_CLOCK_CNTL_INDEX 0x0008 - # define RADEON_PLL_WR_EN (1 << 7) - # define RADEON_PLL_DIV_SEL (3 << 8) - # define RADEON_PLL2_DIV_SEL_MASK ~(3 << 8) --#define RADEON_CLK_PWRMGT_CNTL 0x0014 -+#define RADEON_M_SPLL_REF_FB_DIV 0x000a /* PLL */ -+# define RADEON_M_SPLL_REF_DIV_MASK 0xff -+# define RADEON_M_SPLL_REF_DIV_SHIFT 0 -+# define RADEON_MPLL_FB_DIV_MASK 0xff -+# define RADEON_MPLL_FB_DIV_SHIFT 8 -+# define RADEON_SPLL_FB_DIV_MASK 0xff -+# define RADEON_SPLL_FB_DIV_SHIFT 16 -+#define RADEON_SPLL_CNTL 0x000c /* PLL */ -+# define RADEON_SPLL_SLEEP (1 << 0) -+# define RADEON_SPLL_RESET (1 << 1) -+# define RADEON_SPLL_PCP_MASK 0x7 -+# define RADEON_SPLL_PCP_SHIFT 8 -+# define RADEON_SPLL_PVG_MASK 0x7 -+# define RADEON_SPLL_PVG_SHIFT 11 -+# define RADEON_SPLL_PDC_MASK 0x3 -+# define RADEON_SPLL_PDC_SHIFT 14 -+#define RADEON_CLK_PWRMGT_CNTL 0x0014 /* PLL */ - # define RADEON_ENGIN_DYNCLK_MODE (1 << 12) - # define RADEON_ACTIVE_HILO_LAT_MASK (3 << 13) - # define RADEON_ACTIVE_HILO_LAT_SHIFT 13 -@@ -300,7 +340,7 @@ - # define RADEON_DYN_STOP_MODE_MASK (7 << 21) - # define RADEON_TVPLL_PWRMGT_OFF (1 << 30) - # define RADEON_TVCLK_TURNOFF (1 << 31) --#define RADEON_PLL_PWRMGT_CNTL 0x0015 -+#define RADEON_PLL_PWRMGT_CNTL 0x0015 /* PLL */ - # define RADEON_TCL_BYPASS_DISABLE (1 << 20) - #define RADEON_CLR_CMP_CLR_3D 0x1a24 - #define RADEON_CLR_CMP_CLR_DST 0x15c8 -@@ -405,8 +445,9 @@ - # define RADEON_CRTC_V_CUTOFF_ACTIVE_EN (1<<5) - #define RADEON_CRTC_GUI_TRIG_VLINE 0x0218 - # define RADEON_CRTC_GUI_TRIG_VLINE_START_SHIFT 0 --# define RADEON_CRTC_GUI_TRIG_VLINE_END_SHIFT 16 - # define RADEON_CRTC_GUI_TRIG_VLINE_INV (1 << 15) -+# define RADEON_CRTC_GUI_TRIG_VLINE_END_SHIFT 16 -+# define RADEON_CRTC_GUI_TRIG_VLINE_STALL (1 << 30) - #define RADEON_CRTC_H_SYNC_STRT_WID 0x0204 - # define RADEON_CRTC_H_SYNC_STRT_PIX (0x07 << 0) - # define RADEON_CRTC_H_SYNC_STRT_CHAR (0x3ff << 3) -@@ -936,11 +977,11 @@ - #define RADEON_GENMO_WT 0x03c2 /* VGA */ - #define RADEON_GENS0 0x03c2 /* VGA */ - #define RADEON_GENS1 0x03da /* VGA, 0x03ba */ --#define RADEON_GPIO_MONID 0x0068 /* DDC interface via I2C */ -+#define RADEON_GPIO_MONID 0x0068 /* DDC interface via I2C */ /* DDC3 */ - #define RADEON_GPIO_MONIDB 0x006c - #define RADEON_GPIO_CRT2_DDC 0x006c --#define RADEON_GPIO_DVI_DDC 0x0064 --#define RADEON_GPIO_VGA_DDC 0x0060 -+#define RADEON_GPIO_DVI_DDC 0x0064 /* DDC2 */ -+#define RADEON_GPIO_VGA_DDC 0x0060 /* DDC1 */ - # define RADEON_GPIO_A_0 (1 << 0) - # define RADEON_GPIO_A_1 (1 << 1) - # define RADEON_GPIO_Y_0 (1 << 8) -@@ -979,24 +1020,28 @@ - - /* Multimedia I2C bus */ - #define RADEON_I2C_CNTL_0 0x0090 --#define RADEON_I2C_DONE (1<<0) --#define RADEON_I2C_NACK (1<<1) --#define RADEON_I2C_HALT (1<<2) --#define RADEON_I2C_SOFT_RST (1<<5) --#define RADEON_I2C_DRIVE_EN (1<<6) --#define RADEON_I2C_DRIVE_SEL (1<<7) --#define RADEON_I2C_START (1<<8) --#define RADEON_I2C_STOP (1<<9) --#define RADEON_I2C_RECEIVE (1<<10) --#define RADEON_I2C_ABORT (1<<11) --#define RADEON_I2C_GO (1<<12) -+#define RADEON_I2C_DONE (1 << 0) -+#define RADEON_I2C_NACK (1 << 1) -+#define RADEON_I2C_HALT (1 << 2) -+#define RADEON_I2C_SOFT_RST (1 << 5) -+#define RADEON_I2C_DRIVE_EN (1 << 6) -+#define RADEON_I2C_DRIVE_SEL (1 << 7) -+#define RADEON_I2C_START (1 << 8) -+#define RADEON_I2C_STOP (1 << 9) -+#define RADEON_I2C_RECEIVE (1 << 10) -+#define RADEON_I2C_ABORT (1 << 11) -+#define RADEON_I2C_GO (1 << 12) - #define RADEON_I2C_CNTL_1 0x0094 --#define RADEON_I2C_SEL (1<<16) --#define RADEON_I2C_EN (1<<17) -+#define RADEON_I2C_SEL (1 << 16) -+#define RADEON_I2C_EN (1 << 17) - #define RADEON_I2C_DATA 0x0098 - - #define RADEON_DVI_I2C_CNTL_0 0x02e0 --#define RADEON_DVI_I2C_CNTL_1 0x02e4 /* ? */ -+# define R200_DVI_I2C_PIN_SEL(x) ((x) << 3) -+# define R200_SEL_DDC1 0 /* 0x60 - VGA_DDC */ -+# define R200_SEL_DDC2 1 /* 0x64 - DVI_DDC */ -+# define R200_SEL_DDC3 2 /* 0x68 - MONID_DDC */ -+#define RADEON_DVI_I2C_CNTL_1 0x02e4 - #define RADEON_DVI_I2C_DATA 0x02e8 - - #define RADEON_INTERRUPT_LINE 0x0f3c /* PCI */ -@@ -3504,12 +3549,24 @@ - # define R520_MEM_NUM_CHANNELS_SHIFT 24 - # define R520_MC_CHANNEL_SIZE (1 << 23) - -+#define RS780_MC_INDEX 0x28f8 -+# define RS780_MC_INDEX_MASK 0x1ff -+# define RS780_MC_INDEX_WR_EN (1 << 9) -+#define RS780_MC_DATA 0x28fc -+ - #define R600_RAMCFG 0x2408 - # define R600_CHANSIZE (1 << 7) - # define R600_CHANSIZE_OVERRIDE (1 << 10) - - #define R600_SRBM_STATUS 0x0e50 - -+#define AVIVO_CP_DYN_CNTL 0x000f /* PLL */ -+# define AVIVO_CP_FORCEON (1 << 0) -+#define AVIVO_E2_DYN_CNTL 0x0011 /* PLL */ -+# define AVIVO_E2_FORCEON (1 << 0) -+#define AVIVO_IDCT_DYN_CNTL 0x0013 /* PLL */ -+# define AVIVO_IDCT_FORCEON (1 << 0) -+ - #define AVIVO_HDP_FB_LOCATION 0x134 - - #define AVIVO_VGA_RENDER_CONTROL 0x0300 -@@ -3689,6 +3746,19 @@ - # define AVIVO_DC_LB_MEMORY_SPLIT_SHIFT_MODE (1 << 2) - # define AVIVO_DC_LB_DISP1_END_ADR_SHIFT 4 - # define AVIVO_DC_LB_DISP1_END_ADR_MASK 0x7ff -+#define AVIVO_D1MODE_PRIORITY_A_CNT 0x6548 -+# define AVIVO_DxMODE_PRIORITY_MARK_MASK 0x7fff -+# define AVIVO_DxMODE_PRIORITY_OFF (1 << 16) -+# define AVIVO_DxMODE_PRIORITY_ALWAYS_ON (1 << 20) -+# define AVIVO_DxMODE_PRIORITY_FORCE_MASK (1 << 24) -+#define AVIVO_D1MODE_PRIORITY_B_CNT 0x654c -+#define AVIVO_D2MODE_PRIORITY_A_CNT 0x6d48 -+#define AVIVO_D2MODE_PRIORITY_B_CNT 0x6d4c -+#define AVIVO_LB_MAX_REQ_OUTSTANDING 0x6d58 -+# define AVIVO_LB_D1_MAX_REQ_OUTSTANDING_MASK 0xf -+# define AVIVO_LB_D1_MAX_REQ_OUTSTANDING_SHIFT 0 -+# define AVIVO_LB_D2_MAX_REQ_OUTSTANDING_MASK 0xf -+# define AVIVO_LB_D2_MAX_REQ_OUTSTANDING_SHIFT 16 - - #define AVIVO_D1MODE_DATA_FORMAT 0x6528 - # define AVIVO_D1MODE_INTERLEAVE_EN (1 << 0) -@@ -3750,6 +3820,8 @@ - #define AVIVO_D2CUR_SIZE 0x6c10 - #define AVIVO_D2CUR_POSITION 0x6c14 - -+#define RS690_DCP_CONTROL 0x6c9c -+ - #define AVIVO_D2MODE_DATA_FORMAT 0x6d28 - #define AVIVO_D2MODE_DESKTOP_HEIGHT 0x6d2c - #define AVIVO_D2MODE_VIEWPORT_START 0x6d80 -@@ -4038,6 +4110,11 @@ - # define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28 - # define R600_SCK_PRESCALE_CRYSTAL_CLK_MASK (0xf << 28) - -+#define R600_CG_SPLL_FUNC_CNTL 0x600 -+# define R600_SPLL_BYPASS_EN (1 << 3) -+#define R600_CG_SPLL_STATUS 0x60c -+# define R600_SPLL_CHG_STATUS (1 << 1) -+ - #define R600_BIOS_0_SCRATCH 0x1724 - #define R600_BIOS_1_SCRATCH 0x1728 - #define R600_BIOS_2_SCRATCH 0x172c -@@ -4223,6 +4300,12 @@ - #define R300_VAP_PVS_CODE_CNTL_1 0x22D8 - # define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 - #define R300_VAP_PVS_VECTOR_INDX_REG 0x2200 -+# define R300_PVS_CODE_START 0 -+# define R300_PVS_CONST_START 512 -+# define R500_PVS_CONST_START 1024 -+# define R300_PVS_VECTOR_INST_INDEX(x) ((x) + R300_PVS_CODE_START) -+# define R300_PVS_VECTOR_CONST_INDEX(x) ((x) + R300_PVS_CONST_START) -+# define R500_PVS_VECTOR_CONST_INDEX(x) ((x) + R500_PVS_CONST_START) - #define R300_VAP_PVS_VECTOR_DATA_REG 0x2204 - /* PVS instructions */ - /* Opcode and dst instruction */ -@@ -4341,6 +4424,10 @@ - #define R300_PVS_SRC_ADDR_SEL(x) ((x) << 29) - #define R300_PVS_SRC_ADDR_MODE_1 (1 << 31) - -+#define R300_VAP_PVS_CONST_CNTL 0x22d4 -+# define R300_PVS_CONST_BASE_OFFSET(x) ((x) << 0) -+# define R300_PVS_MAX_CONST_ADDR(x) ((x) << 16) -+ - #define R300_VAP_PVS_FLOW_CNTL_OPC 0x22dc - #define R300_VAP_OUT_VTX_FMT_0 0x2090 - # define R300_VTX_POS_PRESENT (1 << 0) -@@ -5380,9 +5467,6 @@ - # define R500_W_SRC_US (0 << 2) - # define R500_W_SRC_RAS (1 << 2) - --#define R500_GA_US_VECTOR_INDEX 0x4250 --#define R500_GA_US_VECTOR_DATA 0x4254 -- - #define R500_RS_INST_0 0x4320 - #define R500_RS_INST_1 0x4324 - # define R500_RS_INST_TEX_ID_SHIFT 0 -diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c -index 79671c0..a9d6fc3 100644 ---- a/src/radeon_textured_video.c -+++ b/src/radeon_textured_video.c -@@ -50,7 +50,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv); - extern Bool - R600CopyToVRAM(ScrnInfoPtr pScrn, - char *src, int src_pitch, -- uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_height, int bpp, -+ uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_width, uint32_t dst_height, int bpp, - int x, int y, int w, int h); - - #define IMAGE_MAX_WIDTH 2048 -@@ -128,11 +128,21 @@ static __inline__ uint32_t float4touint(float fr, float fg, float fb, float fa) - return (ua << 24) | (ur << 16) | (ug << 8) | ub; - } - -+/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces -+ note the difference to the parameters used in overlay are due -+ to 10bit vs. float calcs */ -+static REF_TRANSFORM trans[2] = -+{ -+ {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */ -+ {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */ -+}; -+ - #define ACCEL_MMIO - #define ACCEL_PREAMBLE() unsigned char *RADEONMMIO = info->MMIO - #define BEGIN_ACCEL(n) RADEONWaitForFifo(pScrn, (n)) - #define OUT_ACCEL_REG(reg, val) OUTREG(reg, val) - #define OUT_ACCEL_REG_F(reg, val) OUTREG(reg, F_TO_DW(val)) -+#define OUT_RELOC(x, read, write) do {} while(0) - #define FINISH_ACCEL() - - #include "radeon_textured_videofuncs.c" -@@ -142,6 +152,7 @@ static __inline__ uint32_t float4touint(float fr, float fg, float fb, float fa) - #undef BEGIN_ACCEL - #undef OUT_ACCEL_REG - #undef OUT_ACCEL_REG_F -+#undef OUT_RELOC - #undef FINISH_ACCEL - - #ifdef XF86DRI -@@ -155,6 +166,7 @@ static __inline__ uint32_t float4touint(float fr, float fg, float fb, float fa) - #define OUT_ACCEL_REG_F(reg, val) OUT_ACCEL_REG(reg, F_TO_DW(val)) - #define FINISH_ACCEL() ADVANCE_RING() - #define OUT_RING_F(x) OUT_RING(F_TO_DW(x)) -+#define OUT_RELOC(x, read, write) OUT_RING_RELOC(x, read, write) - - #include "radeon_textured_videofuncs.c" - -@@ -169,128 +181,40 @@ static __inline__ uint32_t float4touint(float fr, float fg, float fb, float fa) - #endif /* XF86DRI */ - - static void --R600CopyPlanarHW(ScrnInfoPtr pScrn, -- unsigned char *y_src, unsigned char *u_src, unsigned char *v_src, -- uint32_t dst_mc_addr, -- int srcPitch, int srcPitch2, int dstPitch, -- int w, int h) --{ -- int dstPitch2 = dstPitch >> 1; -- int h2 = h >> 1; -- int w2 = w >> 1; -- int v_offset, u_offset; -- v_offset = dstPitch * h; -- v_offset = (v_offset + 255) & ~255; -- u_offset = v_offset + (dstPitch2 * h2); -- u_offset = (u_offset + 255) & ~255; -- -- /* Y */ -- R600CopyToVRAM(pScrn, -- (char *)y_src, srcPitch, -- dstPitch, dst_mc_addr, h, 8, -- 0, 0, w, h); -- -- /* V */ -- R600CopyToVRAM(pScrn, -- (char *)v_src, srcPitch2, -- dstPitch2, dst_mc_addr + v_offset, h2, 8, -- 0, 0, w2, h2); -- -- /* U */ -- R600CopyToVRAM(pScrn, -- (char *)u_src, srcPitch2, -- dstPitch2, dst_mc_addr + u_offset, h2, 8, -- 0, 0, w2, h2); --} -- --static void --R600CopyPackedHW(ScrnInfoPtr pScrn, -- unsigned char *src, uint32_t dst_mc_addr, -- int srcPitch, int dstPitch, -- int w, int h) --{ -- -- /* YUV */ -- R600CopyToVRAM(pScrn, -- (char *)src, srcPitch, -- dstPitch >> 2, dst_mc_addr, h, 32, -- 0, 0, w >> 1, h); -- --} -- --static void --R600CopyPlanarSW(ScrnInfoPtr pScrn, -- unsigned char *y_src, unsigned char *u_src, unsigned char *v_src, -- unsigned char *dst, -- int srcPitch, int srcPitch2, int dstPitch, -- int w, int h) --{ -- int i; -- int dstPitch2 = dstPitch >> 1; -- int h2 = h >> 1; -- -- /* Y */ -- if (srcPitch == dstPitch) { -- memcpy(dst, y_src, srcPitch * h); -- dst += (dstPitch * h); -- } else { -- for (i = 0; i < h; i++) { -- memcpy(dst, y_src, srcPitch); -- y_src += srcPitch; -- dst += dstPitch; -- } -- } -- -- /* tex base need 256B alignment */ -- if (h & 1) -- dst += dstPitch; -- -- /* V */ -- if (srcPitch2 == dstPitch2) { -- memcpy(dst, v_src, srcPitch2 * h2); -- dst += (dstPitch2 * h2); -- } else { -- for (i = 0; i < h2; i++) { -- memcpy(dst, v_src, srcPitch2); -- v_src += srcPitch2; -- dst += dstPitch2; -- } -+R600CopyData( -+ ScrnInfoPtr pScrn, -+ unsigned char *src, -+ unsigned char *dst, -+ unsigned int srcPitch, -+ unsigned int dstPitch, -+ unsigned int h, -+ unsigned int w, -+ unsigned int cpp -+){ -+ RADEONInfoPtr info = RADEONPTR( pScrn ); -+ -+ if (cpp == 2) { -+ w *= 2; -+ cpp = 1; - } - -- /* tex base need 256B alignment */ -- if (h2 & 1) -- dst += dstPitch2; -+ if (info->DMAForXv) { -+ uint32_t dst_mc_addr = dst - (unsigned char *)info->FB + info->fbLocation; - -- /* U */ -- if (srcPitch2 == dstPitch2) { -- memcpy(dst, u_src, srcPitch2 * h2); -- dst += (dstPitch2 * h2); -+ R600CopyToVRAM(pScrn, -+ (char *)src, srcPitch, -+ dstPitch, dst_mc_addr, w, h, cpp * 8, -+ 0, 0, w, h); - } else { -- for (i = 0; i < h2; i++) { -- memcpy(dst, u_src, srcPitch2); -- u_src += srcPitch2; -- dst += dstPitch2; -- } -- } --} -- --static void --R600CopyPackedSW(ScrnInfoPtr pScrn, -- unsigned char *src, unsigned char *dst, -- int srcPitch, int dstPitch, -- int w, int h) --{ -- int i; -- -- if (srcPitch == dstPitch) { -- memcpy(dst, src, srcPitch * h); -- dst += (dstPitch * h); -- } else { -- for (i = 0; i < h; i++) { -- memcpy(dst, src, srcPitch); -- src += srcPitch; -- dst += dstPitch; -- } -+ if (srcPitch == dstPitch) -+ memcpy(dst, src, srcPitch * h); -+ else { -+ while (h--) { -+ memcpy(dst, src, srcPitch); -+ src += srcPitch; -+ dst += dstPitch; -+ } -+ } - } - } - -@@ -316,9 +240,10 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, - int srcPitch, srcPitch2, dstPitch, dstPitch2 = 0; - int s2offset, s3offset, tmp; - int d2line, d3line; -- int top, left, npixels, nlines, size; -+ int top, nlines, size; - BoxRec dstBox; - int dst_width = width, dst_height = height; -+ int hw_align; - - /* make the compiler happy */ - s2offset = s3offset = srcPitch2 = 0; -@@ -337,18 +262,15 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, - if (!xf86XVClipVideoHelper(&dstBox, &x1, &x2, &y1, &y2, clipBoxes, width, height)) - return Success; - --/* src_w = (x2 - x1) >> 16; -- src_h = (y2 - y1) >> 16; -- drw_w = dstBox.x2 - dstBox.x1; -- drw_h = dstBox.y2 - dstBox.y1;*/ -- - if ((x1 >= x2) || (y1 >= y2)) - return Success; - - /* Bicubic filter setup */ - pPriv->bicubic_enabled = (pPriv->bicubic_state != BICUBIC_OFF); -- if (!(IS_R300_3D || IS_R500_3D || IS_R600_3D)) -+ if (!(IS_R300_3D || IS_R500_3D)) { - pPriv->bicubic_enabled = FALSE; -+ pPriv->bicubic_state = BICUBIC_OFF; -+ } - if (pPriv->bicubic_enabled && (pPriv->bicubic_state == BICUBIC_AUTO)) { - /* - * Applying the bicubic filter with a scale of less than 200% -@@ -358,43 +280,35 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, - pPriv->bicubic_enabled = FALSE; - } - -- pPriv->planar_hw = pPriv->planar_state; -- if (pPriv->bicubic_enabled || !( IS_R300_3D || -- (info->ChipFamily == CHIP_FAMILY_RV250) || -- (info->ChipFamily == CHIP_FAMILY_RV280) || -- (info->ChipFamily == CHIP_FAMILY_RS300) || -- (info->ChipFamily == CHIP_FAMILY_R200) )) -- pPriv->planar_hw = 0; -+ if (info->ChipFamily >= CHIP_FAMILY_R600) -+ hw_align = 255; -+ else -+ hw_align = 63; - - switch(id) { - case FOURCC_YV12: - case FOURCC_I420: - srcPitch = (width + 3) & ~3; - srcPitch2 = ((width >> 1) + 3) & ~3; -- if (pPriv->planar_hw) { -- dstPitch = (dst_width + 15) & ~15; -- dstPitch = (dstPitch + 63) & ~63; -- dstPitch2 = ((dst_width >> 1) + 15) & ~15; -- dstPitch2 = (dstPitch2 + 63) & ~63; -+ if (pPriv->bicubic_state != BICUBIC_OFF) { -+ dstPitch = ((dst_width << 1) + hw_align) & ~hw_align; -+ dstPitch2 = 0; - } else { -- dstPitch = ((dst_width << 1) + 15) & ~15; -- dstPitch = (dstPitch + 63) & ~63; -+ dstPitch = (dst_width + hw_align) & ~hw_align; -+ dstPitch2 = ((dstPitch >> 1) + hw_align) & ~hw_align; - } - break; - case FOURCC_UYVY: - case FOURCC_YUY2: - default: -- dstPitch = ((dst_width << 1) + 15) & ~15; -- dstPitch = (dstPitch + 63) & ~63; -+ dstPitch = ((dst_width << 1) + hw_align) & ~hw_align; - srcPitch = (width << 1); - srcPitch2 = 0; - break; - } - -- if (info->ChipFamily >= CHIP_FAMILY_R600) -- dstPitch = (dstPitch + 255) & ~255; -- - size = dstPitch * dst_height + 2 * dstPitch2 * ((dst_height + 1) >> 1); -+ size = (size + hw_align) & ~hw_align; - - if (pPriv->video_memory != NULL && size != pPriv->size) { - radeon_legacy_free_memory(pScrn, pPriv->video_memory); -@@ -412,16 +326,16 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, - size * 2, 64); - if (pPriv->video_offset == 0) - return BadAlloc; -+ -+ if (info->cs) -+ pPriv->src_bo = pPriv->video_memory; - } - - /* Bicubic filter loading */ -- if (pPriv->bicubic_memory == NULL && pPriv->bicubic_enabled) { -- pPriv->bicubic_offset = radeon_legacy_allocate_memory(pScrn, -- &pPriv->bicubic_memory, -- sizeof(bicubic_tex_512), 64); -- pPriv->bicubic_src_offset = pPriv->bicubic_offset + info->fbLocation + pScrn->fbOffset; -- if (pPriv->bicubic_offset == 0) -- pPriv->bicubic_enabled = FALSE; -+ if (pPriv->bicubic_enabled) { -+ if (info->bicubic_offset == 0) -+ pPriv->bicubic_enabled = FALSE; -+ pPriv->bicubic_src_offset = info->bicubic_offset; - } - - if (pDraw->type == DRAWABLE_WINDOW) -@@ -447,130 +361,87 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, - } - - /* copy data */ -- top = y1 >> 16; -- left = (x1 >> 16) & ~1; -- npixels = ((((x2 + 0xffff) >> 16) + 1) & ~1) - left; -+ top = (y1 >> 16) & ~1; -+ nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top; -+ -+ pPriv->src_offset = pPriv->video_offset; -+ if (info->cs) { -+ int ret; -+ radeon_bo_wait(pPriv->src_bo); -+ ret = radeon_bo_map(pPriv->src_bo, 1); -+ if (ret) -+ return BadAlloc; - -- pPriv->src_offset = pPriv->video_offset + info->fbLocation + pScrn->fbOffset; -- if (info->ChipFamily >= CHIP_FAMILY_R600) -+ pPriv->src_addr = pPriv->src_bo->ptr; -+ } else { - pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset); -- else -- pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset + (top * dstPitch)); -+ } - pPriv->src_pitch = dstPitch; -+ - pPriv->planeu_offset = dstPitch * dst_height; -+ pPriv->planeu_offset = (pPriv->planeu_offset + hw_align) & ~hw_align; - pPriv->planev_offset = pPriv->planeu_offset + dstPitch2 * ((dst_height + 1) >> 1); -+ pPriv->planev_offset = (pPriv->planev_offset + hw_align) & ~hw_align; -+ - pPriv->size = size; - pPriv->pDraw = pDraw; - -- --#if 0 -- ErrorF("planeu_offset: 0x%x\n", pPriv->planeu_offset); -- ErrorF("planev_offset: 0x%x\n", pPriv->planev_offset); -- ErrorF("dstPitch2: 0x%x\n", dstPitch2); -- ErrorF("src_offset: 0x%x\n", pPriv->src_offset); -- ErrorF("src_addr: 0x%x\n", pPriv->src_addr); -- ErrorF("src_pitch: 0x%x\n", pPriv->src_pitch); --#endif -- - switch(id) { - case FOURCC_YV12: - case FOURCC_I420: -- if (info->ChipFamily >= CHIP_FAMILY_R600) { -- s2offset = srcPitch * height; -- s3offset = (srcPitch2 * (height >> 1)) + s2offset; -- if (info->DMAForXv) { -- if (id == FOURCC_YV12) -- R600CopyPlanarHW(pScrn, buf, buf + s3offset, buf + s2offset, -- pPriv->src_offset, -- srcPitch, srcPitch2, pPriv->src_pitch, -- width, height); -- else -- R600CopyPlanarHW(pScrn, buf, buf + s2offset, buf + s3offset, -- pPriv->src_offset, -- srcPitch, srcPitch2, pPriv->src_pitch, -- width, height); -- } else { -- if (id == FOURCC_YV12) -- R600CopyPlanarSW(pScrn, buf, buf + s3offset, buf + s2offset, -- pPriv->src_addr, -- srcPitch, srcPitch2, pPriv->src_pitch, -- width, height); -- else -- R600CopyPlanarSW(pScrn, buf, buf + s2offset, buf + s3offset, -- pPriv->src_addr, -- srcPitch, srcPitch2, pPriv->src_pitch, -- width, height); -- } -- } -- else if (pPriv->planar_hw) { -- top &= ~1; -- s2offset = srcPitch * ((height + 1) & ~1); -- s3offset = s2offset + srcPitch2 * ((height + 1) >> 1); -- s2offset += (top >> 1) * srcPitch2 + (left >> 1); -- s3offset += (top >> 1) * srcPitch2 + (left >> 1); -- d2line = pPriv->planeu_offset; -- d3line = pPriv->planev_offset; -- d2line += (top >> 1) * dstPitch2 - (top * dstPitch); -- d3line += (top >> 1) * dstPitch2 - (top * dstPitch); -- nlines = ((y2 + 0xffff) >> 16) - top; -- if(id == FOURCC_YV12) { -+ s2offset = srcPitch * ((height + 1) & ~1); -+ s3offset = s2offset + (srcPitch2 * ((height + 1) >> 1)); -+ s2offset += ((top >> 1) * srcPitch2); -+ s3offset += ((top >> 1) * srcPitch2); -+ if (pPriv->bicubic_state != BICUBIC_OFF) { -+ if (id == FOURCC_I420) { - tmp = s2offset; - s2offset = s3offset; - s3offset = tmp; - } -- RADEONCopyData(pScrn, buf + (top * srcPitch) + left, pPriv->src_addr + left, -- srcPitch, dstPitch, nlines, npixels, 1); -- RADEONCopyData(pScrn, buf + s2offset, pPriv->src_addr + d2line + (left >> 1), -- srcPitch2, dstPitch2, (nlines + 1) >> 1, npixels >> 1, 1); -- RADEONCopyData(pScrn, buf + s3offset, pPriv->src_addr + d3line + (left >> 1), -- srcPitch2, dstPitch2, (nlines + 1) >> 1, npixels >> 1, 1); -+ RADEONCopyMungedData(pScrn, buf + (top * srcPitch), -+ buf + s2offset, buf + s3offset, pPriv->src_addr + (top * dstPitch), -+ srcPitch, srcPitch2, dstPitch, nlines, width); - } else { -- top &= ~1; -- nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top; -- s2offset = srcPitch * height; -- s3offset = (srcPitch2 * (height >> 1)) + s2offset; -- pPriv->src_addr += left << 1; -- tmp = ((top >> 1) * srcPitch2) + (left >> 1); -- s2offset += tmp; -- s3offset += tmp; -- if (id == FOURCC_I420) { -+ if (id == FOURCC_YV12) { - tmp = s2offset; - s2offset = s3offset; - s3offset = tmp; - } -- RADEONCopyMungedData(pScrn, buf + (top * srcPitch) + left, -- buf + s2offset, buf + s3offset, pPriv->src_addr, -- srcPitch, srcPitch2, dstPitch, nlines, npixels); -+ d2line = pPriv->planeu_offset + ((top >> 1) * dstPitch2); -+ d3line = pPriv->planev_offset + ((top >> 1) * dstPitch2); -+ -+ if (info->ChipFamily >= CHIP_FAMILY_R600) { -+ R600CopyData(pScrn, buf + (top * srcPitch), pPriv->src_addr + (top * dstPitch), -+ srcPitch, dstPitch, nlines, width, 1); -+ R600CopyData(pScrn, buf + s2offset, pPriv->src_addr + d2line, -+ srcPitch2, dstPitch2, (nlines + 1) >> 1, width >> 1, 1); -+ R600CopyData(pScrn, buf + s3offset, pPriv->src_addr + d3line, -+ srcPitch2, dstPitch2, (nlines + 1) >> 1, width >> 1, 1); -+ } else { -+ RADEONCopyData(pScrn, buf + (top * srcPitch), pPriv->src_addr + (top * dstPitch), -+ srcPitch, dstPitch, nlines, width, 1); -+ RADEONCopyData(pScrn, buf + s2offset, pPriv->src_addr + d2line, -+ srcPitch2, dstPitch2, (nlines + 1) >> 1, width >> 1, 1); -+ RADEONCopyData(pScrn, buf + s3offset, pPriv->src_addr + d3line, -+ srcPitch2, dstPitch2, (nlines + 1) >> 1, width >> 1, 1); -+ } - } - break; - case FOURCC_UYVY: - case FOURCC_YUY2: - default: -- if (info->ChipFamily >= CHIP_FAMILY_R600) { -- if (info->DMAForXv) -- R600CopyPackedHW(pScrn, buf, pPriv->src_offset, -- 2 * width, pPriv->src_pitch, -- width, height); -- else -- R600CopyPackedSW(pScrn, buf, pPriv->src_addr, -- 2 * width, pPriv->src_pitch, -- width, height); -- } else { -- nlines = ((y2 + 0xffff) >> 16) - top; -- pPriv->src_addr += left << 1; -- RADEONCopyData(pScrn, buf + (top * srcPitch) + (left << 1), -- pPriv->src_addr, srcPitch, dstPitch, nlines, npixels, 2); -- } -+ if (info->ChipFamily >= CHIP_FAMILY_R600) -+ R600CopyData(pScrn, buf + (top * srcPitch), -+ pPriv->src_addr + (top * dstPitch), -+ srcPitch, dstPitch, nlines, width, 2); -+ else -+ RADEONCopyData(pScrn, buf + (top * srcPitch), -+ pPriv->src_addr + (top * dstPitch), -+ srcPitch, dstPitch, nlines, width, 2); - break; - } - -- /* Upload bicubic filter tex */ -- if (pPriv->bicubic_enabled) { -- if (info->ChipFamily < CHIP_FAMILY_R600) -- RADEONCopyData(pScrn, (uint8_t *)bicubic_tex_512, -- (uint8_t *)(info->FB + pPriv->bicubic_offset), 1024, 1024, 1, 512, 2); -- } -- - /* update cliplist */ - if (!REGION_EQUAL(pScrn->pScreen, &pPriv->clip, clipBoxes)) { - REGION_COPY(pScrn->pScreen, &pPriv->clip, clipBoxes); -@@ -579,6 +450,8 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, - pPriv->id = id; - pPriv->src_w = src_w; - pPriv->src_h = src_h; -+ pPriv->src_x = src_x; -+ pPriv->src_y = src_y; - pPriv->drw_x = drw_x; - pPriv->drw_y = drw_y; - pPriv->dst_w = drw_w; -@@ -586,14 +459,32 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, - pPriv->w = width; - pPriv->h = height; - -+ if (info->cs) -+ radeon_bo_unmap(pPriv->src_bo); - #ifdef XF86DRI -- if (IS_R600_3D) -- R600DisplayTexturedVideo(pScrn, pPriv); -- else if (info->directRenderingEnabled) -- RADEONDisplayTexturedVideoCP(pScrn, pPriv); -- else -+ if (info->directRenderingEnabled) { -+ if (IS_R600_3D) -+ R600DisplayTexturedVideo(pScrn, pPriv); -+ else if (IS_R500_3D) -+ R500DisplayTexturedVideoCP(pScrn, pPriv); -+ else if (IS_R300_3D) -+ R300DisplayTexturedVideoCP(pScrn, pPriv); -+ else if (IS_R200_3D) -+ R200DisplayTexturedVideoCP(pScrn, pPriv); -+ else -+ RADEONDisplayTexturedVideoCP(pScrn, pPriv); -+ } else - #endif -- RADEONDisplayTexturedVideoMMIO(pScrn, pPriv); -+ { -+ if (IS_R500_3D) -+ R500DisplayTexturedVideoMMIO(pScrn, pPriv); -+ else if (IS_R300_3D) -+ R300DisplayTexturedVideoMMIO(pScrn, pPriv); -+ else if (IS_R200_3D) -+ R200DisplayTexturedVideoMMIO(pScrn, pPriv); -+ else -+ RADEONDisplayTexturedVideoMMIO(pScrn, pPriv); -+ } - - return Success; - } -@@ -636,28 +527,73 @@ static XF86VideoFormatRec Formats[NUM_FORMATS] = - {15, TrueColor}, {16, TrueColor}, {24, TrueColor} - }; - --#define NUM_ATTRIBUTES 2 -+#define NUM_ATTRIBUTES 1 - - static XF86AttributeRec Attributes[NUM_ATTRIBUTES+1] = - { - {XvSettable | XvGettable, 0, 1, "XV_VSYNC"}, -- {XvSettable | XvGettable, 0, 1, "XV_HWPLANAR"}, - {0, 0, 0, NULL} - }; - --#define NUM_ATTRIBUTES_R300 3 -+#define NUM_ATTRIBUTES_R200 6 -+ -+static XF86AttributeRec Attributes_r200[NUM_ATTRIBUTES_R200+1] = -+{ -+ {XvSettable | XvGettable, 0, 1, "XV_VSYNC"}, -+ {XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS"}, -+ {XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST"}, -+ {XvSettable | XvGettable, -1000, 1000, "XV_SATURATION"}, -+ {XvSettable | XvGettable, -1000, 1000, "XV_HUE"}, -+ {XvSettable | XvGettable, 0, 1, "XV_COLORSPACE"}, -+ {0, 0, 0, NULL} -+}; -+ -+#define NUM_ATTRIBUTES_R300 8 - - static XF86AttributeRec Attributes_r300[NUM_ATTRIBUTES_R300+1] = - { - {XvSettable | XvGettable, 0, 2, "XV_BICUBIC"}, - {XvSettable | XvGettable, 0, 1, "XV_VSYNC"}, -- {XvSettable | XvGettable, 0, 1, "XV_HWPLANAR"}, -+ {XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS"}, -+ {XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST"}, -+ {XvSettable | XvGettable, -1000, 1000, "XV_SATURATION"}, -+ {XvSettable | XvGettable, -1000, 1000, "XV_HUE"}, -+ {XvSettable | XvGettable, 100, 10000, "XV_GAMMA"}, -+ {XvSettable | XvGettable, 0, 1, "XV_COLORSPACE"}, -+ {0, 0, 0, NULL} -+}; -+ -+#define NUM_ATTRIBUTES_R500 7 -+ -+static XF86AttributeRec Attributes_r500[NUM_ATTRIBUTES_R500+1] = -+{ -+ {XvSettable | XvGettable, 0, 2, "XV_BICUBIC"}, -+ {XvSettable | XvGettable, 0, 1, "XV_VSYNC"}, -+ {XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS"}, -+ {XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST"}, -+ {XvSettable | XvGettable, -1000, 1000, "XV_SATURATION"}, -+ {XvSettable | XvGettable, -1000, 1000, "XV_HUE"}, -+ {XvSettable | XvGettable, 0, 1, "XV_COLORSPACE"}, -+ {0, 0, 0, NULL} -+}; -+ -+#define NUM_ATTRIBUTES_R600 6 -+ -+static XF86AttributeRec Attributes_r600[NUM_ATTRIBUTES_R600+1] = -+{ -+ {XvSettable | XvGettable, 0, 1, "XV_VSYNC"}, -+ {XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS"}, -+ {XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST"}, -+ {XvSettable | XvGettable, -1000, 1000, "XV_SATURATION"}, -+ {XvSettable | XvGettable, -1000, 1000, "XV_HUE"}, -+ {XvSettable | XvGettable, 0, 1, "XV_COLORSPACE"}, - {0, 0, 0, NULL} - }; - - static Atom xvBicubic; - static Atom xvVSync; --static Atom xvHWPlanar; -+static Atom xvBrightness, xvContrast, xvSaturation, xvHue; -+static Atom xvGamma, xvColorspace; - - #define NUM_IMAGES 4 - -@@ -684,8 +620,18 @@ RADEONGetTexPortAttribute(ScrnInfoPtr pScrn, - *value = pPriv->bicubic_state; - else if (attribute == xvVSync) - *value = pPriv->vsync; -- else if (attribute == xvHWPlanar) -- *value = pPriv->planar_state; -+ else if (attribute == xvBrightness) -+ *value = pPriv->brightness; -+ else if (attribute == xvContrast) -+ *value = pPriv->contrast; -+ else if (attribute == xvSaturation) -+ *value = pPriv->saturation; -+ else if (attribute == xvHue) -+ *value = pPriv->hue; -+ else if (attribute == xvGamma) -+ *value = pPriv->gamma; -+ else if(attribute == xvColorspace) -+ *value = pPriv->transform_index; - else - return BadMatch; - -@@ -707,14 +653,73 @@ RADEONSetTexPortAttribute(ScrnInfoPtr pScrn, - pPriv->bicubic_state = ClipValue (value, 0, 2); - else if (attribute == xvVSync) - pPriv->vsync = ClipValue (value, 0, 1); -- else if (attribute == xvHWPlanar) -- pPriv->planar_state = ClipValue (value, 0, 1); -+ else if (attribute == xvBrightness) -+ pPriv->brightness = ClipValue (value, -1000, 1000); -+ else if (attribute == xvContrast) -+ pPriv->contrast = ClipValue (value, -1000, 1000); -+ else if (attribute == xvSaturation) -+ pPriv->saturation = ClipValue (value, -1000, 1000); -+ else if (attribute == xvHue) -+ pPriv->hue = ClipValue (value, -1000, 1000); -+ else if (attribute == xvGamma) -+ pPriv->gamma = ClipValue (value, 100, 10000); -+ else if(attribute == xvColorspace) -+ pPriv->transform_index = ClipValue (value, 0, 1); - else - return BadMatch; - - return Success; - } - -+Bool radeon_load_bicubic_texture(ScrnInfoPtr pScrn) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ /* Bicubic filter loading */ -+ info->bicubic_offset = radeon_legacy_allocate_memory(pScrn, -+ &info->bicubic_memory, -+ sizeof(bicubic_tex_512), 64); -+ if (info->bicubic_offset == 0) -+ return FALSE; -+ -+ if (info->cs) -+ info->bicubic_bo = info->bicubic_memory; -+ -+ /* Upload bicubic filter tex */ -+ if (info->ChipFamily < CHIP_FAMILY_R600) { -+ uint8_t *bicubic_addr; -+ int ret; -+ if (info->cs) { -+ radeon_bo_wait(info->bicubic_bo); -+ ret = radeon_bo_map(info->bicubic_bo, 1); -+ if (ret) -+ return FALSE; -+ -+ bicubic_addr = info->bicubic_bo->ptr; -+ } else -+ bicubic_addr = (uint8_t *)(info->FB + info->bicubic_offset); -+ -+ RADEONCopyData(pScrn, (uint8_t *)bicubic_tex_512, bicubic_addr, 1024, 1024, 1, 512, 2); -+ if (info->cs) -+ radeon_bo_unmap(info->bicubic_bo); -+ } -+ return TRUE; -+} -+ -+#if 0 -+/* XXX */ -+static void radeon_unload_bicubic_texture(ScrnInfoPtr pScrn) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ -+ if (info->bicubic_memory != NULL) { -+ radeon_legacy_free_memory(pScrn, info->bicubic_memory); -+ info->bicubic_memory = NULL; -+ } -+ -+} -+#endif -+ - XF86VideoAdaptorPtr - RADEONSetupImageTexturedVideo(ScreenPtr pScreen) - { -@@ -732,7 +737,12 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen) - - xvBicubic = MAKE_ATOM("XV_BICUBIC"); - xvVSync = MAKE_ATOM("XV_VSYNC"); -- xvHWPlanar = MAKE_ATOM("XV_HWPLANAR"); -+ xvBrightness = MAKE_ATOM("XV_BRIGHTNESS"); -+ xvContrast = MAKE_ATOM("XV_CONTRAST"); -+ xvSaturation = MAKE_ATOM("XV_SATURATION"); -+ xvHue = MAKE_ATOM("XV_HUE"); -+ xvGamma = MAKE_ATOM("XV_GAMMA"); -+ xvColorspace = MAKE_ATOM("XV_COLORSPACE"); - - adapt->type = XvWindowMask | XvInputMask | XvImageMask; - adapt->flags = 0; -@@ -752,10 +762,23 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen) - pPortPriv = - (RADEONPortPrivPtr)(&adapt->pPortPrivates[num_texture_ports]); - -- if (IS_R300_3D || IS_R500_3D) { -+ if (IS_R600_3D) { -+ adapt->pAttributes = Attributes_r600; -+ adapt->nAttributes = NUM_ATTRIBUTES_R600; -+ } -+ else if (IS_R500_3D) { -+ adapt->pAttributes = Attributes_r500; -+ adapt->nAttributes = NUM_ATTRIBUTES_R500; -+ } -+ else if (IS_R300_3D) { - adapt->pAttributes = Attributes_r300; - adapt->nAttributes = NUM_ATTRIBUTES_R300; -- } else { -+ } -+ else if (IS_R200_3D) { -+ adapt->pAttributes = Attributes_r200; -+ adapt->nAttributes = NUM_ATTRIBUTES_R200; -+ } -+ else { - adapt->pAttributes = Attributes; - adapt->nAttributes = NUM_ATTRIBUTES; - } -@@ -782,13 +805,21 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen) - pPriv->doubleBuffer = 0; - pPriv->bicubic_state = BICUBIC_AUTO; - pPriv->vsync = TRUE; -- pPriv->planar_state = 1; -+ pPriv->brightness = 0; -+ pPriv->contrast = 0; -+ pPriv->saturation = 0; -+ pPriv->hue = 0; -+ pPriv->gamma = 1000; -+ pPriv->transform_index = 0; - - /* gotta uninit this someplace, XXX: shouldn't be necessary for textured */ - REGION_NULL(pScreen, &pPriv->clip); - adapt->pPortPrivates[i].ptr = (pointer) (pPriv); - } - -+ if (IS_R500_3D || IS_R300_3D) -+ radeon_load_bicubic_texture(pScrn); -+ - return adapt; - } - -diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c -index 6cb2870..1d6b2e7 100644 ---- a/src/radeon_textured_videofuncs.c -+++ b/src/radeon_textured_videofuncs.c -@@ -47,8 +47,8 @@ - - #ifdef ACCEL_CP - --#define VTX_OUT_FILTER(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ --do { \ -+#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ -+do { \ - OUT_RING_F(_dstX); \ - OUT_RING_F(_dstY); \ - OUT_RING_F(_srcX); \ -@@ -57,7 +57,7 @@ do { \ - OUT_RING_F(_maskY); \ - } while (0) - --#define VTX_OUT(_dstX, _dstY, _srcX, _srcY) \ -+#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \ - do { \ - OUT_RING_F(_dstX); \ - OUT_RING_F(_dstY); \ -@@ -67,7 +67,7 @@ do { \ - - #else /* ACCEL_CP */ - --#define VTX_OUT_FILTER(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ -+#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ - do { \ - OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \ - OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \ -@@ -77,7 +77,7 @@ do { \ - OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskY); \ - } while (0) - --#define VTX_OUT(_dstX, _dstY, _srcX, _srcY) \ -+#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \ - do { \ - OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \ - OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \ -@@ -92,30 +92,48 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv - { - RADEONInfoPtr info = RADEONPTR(pScrn); - PixmapPtr pPixmap = pPriv->pPixmap; -- uint32_t txformat; -- uint32_t txfilter, txformat0, txformat1, txoffset, txpitch; -- uint32_t dst_offset, dst_pitch, dst_format; -- uint32_t txenable, colorpitch; -- uint32_t blendcntl; -+ struct radeon_exa_pixmap_priv *driver_priv; -+ uint32_t txformat, txsize, txpitch, txoffset; -+ uint32_t dst_pitch, dst_format; -+ uint32_t colorpitch; - Bool isplanar = FALSE; - int dstxoff, dstyoff, pixel_shift, vtx_count; - BoxPtr pBox = REGION_RECTS(&pPriv->clip); - int nBox = REGION_NUM_RECTS(&pPriv->clip); - ACCEL_PREAMBLE(); - -+#ifdef XF86DRM_MODE -+ if (info->cs) { -+ int ret; -+ -+ radeon_cs_space_reset_bos(info->cs); -+ radeon_cs_space_add_persistent_bo(info->cs, pPriv->src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); -+ -+ if (pPriv->bicubic_enabled) -+ radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); -+ -+ driver_priv = exaGetPixmapDriverPrivate(pPixmap); -+ radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); -+ -+ ret = radeon_cs_space_check(info->cs); -+ if (ret) { -+ ErrorF("Not enough RAM to hw accel xv operation\n"); -+ return; -+ } -+ } -+#endif -+ - pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; - -+ - #ifdef USE_EXA - if (info->useEXA) { -- dst_offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; - dst_pitch = exaGetPixmapPitch(pPixmap); - } else - #endif -- { -- dst_offset = (pPixmap->devPrivate.ptr - info->FB) + -- info->fbLocation + pScrn->fbOffset; -- dst_pitch = pPixmap->devKind; -- } -+ { -+ dst_pitch = pPixmap->devKind; -+ } - - #ifdef COMPOSITE - dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; -@@ -130,1859 +148,2142 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv - RADEON_SWITCH_TO_3D(); - } else - #endif -- { -- BEGIN_ACCEL(2); -- if (IS_R300_3D || IS_R500_3D) -- OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); -- else -- OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH); -- /* We must wait for 3d to idle, in case source was just written as a dest. */ -- OUT_ACCEL_REG(RADEON_WAIT_UNTIL, -- RADEON_WAIT_HOST_IDLECLEAN | -- RADEON_WAIT_2D_IDLECLEAN | -- RADEON_WAIT_3D_IDLECLEAN | -- RADEON_WAIT_DMA_GUI_IDLE); -- FINISH_ACCEL(); -+ { -+ BEGIN_ACCEL(2); -+ OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH); -+ /* We must wait for 3d to idle, in case source was just written as a dest. */ -+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, -+ RADEON_WAIT_HOST_IDLECLEAN | -+ RADEON_WAIT_2D_IDLECLEAN | -+ RADEON_WAIT_3D_IDLECLEAN | -+ RADEON_WAIT_DMA_GUI_IDLE); -+ FINISH_ACCEL(); - -- if (!info->accel_state->XInited3D) -- RADEONInit3DEngine(pScrn); -- } -+ if (!info->accel_state->XInited3D) -+ RADEONInit3DEngine(pScrn); -+ } - -- if (pPriv->bicubic_enabled) -+ /* Same for R100/R200 */ -+ switch (pPixmap->drawable.bitsPerPixel) { -+ case 16: -+ if (pPixmap->drawable.depth == 15) -+ dst_format = RADEON_COLOR_FORMAT_ARGB1555; -+ else -+ dst_format = RADEON_COLOR_FORMAT_RGB565; -+ break; -+ case 32: -+ dst_format = RADEON_COLOR_FORMAT_ARGB8888; -+ break; -+ default: -+ return; -+ } -+ -+ if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) { -+ isplanar = TRUE; -+ txformat = RADEON_TXFORMAT_Y8; -+ } else { -+ if (pPriv->id == FOURCC_UYVY) -+ txformat = RADEON_TXFORMAT_YVYU422; -+ else -+ txformat = RADEON_TXFORMAT_VYUY422; -+ } -+ -+ txformat |= RADEON_TXFORMAT_NON_POWER2; -+ -+ colorpitch = dst_pitch >> pixel_shift; -+ -+ if (RADEONTilingEnabled(pScrn, pPixmap)) -+ colorpitch |= RADEON_COLOR_TILE_ENABLE; -+ -+ txoffset = info->cs ? 0 : pPriv->src_offset; -+ -+ BEGIN_ACCEL_RELOC(4,2); -+ -+ OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format); -+ EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap); -+ EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap); -+ OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, -+ RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); -+ -+ FINISH_ACCEL(); -+ -+ if (isplanar) { -+ /* need 2 texcoord sets (even though they are identical) due -+ to denormalization! hw apparently can't premultiply -+ same coord set by different texture size */ - vtx_count = 6; -- else -+ -+ txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | -+ (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); -+ txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63; -+ txpitch -= 32; -+ -+ BEGIN_ACCEL(23); -+ -+ OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | -+ RADEON_SE_VTX_FMT_ST0 | -+ RADEON_SE_VTX_FMT_ST1)); -+ -+ OUT_ACCEL_REG(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE | -+ RADEON_TEX_1_ENABLE | RADEON_TEX_BLEND_1_ENABLE | -+ RADEON_TEX_2_ENABLE | RADEON_TEX_BLEND_2_ENABLE | -+ RADEON_PLANAR_YUV_ENABLE)); -+ -+ /* Y */ -+ OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, -+ RADEON_MAG_FILTER_LINEAR | -+ RADEON_MIN_FILTER_LINEAR | -+ RADEON_CLAMP_S_CLAMP_LAST | -+ RADEON_CLAMP_T_CLAMP_LAST | -+ RADEON_YUV_TO_RGB); -+ OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0); -+ OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, pPriv->src_bo); -+ OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, -+ RADEON_COLOR_ARG_A_ZERO | -+ RADEON_COLOR_ARG_B_ZERO | -+ RADEON_COLOR_ARG_C_T0_COLOR | -+ RADEON_BLEND_CTL_ADD | -+ RADEON_CLAMP_TX); -+ OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, -+ RADEON_ALPHA_ARG_A_ZERO | -+ RADEON_ALPHA_ARG_B_ZERO | -+ RADEON_ALPHA_ARG_C_T0_ALPHA | -+ RADEON_BLEND_CTL_ADD | -+ RADEON_CLAMP_TX); -+ -+ OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, -+ (pPriv->w - 1) | -+ ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); -+ OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, -+ pPriv->src_pitch - 32); -+ -+ /* U */ -+ OUT_ACCEL_REG(RADEON_PP_TXFILTER_1, -+ RADEON_MAG_FILTER_LINEAR | -+ RADEON_MIN_FILTER_LINEAR | -+ RADEON_CLAMP_S_CLAMP_LAST | -+ RADEON_CLAMP_T_CLAMP_LAST); -+ OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1); -+ OUT_TEXTURE_REG(R200_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, pPriv->src_bo); -+ OUT_ACCEL_REG(RADEON_PP_TXCBLEND_1, -+ RADEON_COLOR_ARG_A_ZERO | -+ RADEON_COLOR_ARG_B_ZERO | -+ RADEON_COLOR_ARG_C_T0_COLOR | -+ RADEON_BLEND_CTL_ADD | -+ RADEON_CLAMP_TX); -+ OUT_ACCEL_REG(RADEON_PP_TXABLEND_1, -+ RADEON_ALPHA_ARG_A_ZERO | -+ RADEON_ALPHA_ARG_B_ZERO | -+ RADEON_ALPHA_ARG_C_T0_ALPHA | -+ RADEON_BLEND_CTL_ADD | -+ RADEON_CLAMP_TX); -+ -+ OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1, txsize); -+ OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch); -+ -+ /* V */ -+ OUT_ACCEL_REG(RADEON_PP_TXFILTER_2, -+ RADEON_MAG_FILTER_LINEAR | -+ RADEON_MIN_FILTER_LINEAR | -+ RADEON_CLAMP_S_CLAMP_LAST | -+ RADEON_CLAMP_T_CLAMP_LAST); -+ OUT_ACCEL_REG(RADEON_PP_TXFORMAT_2, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1); -+ OUT_TEXTURE_REG(R200_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, pPriv->src_bo); -+ OUT_ACCEL_REG(RADEON_PP_TXCBLEND_2, -+ RADEON_COLOR_ARG_A_ZERO | -+ RADEON_COLOR_ARG_B_ZERO | -+ RADEON_COLOR_ARG_C_T0_COLOR | -+ RADEON_BLEND_CTL_ADD | -+ RADEON_CLAMP_TX); -+ OUT_ACCEL_REG(RADEON_PP_TXABLEND_2, -+ RADEON_ALPHA_ARG_A_ZERO | -+ RADEON_ALPHA_ARG_B_ZERO | -+ RADEON_ALPHA_ARG_C_T0_ALPHA | -+ RADEON_BLEND_CTL_ADD | -+ RADEON_CLAMP_TX); -+ -+ OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_2, txsize); -+ OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_2, txpitch); -+ FINISH_ACCEL(); -+ } else { - vtx_count = 4; -+ BEGIN_ACCEL(9); -+ -+ OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | -+ RADEON_SE_VTX_FMT_ST0)); -+ -+ OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); -+ -+ OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, -+ RADEON_MAG_FILTER_LINEAR | -+ RADEON_MIN_FILTER_LINEAR | -+ RADEON_CLAMP_S_CLAMP_LAST | -+ RADEON_CLAMP_T_CLAMP_LAST | -+ RADEON_YUV_TO_RGB); -+ OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0); -+ OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, txoffset, pPriv->src_bo); -+ OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, -+ RADEON_COLOR_ARG_A_ZERO | -+ RADEON_COLOR_ARG_B_ZERO | -+ RADEON_COLOR_ARG_C_T0_COLOR | -+ RADEON_BLEND_CTL_ADD | -+ RADEON_CLAMP_TX); -+ OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, -+ RADEON_ALPHA_ARG_A_ZERO | -+ RADEON_ALPHA_ARG_B_ZERO | -+ RADEON_ALPHA_ARG_C_T0_ALPHA | -+ RADEON_BLEND_CTL_ADD | -+ RADEON_CLAMP_TX); -+ -+ OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, -+ (pPriv->w - 1) | -+ ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); -+ OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, -+ pPriv->src_pitch - 32); -+ FINISH_ACCEL(); -+ } - -- if (IS_R300_3D || IS_R500_3D) { -- uint32_t output_fmt; -- -- switch (pPixmap->drawable.bitsPerPixel) { -- case 16: -- if (pPixmap->drawable.depth == 15) -- dst_format = R300_COLORFORMAT_ARGB1555; -- else -- dst_format = R300_COLORFORMAT_RGB565; -- break; -- case 32: -- dst_format = R300_COLORFORMAT_ARGB8888; -- break; -- default: -- return; -+ BEGIN_ACCEL(2); -+ OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0); -+ OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, (((pPixmap->drawable.width) << RADEON_RE_WIDTH_SHIFT) | -+ ((pPixmap->drawable.height) << RADEON_RE_HEIGHT_SHIFT))); -+ FINISH_ACCEL(); -+ -+ if (pPriv->vsync) { -+ xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn, -+ pPriv->drw_x, -+ pPriv->drw_x + pPriv->dst_w, -+ pPriv->drw_y, -+ pPriv->drw_y + pPriv->dst_h); -+ if (crtc) { -+ RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; -+ -+ FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, -+ radeon_crtc->crtc_id, -+ pPriv->drw_y - crtc->y, -+ (pPriv->drw_y - crtc->y) + pPriv->dst_h); - } -+ } -+ /* -+ * Rendering of the actual polygon is done in two different -+ * ways depending on chip generation: -+ * -+ * < R300: -+ * -+ * These chips can render a rectangle in one pass, so -+ * handling is pretty straight-forward. -+ * -+ * >= R300: -+ * -+ * These chips can accept a quad, but will render it as -+ * two triangles which results in a diagonal tear. Instead -+ * We render a single, large triangle and use the scissor -+ * functionality to restrict it to the desired rectangle. -+ * Due to guardband limits on r3xx/r4xx, we can only use -+ * the single triangle up to 4021 pixels; above that we -+ * render as a quad. -+ */ -+ -+#ifdef ACCEL_CP -+ BEGIN_RING(nBox * 3 * vtx_count + 5); -+ OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, -+ nBox * 3 * vtx_count + 1)); -+ if (isplanar) -+ OUT_RING(RADEON_CP_VC_FRMT_XY | -+ RADEON_CP_VC_FRMT_ST0 | -+ RADEON_CP_VC_FRMT_ST1); -+ else -+ OUT_RING(RADEON_CP_VC_FRMT_XY | -+ RADEON_CP_VC_FRMT_ST0); -+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | -+ RADEON_CP_VC_CNTL_PRIM_WALK_RING | -+ RADEON_CP_VC_CNTL_MAOS_ENABLE | -+ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | -+ ((nBox * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT)); -+#else /* ACCEL_CP */ -+ BEGIN_ACCEL(nBox * vtx_count * 3 + 2); -+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST | -+ RADEON_VF_PRIM_WALK_DATA | -+ RADEON_VF_RADEON_MODE | -+ ((nBox * 3) << RADEON_VF_NUM_VERTICES_SHIFT))); -+#endif - -- output_fmt = (R300_OUT_FMT_C4_8 | -- R300_OUT_FMT_C0_SEL_BLUE | -- R300_OUT_FMT_C1_SEL_GREEN | -- R300_OUT_FMT_C2_SEL_RED | -- R300_OUT_FMT_C3_SEL_ALPHA); -+ while (nBox--) { -+ int srcX, srcY, srcw, srch; -+ int dstX, dstY, dstw, dsth; -+ dstX = pBox->x1 + dstxoff; -+ dstY = pBox->y1 + dstyoff; -+ dstw = pBox->x2 - pBox->x1; -+ dsth = pBox->y2 - pBox->y1; - -- colorpitch = dst_pitch >> pixel_shift; -- colorpitch |= dst_format; -+ srcX = pPriv->src_x; -+ srcX += ((pBox->x1 - pPriv->drw_x) * -+ pPriv->src_w) / pPriv->dst_w; -+ srcY = pPriv->src_y; -+ srcY += ((pBox->y1 - pPriv->drw_y) * -+ pPriv->src_h) / pPriv->dst_h; - -- if (RADEONTilingEnabled(pScrn, pPixmap)) -- colorpitch |= R300_COLORTILE; -+ srcw = (pPriv->src_w * dstw) / pPriv->dst_w; -+ srch = (pPriv->src_h * dsth) / pPriv->dst_h; - -- if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) { -- isplanar = TRUE; -- } - - if (isplanar) { -- txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0; -- txpitch = pPriv->src_pitch; -+ /* -+ * Just render a rect (using three coords). -+ */ -+ VTX_OUT_6((float)dstX, (float)(dstY + dsth), -+ (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, -+ (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); -+ VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), -+ (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, -+ (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); -+ VTX_OUT_6((float)(dstX + dstw), (float)dstY, -+ (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, -+ (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); - } else { -- if (pPriv->id == FOURCC_UYVY) -- txformat1 = R300_TX_FORMAT_YVYU422; -- else -- txformat1 = R300_TX_FORMAT_VYUY422; -+ /* -+ * Just render a rect (using three coords). -+ */ -+ VTX_OUT_4((float)dstX, (float)(dstY + dsth), -+ (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); -+ VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), -+ (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); -+ VTX_OUT_4((float)(dstX + dstw), (float)dstY, -+ (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); -+ } - -- txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; -+ pBox++; -+ } - -- /* pitch is in pixels */ -- txpitch = pPriv->src_pitch / 2; -+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); -+#ifdef ACCEL_CP -+ ADVANCE_RING(); -+#else -+ FINISH_ACCEL(); -+#endif /* !ACCEL_CP */ -+ -+ DamageDamageRegion(pPriv->pDraw, &pPriv->clip); -+} -+ -+static void -+FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ PixmapPtr pPixmap = pPriv->pPixmap; -+ struct radeon_exa_pixmap_priv *driver_priv; -+ uint32_t txformat; -+ uint32_t txfilter, txsize, txpitch, txoffset; -+ uint32_t dst_pitch, dst_format; -+ uint32_t colorpitch; -+ Bool isplanar = FALSE; -+ int dstxoff, dstyoff, pixel_shift, vtx_count; -+ BoxPtr pBox = REGION_RECTS(&pPriv->clip); -+ int nBox = REGION_NUM_RECTS(&pPriv->clip); -+ -+ /* note: in contrast to r300, use input biasing on uv components */ -+ const float Loff = -0.0627; -+ float uvcosf, uvsinf; -+ float yco, yoff; -+ float uco[3], vco[3]; -+ float bright, cont, sat; -+ int ref = pPriv->transform_index; -+ float ucscale = 0.25, vcscale = 0.25; -+ Bool needux8 = FALSE, needvx8 = FALSE; -+ ACCEL_PREAMBLE(); -+ -+#ifdef XF86DRM_MODE -+ if (info->cs) { -+ int ret; -+ -+ radeon_cs_space_reset_bos(info->cs); -+ radeon_cs_space_add_persistent_bo(info->cs, pPriv->src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); -+ -+ if (pPriv->bicubic_enabled) -+ radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); -+ -+ driver_priv = exaGetPixmapDriverPrivate(pPixmap); -+ radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); -+ -+ ret = radeon_cs_space_check(info->cs); -+ if (ret) { -+ ErrorF("Not enough RAM to hw accel xv operation\n"); -+ return; - } -- txpitch -= 1; -+ } -+#endif - -- txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | -- (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | -- R300_TXPITCH_EN); -+ pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; - -- info->accel_state->texW[0] = pPriv->w; -- info->accel_state->texH[0] = pPriv->h; -+#ifdef USE_EXA -+ if (info->useEXA) { -+ dst_pitch = exaGetPixmapPitch(pPixmap); -+ } else -+#endif -+ { -+ dst_pitch = pPixmap->devKind; -+ } - -- txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | -- R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | -- R300_TX_MAG_FILTER_LINEAR | -- R300_TX_MIN_FILTER_LINEAR | -- (0 << R300_TX_ID_SHIFT)); -+#ifdef COMPOSITE -+ dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; -+ dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; -+#else -+ dstxoff = 0; -+ dstyoff = 0; -+#endif - -+#ifdef USE_EXA -+ if (info->useEXA) { -+ RADEON_SWITCH_TO_3D(); -+ } else -+#endif -+ { -+ BEGIN_ACCEL(2); -+ OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH); -+ /* We must wait for 3d to idle, in case source was just written as a dest. */ -+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, -+ RADEON_WAIT_HOST_IDLECLEAN | -+ RADEON_WAIT_2D_IDLECLEAN | -+ RADEON_WAIT_3D_IDLECLEAN | -+ RADEON_WAIT_DMA_GUI_IDLE); -+ FINISH_ACCEL(); - -- if (IS_R500_3D && ((pPriv->w - 1) & 0x800)) -- txpitch |= R500_TXWIDTH_11; -+ if (!info->accel_state->XInited3D) -+ RADEONInit3DEngine(pScrn); -+ } - -- if (IS_R500_3D && ((pPriv->h - 1) & 0x800)) -- txpitch |= R500_TXHEIGHT_11; -+ /* Same for R100/R200 */ -+ switch (pPixmap->drawable.bitsPerPixel) { -+ case 16: -+ if (pPixmap->drawable.depth == 15) -+ dst_format = RADEON_COLOR_FORMAT_ARGB1555; -+ else -+ dst_format = RADEON_COLOR_FORMAT_RGB565; -+ break; -+ case 32: -+ dst_format = RADEON_COLOR_FORMAT_ARGB8888; -+ break; -+ default: -+ return; -+ } - -- txoffset = pPriv->src_offset; -+ if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) { -+ isplanar = TRUE; -+ txformat = RADEON_TXFORMAT_I8; -+ } else { -+ if (pPriv->id == FOURCC_UYVY) -+ txformat = RADEON_TXFORMAT_YVYU422; -+ else -+ txformat = RADEON_TXFORMAT_VYUY422; -+ } -+ -+ txformat |= RADEON_TXFORMAT_NON_POWER2; -+ -+ colorpitch = dst_pitch >> pixel_shift; -+ -+ if (RADEONTilingEnabled(pScrn, pPixmap)) -+ colorpitch |= RADEON_COLOR_TILE_ENABLE; -+ -+ BEGIN_ACCEL_RELOC(4,2); -+ -+ OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format); -+ EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap); -+ EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap); -+ -+ OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, -+ RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); -+ -+ FINISH_ACCEL(); -+ -+ txfilter = R200_MAG_FILTER_LINEAR | -+ R200_MIN_FILTER_LINEAR | -+ R200_CLAMP_S_CLAMP_LAST | -+ R200_CLAMP_T_CLAMP_LAST; -+ -+ /* contrast can cause constant overflow, clamp */ -+ cont = RTFContrast(pPriv->contrast); -+ if (cont * trans[ref].RefLuma > 2.0) -+ cont = 2.0 / trans[ref].RefLuma; -+ /* brightness is only from -0.5 to 0.5 should be safe */ -+ bright = RTFBrightness(pPriv->brightness); -+ /* saturation can also cause overflow, clamp */ -+ sat = RTFSaturation(pPriv->saturation); -+ if (sat * trans[ref].RefBCb > 4.0) -+ sat = 4.0 / trans[ref].RefBCb; -+ uvcosf = sat * cos(RTFHue(pPriv->hue)); -+ uvsinf = sat * sin(RTFHue(pPriv->hue)); -+ -+ yco = trans[ref].RefLuma * cont; -+ uco[0] = -trans[ref].RefRCr * uvsinf; -+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; -+ uco[2] = trans[ref].RefBCb * uvcosf; -+ vco[0] = trans[ref].RefRCr * uvcosf; -+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; -+ vco[2] = trans[ref].RefBCb * uvsinf; -+ yoff = Loff * yco + bright; -+ -+ if ((uco[0] > 2.0) || (uco[2] > 2.0)) { -+ needux8 = TRUE; -+ ucscale = 0.125; -+ } -+ if ((vco[0] > 2.0) || (vco[2] > 2.0)) { -+ needvx8 = TRUE; -+ vcscale = 0.125; -+ } -+ -+ txoffset = info->cs ? 0 : pPriv->src_offset; -+ -+ if (isplanar) { -+ /* need 2 texcoord sets (even though they are identical) due -+ to denormalization! hw apparently can't premultiply -+ same coord set by different texture size */ -+ vtx_count = 6; -+ -+ txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | -+ (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); -+ txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63; -+ txpitch -= 32; -+ -+ BEGIN_ACCEL(36); -+ -+ OUT_ACCEL_REG(RADEON_PP_CNTL, -+ RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE | -+ RADEON_TEX_BLEND_0_ENABLE | -+ RADEON_TEX_BLEND_1_ENABLE | -+ RADEON_TEX_BLEND_2_ENABLE); -+ -+ OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); -+ OUT_ACCEL_REG(R200_SE_VTX_FMT_1, -+ (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) | -+ (2 << R200_VTX_TEX1_COMP_CNT_SHIFT)); -+ -+ OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); -+ OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); -+ OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); -+ OUT_ACCEL_REG(R200_PP_TXSIZE_0, -+ (pPriv->w - 1) | -+ ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); -+ OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); -+ OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, pPriv->src_bo); -+ -+ OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter); -+ OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); -+ OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0); -+ OUT_ACCEL_REG(R200_PP_TXSIZE_1, txsize); -+ OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch); -+ OUT_TEXTURE_REG(R200_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, pPriv->src_bo); -+ -+ OUT_ACCEL_REG(R200_PP_TXFILTER_2, txfilter); -+ OUT_ACCEL_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); -+ OUT_ACCEL_REG(R200_PP_TXFORMAT_X_2, 0); -+ OUT_ACCEL_REG(R200_PP_TXSIZE_2, txsize); -+ OUT_ACCEL_REG(R200_PP_TXPITCH_2, txpitch); -+ OUT_TEXTURE_REG(R200_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, pPriv->src_bo); -+ -+ /* similar to r300 code. Note the big problem is that hardware constants -+ * are 8 bits only, representing 0.0-1.0. We can get that up (using bias -+ * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually -+ * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but -+ * the constants not. To get larger range can use output scale, but for -+ * that 2.018 value we need a total scale by 8, which means the constants -+ * really have no accuracy whatsoever (5 fractional bits only). -+ * The only direct way to get high precision "constants" into the fragment -+ * pipe I know of is to use the texcoord interpolator (not color, this one -+ * is 8 bit only too), which seems a bit expensive. We're lucky though it -+ * seems the values we need seem to fit better than worst case (get about -+ * 6 fractional bits for this instead of 5, at least when not correcting for -+ * hue/saturation/contrast/brightness, which is the same as for vco - yco and -+ * yoff get 8 fractional bits). Try to preserve as much accuracy as possible -+ * even with non-default saturation/hue/contrast/brightness adjustments, -+ * it gets a little crazy and ultimately precision might still be lacking. -+ * -+ * A higher precision (8 fractional bits) version might just put uco into -+ * a texcoord, and calculate a new vcoconst in the shader, like so: -+ * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable -+ * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0} -+ * vcocalc = ADD temp, bias/scale(cohelper), vco -+ * would in total use 4 tex units, 4 instructions which seems fairly -+ * balanced for this architecture (instead of 3 + 3 for the solution here) -+ * -+ * temp = MAD(yco, yuv.yyyy, yoff) -+ * temp = MAD(uco, yuv.uuuu, temp) -+ * result = MAD(vco, yuv.vvvv, temp) -+ * -+ * note first mad produces actually scalar, hence we transform -+ * it into a dp2a to get 8 bit precision of yco instead of 7 - -+ * That's assuming hw correctly expands consts to internal precision. -+ * (y * 1 + y * (yco - 1) + yoff) -+ * temp = DP2A / 2 (yco, yuv.yyyy, yoff) -+ * temp = MAD (uco / 4, yuv.uuuu * 2, temp) -+ * result = MAD x2 (vco / 2, yuv.vvvv, temp) -+ * -+ * vco, uco need bias (and hence scale too) -+ * -+ */ -+ -+ /* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */ -+ OUT_ACCEL_REG(R200_PP_TXCBLEND_0, -+ R200_TXC_ARG_A_TFACTOR_COLOR | -+ R200_TXC_ARG_B_R0_COLOR | -+ R200_TXC_ARG_C_TFACTOR_COLOR | -+ (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | -+ R200_TXC_OP_DOT2_ADD); -+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, -+ (0 << R200_TXC_TFACTOR_SEL_SHIFT) | -+ R200_TXC_SCALE_INV2 | -+ R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); -+ OUT_ACCEL_REG(R200_PP_TXABLEND_0, -+ R200_TXA_ARG_A_ZERO | -+ R200_TXA_ARG_B_ZERO | -+ R200_TXA_ARG_C_ZERO | -+ R200_TXA_OP_MADD); -+ OUT_ACCEL_REG(R200_PP_TXABLEND2_0, -+ R200_TXA_OUTPUT_REG_NONE); -+ -+ /* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */ -+ OUT_ACCEL_REG(R200_PP_TXCBLEND_1, -+ R200_TXC_ARG_A_TFACTOR_COLOR | -+ R200_TXC_BIAS_ARG_A | -+ R200_TXC_SCALE_ARG_A | -+ R200_TXC_ARG_B_R1_COLOR | -+ R200_TXC_BIAS_ARG_B | -+ (needux8 ? R200_TXC_SCALE_ARG_B : 0) | -+ R200_TXC_ARG_C_R0_COLOR | -+ R200_TXC_OP_MADD); -+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_1, -+ (1 << R200_TXC_TFACTOR_SEL_SHIFT) | -+ R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); -+ OUT_ACCEL_REG(R200_PP_TXABLEND_1, -+ R200_TXA_ARG_A_ZERO | -+ R200_TXA_ARG_B_ZERO | -+ R200_TXA_ARG_C_ZERO | -+ R200_TXA_OP_MADD); -+ OUT_ACCEL_REG(R200_PP_TXABLEND2_1, -+ R200_TXA_OUTPUT_REG_NONE); -+ -+ /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */ -+ OUT_ACCEL_REG(R200_PP_TXCBLEND_2, -+ R200_TXC_ARG_A_TFACTOR_COLOR | -+ R200_TXC_BIAS_ARG_A | -+ R200_TXC_SCALE_ARG_A | -+ R200_TXC_ARG_B_R2_COLOR | -+ R200_TXC_BIAS_ARG_B | -+ (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | -+ R200_TXC_ARG_C_R0_COLOR | -+ R200_TXC_OP_MADD); -+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_2, -+ (2 << R200_TXC_TFACTOR_SEL_SHIFT) | -+ R200_TXC_SCALE_2X | -+ R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); -+ OUT_ACCEL_REG(R200_PP_TXABLEND_2, -+ R200_TXA_ARG_A_ZERO | -+ R200_TXA_ARG_B_ZERO | -+ R200_TXA_ARG_C_ZERO | -+ R200_TXA_COMP_ARG_C | -+ R200_TXA_OP_MADD); -+ OUT_ACCEL_REG(R200_PP_TXABLEND2_2, -+ R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); -+ -+ /* shader constants */ -+ OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ -+ yco > 1.0 ? yco - 1.0: yco, -+ yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ -+ 0.0)); -+ OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ -+ uco[1] * ucscale + 0.5, /* or [-2, 2] */ -+ uco[2] * ucscale + 0.5, -+ 0.0)); -+ OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ -+ vco[1] * vcscale + 0.5, /* or [-4, 4] */ -+ vco[2] * vcscale + 0.5, -+ 0.0)); - -- BEGIN_ACCEL(6); -- OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter); -- OUT_ACCEL_REG(R300_TX_FILTER1_0, 0); -- OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0); -- OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1); -- OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch); -- OUT_ACCEL_REG(R300_TX_OFFSET_0, txoffset); - FINISH_ACCEL(); -+ } else { -+ vtx_count = 4; - -- txenable = R300_TEX_0_ENABLE; -+ BEGIN_ACCEL(24); -+ -+ OUT_ACCEL_REG(RADEON_PP_CNTL, -+ RADEON_TEX_0_ENABLE | -+ RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE | -+ RADEON_TEX_BLEND_2_ENABLE); -+ -+ OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); -+ OUT_ACCEL_REG(R200_SE_VTX_FMT_1, -+ (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); -+ -+ OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); -+ OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); -+ OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); -+ OUT_ACCEL_REG(R200_PP_TXSIZE_0, -+ (pPriv->w - 1) | -+ ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); -+ OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); -+ OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, pPriv->src_bo); -+ -+ /* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */ -+ OUT_ACCEL_REG(R200_PP_TXCBLEND_0, -+ R200_TXC_ARG_A_TFACTOR_COLOR | -+ R200_TXC_ARG_B_R0_COLOR | -+ R200_TXC_ARG_C_TFACTOR_COLOR | -+ (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | -+ R200_TXC_OP_DOT2_ADD); -+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, -+ (0 << R200_TXC_TFACTOR_SEL_SHIFT) | -+ R200_TXC_SCALE_INV2 | -+ (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) | -+ R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); -+ OUT_ACCEL_REG(R200_PP_TXABLEND_0, -+ R200_TXA_ARG_A_ZERO | -+ R200_TXA_ARG_B_ZERO | -+ R200_TXA_ARG_C_ZERO | -+ R200_TXA_OP_MADD); -+ OUT_ACCEL_REG(R200_PP_TXABLEND2_0, -+ R200_TXA_OUTPUT_REG_NONE); -+ -+ /* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */ -+ OUT_ACCEL_REG(R200_PP_TXCBLEND_1, -+ R200_TXC_ARG_A_TFACTOR_COLOR | -+ R200_TXC_BIAS_ARG_A | -+ R200_TXC_SCALE_ARG_A | -+ R200_TXC_ARG_B_R0_COLOR | -+ R200_TXC_BIAS_ARG_B | -+ (needux8 ? R200_TXC_SCALE_ARG_B : 0) | -+ R200_TXC_ARG_C_R1_COLOR | -+ R200_TXC_OP_MADD); -+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_1, -+ (1 << R200_TXC_TFACTOR_SEL_SHIFT) | -+ (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) | -+ R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); -+ OUT_ACCEL_REG(R200_PP_TXABLEND_1, -+ R200_TXA_ARG_A_ZERO | -+ R200_TXA_ARG_B_ZERO | -+ R200_TXA_ARG_C_ZERO | -+ R200_TXA_OP_MADD); -+ OUT_ACCEL_REG(R200_PP_TXABLEND2_1, -+ R200_TXA_OUTPUT_REG_NONE); -+ -+ /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */ -+ OUT_ACCEL_REG(R200_PP_TXCBLEND_2, -+ R200_TXC_ARG_A_TFACTOR_COLOR | -+ R200_TXC_BIAS_ARG_A | -+ R200_TXC_SCALE_ARG_A | -+ R200_TXC_ARG_B_R0_COLOR | -+ R200_TXC_BIAS_ARG_B | -+ (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | -+ R200_TXC_ARG_C_R1_COLOR | -+ R200_TXC_OP_MADD); -+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_2, -+ (2 << R200_TXC_TFACTOR_SEL_SHIFT) | -+ R200_TXC_SCALE_2X | -+ (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) | -+ R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); -+ OUT_ACCEL_REG(R200_PP_TXABLEND_2, -+ R200_TXA_ARG_A_ZERO | -+ R200_TXA_ARG_B_ZERO | -+ R200_TXA_ARG_C_ZERO | -+ R200_TXA_COMP_ARG_C | -+ R200_TXA_OP_MADD); -+ OUT_ACCEL_REG(R200_PP_TXABLEND2_2, -+ R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); -+ -+ /* shader constants */ -+ OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ -+ yco > 1.0 ? yco - 1.0: yco, -+ yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ -+ 0.0)); -+ OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ -+ uco[1] * ucscale + 0.5, /* or [-2, 2] */ -+ uco[2] * ucscale + 0.5, -+ 0.0)); -+ OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ -+ vco[1] * vcscale + 0.5, /* or [-4, 4] */ -+ vco[2] * vcscale + 0.5, -+ 0.0)); - -- if (isplanar) { -- txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | -- (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | -- R300_TXPITCH_EN); -- txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63; -- txpitch -= 1; -- txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | -- R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | -- R300_TX_MIN_FILTER_LINEAR | -- R300_TX_MAG_FILTER_LINEAR); -- -- BEGIN_ACCEL(12); -- OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); -- OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); -- OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); -- OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2); -- OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); -- OUT_ACCEL_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset); -- OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); -- OUT_ACCEL_REG(R300_TX_FILTER1_2, 0); -- OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0); -- OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3); -- OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch); -- OUT_ACCEL_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset); -- FINISH_ACCEL(); -- txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; -- } -+ FINISH_ACCEL(); -+ } - -- if (pPriv->bicubic_enabled) { -- /* Size is 128x1 */ -- txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | -- (0x0 << R300_TXHEIGHT_SHIFT) | -- R300_TXPITCH_EN); -- /* Format is 32-bit floats, 4bpp */ -- txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); -- /* Pitch is 127 (128-1) */ -- txpitch = 0x7f; -- /* Tex filter */ -- txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | -- R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | -- R300_TX_MIN_FILTER_NEAREST | -- R300_TX_MAG_FILTER_NEAREST | -- (1 << R300_TX_ID_SHIFT)); -- -- BEGIN_ACCEL(6); -- OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter); -- OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); -- OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); -- OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1); -- OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); -- OUT_ACCEL_REG(R300_TX_OFFSET_1, pPriv->bicubic_src_offset); -- FINISH_ACCEL(); -- -- /* Enable tex 1 */ -- txenable |= R300_TEX_1_ENABLE; -- } -+ BEGIN_ACCEL(2); -+ OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0); -+ OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, (((pPixmap->drawable.width) << RADEON_RE_WIDTH_SHIFT) | -+ ((pPixmap->drawable.height) << RADEON_RE_HEIGHT_SHIFT))); -+ FINISH_ACCEL(); - -- /* setup the VAP */ -- if (info->accel_state->has_tcl) { -- if (pPriv->bicubic_enabled) -- BEGIN_ACCEL(7); -- else -- BEGIN_ACCEL(6); -- } else { -- if (pPriv->bicubic_enabled) -- BEGIN_ACCEL(5); -- else -- BEGIN_ACCEL(4); -+ if (pPriv->vsync) { -+ xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn, -+ pPriv->drw_x, -+ pPriv->drw_x + pPriv->dst_w, -+ pPriv->drw_y, -+ pPriv->drw_y + pPriv->dst_h); -+ if (crtc) { -+ RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; -+ -+ FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, -+ radeon_crtc->crtc_id, -+ pPriv->drw_y - crtc->y, -+ (pPriv->drw_y - crtc->y) + pPriv->dst_h); - } -+ } -+ /* -+ * Rendering of the actual polygon is done in two different -+ * ways depending on chip generation: -+ * -+ * < R300: -+ * -+ * These chips can render a rectangle in one pass, so -+ * handling is pretty straight-forward. -+ * -+ * >= R300: -+ * -+ * These chips can accept a quad, but will render it as -+ * two triangles which results in a diagonal tear. Instead -+ * We render a single, large triangle and use the scissor -+ * functionality to restrict it to the desired rectangle. -+ * Due to guardband limits on r3xx/r4xx, we can only use -+ * the single triangle up to 4021 pixels; above that we -+ * render as a quad. -+ */ - -- /* These registers define the number, type, and location of data submitted -- * to the PVS unit of GA input (when PVS is disabled) -- * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is -- * enabled. This memory provides the imputs to the vertex shader program -- * and ordering is not important. When PVS/TCL is disabled, this field maps -- * directly to the GA input memory and the order is signifigant. In -- * PVS_BYPASS mode the order is as follows: -- * Position -- * Point Size -- * Color 0-3 -- * Textures 0-7 -- * Fog -- */ -- if (pPriv->bicubic_enabled) { -- OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, -- ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | -- (0 << R300_SKIP_DWORDS_0_SHIFT) | -- (0 << R300_DST_VEC_LOC_0_SHIFT) | -- R300_SIGNED_0 | -- (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | -- (0 << R300_SKIP_DWORDS_1_SHIFT) | -- (6 << R300_DST_VEC_LOC_1_SHIFT) | -- R300_SIGNED_1)); -- OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, -- ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | -- (0 << R300_SKIP_DWORDS_2_SHIFT) | -- (7 << R300_DST_VEC_LOC_2_SHIFT) | -- R300_LAST_VEC_2 | -- R300_SIGNED_2)); -+#ifdef ACCEL_CP -+ BEGIN_RING(nBox * 3 * vtx_count + 4); -+ OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, -+ nBox * 3 * vtx_count)); -+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | -+ RADEON_CP_VC_CNTL_PRIM_WALK_RING | -+ ((nBox * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT)); -+#else /* ACCEL_CP */ -+ BEGIN_ACCEL(nBox * 3 * vtx_count + 2); -+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST | -+ RADEON_VF_PRIM_WALK_DATA | -+ ((nBox * 3) << RADEON_VF_NUM_VERTICES_SHIFT))); -+ -+#endif -+ -+ while (nBox--) { -+ int srcX, srcY, srcw, srch; -+ int dstX, dstY, dstw, dsth; -+ dstX = pBox->x1 + dstxoff; -+ dstY = pBox->y1 + dstyoff; -+ dstw = pBox->x2 - pBox->x1; -+ dsth = pBox->y2 - pBox->y1; -+ -+ srcX = pPriv->src_x; -+ srcX += ((pBox->x1 - pPriv->drw_x) * -+ pPriv->src_w) / pPriv->dst_w; -+ srcY = pPriv->src_y; -+ srcY += ((pBox->y1 - pPriv->drw_y) * -+ pPriv->src_h) / pPriv->dst_h; -+ -+ srcw = (pPriv->src_w * dstw) / pPriv->dst_w; -+ srch = (pPriv->src_h * dsth) / pPriv->dst_h; -+ -+ if (isplanar) { -+ /* -+ * Just render a rect (using three coords). -+ */ -+ VTX_OUT_6((float)dstX, (float)(dstY + dsth), -+ (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, -+ (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); -+ VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), -+ (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, -+ (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); -+ VTX_OUT_6((float)(dstX + dstw), (float)dstY, -+ (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, -+ (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); - } else { -- OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, -- ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | -- (0 << R300_SKIP_DWORDS_0_SHIFT) | -- (0 << R300_DST_VEC_LOC_0_SHIFT) | -- R300_SIGNED_0 | -- (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | -- (0 << R300_SKIP_DWORDS_1_SHIFT) | -- (6 << R300_DST_VEC_LOC_1_SHIFT) | -- R300_LAST_VEC_1 | -- R300_SIGNED_1)); -+ /* -+ * Just render a rect (using three coords). -+ */ -+ VTX_OUT_4((float)dstX, (float)(dstY + dsth), -+ (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); -+ VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), -+ (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); -+ VTX_OUT_4((float)(dstX + dstw), (float)dstY, -+ (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); - } - -- /* load the vertex shader -- * We pre-load vertex programs in RADEONInit3DEngine(): -- * - exa mask/Xv bicubic -- * - exa no mask -- * - Xv -- * Here we select the offset of the vertex program we want to use -- */ -- if (info->accel_state->has_tcl) { -- if (pPriv->bicubic_enabled) { -- OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, -- ((0 << R300_PVS_FIRST_INST_SHIFT) | -- (2 << R300_PVS_XYZW_VALID_INST_SHIFT) | -- (2 << R300_PVS_LAST_INST_SHIFT))); -- OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, -- (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); -- } else { -- OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, -- ((5 << R300_PVS_FIRST_INST_SHIFT) | -- (6 << R300_PVS_XYZW_VALID_INST_SHIFT) | -- (6 << R300_PVS_LAST_INST_SHIFT))); -- OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, -- (6 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); -- } -- } -+ pBox++; -+ } - -- /* Position and one set of 2 texture coordinates */ -- OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); -- if (pPriv->bicubic_enabled) -- OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | -- (2 << R300_TEX_1_COMP_CNT_SHIFT))); -- else -- OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); -+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); - -- OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt); -+#ifdef ACCEL_CP -+ ADVANCE_RING(); -+#else - FINISH_ACCEL(); -+#endif /* !ACCEL_CP */ - -- /* setup pixel shader */ -- if (IS_R300_3D) { -- if (pPriv->bicubic_enabled) { -- BEGIN_ACCEL(79); -- -- /* 4 components: 2 for tex0 and 2 for tex1 */ -- OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | -- R300_RS_COUNT_HIRES_EN)); -- -- /* R300_INST_COUNT_RS - highest RS instruction used */ -- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6)); -- -- /* Pixel stack frame size. */ -- OUT_ACCEL_REG(R300_US_PIXSIZE, 5); -- -- /* Indirection levels */ -- OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) | -- R300_FIRST_TEX)); -- -- /* Set nodes. */ -- OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | -- R300_ALU_CODE_SIZE(14) | -- R300_TEX_CODE_OFFSET(0) | -- R300_TEX_CODE_SIZE(6))); -- -- /* Nodes are allocated highest first, but executed lowest first */ -- OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0); -- OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) | -- R300_ALU_SIZE(0) | -- R300_TEX_START(0) | -- R300_TEX_SIZE(0))); -- OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) | -- R300_ALU_SIZE(9) | -- R300_TEX_START(1) | -- R300_TEX_SIZE(0))); -- OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) | -- R300_ALU_SIZE(2) | -- R300_TEX_START(2) | -- R300_TEX_SIZE(3) | -- R300_RGBA_OUT)); -- -- /* ** BICUBIC FP ** */ -- -- /* texcoord0 => temp0 -- * texcoord1 => temp1 */ -- -- // first node -- /* TEX temp2, temp1.rrr0, tex1, 1D */ -- OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) | -- R300_TEX_ID(1) | -- R300_TEX_SRC_ADDR(1) | -- R300_TEX_DST_ADDR(2))); -- -- /* MOV temp1.r, temp1.ggg0 */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) | -- R300_ALU_RGB_ADDRD(1) | -- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) | -- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -- -- -- // second node -- /* TEX temp1, temp1, tex1, 1D */ -- OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) | -- R300_TEX_ID(1) | -- R300_TEX_SRC_ADDR(1) | -- R300_TEX_DST_ADDR(1))); -- -- /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) | -- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | -- R300_ALU_RGB_ADDRD(3) | -- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) | -- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -- -- -- /* MUL temp2.rg, temp2.rrr0, const0.rgb */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) | -- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | -- R300_ALU_RGB_ADDRD(2) | -- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) | -- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -- -- /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) | -- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | -- R300_ALU_RGB_ADDR2(3) | -- R300_ALU_RGB_ADDRD(4) | -- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) | -- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -- -- /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) | -- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | -- R300_ALU_RGB_ADDR2(2) | -- R300_ALU_RGB_ADDRD(5) | -- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) | -- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -- -- /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) | -- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | -- R300_ALU_RGB_ADDR2(3) | -- R300_ALU_RGB_ADDRD(3) | -- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) | -- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -- -- /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) | -- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | -- R300_ALU_RGB_ADDR2(2) | -- R300_ALU_RGB_ADDRD(1) | -- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) | -- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -- -- /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | -- R300_ALU_RGB_ADDR2(1) | -- R300_ALU_RGB_ADDRD(1) | -- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) | -- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -- -- /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | -- R300_ALU_RGB_ADDR2(3) | -- R300_ALU_RGB_ADDRD(2) | -- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) | -- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -- -- /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | -- R300_ALU_RGB_ADDR2(5) | -- R300_ALU_RGB_ADDRD(3) | -- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) | -- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -- -- /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) | -- R300_ALU_RGB_ADDR2(4) | -- R300_ALU_RGB_ADDRD(0) | -- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) | -- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -- -- -- // third node -- /* TEX temp4, temp1.rg--, tex0, 1D */ -- OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) | -- R300_TEX_ID(0) | -- R300_TEX_SRC_ADDR(1) | -- R300_TEX_DST_ADDR(4))); -- -- /* TEX temp3, temp3.rg--, tex0, 1D */ -- OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) | -- R300_TEX_ID(0) | -- R300_TEX_SRC_ADDR(3) | -- R300_TEX_DST_ADDR(3))); -- -- /* TEX temp5, temp2.rg--, tex0, 1D */ -- OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) | -- R300_TEX_ID(0) | -- R300_TEX_SRC_ADDR(2) | -- R300_TEX_DST_ADDR(5))); -- -- /* TEX temp0, temp0.rg--, tex0, 1D */ -- OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) | -- R300_TEX_ID(0) | -- R300_TEX_SRC_ADDR(0) | -- R300_TEX_DST_ADDR(0))); -- -- /* LRP temp3, temp1.bbbb, temp4, temp3 -> -- * - PRESUB temps, temp4 - temp3 -- * - MAD temp3, temp1.bbbb, temps, temp3 */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | -- R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) | -- R300_ALU_RGB_ADDR1(4) | -- R300_ALU_RGB_ADDR2(1) | -- R300_ALU_RGB_ADDRD(3) | -- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) | -- R300_ALU_ALPHA_ADDR1(4) | -- R300_ALU_ALPHA_ADDR2(1) | -- R300_ALU_ALPHA_ADDRD(3) | -- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); -- -- /* LRP temp0, temp1.bbbb, temp5, temp0 -> -- * - PRESUB temps, temp5 - temp0 -- * - MAD temp0, temp1.bbbb, temps, temp0 */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | -- R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) | -- R300_ALU_RGB_INSERT_NOP)); -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) | -- R300_ALU_RGB_ADDR1(5) | -- R300_ALU_RGB_ADDR2(1) | -- R300_ALU_RGB_ADDRD(0) | -- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) | -- R300_ALU_ALPHA_ADDR1(5) | -- R300_ALU_ALPHA_ADDR2(1) | -- R300_ALU_ALPHA_ADDRD(0) | -- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); -- -- /* LRP output, temp2.bbbb, temp3, temp0 -> -- * - PRESUB temps, temp3 - temp0 -- * - MAD output, temp2.bbbb, temps, temp0 */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | -- R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) | -- R300_ALU_RGB_ADDR1(3) | -- R300_ALU_RGB_ADDR2(2) | -- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) | -- R300_ALU_ALPHA_ADDR1(3) | -- R300_ALU_ALPHA_ADDR2(2) | -- R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A))); -- -- /* Shader constants. */ -- OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w)); -- OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0); -- OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0); -- OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0); -- -- OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0); -- OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h)); -- OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0); -- OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0); -- -- FINISH_ACCEL(); -- } else if (isplanar) { -- /* -- * y' = y - .0625 -- * u' = u - .5 -- * v' = v - .5; -- * -- * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' -- * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' -- * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' -- * -- * DP3 might look like the straightforward solution -- * but we'd need to move the texture yuv values in -- * the same reg for this to work. Therefore use MADs. -- * Without changing the shader at all (only the constants) -- * could also provide hue/saturation/brightness/contrast control. -- * -- * yco = 1.1643 -- * uco = 0, -0.39173, 2.017 -- * vco = 1.5958, -0.8129, 0 -- * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], -- * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], -- * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], -- * -- * temp = MAD(yco, yuv.yyyy, off) -- * temp = MAD(uco, yuv.uuuu, temp) -- * result = MAD(vco, yuv.vvvv, temp) -- */ -- float yco = 1.1643; -- float uco[3] = {0.0, -0.39173, 2.018}; -- float vco[3] = {1.5958, -0.8129, 0.0}; -- float off[3] = {-0.0625 * yco + -0.5 * uco[0] + -0.5 * vco[0], -- -0.0625 * yco + -0.5 * uco[1] + -0.5 * vco[1], -- -0.0625 * yco + -0.5 * uco[2] + -0.5 * vco[2]}; -- -- BEGIN_ACCEL(33); -- /* 2 components: same 2 for tex0/1/2 */ -- OUT_ACCEL_REG(R300_RS_COUNT, -- ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | -- R300_RS_COUNT_HIRES_EN)); -- /* R300_INST_COUNT_RS - highest RS instruction used */ -- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); -- -- OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ -- -- /* Indirection levels */ -- OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | -- R300_FIRST_TEX)); -- -- OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | -- R300_ALU_CODE_SIZE(3) | -- R300_TEX_CODE_OFFSET(0) | -- R300_TEX_CODE_SIZE(3))); -- -- OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | -- R300_ALU_SIZE(2) | -- R300_TEX_START(0) | -- R300_TEX_SIZE(2) | -- R300_RGBA_OUT)); -- -- /* tex inst */ -- OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | -- R300_TEX_DST_ADDR(0) | -- R300_TEX_ID(0) | -- R300_TEX_INST(R300_TEX_INST_LD))); -- OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) | -- R300_TEX_DST_ADDR(1) | -- R300_TEX_ID(1) | -- R300_TEX_INST(R300_TEX_INST_LD))); -- OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) | -- R300_TEX_DST_ADDR(2) | -- R300_TEX_ID(2) | -- R300_TEX_INST(R300_TEX_INST_LD))); -- -- /* ALU inst */ -- /* MAD temp0, const0.a, temp0, const0.rgb */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | -- R300_ALU_RGB_ADDR1(0) | -- R300_ALU_RGB_ADDR2(0) | -- R300_ALU_RGB_ADDRD(0) | -- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | -- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | -- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | -- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); -- /* alpha nop, but need to set up alpha source for rgb usage */ -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | -- R300_ALU_ALPHA_ADDR1(0) | -- R300_ALU_ALPHA_ADDR2(0) | -- R300_ALU_ALPHA_ADDRD(0) | -- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -- -- /* MAD const1, temp1, temp0 */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | -- R300_ALU_RGB_ADDR1(1) | -- R300_ALU_RGB_ADDR2(0) | -- R300_ALU_RGB_ADDRD(0) | -- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | -- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | -- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); -- /* alpha nop */ -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) | -- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -- -- /* MAD result, const2, temp2, temp0 */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | -- R300_ALU_RGB_ADDR1(2) | -- R300_ALU_RGB_ADDR2(0) | -- R300_ALU_RGB_ADDRD(0) | -- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | -- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | -- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | -- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | -- R300_ALU_RGB_CLAMP)); -- /* write alpha 1 */ -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | -- R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | -- R300_ALU_ALPHA_TARGET_A)); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); -- -- /* Shader constants. */ -- /* constant 0: off, yco */ -- OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0])); -- OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1])); -- OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2])); -- OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco)); -- /* constant 1: uco */ -- OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0])); -- OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1])); -- OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2])); -- OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(0.0)); -- /* constant 2: vco */ -- OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0])); -- OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1])); -- OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2])); -- OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0)); -- -- FINISH_ACCEL(); -+ DamageDamageRegion(pPriv->pDraw, &pPriv->clip); -+} - -- } else { -- BEGIN_ACCEL(11); -- /* 2 components: 2 for tex0 */ -- OUT_ACCEL_REG(R300_RS_COUNT, -- ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | -- R300_RS_COUNT_HIRES_EN)); -- /* R300_INST_COUNT_RS - highest RS instruction used */ -- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); -- -- OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ -- -- /* Indirection levels */ -- OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | -- R300_FIRST_TEX)); -- -- OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | -- R300_ALU_CODE_SIZE(1) | -- R300_TEX_CODE_OFFSET(0) | -- R300_TEX_CODE_SIZE(1))); -- -- OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | -- R300_ALU_SIZE(0) | -- R300_TEX_START(0) | -- R300_TEX_SIZE(0) | -- R300_RGBA_OUT)); -- -- /* tex inst */ -- OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | -- R300_TEX_DST_ADDR(0) | -- R300_TEX_ID(0) | -- R300_TEX_INST(R300_TEX_INST_LD))); -- -- /* ALU inst */ -- /* RGB */ -- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) | -- R300_ALU_RGB_ADDR1(0) | -- R300_ALU_RGB_ADDR2(0) | -- R300_ALU_RGB_ADDRD(0) | -- R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R | -- R300_ALU_RGB_MASK_G | -- R300_ALU_RGB_MASK_B)) | -- R300_ALU_RGB_TARGET_A)); -- OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | -- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | -- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | -- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | -- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | -- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | -- R300_ALU_RGB_CLAMP)); -- /* Alpha */ -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) | -- R300_ALU_ALPHA_ADDR1(0) | -- R300_ALU_ALPHA_ADDR2(0) | -- R300_ALU_ALPHA_ADDRD(0) | -- R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | -- R300_ALU_ALPHA_TARGET_A | -- R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE))); -- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | -- R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | -- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | -- R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | -- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | -- R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | -- R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -- R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) | -- R300_ALU_ALPHA_CLAMP)); -- FINISH_ACCEL(); -- } -- } else { -- if (pPriv->bicubic_enabled) { -- BEGIN_ACCEL(7); -- -- /* 4 components: 2 for tex0 and 2 for tex1 */ -- OUT_ACCEL_REG(R300_RS_COUNT, -- ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | -- R300_RS_COUNT_HIRES_EN)); -- -- /* R300_INST_COUNT_RS - highest RS instruction used */ -- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6)); -- -- /* Pixel stack frame size. */ -- OUT_ACCEL_REG(R300_US_PIXSIZE, 5); -- -- /* FP length. */ -- OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | -- R500_US_CODE_END_ADDR(13))); -- OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | -- R500_US_CODE_RANGE_SIZE(13))); -- -- /* Prepare for FP emission. */ -- OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); -- FINISH_ACCEL(); -- -- BEGIN_ACCEL(89); -- /* Pixel shader. -- * I've gone ahead and annotated each instruction, since this -- * thing is MASSIVE. :3 -- * Note: In order to avoid buggies with temps and multiple -- * inputs, all temps are offset by 2. temp0 -> register2. */ -- -- /* TEX temp2, input1.xxxx, tex1, 1D */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -- R500_INST_RGB_WMASK_R | -- R500_INST_RGB_WMASK_G | -- R500_INST_RGB_WMASK_B)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | -- R500_TEX_INST_LD | -- R500_TEX_IGNORE_UNCOVERED)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | -- R500_TEX_SRC_S_SWIZ_R | -- R500_TEX_SRC_T_SWIZ_R | -- R500_TEX_SRC_R_SWIZ_R | -- R500_TEX_SRC_Q_SWIZ_R | -- R500_TEX_DST_ADDR(2) | -- R500_TEX_DST_R_SWIZ_R | -- R500_TEX_DST_G_SWIZ_G | -- R500_TEX_DST_B_SWIZ_B | -- R500_TEX_DST_A_SWIZ_A)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- -- /* TEX temp5, input1.yyyy, tex1, 1D */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -- R500_INST_TEX_SEM_WAIT | -- R500_INST_RGB_WMASK_R | -- R500_INST_RGB_WMASK_G | -- R500_INST_RGB_WMASK_B)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | -- R500_TEX_INST_LD | -- R500_TEX_SEM_ACQUIRE | -- R500_TEX_IGNORE_UNCOVERED)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | -- R500_TEX_SRC_S_SWIZ_G | -- R500_TEX_SRC_T_SWIZ_G | -- R500_TEX_SRC_R_SWIZ_G | -- R500_TEX_SRC_Q_SWIZ_G | -- R500_TEX_DST_ADDR(5) | -- R500_TEX_DST_R_SWIZ_R | -- R500_TEX_DST_G_SWIZ_G | -- R500_TEX_DST_B_SWIZ_B | -- R500_TEX_DST_A_SWIZ_A)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- -- /* MUL temp4, const0.x0x0, temp2.yyxx */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -- R500_INST_TEX_SEM_WAIT | -- R500_INST_RGB_WMASK_R | -- R500_INST_RGB_WMASK_G | -- R500_INST_RGB_WMASK_B | -- R500_INST_ALPHA_WMASK)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | -- R500_RGB_ADDR0_CONST | -- R500_RGB_ADDR1(2))); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | -- R500_ALPHA_ADDR0_CONST | -- R500_ALPHA_ADDR1(2))); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | -- R500_ALU_RGB_R_SWIZ_A_R | -- R500_ALU_RGB_G_SWIZ_A_0 | -- R500_ALU_RGB_B_SWIZ_A_R | -- R500_ALU_RGB_SEL_B_SRC1 | -- R500_ALU_RGB_R_SWIZ_B_G | -- R500_ALU_RGB_G_SWIZ_B_G | -- R500_ALU_RGB_B_SWIZ_B_R)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | -- R500_ALPHA_OP_MAD | -- R500_ALPHA_SEL_A_SRC0 | -- R500_ALPHA_SWIZ_A_0 | -- R500_ALPHA_SEL_B_SRC1 | -- R500_ALPHA_SWIZ_B_R)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | -- R500_ALU_RGBA_OP_MAD | -- R500_ALU_RGBA_R_SWIZ_0 | -- R500_ALU_RGBA_G_SWIZ_0 | -- R500_ALU_RGBA_B_SWIZ_0 | -- R500_ALU_RGBA_A_SWIZ_0)); -- -- /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -- R500_INST_RGB_WMASK_R | -- R500_INST_RGB_WMASK_G | -- R500_INST_RGB_WMASK_B | -- R500_INST_ALPHA_WMASK)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | -- R500_RGB_ADDR0_CONST | -- R500_RGB_ADDR1(5) | -- R500_RGB_ADDR2(4))); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | -- R500_ALPHA_ADDR0_CONST | -- R500_ALPHA_ADDR1(5) | -- R500_ALPHA_ADDR2(4))); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | -- R500_ALU_RGB_R_SWIZ_A_0 | -- R500_ALU_RGB_G_SWIZ_A_G | -- R500_ALU_RGB_B_SWIZ_A_0 | -- R500_ALU_RGB_SEL_B_SRC1 | -- R500_ALU_RGB_R_SWIZ_B_R | -- R500_ALU_RGB_G_SWIZ_B_R | -- R500_ALU_RGB_B_SWIZ_B_R)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | -- R500_ALPHA_OP_MAD | -- R500_ALPHA_SEL_A_SRC0 | -- R500_ALPHA_SWIZ_A_G | -- R500_ALPHA_SEL_B_SRC1 | -- R500_ALPHA_SWIZ_B_R)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | -- R500_ALU_RGBA_OP_MAD | -- R500_ALU_RGBA_SEL_C_SRC2 | -- R500_ALU_RGBA_R_SWIZ_R | -- R500_ALU_RGBA_G_SWIZ_G | -- R500_ALU_RGBA_B_SWIZ_B | -- R500_ALU_RGBA_A_SWIZ_A)); -- -- /* ADD temp3, temp3, input0.xyxy */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -- R500_INST_RGB_WMASK_R | -- R500_INST_RGB_WMASK_G | -- R500_INST_RGB_WMASK_B | -- R500_INST_ALPHA_WMASK)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) | -- R500_RGB_ADDR2(0))); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) | -- R500_ALPHA_ADDR2(0))); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | -- R500_ALU_RGB_G_SWIZ_A_1 | -- R500_ALU_RGB_B_SWIZ_A_1 | -- R500_ALU_RGB_SEL_B_SRC1 | -- R500_ALU_RGB_R_SWIZ_B_R | -- R500_ALU_RGB_G_SWIZ_B_G | -- R500_ALU_RGB_B_SWIZ_B_B)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | -- R500_ALPHA_OP_MAD | -- R500_ALPHA_SWIZ_A_1 | -- R500_ALPHA_SEL_B_SRC1 | -- R500_ALPHA_SWIZ_B_A)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | -- R500_ALU_RGBA_OP_MAD | -- R500_ALU_RGBA_SEL_C_SRC2 | -- R500_ALU_RGBA_R_SWIZ_R | -- R500_ALU_RGBA_G_SWIZ_G | -- R500_ALU_RGBA_B_SWIZ_R | -- R500_ALU_RGBA_A_SWIZ_G)); -- -- /* TEX temp1, temp3.zwxy, tex0, 2D */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -- R500_INST_RGB_WMASK_R | -- R500_INST_RGB_WMASK_G | -- R500_INST_RGB_WMASK_B | -- R500_INST_ALPHA_WMASK)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | -- R500_TEX_INST_LD | -- R500_TEX_IGNORE_UNCOVERED)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | -- R500_TEX_SRC_S_SWIZ_B | -- R500_TEX_SRC_T_SWIZ_A | -- R500_TEX_SRC_R_SWIZ_R | -- R500_TEX_SRC_Q_SWIZ_G | -- R500_TEX_DST_ADDR(1) | -- R500_TEX_DST_R_SWIZ_R | -- R500_TEX_DST_G_SWIZ_G | -- R500_TEX_DST_B_SWIZ_B | -- R500_TEX_DST_A_SWIZ_A)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- -- /* TEX temp3, temp3.xyzw, tex0, 2D */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -- R500_INST_TEX_SEM_WAIT | -- R500_INST_RGB_WMASK_R | -- R500_INST_RGB_WMASK_G | -- R500_INST_RGB_WMASK_B | -- R500_INST_ALPHA_WMASK)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | -- R500_TEX_INST_LD | -- R500_TEX_SEM_ACQUIRE | -- R500_TEX_IGNORE_UNCOVERED)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | -- R500_TEX_SRC_S_SWIZ_R | -- R500_TEX_SRC_T_SWIZ_G | -- R500_TEX_SRC_R_SWIZ_B | -- R500_TEX_SRC_Q_SWIZ_A | -- R500_TEX_DST_ADDR(3) | -- R500_TEX_DST_R_SWIZ_R | -- R500_TEX_DST_G_SWIZ_G | -- R500_TEX_DST_B_SWIZ_B | -- R500_TEX_DST_A_SWIZ_A)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- -- /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -- R500_INST_RGB_WMASK_R | -- R500_INST_RGB_WMASK_G | -- R500_INST_RGB_WMASK_B | -- R500_INST_ALPHA_WMASK)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | -- R500_RGB_ADDR0_CONST | -- R500_RGB_ADDR1(5) | -- R500_RGB_ADDR2(4))); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | -- R500_ALPHA_ADDR0_CONST | -- R500_ALPHA_ADDR1(5) | -- R500_ALPHA_ADDR2(4))); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | -- R500_ALU_RGB_R_SWIZ_A_0 | -- R500_ALU_RGB_G_SWIZ_A_G | -- R500_ALU_RGB_B_SWIZ_A_0 | -- R500_ALU_RGB_SEL_B_SRC1 | -- R500_ALU_RGB_R_SWIZ_B_G | -- R500_ALU_RGB_G_SWIZ_B_G | -- R500_ALU_RGB_B_SWIZ_B_G)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | -- R500_ALPHA_OP_MAD | -- R500_ALPHA_SEL_A_SRC0 | -- R500_ALPHA_SWIZ_A_G | -- R500_ALPHA_SEL_B_SRC1 | -- R500_ALPHA_SWIZ_B_G)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | -- R500_ALU_RGBA_OP_MAD | -- R500_ALU_RGBA_SEL_C_SRC2 | -- R500_ALU_RGBA_R_SWIZ_R | -- R500_ALU_RGBA_G_SWIZ_G | -- R500_ALU_RGBA_B_SWIZ_B | -- R500_ALU_RGBA_A_SWIZ_A)); -- -- /* ADD temp0, temp4, input0.xyxy */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -- R500_INST_RGB_WMASK_R | -- R500_INST_RGB_WMASK_G | -- R500_INST_RGB_WMASK_B | -- R500_INST_ALPHA_WMASK)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) | -- R500_RGB_ADDR2(0))); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) | -- R500_ALPHA_ADDR2(0))); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | -- R500_ALU_RGB_G_SWIZ_A_1 | -- R500_ALU_RGB_B_SWIZ_A_1 | -- R500_ALU_RGB_SEL_B_SRC1 | -- R500_ALU_RGB_R_SWIZ_B_R | -- R500_ALU_RGB_G_SWIZ_B_G | -- R500_ALU_RGB_B_SWIZ_B_B)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | -- R500_ALPHA_OP_MAD | -- R500_ALPHA_SWIZ_A_1 | -- R500_ALPHA_SEL_B_SRC1 | -- R500_ALPHA_SWIZ_B_A)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | -- R500_ALU_RGBA_OP_MAD | -- R500_ALU_RGBA_SEL_C_SRC2 | -- R500_ALU_RGBA_R_SWIZ_R | -- R500_ALU_RGBA_G_SWIZ_G | -- R500_ALU_RGBA_B_SWIZ_R | -- R500_ALU_RGBA_A_SWIZ_G)); -- -- /* TEX temp4, temp0.zwzw, tex0, 2D */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -- R500_INST_TEX_SEM_WAIT | -- R500_INST_RGB_WMASK_R | -- R500_INST_RGB_WMASK_G | -- R500_INST_RGB_WMASK_B | -- R500_INST_ALPHA_WMASK)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | -- R500_TEX_INST_LD | -- R500_TEX_IGNORE_UNCOVERED)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | -- R500_TEX_SRC_S_SWIZ_B | -- R500_TEX_SRC_T_SWIZ_A | -- R500_TEX_SRC_R_SWIZ_B | -- R500_TEX_SRC_Q_SWIZ_A | -- R500_TEX_DST_ADDR(4) | -- R500_TEX_DST_R_SWIZ_R | -- R500_TEX_DST_G_SWIZ_G | -- R500_TEX_DST_B_SWIZ_B | -- R500_TEX_DST_A_SWIZ_A)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- -- /* TEX temp0, temp0.xyzw, tex0, 2D */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -- R500_INST_TEX_SEM_WAIT | -- R500_INST_RGB_WMASK_R | -- R500_INST_RGB_WMASK_G | -- R500_INST_RGB_WMASK_B | -- R500_INST_ALPHA_WMASK)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | -- R500_TEX_INST_LD | -- R500_TEX_SEM_ACQUIRE | -- R500_TEX_IGNORE_UNCOVERED)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | -- R500_TEX_SRC_S_SWIZ_R | -- R500_TEX_SRC_T_SWIZ_G | -- R500_TEX_SRC_R_SWIZ_B | -- R500_TEX_SRC_Q_SWIZ_A | -- R500_TEX_DST_ADDR(0) | -- R500_TEX_DST_R_SWIZ_R | -- R500_TEX_DST_G_SWIZ_G | -- R500_TEX_DST_B_SWIZ_B | -- R500_TEX_DST_A_SWIZ_A)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- -- /* LRP temp3, temp2.zzzz, temp1, temp3 -> -- * - PRESUB temps, temp1 - temp3 -- * - MAD temp2.zzzz, temps, temp3 */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -- R500_INST_RGB_WMASK_R | -- R500_INST_RGB_WMASK_G | -- R500_INST_RGB_WMASK_B | -- R500_INST_ALPHA_WMASK)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) | -- R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | -- R500_RGB_ADDR1(1) | -- R500_RGB_ADDR2(2))); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) | -- R500_ALPHA_SRCP_OP_A1_MINUS_A0 | -- R500_ALPHA_ADDR1(1) | -- R500_ALPHA_ADDR2(2))); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | -- R500_ALU_RGB_R_SWIZ_A_B | -- R500_ALU_RGB_G_SWIZ_A_B | -- R500_ALU_RGB_B_SWIZ_A_B | -- R500_ALU_RGB_SEL_B_SRCP | -- R500_ALU_RGB_R_SWIZ_B_R | -- R500_ALU_RGB_G_SWIZ_B_G | -- R500_ALU_RGB_B_SWIZ_B_B)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | -- R500_ALPHA_OP_MAD | -- R500_ALPHA_SEL_A_SRC2 | -- R500_ALPHA_SWIZ_A_B | -- R500_ALPHA_SEL_B_SRCP | -- R500_ALPHA_SWIZ_B_A)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | -- R500_ALU_RGBA_OP_MAD | -- R500_ALU_RGBA_SEL_C_SRC0 | -- R500_ALU_RGBA_R_SWIZ_R | -- R500_ALU_RGBA_G_SWIZ_G | -- R500_ALU_RGBA_B_SWIZ_B | -- R500_ALU_RGBA_A_SWIZ_A)); -- -- /* LRP temp0, temp2.zzzz, temp4, temp0 -> -- * - PRESUB temps, temp4 - temp1 -- * - MAD temp2.zzzz, temps, temp0 */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -- R500_INST_TEX_SEM_WAIT | -- R500_INST_RGB_WMASK_R | -- R500_INST_RGB_WMASK_G | -- R500_INST_RGB_WMASK_B | -- R500_INST_ALPHA_WMASK)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | -- R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | -- R500_RGB_ADDR1(4) | -- R500_RGB_ADDR2(2))); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | -- R500_ALPHA_SRCP_OP_A1_MINUS_A0 | -- R500_ALPHA_ADDR1(4) | -- R500_ALPHA_ADDR2(2))); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | -- R500_ALU_RGB_R_SWIZ_A_B | -- R500_ALU_RGB_G_SWIZ_A_B | -- R500_ALU_RGB_B_SWIZ_A_B | -- R500_ALU_RGB_SEL_B_SRCP | -- R500_ALU_RGB_R_SWIZ_B_R | -- R500_ALU_RGB_G_SWIZ_B_G | -- R500_ALU_RGB_B_SWIZ_B_B)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | -- R500_ALPHA_OP_MAD | -- R500_ALPHA_SEL_A_SRC2 | -- R500_ALPHA_SWIZ_A_B | -- R500_ALPHA_SEL_B_SRCP | -- R500_ALPHA_SWIZ_B_A)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | -- R500_ALU_RGBA_OP_MAD | -- R500_ALU_RGBA_SEL_C_SRC0 | -- R500_ALU_RGBA_R_SWIZ_R | -- R500_ALU_RGBA_G_SWIZ_G | -- R500_ALU_RGBA_B_SWIZ_B | -- R500_ALU_RGBA_A_SWIZ_A)); -- -- /* LRP output, temp5.zzzz, temp3, temp0 -> -- * - PRESUB temps, temp3 - temp0 -- * - MAD temp5.zzzz, temps, temp0 */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | -- R500_INST_LAST | -- R500_INST_TEX_SEM_WAIT | -- R500_INST_RGB_WMASK_R | -- R500_INST_RGB_WMASK_G | -- R500_INST_RGB_WMASK_B | -- R500_INST_ALPHA_WMASK | -- R500_INST_RGB_OMASK_R | -- R500_INST_RGB_OMASK_G | -- R500_INST_RGB_OMASK_B | -- R500_INST_ALPHA_OMASK)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | -- R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | -- R500_RGB_ADDR1(3) | -- R500_RGB_ADDR2(5))); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | -- R500_ALPHA_SRCP_OP_A1_MINUS_A0 | -- R500_ALPHA_ADDR1(3) | -- R500_ALPHA_ADDR2(5))); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | -- R500_ALU_RGB_R_SWIZ_A_B | -- R500_ALU_RGB_G_SWIZ_A_B | -- R500_ALU_RGB_B_SWIZ_A_B | -- R500_ALU_RGB_SEL_B_SRCP | -- R500_ALU_RGB_R_SWIZ_B_R | -- R500_ALU_RGB_G_SWIZ_B_G | -- R500_ALU_RGB_B_SWIZ_B_B)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | -- R500_ALPHA_OP_MAD | -- R500_ALPHA_SEL_A_SRC2 | -- R500_ALPHA_SWIZ_A_B | -- R500_ALPHA_SEL_B_SRCP | -- R500_ALPHA_SWIZ_B_A)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | -- R500_ALU_RGBA_OP_MAD | -- R500_ALU_RGBA_SEL_C_SRC0 | -- R500_ALU_RGBA_R_SWIZ_R | -- R500_ALU_RGBA_G_SWIZ_G | -- R500_ALU_RGBA_B_SWIZ_B | -- R500_ALU_RGBA_A_SWIZ_A)); -- -- /* Shader constants. */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); -- -- /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */ -- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w)); -- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h)); -- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); -- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); -- -- FINISH_ACCEL(); -+static void -+FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ PixmapPtr pPixmap = pPriv->pPixmap; -+ struct radeon_exa_pixmap_priv *driver_priv; -+ uint32_t txfilter, txformat0, txformat1, txoffset, txpitch; -+ uint32_t dst_pitch, dst_format; -+ uint32_t txenable, colorpitch, bicubic_offset; -+ uint32_t output_fmt; -+ Bool isplanar = FALSE; -+ int dstxoff, dstyoff, pixel_shift, vtx_count; -+ BoxPtr pBox = REGION_RECTS(&pPriv->clip); -+ int nBox = REGION_NUM_RECTS(&pPriv->clip); -+ ACCEL_PREAMBLE(); - -- } else { -- BEGIN_ACCEL(19); -- /* 2 components: 2 for tex0 */ -- OUT_ACCEL_REG(R300_RS_COUNT, -- ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | -- R300_RS_COUNT_HIRES_EN)); -- -- /* R300_INST_COUNT_RS - highest RS instruction used */ -- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); -- -- /* Pixel stack frame size. */ -- OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ -- -- /* FP length. */ -- OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | -- R500_US_CODE_END_ADDR(1))); -- OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | -- R500_US_CODE_RANGE_SIZE(1))); -- -- /* Prepare for FP emission. */ -- OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); -- -- /* tex inst */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -- R500_INST_TEX_SEM_WAIT | -- R500_INST_RGB_WMASK_R | -- R500_INST_RGB_WMASK_G | -- R500_INST_RGB_WMASK_B | -- R500_INST_ALPHA_WMASK | -- R500_INST_RGB_CLAMP | -- R500_INST_ALPHA_CLAMP)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | -- R500_TEX_INST_LD | -- R500_TEX_SEM_ACQUIRE | -- R500_TEX_IGNORE_UNCOVERED)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | -- R500_TEX_SRC_S_SWIZ_R | -- R500_TEX_SRC_T_SWIZ_G | -- R500_TEX_DST_ADDR(0) | -- R500_TEX_DST_R_SWIZ_R | -- R500_TEX_DST_G_SWIZ_G | -- R500_TEX_DST_B_SWIZ_B | -- R500_TEX_DST_A_SWIZ_A)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | -- R500_DX_S_SWIZ_R | -- R500_DX_T_SWIZ_R | -- R500_DX_R_SWIZ_R | -- R500_DX_Q_SWIZ_R | -- R500_DY_ADDR(0) | -- R500_DY_S_SWIZ_R | -- R500_DY_T_SWIZ_R | -- R500_DY_R_SWIZ_R | -- R500_DY_Q_SWIZ_R)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -- -- /* ALU inst */ -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | -- R500_INST_TEX_SEM_WAIT | -- R500_INST_LAST | -- R500_INST_RGB_OMASK_R | -- R500_INST_RGB_OMASK_G | -- R500_INST_RGB_OMASK_B | -- R500_INST_ALPHA_OMASK | -- R500_INST_RGB_CLAMP | -- R500_INST_ALPHA_CLAMP)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | -- R500_RGB_ADDR1(0) | -- R500_RGB_ADDR1_CONST | -- R500_RGB_ADDR2(0) | -- R500_RGB_ADDR2_CONST)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | -- R500_ALPHA_ADDR1(0) | -- R500_ALPHA_ADDR1_CONST | -- R500_ALPHA_ADDR2(0) | -- R500_ALPHA_ADDR2_CONST)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | -- R500_ALU_RGB_R_SWIZ_A_R | -- R500_ALU_RGB_G_SWIZ_A_G | -- R500_ALU_RGB_B_SWIZ_A_B | -- R500_ALU_RGB_SEL_B_SRC0 | -- R500_ALU_RGB_R_SWIZ_B_1 | -- R500_ALU_RGB_B_SWIZ_B_1 | -- R500_ALU_RGB_G_SWIZ_B_1)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | -- R500_ALPHA_SWIZ_A_A | -- R500_ALPHA_SWIZ_B_1)); -- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | -- R500_ALU_RGBA_R_SWIZ_0 | -- R500_ALU_RGBA_G_SWIZ_0 | -- R500_ALU_RGBA_B_SWIZ_0 | -- R500_ALU_RGBA_A_SWIZ_0)); -- FINISH_ACCEL(); -- } -+#ifdef XF86DRM_MODE -+ if (info->cs) { -+ int ret; -+ -+ radeon_cs_space_reset_bos(info->cs); -+ radeon_cs_space_add_persistent_bo(info->cs, pPriv->src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); -+ -+ if (pPriv->bicubic_enabled) -+ radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); -+ -+ driver_priv = exaGetPixmapDriverPrivate(pPixmap); -+ radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); -+ -+ ret = radeon_cs_space_check(info->cs); -+ if (ret) { -+ ErrorF("Not enough RAM to hw accel xv operation\n"); -+ return; - } -+ } -+#endif - -- BEGIN_ACCEL(6); -- OUT_ACCEL_REG(R300_TX_INVALTAGS, 0); -- OUT_ACCEL_REG(R300_TX_ENABLE, txenable); -+ pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; - -- OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset); -- OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch); -+#ifdef USE_EXA -+ if (info->useEXA) { -+ dst_pitch = exaGetPixmapPitch(pPixmap); -+ } else -+#endif -+ { -+ dst_pitch = pPixmap->devKind; -+ } - -- blendcntl = RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO; -- /* no need to enable blending */ -- OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, blendcntl); -+#ifdef COMPOSITE -+ dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; -+ dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; -+#else -+ dstxoff = 0; -+ dstyoff = 0; -+#endif - -- OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count); -+#ifdef USE_EXA -+ if (info->useEXA) { -+ RADEON_SWITCH_TO_3D(); -+ } else -+#endif -+ { -+ BEGIN_ACCEL(2); -+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); -+ /* We must wait for 3d to idle, in case source was just written as a dest. */ -+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, -+ RADEON_WAIT_HOST_IDLECLEAN | -+ RADEON_WAIT_2D_IDLECLEAN | -+ RADEON_WAIT_3D_IDLECLEAN | -+ RADEON_WAIT_DMA_GUI_IDLE); - FINISH_ACCEL(); - -- } else { -+ if (!info->accel_state->XInited3D) -+ RADEONInit3DEngine(pScrn); -+ } - -- /* Same for R100/R200 */ -- switch (pPixmap->drawable.bitsPerPixel) { -- case 16: -- if (pPixmap->drawable.depth == 15) -- dst_format = RADEON_COLOR_FORMAT_ARGB1555; -- else -- dst_format = RADEON_COLOR_FORMAT_RGB565; -- break; -- case 32: -- dst_format = RADEON_COLOR_FORMAT_ARGB8888; -- break; -- default: -- return; -- } -+ if (pPriv->bicubic_enabled) -+ vtx_count = 6; -+ else -+ vtx_count = 4; - -- if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) { -- isplanar = TRUE; -- } -+ switch (pPixmap->drawable.bitsPerPixel) { -+ case 16: -+ if (pPixmap->drawable.depth == 15) -+ dst_format = R300_COLORFORMAT_ARGB1555; -+ else -+ dst_format = R300_COLORFORMAT_RGB565; -+ break; -+ case 32: -+ dst_format = R300_COLORFORMAT_ARGB8888; -+ break; -+ default: -+ return; -+ } - -- if (isplanar) { -- txformat = RADEON_TXFORMAT_I8; -- } else { -- if (pPriv->id == FOURCC_UYVY) -- txformat = RADEON_TXFORMAT_YVYU422; -- else -- txformat = RADEON_TXFORMAT_VYUY422; -- } -+ output_fmt = (R300_OUT_FMT_C4_8 | -+ R300_OUT_FMT_C0_SEL_BLUE | -+ R300_OUT_FMT_C1_SEL_GREEN | -+ R300_OUT_FMT_C2_SEL_RED | -+ R300_OUT_FMT_C3_SEL_ALPHA); -+ -+ colorpitch = dst_pitch >> pixel_shift; -+ colorpitch |= dst_format; - -- txformat |= RADEON_TXFORMAT_NON_POWER2; -+ if (RADEONTilingEnabled(pScrn, pPixmap)) -+ colorpitch |= R300_COLORTILE; - -- colorpitch = dst_pitch >> pixel_shift; - -- if (RADEONTilingEnabled(pScrn, pPixmap)) -- colorpitch |= RADEON_COLOR_TILE_ENABLE; -+ if (((pPriv->bicubic_state == BICUBIC_OFF)) && -+ (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) -+ isplanar = TRUE; -+ -+ if (isplanar) { -+ txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0; -+ txpitch = pPriv->src_pitch; -+ } else { -+ if (pPriv->id == FOURCC_UYVY) -+ txformat1 = R300_TX_FORMAT_YVYU422; -+ else -+ txformat1 = R300_TX_FORMAT_VYUY422; - -- BEGIN_ACCEL(4); -+ if (pPriv->bicubic_state != BICUBIC_OFF) -+ txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; - -- OUT_ACCEL_REG(RADEON_RB3D_CNTL, -- dst_format /*| RADEON_ALPHA_BLEND_ENABLE*/); -- OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset); -+ /* pitch is in pixels */ -+ txpitch = pPriv->src_pitch / 2; -+ } -+ txpitch -= 1; -+ -+ txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | -+ (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | -+ R300_TXPITCH_EN); -+ -+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | -+ R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | -+ R300_TX_MAG_FILTER_LINEAR | -+ R300_TX_MIN_FILTER_LINEAR | -+ (0 << R300_TX_ID_SHIFT)); -+ -+ txoffset = info->cs ? 0 : pPriv->src_offset; -+ -+ BEGIN_ACCEL_RELOC(6, 1); -+ OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter); -+ OUT_ACCEL_REG(R300_TX_FILTER1_0, 0); -+ OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0); -+ if (isplanar) -+ OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1 | R300_TX_FORMAT_CACHE_HALF_REGION_0); -+ else -+ OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1); -+ OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch); -+ OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, pPriv->src_bo); -+ FINISH_ACCEL(); - -- OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch); -+ txenable = R300_TEX_0_ENABLE; - -- OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, -- RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); -+ if (isplanar) { -+ txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | -+ (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | -+ R300_TXPITCH_EN); -+ txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63; -+ txpitch -= 1; -+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | -+ R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | -+ R300_TX_MIN_FILTER_LINEAR | -+ R300_TX_MAG_FILTER_LINEAR); -+ -+ BEGIN_ACCEL_RELOC(12, 2); -+ OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); -+ OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); -+ OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); -+ OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2); -+ OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); -+ OUT_TEXTURE_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset, pPriv->src_bo); -+ OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); -+ OUT_ACCEL_REG(R300_TX_FILTER1_2, 0); -+ OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0); -+ OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3); -+ OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch); -+ OUT_TEXTURE_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset, pPriv->src_bo); -+ FINISH_ACCEL(); -+ txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; -+ } - -+ if (pPriv->bicubic_enabled) { -+ /* Size is 128x1 */ -+ txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | -+ (0x0 << R300_TXHEIGHT_SHIFT) | -+ R300_TXPITCH_EN); -+ /* Format is 32-bit floats, 4bpp */ -+ txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); -+ /* Pitch is 127 (128-1) */ -+ txpitch = 0x7f; -+ /* Tex filter */ -+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | -+ R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | -+ R300_TX_MIN_FILTER_NEAREST | -+ R300_TX_MAG_FILTER_NEAREST | -+ (1 << R300_TX_ID_SHIFT)); -+ -+ if (info->cs) -+ bicubic_offset = 0; -+ else -+ bicubic_offset = pPriv->bicubic_src_offset; -+ -+ BEGIN_ACCEL_RELOC(6, 1); -+ OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter); -+ OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); -+ OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); -+ OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1); -+ OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); -+ OUT_TEXTURE_REG(R300_TX_OFFSET_1, bicubic_offset, info->bicubic_bo); - FINISH_ACCEL(); - -+ /* Enable tex 1 */ -+ txenable |= R300_TEX_1_ENABLE; -+ } - -- if ((info->ChipFamily == CHIP_FAMILY_RV250) || -- (info->ChipFamily == CHIP_FAMILY_RV280) || -- (info->ChipFamily == CHIP_FAMILY_RS300) || -- (info->ChipFamily == CHIP_FAMILY_R200)) { -- -- info->accel_state->texW[0] = pPriv->w; -- info->accel_state->texH[0] = pPriv->h; -- -- if (isplanar) { -- /* note: in contrast to r300, use input biasing on uv components */ -- float yco = 1.1643; -- float yoff = -0.0625 * yco; -- float uco[3] = {0.0, -0.39173, 2.018}; -- float vco[3] = {1.5958, -0.8129, 0.0}; -- -- /* need 2 texcoord sets (even though they are identical) due -- to denormalization! hw apparently can't premultiply -- same coord set by different texture size */ -- vtx_count = 6; -- -- txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | -- (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); -- txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63; -- txpitch -= 32; -- txfilter = R200_MAG_FILTER_LINEAR | -- R200_MIN_FILTER_LINEAR | -- R200_CLAMP_S_CLAMP_LAST | -- R200_CLAMP_T_CLAMP_LAST; -- -- BEGIN_ACCEL(36); -- -- OUT_ACCEL_REG(RADEON_PP_CNTL, -- RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE | -- RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE | -- RADEON_TEX_BLEND_2_ENABLE); -- -- OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); -- OUT_ACCEL_REG(R200_SE_VTX_FMT_1, -- (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) | -- (2 << R200_VTX_TEX1_COMP_CNT_SHIFT)); -- -- OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); -- OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); -- OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); -- OUT_ACCEL_REG(R200_PP_TXSIZE_0, -- (pPriv->w - 1) | -- ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); -- OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); -- OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset); -- -- OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter); -- OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); -- OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0); -- OUT_ACCEL_REG(R200_PP_TXSIZE_1, txformat0); -- OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch); -- OUT_ACCEL_REG(R200_PP_TXOFFSET_1, pPriv->src_offset + pPriv->planeu_offset); -- -- OUT_ACCEL_REG(R200_PP_TXFILTER_2, txfilter); -- OUT_ACCEL_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1); -- OUT_ACCEL_REG(R200_PP_TXFORMAT_X_2, 0); -- OUT_ACCEL_REG(R200_PP_TXSIZE_2, txformat0); -- OUT_ACCEL_REG(R200_PP_TXPITCH_2, txpitch); -- OUT_ACCEL_REG(R200_PP_TXOFFSET_2, pPriv->src_offset + pPriv->planev_offset); -- -- /* similar to r300 code. Note the big problem is that hardware constants -- * are 8 bits only, representing 0.0-1.0. We can get that up (using bias -- * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually -- * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but -- * the constants not. To get larger range can use output scale, but for -- * that 2.018 value we need a total scale by 8, which means the constants -- * really have no accuracy whatsoever (5 fractional bits only). -- * The only direct way to get high precision "constants" into the fragment -- * pipe I know of is to use the texcoord interpolator (not color, this one -- * is 8 bit only too), which seems a bit expensive. We're lucky though it -- * seems the values we need seem to fit better than worst case (get about -- * 6 fractional bits for this instead of 5, at least when not correcting for -- * hue/saturation/contrast/brightness, which is the same as for vco - yco and -- * yoff get 8 fractional bits). -- * -- * A higher precision (8 fractional bits) version might just put uco into -- * a texcoord, and calculate a new vcoconst in the shader, like so: -- * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable -- * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0} -- * vcocalc = ADD temp, bias/scale(cohelper), vco -- * would in total use 4 tex units, 4 instructions which seems fairly -- * balanced for this architecture (instead of 3 + 3 for the solution here) -- * -- * temp = MAD(yco, yuv.yyyy, yoff) -- * temp = MAD(uco, yuv.uuuu, temp) -- * result = MAD(vco, yuv.vvvv, temp) -- * -- * note first mad produces actually scalar, hence we transform -- * it into a dp2a to get 8 bit precision of yco instead of 7 - -- * That's assuming hw correctly expands consts to internal precision. -- * (y * 1 + y * (yco - 1) + yoff) -- * temp = DP2A / 2 (yco, yuv.yyyy, yoff) -- * temp = MAD (uco / 4, yuv.uuuu * 2, temp) -- * result = MAD x2 (vco / 2, yuv.vvvv, temp) -- * -- * vco, uco need bias (and hence scale too) -- * -- */ -+ /* setup the VAP */ -+ if (info->accel_state->has_tcl) { -+ if (pPriv->bicubic_enabled) -+ BEGIN_ACCEL(7); -+ else -+ BEGIN_ACCEL(6); -+ } else { -+ if (pPriv->bicubic_enabled) -+ BEGIN_ACCEL(5); -+ else -+ BEGIN_ACCEL(4); -+ } - -- /* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */ -- OUT_ACCEL_REG(R200_PP_TXCBLEND_0, -- R200_TXC_ARG_A_TFACTOR_COLOR | -- R200_TXC_ARG_B_R0_COLOR | -- R200_TXC_ARG_C_TFACTOR_COLOR | -- R200_TXC_NEG_ARG_C | -- R200_TXC_OP_DOT2_ADD); -- OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, -- (0 << R200_TXC_TFACTOR_SEL_SHIFT) | -- R200_TXC_SCALE_INV2 | -- R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); -- OUT_ACCEL_REG(R200_PP_TXABLEND_0, -- R200_TXA_ARG_A_ZERO | -- R200_TXA_ARG_B_ZERO | -- R200_TXA_ARG_C_ZERO | -- R200_TXA_OP_MADD); -- OUT_ACCEL_REG(R200_PP_TXABLEND2_0, -- R200_TXA_OUTPUT_REG_NONE); -- -- /* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */ -- OUT_ACCEL_REG(R200_PP_TXCBLEND_1, -- R200_TXC_ARG_A_TFACTOR_COLOR | -- R200_TXC_BIAS_ARG_A | -- R200_TXC_SCALE_ARG_A | -- R200_TXC_ARG_B_R1_COLOR | -- R200_TXC_BIAS_ARG_B | -- R200_TXC_SCALE_ARG_B | -- R200_TXC_ARG_C_R0_COLOR | -- R200_TXC_OP_MADD); -- OUT_ACCEL_REG(R200_PP_TXCBLEND2_1, -- (1 << R200_TXC_TFACTOR_SEL_SHIFT) | -- R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0); -- OUT_ACCEL_REG(R200_PP_TXABLEND_1, -- R200_TXA_ARG_A_ZERO | -- R200_TXA_ARG_B_ZERO | -- R200_TXA_ARG_C_ZERO | -- R200_TXA_OP_MADD); -- OUT_ACCEL_REG(R200_PP_TXABLEND2_1, -- R200_TXA_OUTPUT_REG_NONE); -- -- /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */ -- OUT_ACCEL_REG(R200_PP_TXCBLEND_2, -- R200_TXC_ARG_A_TFACTOR_COLOR | -- R200_TXC_BIAS_ARG_A | -- R200_TXC_SCALE_ARG_A | -- R200_TXC_ARG_B_R2_COLOR | -- R200_TXC_BIAS_ARG_B | -- R200_TXC_ARG_C_R0_COLOR | -- R200_TXC_OP_MADD); -- OUT_ACCEL_REG(R200_PP_TXCBLEND2_2, -- (2 << R200_TXC_TFACTOR_SEL_SHIFT) | -- R200_TXC_SCALE_2X | -- R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); -- OUT_ACCEL_REG(R200_PP_TXABLEND_2, -- R200_TXA_ARG_A_ZERO | -- R200_TXA_ARG_B_ZERO | -- R200_TXA_ARG_C_ZERO | -- R200_TXA_COMP_ARG_C | -- R200_TXA_OP_MADD); -- OUT_ACCEL_REG(R200_PP_TXABLEND2_2, -- R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); -- -- /* shader constants */ -- OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(1.0, /* src range [1, 2] */ -- yco - 1.0, -- -yoff, /* range [-1, 0] */ -- 0.0)); -- OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * 0.125 + 0.5, /* range [-4, 4] */ -- uco[1] * 0.125 + 0.5, -- uco[2] * 0.125 + 0.5, -- 0.0)); -- OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * 0.25 + 0.5, /* range [-2, 2] */ -- vco[1] * 0.25 + 0.5, -- vco[2] * 0.25 + 0.5, -- 0.0)); -- -- FINISH_ACCEL(); -- } -- else if (info->ChipFamily == CHIP_FAMILY_RV250) { -- /* fix up broken packed yuv - shader same as above except -- yuv compoents are all in same reg */ -- float yco = 1.1643; -- float yoff = -0.0625 * yco; -- float uco[3] = {0.0, -0.39173, 2.018}; -- float vco[3] = {1.5958, -0.8129, 0.0}; -- -- txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | -- (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); -- txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63; -- txpitch -= 32; -- txfilter = R200_MAG_FILTER_LINEAR | -- R200_MIN_FILTER_LINEAR | -- R200_CLAMP_S_CLAMP_LAST | -- R200_CLAMP_T_CLAMP_LAST; -- -- BEGIN_ACCEL(24); -- -- OUT_ACCEL_REG(RADEON_PP_CNTL, -- RADEON_TEX_0_ENABLE | -- RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE | -- RADEON_TEX_BLEND_2_ENABLE); -- -- OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); -- OUT_ACCEL_REG(R200_SE_VTX_FMT_1, -- (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); -- -- OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); -- OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); -- OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); -- OUT_ACCEL_REG(R200_PP_TXSIZE_0, -- (pPriv->w - 1) | -- ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); -- OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); -- OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset); -- -- /* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */ -- OUT_ACCEL_REG(R200_PP_TXCBLEND_0, -- R200_TXC_ARG_A_TFACTOR_COLOR | -- R200_TXC_ARG_B_R0_COLOR | -- R200_TXC_ARG_C_TFACTOR_COLOR | -- R200_TXC_NEG_ARG_C | -- R200_TXC_OP_DOT2_ADD); -- OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, -- (0 << R200_TXC_TFACTOR_SEL_SHIFT) | -- R200_TXC_SCALE_INV2 | -- (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) | -- R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); -- OUT_ACCEL_REG(R200_PP_TXABLEND_0, -- R200_TXA_ARG_A_ZERO | -- R200_TXA_ARG_B_ZERO | -- R200_TXA_ARG_C_ZERO | -- R200_TXA_OP_MADD); -- OUT_ACCEL_REG(R200_PP_TXABLEND2_0, -- R200_TXA_OUTPUT_REG_NONE); -- -- /* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */ -- OUT_ACCEL_REG(R200_PP_TXCBLEND_1, -- R200_TXC_ARG_A_TFACTOR_COLOR | -- R200_TXC_BIAS_ARG_A | -- R200_TXC_SCALE_ARG_A | -- R200_TXC_ARG_B_R0_COLOR | -- R200_TXC_BIAS_ARG_B | -- R200_TXC_SCALE_ARG_B | -- R200_TXC_ARG_C_R1_COLOR | -- R200_TXC_OP_MADD); -- OUT_ACCEL_REG(R200_PP_TXCBLEND2_1, -- (1 << R200_TXC_TFACTOR_SEL_SHIFT) | -- (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) | -- R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1); -- OUT_ACCEL_REG(R200_PP_TXABLEND_1, -- R200_TXA_ARG_A_ZERO | -- R200_TXA_ARG_B_ZERO | -- R200_TXA_ARG_C_ZERO | -- R200_TXA_OP_MADD); -- OUT_ACCEL_REG(R200_PP_TXABLEND2_1, -- R200_TXA_OUTPUT_REG_NONE); -- -- /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */ -- OUT_ACCEL_REG(R200_PP_TXCBLEND_2, -- R200_TXC_ARG_A_TFACTOR_COLOR | -- R200_TXC_BIAS_ARG_A | -- R200_TXC_SCALE_ARG_A | -- R200_TXC_ARG_B_R0_COLOR | -- R200_TXC_BIAS_ARG_B | -- R200_TXC_ARG_C_R1_COLOR | -- R200_TXC_OP_MADD); -- OUT_ACCEL_REG(R200_PP_TXCBLEND2_2, -- (2 << R200_TXC_TFACTOR_SEL_SHIFT) | -- R200_TXC_SCALE_2X | -- (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) | -- R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); -- OUT_ACCEL_REG(R200_PP_TXABLEND_2, -- R200_TXA_ARG_A_ZERO | -- R200_TXA_ARG_B_ZERO | -- R200_TXA_ARG_C_ZERO | -- R200_TXA_COMP_ARG_C | -- R200_TXA_OP_MADD); -- OUT_ACCEL_REG(R200_PP_TXABLEND2_2, -- R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); -- -- /* shader constants */ -- OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(1.0, /* src range [1, 2] */ -- yco - 1.0, -- -yoff, /* range [-1, 0] */ -- 0.0)); -- OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * 0.125 + 0.5, /* range [-4, 4] */ -- uco[1] * 0.125 + 0.5, -- uco[2] * 0.125 + 0.5, -- 0.0)); -- OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * 0.25 + 0.5, /* range [-2, 2] */ -- vco[1] * 0.25 + 0.5, -- vco[2] * 0.25 + 0.5, -- 0.0)); -- -- FINISH_ACCEL(); -- } -- else { -- BEGIN_ACCEL(13); -- OUT_ACCEL_REG(RADEON_PP_CNTL, -- RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); -- -- OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); -- OUT_ACCEL_REG(R200_SE_VTX_FMT_1, -- (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); -- -- OUT_ACCEL_REG(R200_PP_TXFILTER_0, -- R200_MAG_FILTER_LINEAR | -- R200_MIN_FILTER_LINEAR | -- R200_CLAMP_S_CLAMP_LAST | -- R200_CLAMP_T_CLAMP_LAST | -- R200_YUV_TO_RGB); -- OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); -- OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); -- OUT_ACCEL_REG(R200_PP_TXSIZE_0, -- (pPriv->w - 1) | -- ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); -- OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32); -- -- OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset); -- -- OUT_ACCEL_REG(R200_PP_TXCBLEND_0, -- R200_TXC_ARG_A_ZERO | -- R200_TXC_ARG_B_ZERO | -- R200_TXC_ARG_C_R0_COLOR | -- R200_TXC_OP_MADD); -- OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, -- R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); -- OUT_ACCEL_REG(R200_PP_TXABLEND_0, -- R200_TXA_ARG_A_ZERO | -- R200_TXA_ARG_B_ZERO | -- R200_TXA_ARG_C_R0_ALPHA | -- R200_TXA_OP_MADD); -- OUT_ACCEL_REG(R200_PP_TXABLEND2_0, -- R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); -- FINISH_ACCEL(); -- } -+ /* These registers define the number, type, and location of data submitted -+ * to the PVS unit of GA input (when PVS is disabled) -+ * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is -+ * enabled. This memory provides the imputs to the vertex shader program -+ * and ordering is not important. When PVS/TCL is disabled, this field maps -+ * directly to the GA input memory and the order is signifigant. In -+ * PVS_BYPASS mode the order is as follows: -+ * Position -+ * Point Size -+ * Color 0-3 -+ * Textures 0-7 -+ * Fog -+ */ -+ if (pPriv->bicubic_enabled) { -+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, -+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | -+ (0 << R300_SKIP_DWORDS_0_SHIFT) | -+ (0 << R300_DST_VEC_LOC_0_SHIFT) | -+ R300_SIGNED_0 | -+ (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | -+ (0 << R300_SKIP_DWORDS_1_SHIFT) | -+ (6 << R300_DST_VEC_LOC_1_SHIFT) | -+ R300_SIGNED_1)); -+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, -+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | -+ (0 << R300_SKIP_DWORDS_2_SHIFT) | -+ (7 << R300_DST_VEC_LOC_2_SHIFT) | -+ R300_LAST_VEC_2 | -+ R300_SIGNED_2)); -+ } else { -+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, -+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | -+ (0 << R300_SKIP_DWORDS_0_SHIFT) | -+ (0 << R300_DST_VEC_LOC_0_SHIFT) | -+ R300_SIGNED_0 | -+ (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | -+ (0 << R300_SKIP_DWORDS_1_SHIFT) | -+ (6 << R300_DST_VEC_LOC_1_SHIFT) | -+ R300_LAST_VEC_1 | -+ R300_SIGNED_1)); -+ } -+ -+ /* load the vertex shader -+ * We pre-load vertex programs in RADEONInit3DEngine(): -+ * - exa -+ * - Xv -+ * - Xv bicubic -+ * Here we select the offset of the vertex program we want to use -+ */ -+ if (info->accel_state->has_tcl) { -+ if (pPriv->bicubic_enabled) { -+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, -+ ((11 << R300_PVS_FIRST_INST_SHIFT) | -+ (13 << R300_PVS_XYZW_VALID_INST_SHIFT) | -+ (13 << R300_PVS_LAST_INST_SHIFT))); -+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, -+ (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); - } else { -+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, -+ ((9 << R300_PVS_FIRST_INST_SHIFT) | -+ (10 << R300_PVS_XYZW_VALID_INST_SHIFT) | -+ (10 << R300_PVS_LAST_INST_SHIFT))); -+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, -+ (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); -+ } -+ } -+ -+ /* Position and one set of 2 texture coordinates */ -+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); -+ if (pPriv->bicubic_enabled) -+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | -+ (2 << R300_TEX_1_COMP_CNT_SHIFT))); -+ else -+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); -+ -+ OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt); -+ FINISH_ACCEL(); -+ -+ /* setup pixel shader */ -+ if (pPriv->bicubic_state != BICUBIC_OFF) { -+ if (pPriv->bicubic_enabled) { -+ BEGIN_ACCEL(79); -+ -+ /* 4 components: 2 for tex0 and 2 for tex1 */ -+ OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | -+ R300_RS_COUNT_HIRES_EN)); -+ -+ /* R300_INST_COUNT_RS - highest RS instruction used */ -+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); -+ -+ /* Pixel stack frame size. */ -+ OUT_ACCEL_REG(R300_US_PIXSIZE, 5); -+ -+ /* Indirection levels */ -+ OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) | -+ R300_FIRST_TEX)); -+ -+ /* Set nodes. */ -+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | -+ R300_ALU_CODE_SIZE(14) | -+ R300_TEX_CODE_OFFSET(0) | -+ R300_TEX_CODE_SIZE(6))); -+ -+ /* Nodes are allocated highest first, but executed lowest first */ -+ OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0); -+ OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) | -+ R300_ALU_SIZE(0) | -+ R300_TEX_START(0) | -+ R300_TEX_SIZE(0))); -+ OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) | -+ R300_ALU_SIZE(9) | -+ R300_TEX_START(1) | -+ R300_TEX_SIZE(0))); -+ OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) | -+ R300_ALU_SIZE(2) | -+ R300_TEX_START(2) | -+ R300_TEX_SIZE(3) | -+ R300_RGBA_OUT)); -+ -+ /* ** BICUBIC FP ** */ -+ -+ /* texcoord0 => temp0 -+ * texcoord1 => temp1 */ -+ -+ // first node -+ /* TEX temp2, temp1.rrr0, tex1, 1D */ -+ OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) | -+ R300_TEX_ID(1) | -+ R300_TEX_SRC_ADDR(1) | -+ R300_TEX_DST_ADDR(2))); -+ -+ /* MOV temp1.r, temp1.ggg0 */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) | -+ R300_ALU_RGB_ADDRD(1) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ -+ -+ // second node -+ /* TEX temp1, temp1, tex1, 1D */ -+ OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) | -+ R300_TEX_ID(1) | -+ R300_TEX_SRC_ADDR(1) | -+ R300_TEX_DST_ADDR(1))); -+ -+ /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) | -+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | -+ R300_ALU_RGB_ADDRD(3) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ -+ -+ /* MUL temp2.rg, temp2.rrr0, const0.rgb */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) | -+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | -+ R300_ALU_RGB_ADDRD(2) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ -+ /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) | -+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | -+ R300_ALU_RGB_ADDR2(3) | -+ R300_ALU_RGB_ADDRD(4) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ -+ /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) | -+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | -+ R300_ALU_RGB_ADDR2(2) | -+ R300_ALU_RGB_ADDRD(5) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ -+ /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) | -+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | -+ R300_ALU_RGB_ADDR2(3) | -+ R300_ALU_RGB_ADDRD(3) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ -+ /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) | -+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | -+ R300_ALU_RGB_ADDR2(2) | -+ R300_ALU_RGB_ADDRD(1) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ -+ /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_ADDR2(1) | -+ R300_ALU_RGB_ADDRD(1) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ -+ /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_ADDR2(3) | -+ R300_ALU_RGB_ADDRD(2) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ -+ /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_ADDR2(5) | -+ R300_ALU_RGB_ADDRD(3) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ -+ /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_ADDR2(4) | -+ R300_ALU_RGB_ADDRD(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ -+ -+ // third node -+ /* TEX temp4, temp1.rg--, tex0, 1D */ -+ OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) | -+ R300_TEX_ID(0) | -+ R300_TEX_SRC_ADDR(1) | -+ R300_TEX_DST_ADDR(4))); -+ -+ /* TEX temp3, temp3.rg--, tex0, 1D */ -+ OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) | -+ R300_TEX_ID(0) | -+ R300_TEX_SRC_ADDR(3) | -+ R300_TEX_DST_ADDR(3))); -+ -+ /* TEX temp5, temp2.rg--, tex0, 1D */ -+ OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) | -+ R300_TEX_ID(0) | -+ R300_TEX_SRC_ADDR(2) | -+ R300_TEX_DST_ADDR(5))); -+ -+ /* TEX temp0, temp0.rg--, tex0, 1D */ -+ OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) | -+ R300_TEX_ID(0) | -+ R300_TEX_SRC_ADDR(0) | -+ R300_TEX_DST_ADDR(0))); -+ -+ /* LRP temp3, temp1.bbbb, temp4, temp3 -> -+ * - PRESUB temps, temp4 - temp3 -+ * - MAD temp3, temp1.bbbb, temps, temp3 */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | -+ R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) | -+ R300_ALU_RGB_ADDR1(4) | -+ R300_ALU_RGB_ADDR2(1) | -+ R300_ALU_RGB_ADDRD(3) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) | -+ R300_ALU_ALPHA_ADDR1(4) | -+ R300_ALU_ALPHA_ADDR2(1) | -+ R300_ALU_ALPHA_ADDRD(3) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); -+ -+ /* LRP temp0, temp1.bbbb, temp5, temp0 -> -+ * - PRESUB temps, temp5 - temp0 -+ * - MAD temp0, temp1.bbbb, temps, temp0 */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | -+ R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) | -+ R300_ALU_RGB_INSERT_NOP)); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_ADDR1(5) | -+ R300_ALU_RGB_ADDR2(1) | -+ R300_ALU_RGB_ADDRD(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) | -+ R300_ALU_ALPHA_ADDR1(5) | -+ R300_ALU_ALPHA_ADDR2(1) | -+ R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); -+ -+ /* LRP output, temp2.bbbb, temp3, temp0 -> -+ * - PRESUB temps, temp3 - temp0 -+ * - MAD output, temp2.bbbb, temps, temp0 */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | -+ R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_ADDR1(3) | -+ R300_ALU_RGB_ADDR2(2) | -+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) | -+ R300_ALU_ALPHA_ADDR1(3) | -+ R300_ALU_ALPHA_ADDR2(2) | -+ R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A))); -+ -+ /* Shader constants. */ -+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w)); -+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0); -+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0); -+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0); -+ -+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0); -+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h)); -+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0); -+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0); - -- info->accel_state->texW[0] = 1; -- info->accel_state->texH[0] = 1; -- -- BEGIN_ACCEL(9); -- -- OUT_ACCEL_REG(RADEON_PP_CNTL, -- RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); -- -- OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY | -- RADEON_SE_VTX_FMT_ST0)); -- -- OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, -- RADEON_MAG_FILTER_LINEAR | -- RADEON_MIN_FILTER_LINEAR | -- RADEON_CLAMP_S_CLAMP_LAST | -- RADEON_CLAMP_T_CLAMP_LAST | -- RADEON_YUV_TO_RGB); -- OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat); -- OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, pPriv->src_offset); -- OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, -- RADEON_COLOR_ARG_A_ZERO | -- RADEON_COLOR_ARG_B_ZERO | -- RADEON_COLOR_ARG_C_T0_COLOR | -- RADEON_BLEND_CTL_ADD | -- RADEON_CLAMP_TX); -- OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, -- RADEON_ALPHA_ARG_A_ZERO | -- RADEON_ALPHA_ARG_B_ZERO | -- RADEON_ALPHA_ARG_C_T0_ALPHA | -- RADEON_BLEND_CTL_ADD | -- RADEON_CLAMP_TX); -- -- OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, -- (pPriv->w - 1) | -- ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT)); -- OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, -- pPriv->src_pitch - 32); -+ FINISH_ACCEL(); -+ } else { -+ BEGIN_ACCEL(11); -+ /* 2 components: 2 for tex0 */ -+ OUT_ACCEL_REG(R300_RS_COUNT, -+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | -+ R300_RS_COUNT_HIRES_EN)); -+ /* R300_INST_COUNT_RS - highest RS instruction used */ -+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); -+ -+ OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ -+ -+ /* Indirection levels */ -+ OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | -+ R300_FIRST_TEX)); -+ -+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | -+ R300_ALU_CODE_SIZE(1) | -+ R300_TEX_CODE_OFFSET(0) | -+ R300_TEX_CODE_SIZE(1))); -+ -+ OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | -+ R300_ALU_SIZE(0) | -+ R300_TEX_START(0) | -+ R300_TEX_SIZE(0) | -+ R300_RGBA_OUT)); -+ -+ /* tex inst */ -+ OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | -+ R300_TEX_DST_ADDR(0) | -+ R300_TEX_ID(0) | -+ R300_TEX_INST(R300_TEX_INST_LD))); -+ -+ /* ALU inst */ -+ /* RGB */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_ADDR1(0) | -+ R300_ALU_RGB_ADDR2(0) | -+ R300_ALU_RGB_ADDRD(0) | -+ R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R | -+ R300_ALU_RGB_MASK_G | -+ R300_ALU_RGB_MASK_B)) | -+ R300_ALU_RGB_TARGET_A)); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | -+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | -+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | -+ R300_ALU_RGB_CLAMP)); -+ /* Alpha */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) | -+ R300_ALU_ALPHA_ADDR1(0) | -+ R300_ALU_ALPHA_ADDR2(0) | -+ R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | -+ R300_ALU_ALPHA_TARGET_A | -+ R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | -+ R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | -+ R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | -+ R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) | -+ R300_ALU_ALPHA_CLAMP)); - FINISH_ACCEL(); - } -+ } else { -+ /* -+ * y' = y - .0625 -+ * u' = u - .5 -+ * v' = v - .5; -+ * -+ * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' -+ * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' -+ * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' -+ * -+ * DP3 might look like the straightforward solution -+ * but we'd need to move the texture yuv values in -+ * the same reg for this to work. Therefore use MADs. -+ * Brightness just adds to the off constant. -+ * Contrast is multiplication of luminance. -+ * Saturation and hue change the u and v coeffs. -+ * Default values (before adjustments - depend on colorspace): -+ * yco = 1.1643 -+ * uco = 0, -0.39173, 2.017 -+ * vco = 1.5958, -0.8129, 0 -+ * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], -+ * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], -+ * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], -+ * -+ * temp = MAD(yco, yuv.yyyy, off) -+ * temp = MAD(uco, yuv.uuuu, temp) -+ * result = MAD(vco, yuv.vvvv, temp) -+ */ -+ /* TODO: don't recalc consts always */ -+ const float Loff = -0.0627; -+ const float Coff = -0.502; -+ float uvcosf, uvsinf; -+ float yco; -+ float uco[3], vco[3], off[3]; -+ float bright, cont, gamma; -+ int ref = pPriv->transform_index; -+ Bool needgamma = FALSE; -+ -+ cont = RTFContrast(pPriv->contrast); -+ bright = RTFBrightness(pPriv->brightness); -+ gamma = (float)pPriv->gamma / 1000.0; -+ uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); -+ uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); -+ /* overlay video also does pre-gamma contrast/sat adjust, should we? */ -+ -+ yco = trans[ref].RefLuma * cont; -+ uco[0] = -trans[ref].RefRCr * uvsinf; -+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; -+ uco[2] = trans[ref].RefBCb * uvcosf; -+ vco[0] = trans[ref].RefRCr * uvcosf; -+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; -+ vco[2] = trans[ref].RefBCb * uvsinf; -+ off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; -+ off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; -+ off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; -+ -+ if (gamma != 1.0) { -+ needgamma = TRUE; -+ /* note: gamma correction is out = in ^ gamma; -+ gpu can only do LG2/EX2 therefore we transform into -+ in ^ gamma = 2 ^ (log2(in) * gamma). -+ Lots of scalar ops, unfortunately (better solution?) - -+ without gamma that's 3 inst, with gamma it's 10... -+ could use different gamma factors per channel, -+ if that's of any use. */ -+ } -+ -+ if (isplanar) { -+ BEGIN_ACCEL(needgamma ? 28 + 33 : 33); -+ /* 2 components: same 2 for tex0/1/2 */ -+ OUT_ACCEL_REG(R300_RS_COUNT, -+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | -+ R300_RS_COUNT_HIRES_EN)); -+ /* R300_INST_COUNT_RS - highest RS instruction used */ -+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); -+ -+ OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ -+ -+ /* Indirection levels */ -+ OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | -+ R300_FIRST_TEX)); -+ -+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | -+ R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | -+ R300_TEX_CODE_OFFSET(0) | -+ R300_TEX_CODE_SIZE(3))); -+ -+ OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | -+ R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | -+ R300_TEX_START(0) | -+ R300_TEX_SIZE(2) | -+ R300_RGBA_OUT)); -+ -+ /* tex inst */ -+ OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | -+ R300_TEX_DST_ADDR(2) | -+ R300_TEX_ID(0) | -+ R300_TEX_INST(R300_TEX_INST_LD))); -+ OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) | -+ R300_TEX_DST_ADDR(1) | -+ R300_TEX_ID(1) | -+ R300_TEX_INST(R300_TEX_INST_LD))); -+ OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) | -+ R300_TEX_DST_ADDR(0) | -+ R300_TEX_ID(2) | -+ R300_TEX_INST(R300_TEX_INST_LD))); -+ -+ /* ALU inst */ -+ /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | -+ R300_ALU_RGB_ADDR1(2) | -+ R300_ALU_RGB_ADDR2(0) | -+ R300_ALU_RGB_ADDRD(2) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | -+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | -+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); -+ /* alpha nop, but need to set up alpha source for rgb usage */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | -+ R300_ALU_ALPHA_ADDR1(2) | -+ R300_ALU_ALPHA_ADDR2(0) | -+ R300_ALU_ALPHA_ADDRD(2) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ -+ /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | -+ R300_ALU_RGB_ADDR1(1) | -+ R300_ALU_RGB_ADDR2(2) | -+ R300_ALU_RGB_ADDRD(2) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | -+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); -+ /* alpha nop */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ -+ /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | -+ R300_ALU_RGB_ADDR1(0) | -+ R300_ALU_RGB_ADDR2(2) | -+ R300_ALU_RGB_ADDRD(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | -+ (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | -+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | -+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | -+ R300_ALU_RGB_CLAMP)); -+ /* write alpha 1 */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | -+ R300_ALU_ALPHA_TARGET_A)); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); -+ -+ if (needgamma) { -+ /* rgb temp0.r = op_sop, set up src0 reg */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); -+ /* alpha lg2 temp0, temp0.r */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ -+ /* rgb temp0.g = op_sop, set up src0 reg */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); -+ /* alpha lg2 temp0, temp0.g */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ -+ /* rgb temp0.b = op_sop, set up src0 reg */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); -+ /* alpha lg2 temp0, temp0.b */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ -+ /* MUL const1, temp1, temp0 */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_ADDR1(0) | -+ R300_ALU_RGB_ADDR2(0) | -+ R300_ALU_RGB_ADDRD(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | -+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | -+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); -+ /* alpha nop, but set up const1 */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ -+ /* rgb out0.r = op_sop, set up src0 reg */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | -+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); -+ /* alpha ex2 temp0, temp0.r */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ -+ /* rgb out0.g = op_sop, set up src0 reg */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | -+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); -+ /* alpha ex2 temp0, temp0.g */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ -+ /* rgb out0.b = op_sop, set up src0 reg */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | -+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); -+ /* alpha ex2 temp0, temp0.b */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ } -+ } else { -+ BEGIN_ACCEL(needgamma ? 28 + 33 : 33); -+ /* 2 components */ -+ OUT_ACCEL_REG(R300_RS_COUNT, -+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | -+ R300_RS_COUNT_HIRES_EN)); -+ /* R300_INST_COUNT_RS - highest RS instruction used */ -+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); -+ -+ OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ -+ -+ /* Indirection levels */ -+ OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | -+ R300_FIRST_TEX)); -+ -+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | -+ R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | -+ R300_TEX_CODE_OFFSET(0) | -+ R300_TEX_CODE_SIZE(1))); -+ -+ OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | -+ R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | -+ R300_TEX_START(0) | -+ R300_TEX_SIZE(0) | -+ R300_RGBA_OUT)); -+ -+ /* tex inst */ -+ OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | -+ R300_TEX_DST_ADDR(0) | -+ R300_TEX_ID(0) | -+ R300_TEX_INST(R300_TEX_INST_LD))); -+ -+ /* ALU inst */ -+ /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | -+ R300_ALU_RGB_ADDR1(0) | -+ R300_ALU_RGB_ADDR2(0) | -+ R300_ALU_RGB_ADDRD(1) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | -+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) | -+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | -+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); -+ /* alpha nop, but need to set up alpha source for rgb usage */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | -+ R300_ALU_ALPHA_ADDR1(0) | -+ R300_ALU_ALPHA_ADDR2(0) | -+ R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ -+ /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | -+ R300_ALU_RGB_ADDR1(0) | -+ R300_ALU_RGB_ADDR2(1) | -+ R300_ALU_RGB_ADDRD(1) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) | -+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | -+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); -+ /* alpha nop */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ -+ /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | -+ R300_ALU_RGB_ADDR1(0) | -+ R300_ALU_RGB_ADDR2(1) | -+ R300_ALU_RGB_ADDRD(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | -+ (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) | -+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | -+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | -+ R300_ALU_RGB_CLAMP)); -+ /* write alpha 1 */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | -+ R300_ALU_ALPHA_TARGET_A)); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); -+ -+ if (needgamma) { -+ /* rgb temp0.r = op_sop, set up src0 reg */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); -+ /* alpha lg2 temp0, temp0.r */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ -+ /* rgb temp0.g = op_sop, set up src0 reg */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); -+ /* alpha lg2 temp0, temp0.g */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ -+ /* rgb temp0.b = op_sop, set up src0 reg */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); -+ /* alpha lg2 temp0, temp0.b */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ -+ /* MUL const1, temp1, temp0 */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_ADDR1(0) | -+ R300_ALU_RGB_ADDR2(0) | -+ R300_ALU_RGB_ADDRD(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | -+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | -+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | -+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); -+ /* alpha nop, but set up const1 */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ -+ /* rgb out0.r = op_sop, set up src0 reg */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | -+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); -+ /* alpha ex2 temp0, temp0.r */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ -+ /* rgb out0.g = op_sop, set up src0 reg */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | -+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); -+ /* alpha ex2 temp0, temp0.g */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ -+ /* rgb out0.b = op_sop, set up src0 reg */ -+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | -+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | -+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); -+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), -+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | -+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); -+ /* alpha ex2 temp0, temp0.b */ -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | -+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); -+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | -+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | -+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | -+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); -+ } -+ } -+ -+ /* Shader constants. */ -+ /* constant 0: off, yco */ -+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0])); -+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1])); -+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2])); -+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco)); -+ /* constant 1: uco */ -+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0])); -+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1])); -+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2])); -+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma)); -+ /* constant 2: vco */ -+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0])); -+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1])); -+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2])); -+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0)); -+ -+ FINISH_ACCEL(); - } - -+ BEGIN_ACCEL_RELOC(6, 2); -+ OUT_ACCEL_REG(R300_TX_INVALTAGS, 0); -+ OUT_ACCEL_REG(R300_TX_ENABLE, txenable); -+ -+ EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap); -+ EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap); -+ -+ /* no need to enable blending */ -+ OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); -+ -+ OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count); -+ FINISH_ACCEL(); -+ - if (pPriv->vsync) { - xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn, - pPriv->drw_x, -@@ -2014,7 +2315,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv - * We render a single, large triangle and use the scissor - * functionality to restrict it to the desired rectangle. - * Due to guardband limits on r3xx/r4xx, we can only use -- * the single triangle up to 2880 pixels; above that we -+ * the single triangle up to 4021 pixels; above that we - * render as a quad. - */ - -@@ -2027,10 +2328,12 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv - dstw = pBox->x2 - pBox->x1; - dsth = pBox->y2 - pBox->y1; - -- srcX = ((pBox->x1 - pPriv->drw_x) * -- pPriv->src_w) / pPriv->dst_w; -- srcY = ((pBox->y1 - pPriv->drw_y) * -- pPriv->src_h) / pPriv->dst_h; -+ srcX = pPriv->src_x; -+ srcX += ((pBox->x1 - pPriv->drw_x) * -+ pPriv->src_w) / pPriv->dst_w; -+ srcY = pPriv->src_y; -+ srcY += ((pBox->y1 - pPriv->drw_y) * -+ pPriv->src_h) / pPriv->dst_h; - - srcw = (pPriv->src_w * dstw) / pPriv->dst_w; - srch = (pPriv->src_h * dsth) / pPriv->dst_h; -@@ -2040,92 +2343,49 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv - ErrorF("src: %d, %d, %d, %d\n", srcX, srcY, srcw, srch); - #endif - -- if (IS_R300_3D || IS_R500_3D) { -- if (IS_R300_3D && ((dstw+dsth) > 2880)) -- use_quad = TRUE; -- /* -- * Set up the scissor area to that of the output size. -- */ -- BEGIN_ACCEL(2); -- if (IS_R300_3D) { -- /* R300 has an offset */ -- OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX + 1088) << R300_SCISSOR_X_SHIFT) | -- ((dstY + 1088) << R300_SCISSOR_Y_SHIFT))); -- OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw + 1088 - 1) << R300_SCISSOR_X_SHIFT) | -- ((dstY + dsth + 1088 - 1) << R300_SCISSOR_Y_SHIFT))); -- } else { -- OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX) << R300_SCISSOR_X_SHIFT) | -- ((dstY) << R300_SCISSOR_Y_SHIFT))); -- OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw - 1) << R300_SCISSOR_X_SHIFT) | -- ((dstY + dsth - 1) << R300_SCISSOR_Y_SHIFT))); -- } -- FINISH_ACCEL(); -- } -+ if ((dstw+dsth) > 4021) -+ use_quad = TRUE; -+ /* -+ * Set up the scissor area to that of the output size. -+ */ -+ BEGIN_ACCEL(2); -+ /* R300 has an offset */ -+ OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX + 1440) << R300_SCISSOR_X_SHIFT) | -+ ((dstY + 1440) << R300_SCISSOR_Y_SHIFT))); -+ OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw + 1440 - 1) << R300_SCISSOR_X_SHIFT) | -+ ((dstY + dsth + 1440 - 1) << R300_SCISSOR_Y_SHIFT))); -+ FINISH_ACCEL(); - - #ifdef ACCEL_CP -- if (info->ChipFamily < CHIP_FAMILY_R200) { -- BEGIN_RING(3 * vtx_count + 3); -- OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, -- 3 * vtx_count + 1)); -- OUT_RING(RADEON_CP_VC_FRMT_XY | -- RADEON_CP_VC_FRMT_ST0); -- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | -+ if (use_quad) { -+ BEGIN_RING(4 * vtx_count + 4); -+ OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, -+ 4 * vtx_count)); -+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST | - RADEON_CP_VC_CNTL_PRIM_WALK_RING | -- RADEON_CP_VC_CNTL_MAOS_ENABLE | -- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | -- (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); -- } else if (IS_R300_3D || IS_R500_3D) { -- if (use_quad) { -- BEGIN_RING(4 * vtx_count + 4); -- OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, -- 4 * vtx_count)); -- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST | -- RADEON_CP_VC_CNTL_PRIM_WALK_RING | -- (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); -- } else { -- BEGIN_RING(3 * vtx_count + 4); -- OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, -- 3 * vtx_count)); -- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | -- RADEON_CP_VC_CNTL_PRIM_WALK_RING | -- (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); -- } -+ (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); - } else { -- BEGIN_RING(3 * vtx_count + 2); -+ BEGIN_RING(3 * vtx_count + 4); - OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, - 3 * vtx_count)); -- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | -+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | - RADEON_CP_VC_CNTL_PRIM_WALK_RING | - (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); - } - #else /* ACCEL_CP */ -- if (IS_R300_3D || IS_R500_3D) { -- if (use_quad) -- BEGIN_ACCEL(2 + vtx_count * 4); -- else -- BEGIN_ACCEL(2 + vtx_count * 3); -- } else -- BEGIN_ACCEL(1 + vtx_count * 3); -- -- if (info->ChipFamily < CHIP_FAMILY_R200) -- OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST | -+ if (use_quad) -+ BEGIN_ACCEL(2 + vtx_count * 4); -+ else -+ BEGIN_ACCEL(2 + vtx_count * 3); -+ -+ if (use_quad) -+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST | - RADEON_VF_PRIM_WALK_DATA | -- RADEON_VF_RADEON_MODE | -- (3 << RADEON_VF_NUM_VERTICES_SHIFT))); -- else if (IS_R300_3D || IS_R500_3D) { -- if (use_quad) -- OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST | -- RADEON_VF_PRIM_WALK_DATA | -- (4 << RADEON_VF_NUM_VERTICES_SHIFT))); -- else -- OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST | -- RADEON_VF_PRIM_WALK_DATA | -- (3 << RADEON_VF_NUM_VERTICES_SHIFT))); -- } else -- OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST | -+ (4 << RADEON_VF_NUM_VERTICES_SHIFT))); -+ else -+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST | - RADEON_VF_PRIM_WALK_DATA | - (3 << RADEON_VF_NUM_VERTICES_SHIFT))); -- - #endif - if (pPriv->bicubic_enabled) { - /* -@@ -2133,87 +2393,62 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv - * have to deal with the legacy handling. - */ - if (use_quad) { -- VTX_OUT_FILTER((float)dstX, (float)dstY, -- (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0], -- (float)srcX + 0.5, (float)srcY + 0.5); -- VTX_OUT_FILTER((float)dstX, (float)(dstY + dsth), -- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0], -- (float)srcX + 0.5, (float)(srcY + srch) + 0.5); -- VTX_OUT_FILTER((float)(dstX + dstw), (float)(dstY + dsth), -- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0], -- (float)(srcX + srcw) + 0.5, (float)(srcY + srch) + 0.5); -- VTX_OUT_FILTER((float)(dstX + dstw), (float)dstY, -- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0], -- (float)(srcX + srcw) + 0.5, (float)srcY + 0.5); -+ VTX_OUT_6((float)dstX, (float)dstY, -+ (float)srcX / pPriv->w, (float)srcY / pPriv->h, -+ (float)srcX + 0.5, (float)srcY + 0.5); -+ VTX_OUT_6((float)dstX, (float)(dstY + dsth), -+ (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h, -+ (float)srcX + 0.5, (float)(srcY + srch) + 0.5); -+ VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth), -+ (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h, -+ (float)(srcX + srcw) + 0.5, (float)(srcY + srch) + 0.5); -+ VTX_OUT_6((float)(dstX + dstw), (float)dstY, -+ (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h, -+ (float)(srcX + srcw) + 0.5, (float)srcY + 0.5); - } else { -- VTX_OUT_FILTER((float)dstX, (float)dstY, -- (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0], -- (float)srcX + 0.5, (float)srcY + 0.5); -- VTX_OUT_FILTER((float)dstX, (float)(dstY + dstw + dsth), -- (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0], -- (float)srcX + 0.5, (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); -- VTX_OUT_FILTER((float)(dstX + dstw + dsth), (float)dstY, -- ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0], -- (float)srcY / info->accel_state->texH[0], -- (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, -- (float)srcY + 0.5); -+ VTX_OUT_6((float)dstX, (float)dstY, -+ (float)srcX / pPriv->w, (float)srcY / pPriv->h, -+ (float)srcX + 0.5, (float)srcY + 0.5); -+ VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth), -+ (float)srcX / pPriv->w, -+ ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h, -+ (float)srcX + 0.5, -+ (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); -+ VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY, -+ ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, -+ (float)srcY / pPriv->h, -+ (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, -+ (float)srcY + 0.5); - } - } else { -- if (IS_R300_3D || IS_R500_3D) { -- if (use_quad) { -- VTX_OUT((float)dstX, (float)dstY, -- (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]); -- VTX_OUT((float)dstX, (float)(dstY + dsth), -- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]); -- VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth), -- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]); -- VTX_OUT((float)(dstX + dstw), (float)dstY, -- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]); -- } else { -- /* -- * Render a big, scissored triangle. This means -- * increasing the triangle size and adjusting -- * texture coordinates. -- */ -- VTX_OUT((float)dstX, (float)dstY, -- (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]); -- VTX_OUT((float)dstX, (float)(dstY + dsth + dstw), -- (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0]); -- -- VTX_OUT((float)(dstX + dstw + dsth), (float)dstY, -- ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0], -- (float)srcY / info->accel_state->texH[0]); -- } -- } else if (isplanar) { -- /* -- * Just render a rect (using three coords). -- * Filter is a bit a misnomer, it's just texcoords... -- */ -- VTX_OUT_FILTER((float)dstX, (float)(dstY + dsth), -- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0], -- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]); -- VTX_OUT_FILTER((float)(dstX + dstw), (float)(dstY + dsth), -- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0], -- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]); -- VTX_OUT_FILTER((float)(dstX + dstw), (float)dstY, -- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0], -- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]); -+ if (use_quad) { -+ VTX_OUT_4((float)dstX, (float)dstY, -+ (float)srcX / pPriv->w, (float)srcY / pPriv->h); -+ VTX_OUT_4((float)dstX, (float)(dstY + dsth), -+ (float)srcX / pPriv->w, (float)(srcY + srch) / pPriv->h); -+ VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth), -+ (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h); -+ VTX_OUT_4((float)(dstX + dstw), (float)dstY, -+ (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h); - } else { - /* -- * Just render a rect (using three coords). -+ * Render a big, scissored triangle. This means -+ * increasing the triangle size and adjusting -+ * texture coordinates. - */ -- VTX_OUT((float)dstX, (float)(dstY + dsth), -- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]); -- VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth), -- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]); -- VTX_OUT((float)(dstX + dstw), (float)dstY, -- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]); -+ VTX_OUT_4((float)dstX, (float)dstY, -+ (float)srcX / pPriv->w, (float)srcY / pPriv->h); -+ VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw), -+ (float)srcX / pPriv->w, -+ ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h); -+ VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY, -+ ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, -+ (float)srcY / pPriv->h); - } - } - -- if (IS_R300_3D || IS_R500_3D) -- /* flushing is pipelined, free/finish is not */ -- OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); -+ /* flushing is pipelined, free/finish is not */ -+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); - - #ifdef ACCEL_CP - ADVANCE_RING(); -@@ -2224,18 +2459,1529 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv - pBox++; - } - -- if (IS_R300_3D || IS_R500_3D) { -- BEGIN_ACCEL(3); -- OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); -- OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); -+ BEGIN_ACCEL(3); -+ OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); -+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); -+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); -+ FINISH_ACCEL(); -+ -+ DamageDamageRegion(pPriv->pDraw, &pPriv->clip); -+} -+ -+static void -+FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) -+{ -+ RADEONInfoPtr info = RADEONPTR(pScrn); -+ PixmapPtr pPixmap = pPriv->pPixmap; -+ struct radeon_exa_pixmap_priv *driver_priv; -+ uint32_t txfilter, txformat0, txformat1, txoffset, txpitch; -+ uint32_t dst_pitch, dst_format; -+ uint32_t txenable, colorpitch, bicubic_offset; -+ uint32_t output_fmt; -+ Bool isplanar = FALSE; -+ int dstxoff, dstyoff, pixel_shift, vtx_count; -+ BoxPtr pBox = REGION_RECTS(&pPriv->clip); -+ int nBox = REGION_NUM_RECTS(&pPriv->clip); -+ ACCEL_PREAMBLE(); -+ -+#ifdef XF86DRM_MODE -+ if (info->cs) { -+ int ret; -+ -+ radeon_cs_space_reset_bos(info->cs); -+ radeon_cs_space_add_persistent_bo(info->cs, pPriv->src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); -+ -+ if (pPriv->bicubic_enabled) -+ radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); -+ -+ driver_priv = exaGetPixmapDriverPrivate(pPixmap); -+ radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM); -+ -+ ret = radeon_cs_space_check(info->cs); -+ if (ret) { -+ ErrorF("Not enough RAM to hw accel xv operation\n"); -+ return; -+ } -+ } -+#endif -+ -+ pixel_shift = pPixmap->drawable.bitsPerPixel >> 4; -+ -+#ifdef USE_EXA -+ if (info->useEXA) { -+ dst_pitch = exaGetPixmapPitch(pPixmap); - } else -- BEGIN_ACCEL(1); -+#endif -+ { -+ dst_pitch = pPixmap->devKind; -+ } -+ -+#ifdef COMPOSITE -+ dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; -+ dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; -+#else -+ dstxoff = 0; -+ dstyoff = 0; -+#endif -+ -+#ifdef USE_EXA -+ if (info->useEXA) { -+ RADEON_SWITCH_TO_3D(); -+ } else -+#endif -+ { -+ BEGIN_ACCEL(2); -+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); -+ /* We must wait for 3d to idle, in case source was just written as a dest. */ -+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, -+ RADEON_WAIT_HOST_IDLECLEAN | -+ RADEON_WAIT_2D_IDLECLEAN | -+ RADEON_WAIT_3D_IDLECLEAN | -+ RADEON_WAIT_DMA_GUI_IDLE); -+ FINISH_ACCEL(); -+ -+ if (!info->accel_state->XInited3D) -+ RADEONInit3DEngine(pScrn); -+ } -+ -+ if (pPriv->bicubic_enabled) -+ vtx_count = 6; -+ else -+ vtx_count = 4; -+ -+ switch (pPixmap->drawable.bitsPerPixel) { -+ case 16: -+ if (pPixmap->drawable.depth == 15) -+ dst_format = R300_COLORFORMAT_ARGB1555; -+ else -+ dst_format = R300_COLORFORMAT_RGB565; -+ break; -+ case 32: -+ dst_format = R300_COLORFORMAT_ARGB8888; -+ break; -+ default: -+ return; -+ } -+ -+ output_fmt = (R300_OUT_FMT_C4_8 | -+ R300_OUT_FMT_C0_SEL_BLUE | -+ R300_OUT_FMT_C1_SEL_GREEN | -+ R300_OUT_FMT_C2_SEL_RED | -+ R300_OUT_FMT_C3_SEL_ALPHA); -+ -+ colorpitch = dst_pitch >> pixel_shift; -+ colorpitch |= dst_format; -+ -+ if (RADEONTilingEnabled(pScrn, pPixmap)) -+ colorpitch |= R300_COLORTILE; -+ -+ if (((pPriv->bicubic_state == BICUBIC_OFF)) && -+ (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) -+ isplanar = TRUE; -+ -+ if (isplanar) { -+ txformat1 = R300_TX_FORMAT_X8; -+ txpitch = pPriv->src_pitch; -+ } else { -+ if (pPriv->id == FOURCC_UYVY) -+ txformat1 = R300_TX_FORMAT_YVYU422; -+ else -+ txformat1 = R300_TX_FORMAT_VYUY422; -+ -+ if (pPriv->bicubic_state != BICUBIC_OFF) -+ txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; -+ -+ /* pitch is in pixels */ -+ txpitch = pPriv->src_pitch / 2; -+ } -+ txpitch -= 1; -+ -+ txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | -+ (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | -+ R300_TXPITCH_EN); -+ -+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | -+ R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | -+ R300_TX_MAG_FILTER_LINEAR | -+ R300_TX_MIN_FILTER_LINEAR | -+ (0 << R300_TX_ID_SHIFT)); -+ -+ -+ if ((pPriv->w - 1) & 0x800) -+ txpitch |= R500_TXWIDTH_11; -+ -+ if ((pPriv->h - 1) & 0x800) -+ txpitch |= R500_TXHEIGHT_11; -+ -+ txoffset = info->cs ? 0 : pPriv->src_offset; -+ -+ BEGIN_ACCEL_RELOC(6, 1); -+ OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter); -+ OUT_ACCEL_REG(R300_TX_FILTER1_0, 0); -+ OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0); -+ OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1); -+ OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch); -+ OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, pPriv->src_bo); -+ FINISH_ACCEL(); -+ -+ txenable = R300_TEX_0_ENABLE; -+ -+ if (isplanar) { -+ txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) | -+ (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) | -+ R300_TXPITCH_EN); -+ txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63; -+ txpitch -= 1; -+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) | -+ R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) | -+ R300_TX_MIN_FILTER_LINEAR | -+ R300_TX_MAG_FILTER_LINEAR); -+ -+ BEGIN_ACCEL(12); -+ OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT)); -+ OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); -+ OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); -+ OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8); -+ OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); -+ OUT_ACCEL_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset); -+ OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT)); -+ OUT_ACCEL_REG(R300_TX_FILTER1_2, 0); -+ OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0); -+ OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8); -+ OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch); -+ OUT_ACCEL_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset); -+ FINISH_ACCEL(); -+ txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE; -+ } -+ -+ if (pPriv->bicubic_enabled) { -+ /* Size is 128x1 */ -+ txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) | -+ (0x0 << R300_TXHEIGHT_SHIFT) | -+ R300_TXPITCH_EN); -+ /* Format is 32-bit floats, 4bpp */ -+ txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); -+ /* Pitch is 127 (128-1) */ -+ txpitch = 0x7f; -+ /* Tex filter */ -+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) | -+ R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) | -+ R300_TX_MIN_FILTER_NEAREST | -+ R300_TX_MAG_FILTER_NEAREST | -+ (1 << R300_TX_ID_SHIFT)); -+ -+ if (info->cs) -+ bicubic_offset = 0; -+ else -+ bicubic_offset = pPriv->bicubic_src_offset; -+ -+ BEGIN_ACCEL(6); -+ OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter); -+ OUT_ACCEL_REG(R300_TX_FILTER1_1, 0); -+ OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0); -+ OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1); -+ OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch); -+ OUT_TEXTURE_REG(R300_TX_OFFSET_1, bicubic_offset, info->bicubic_bo); -+ FINISH_ACCEL(); -+ -+ /* Enable tex 1 */ -+ txenable |= R300_TEX_1_ENABLE; -+ } -+ -+ /* setup the VAP */ -+ if (info->accel_state->has_tcl) { -+ if (pPriv->bicubic_enabled) -+ BEGIN_ACCEL(7); -+ else -+ BEGIN_ACCEL(6); -+ } else { -+ if (pPriv->bicubic_enabled) -+ BEGIN_ACCEL(5); -+ else -+ BEGIN_ACCEL(4); -+ } -+ -+ /* These registers define the number, type, and location of data submitted -+ * to the PVS unit of GA input (when PVS is disabled) -+ * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is -+ * enabled. This memory provides the imputs to the vertex shader program -+ * and ordering is not important. When PVS/TCL is disabled, this field maps -+ * directly to the GA input memory and the order is signifigant. In -+ * PVS_BYPASS mode the order is as follows: -+ * Position -+ * Point Size -+ * Color 0-3 -+ * Textures 0-7 -+ * Fog -+ */ -+ if (pPriv->bicubic_enabled) { -+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, -+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | -+ (0 << R300_SKIP_DWORDS_0_SHIFT) | -+ (0 << R300_DST_VEC_LOC_0_SHIFT) | -+ R300_SIGNED_0 | -+ (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | -+ (0 << R300_SKIP_DWORDS_1_SHIFT) | -+ (6 << R300_DST_VEC_LOC_1_SHIFT) | -+ R300_SIGNED_1)); -+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, -+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | -+ (0 << R300_SKIP_DWORDS_2_SHIFT) | -+ (7 << R300_DST_VEC_LOC_2_SHIFT) | -+ R300_LAST_VEC_2 | -+ R300_SIGNED_2)); -+ } else { -+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, -+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | -+ (0 << R300_SKIP_DWORDS_0_SHIFT) | -+ (0 << R300_DST_VEC_LOC_0_SHIFT) | -+ R300_SIGNED_0 | -+ (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | -+ (0 << R300_SKIP_DWORDS_1_SHIFT) | -+ (6 << R300_DST_VEC_LOC_1_SHIFT) | -+ R300_LAST_VEC_1 | -+ R300_SIGNED_1)); -+ } -+ -+ /* load the vertex shader -+ * We pre-load vertex programs in RADEONInit3DEngine(): -+ * - exa -+ * - Xv -+ * - Xv bicubic -+ * Here we select the offset of the vertex program we want to use -+ */ -+ if (info->accel_state->has_tcl) { -+ if (pPriv->bicubic_enabled) { -+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, -+ ((11 << R300_PVS_FIRST_INST_SHIFT) | -+ (13 << R300_PVS_XYZW_VALID_INST_SHIFT) | -+ (13 << R300_PVS_LAST_INST_SHIFT))); -+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, -+ (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); -+ } else { -+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0, -+ ((9 << R300_PVS_FIRST_INST_SHIFT) | -+ (10 << R300_PVS_XYZW_VALID_INST_SHIFT) | -+ (10 << R300_PVS_LAST_INST_SHIFT))); -+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1, -+ (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT)); -+ } -+ } -+ -+ /* Position and one set of 2 texture coordinates */ -+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); -+ if (pPriv->bicubic_enabled) -+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | -+ (2 << R300_TEX_1_COMP_CNT_SHIFT))); -+ else -+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT)); -+ -+ OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt); -+ FINISH_ACCEL(); -+ -+ /* setup pixel shader */ -+ if (pPriv->bicubic_state != BICUBIC_OFF) { -+ if (pPriv->bicubic_enabled) { -+ BEGIN_ACCEL(7); -+ -+ /* 4 components: 2 for tex0 and 2 for tex1 */ -+ OUT_ACCEL_REG(R300_RS_COUNT, -+ ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | -+ R300_RS_COUNT_HIRES_EN)); -+ -+ /* R300_INST_COUNT_RS - highest RS instruction used */ -+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); -+ -+ /* Pixel stack frame size. */ -+ OUT_ACCEL_REG(R300_US_PIXSIZE, 5); -+ -+ /* FP length. */ -+ OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | -+ R500_US_CODE_END_ADDR(13))); -+ OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | -+ R500_US_CODE_RANGE_SIZE(13))); -+ -+ /* Prepare for FP emission. */ -+ OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); -+ FINISH_ACCEL(); -+ -+ BEGIN_ACCEL(89); -+ /* Pixel shader. -+ * I've gone ahead and annotated each instruction, since this -+ * thing is MASSIVE. :3 -+ * Note: In order to avoid buggies with temps and multiple -+ * inputs, all temps are offset by 2. temp0 -> register2. */ -+ -+ /* TEX temp2, input1.xxxx, tex1, 1D */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | -+ R500_TEX_INST_LD | -+ R500_TEX_IGNORE_UNCOVERED)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | -+ R500_TEX_SRC_S_SWIZ_R | -+ R500_TEX_SRC_T_SWIZ_R | -+ R500_TEX_SRC_R_SWIZ_R | -+ R500_TEX_SRC_Q_SWIZ_R | -+ R500_TEX_DST_ADDR(2) | -+ R500_TEX_DST_R_SWIZ_R | -+ R500_TEX_DST_G_SWIZ_G | -+ R500_TEX_DST_B_SWIZ_B | -+ R500_TEX_DST_A_SWIZ_A)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ -+ /* TEX temp5, input1.yyyy, tex1, 1D */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | -+ R500_TEX_INST_LD | -+ R500_TEX_SEM_ACQUIRE | -+ R500_TEX_IGNORE_UNCOVERED)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | -+ R500_TEX_SRC_S_SWIZ_G | -+ R500_TEX_SRC_T_SWIZ_G | -+ R500_TEX_SRC_R_SWIZ_G | -+ R500_TEX_SRC_Q_SWIZ_G | -+ R500_TEX_DST_ADDR(5) | -+ R500_TEX_DST_R_SWIZ_R | -+ R500_TEX_DST_G_SWIZ_G | -+ R500_TEX_DST_B_SWIZ_B | -+ R500_TEX_DST_A_SWIZ_A)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ -+ /* MUL temp4, const0.x0x0, temp2.yyxx */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | -+ R500_RGB_ADDR0_CONST | -+ R500_RGB_ADDR1(2))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | -+ R500_ALPHA_ADDR0_CONST | -+ R500_ALPHA_ADDR1(2))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | -+ R500_ALU_RGB_R_SWIZ_A_R | -+ R500_ALU_RGB_G_SWIZ_A_0 | -+ R500_ALU_RGB_B_SWIZ_A_R | -+ R500_ALU_RGB_SEL_B_SRC1 | -+ R500_ALU_RGB_R_SWIZ_B_G | -+ R500_ALU_RGB_G_SWIZ_B_G | -+ R500_ALU_RGB_B_SWIZ_B_R)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | -+ R500_ALPHA_OP_MAD | -+ R500_ALPHA_SEL_A_SRC0 | -+ R500_ALPHA_SWIZ_A_0 | -+ R500_ALPHA_SEL_B_SRC1 | -+ R500_ALPHA_SWIZ_B_R)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | -+ R500_ALU_RGBA_OP_MAD | -+ R500_ALU_RGBA_R_SWIZ_0 | -+ R500_ALU_RGBA_G_SWIZ_0 | -+ R500_ALU_RGBA_B_SWIZ_0 | -+ R500_ALU_RGBA_A_SWIZ_0)); -+ -+ /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | -+ R500_RGB_ADDR0_CONST | -+ R500_RGB_ADDR1(5) | -+ R500_RGB_ADDR2(4))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | -+ R500_ALPHA_ADDR0_CONST | -+ R500_ALPHA_ADDR1(5) | -+ R500_ALPHA_ADDR2(4))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | -+ R500_ALU_RGB_R_SWIZ_A_0 | -+ R500_ALU_RGB_G_SWIZ_A_G | -+ R500_ALU_RGB_B_SWIZ_A_0 | -+ R500_ALU_RGB_SEL_B_SRC1 | -+ R500_ALU_RGB_R_SWIZ_B_R | -+ R500_ALU_RGB_G_SWIZ_B_R | -+ R500_ALU_RGB_B_SWIZ_B_R)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | -+ R500_ALPHA_OP_MAD | -+ R500_ALPHA_SEL_A_SRC0 | -+ R500_ALPHA_SWIZ_A_G | -+ R500_ALPHA_SEL_B_SRC1 | -+ R500_ALPHA_SWIZ_B_R)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | -+ R500_ALU_RGBA_OP_MAD | -+ R500_ALU_RGBA_SEL_C_SRC2 | -+ R500_ALU_RGBA_R_SWIZ_R | -+ R500_ALU_RGBA_G_SWIZ_G | -+ R500_ALU_RGBA_B_SWIZ_B | -+ R500_ALU_RGBA_A_SWIZ_A)); -+ -+ /* ADD temp3, temp3, input0.xyxy */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) | -+ R500_RGB_ADDR2(0))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) | -+ R500_ALPHA_ADDR2(0))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | -+ R500_ALU_RGB_G_SWIZ_A_1 | -+ R500_ALU_RGB_B_SWIZ_A_1 | -+ R500_ALU_RGB_SEL_B_SRC1 | -+ R500_ALU_RGB_R_SWIZ_B_R | -+ R500_ALU_RGB_G_SWIZ_B_G | -+ R500_ALU_RGB_B_SWIZ_B_B)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | -+ R500_ALPHA_OP_MAD | -+ R500_ALPHA_SWIZ_A_1 | -+ R500_ALPHA_SEL_B_SRC1 | -+ R500_ALPHA_SWIZ_B_A)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | -+ R500_ALU_RGBA_OP_MAD | -+ R500_ALU_RGBA_SEL_C_SRC2 | -+ R500_ALU_RGBA_R_SWIZ_R | -+ R500_ALU_RGBA_G_SWIZ_G | -+ R500_ALU_RGBA_B_SWIZ_R | -+ R500_ALU_RGBA_A_SWIZ_G)); -+ -+ /* TEX temp1, temp3.zwxy, tex0, 2D */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | -+ R500_TEX_INST_LD | -+ R500_TEX_IGNORE_UNCOVERED)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | -+ R500_TEX_SRC_S_SWIZ_B | -+ R500_TEX_SRC_T_SWIZ_A | -+ R500_TEX_SRC_R_SWIZ_R | -+ R500_TEX_SRC_Q_SWIZ_G | -+ R500_TEX_DST_ADDR(1) | -+ R500_TEX_DST_R_SWIZ_R | -+ R500_TEX_DST_G_SWIZ_G | -+ R500_TEX_DST_B_SWIZ_B | -+ R500_TEX_DST_A_SWIZ_A)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ -+ /* TEX temp3, temp3.xyzw, tex0, 2D */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | -+ R500_TEX_INST_LD | -+ R500_TEX_SEM_ACQUIRE | -+ R500_TEX_IGNORE_UNCOVERED)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | -+ R500_TEX_SRC_S_SWIZ_R | -+ R500_TEX_SRC_T_SWIZ_G | -+ R500_TEX_SRC_R_SWIZ_B | -+ R500_TEX_SRC_Q_SWIZ_A | -+ R500_TEX_DST_ADDR(3) | -+ R500_TEX_DST_R_SWIZ_R | -+ R500_TEX_DST_G_SWIZ_G | -+ R500_TEX_DST_B_SWIZ_B | -+ R500_TEX_DST_A_SWIZ_A)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ -+ /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | -+ R500_RGB_ADDR0_CONST | -+ R500_RGB_ADDR1(5) | -+ R500_RGB_ADDR2(4))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | -+ R500_ALPHA_ADDR0_CONST | -+ R500_ALPHA_ADDR1(5) | -+ R500_ALPHA_ADDR2(4))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | -+ R500_ALU_RGB_R_SWIZ_A_0 | -+ R500_ALU_RGB_G_SWIZ_A_G | -+ R500_ALU_RGB_B_SWIZ_A_0 | -+ R500_ALU_RGB_SEL_B_SRC1 | -+ R500_ALU_RGB_R_SWIZ_B_G | -+ R500_ALU_RGB_G_SWIZ_B_G | -+ R500_ALU_RGB_B_SWIZ_B_G)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | -+ R500_ALPHA_OP_MAD | -+ R500_ALPHA_SEL_A_SRC0 | -+ R500_ALPHA_SWIZ_A_G | -+ R500_ALPHA_SEL_B_SRC1 | -+ R500_ALPHA_SWIZ_B_G)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | -+ R500_ALU_RGBA_OP_MAD | -+ R500_ALU_RGBA_SEL_C_SRC2 | -+ R500_ALU_RGBA_R_SWIZ_R | -+ R500_ALU_RGBA_G_SWIZ_G | -+ R500_ALU_RGBA_B_SWIZ_B | -+ R500_ALU_RGBA_A_SWIZ_A)); -+ -+ /* ADD temp0, temp4, input0.xyxy */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) | -+ R500_RGB_ADDR2(0))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) | -+ R500_ALPHA_ADDR2(0))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | -+ R500_ALU_RGB_G_SWIZ_A_1 | -+ R500_ALU_RGB_B_SWIZ_A_1 | -+ R500_ALU_RGB_SEL_B_SRC1 | -+ R500_ALU_RGB_R_SWIZ_B_R | -+ R500_ALU_RGB_G_SWIZ_B_G | -+ R500_ALU_RGB_B_SWIZ_B_B)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | -+ R500_ALPHA_OP_MAD | -+ R500_ALPHA_SWIZ_A_1 | -+ R500_ALPHA_SEL_B_SRC1 | -+ R500_ALPHA_SWIZ_B_A)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | -+ R500_ALU_RGBA_OP_MAD | -+ R500_ALU_RGBA_SEL_C_SRC2 | -+ R500_ALU_RGBA_R_SWIZ_R | -+ R500_ALU_RGBA_G_SWIZ_G | -+ R500_ALU_RGBA_B_SWIZ_R | -+ R500_ALU_RGBA_A_SWIZ_G)); -+ -+ /* TEX temp4, temp0.zwzw, tex0, 2D */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | -+ R500_TEX_INST_LD | -+ R500_TEX_IGNORE_UNCOVERED)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | -+ R500_TEX_SRC_S_SWIZ_B | -+ R500_TEX_SRC_T_SWIZ_A | -+ R500_TEX_SRC_R_SWIZ_B | -+ R500_TEX_SRC_Q_SWIZ_A | -+ R500_TEX_DST_ADDR(4) | -+ R500_TEX_DST_R_SWIZ_R | -+ R500_TEX_DST_G_SWIZ_G | -+ R500_TEX_DST_B_SWIZ_B | -+ R500_TEX_DST_A_SWIZ_A)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ -+ /* TEX temp0, temp0.xyzw, tex0, 2D */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | -+ R500_TEX_INST_LD | -+ R500_TEX_SEM_ACQUIRE | -+ R500_TEX_IGNORE_UNCOVERED)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | -+ R500_TEX_SRC_S_SWIZ_R | -+ R500_TEX_SRC_T_SWIZ_G | -+ R500_TEX_SRC_R_SWIZ_B | -+ R500_TEX_SRC_Q_SWIZ_A | -+ R500_TEX_DST_ADDR(0) | -+ R500_TEX_DST_R_SWIZ_R | -+ R500_TEX_DST_G_SWIZ_G | -+ R500_TEX_DST_B_SWIZ_B | -+ R500_TEX_DST_A_SWIZ_A)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ -+ /* LRP temp3, temp2.zzzz, temp1, temp3 -> -+ * - PRESUB temps, temp1 - temp3 -+ * - MAD temp2.zzzz, temps, temp3 */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) | -+ R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | -+ R500_RGB_ADDR1(1) | -+ R500_RGB_ADDR2(2))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) | -+ R500_ALPHA_SRCP_OP_A1_MINUS_A0 | -+ R500_ALPHA_ADDR1(1) | -+ R500_ALPHA_ADDR2(2))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | -+ R500_ALU_RGB_R_SWIZ_A_B | -+ R500_ALU_RGB_G_SWIZ_A_B | -+ R500_ALU_RGB_B_SWIZ_A_B | -+ R500_ALU_RGB_SEL_B_SRCP | -+ R500_ALU_RGB_R_SWIZ_B_R | -+ R500_ALU_RGB_G_SWIZ_B_G | -+ R500_ALU_RGB_B_SWIZ_B_B)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | -+ R500_ALPHA_OP_MAD | -+ R500_ALPHA_SEL_A_SRC2 | -+ R500_ALPHA_SWIZ_A_B | -+ R500_ALPHA_SEL_B_SRCP | -+ R500_ALPHA_SWIZ_B_A)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | -+ R500_ALU_RGBA_OP_MAD | -+ R500_ALU_RGBA_SEL_C_SRC0 | -+ R500_ALU_RGBA_R_SWIZ_R | -+ R500_ALU_RGBA_G_SWIZ_G | -+ R500_ALU_RGBA_B_SWIZ_B | -+ R500_ALU_RGBA_A_SWIZ_A)); -+ -+ /* LRP temp0, temp2.zzzz, temp4, temp0 -> -+ * - PRESUB temps, temp4 - temp1 -+ * - MAD temp2.zzzz, temps, temp0 */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | -+ R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | -+ R500_RGB_ADDR1(4) | -+ R500_RGB_ADDR2(2))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | -+ R500_ALPHA_SRCP_OP_A1_MINUS_A0 | -+ R500_ALPHA_ADDR1(4) | -+ R500_ALPHA_ADDR2(2))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | -+ R500_ALU_RGB_R_SWIZ_A_B | -+ R500_ALU_RGB_G_SWIZ_A_B | -+ R500_ALU_RGB_B_SWIZ_A_B | -+ R500_ALU_RGB_SEL_B_SRCP | -+ R500_ALU_RGB_R_SWIZ_B_R | -+ R500_ALU_RGB_G_SWIZ_B_G | -+ R500_ALU_RGB_B_SWIZ_B_B)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | -+ R500_ALPHA_OP_MAD | -+ R500_ALPHA_SEL_A_SRC2 | -+ R500_ALPHA_SWIZ_A_B | -+ R500_ALPHA_SEL_B_SRCP | -+ R500_ALPHA_SWIZ_B_A)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | -+ R500_ALU_RGBA_OP_MAD | -+ R500_ALU_RGBA_SEL_C_SRC0 | -+ R500_ALU_RGBA_R_SWIZ_R | -+ R500_ALU_RGBA_G_SWIZ_G | -+ R500_ALU_RGBA_B_SWIZ_B | -+ R500_ALU_RGBA_A_SWIZ_A)); -+ -+ /* LRP output, temp5.zzzz, temp3, temp0 -> -+ * - PRESUB temps, temp3 - temp0 -+ * - MAD temp5.zzzz, temps, temp0 */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | -+ R500_INST_LAST | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK | -+ R500_INST_RGB_OMASK_R | -+ R500_INST_RGB_OMASK_G | -+ R500_INST_RGB_OMASK_B | -+ R500_INST_ALPHA_OMASK)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | -+ R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | -+ R500_RGB_ADDR1(3) | -+ R500_RGB_ADDR2(5))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | -+ R500_ALPHA_SRCP_OP_A1_MINUS_A0 | -+ R500_ALPHA_ADDR1(3) | -+ R500_ALPHA_ADDR2(5))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | -+ R500_ALU_RGB_R_SWIZ_A_B | -+ R500_ALU_RGB_G_SWIZ_A_B | -+ R500_ALU_RGB_B_SWIZ_A_B | -+ R500_ALU_RGB_SEL_B_SRCP | -+ R500_ALU_RGB_R_SWIZ_B_R | -+ R500_ALU_RGB_G_SWIZ_B_G | -+ R500_ALU_RGB_B_SWIZ_B_B)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | -+ R500_ALPHA_OP_MAD | -+ R500_ALPHA_SEL_A_SRC2 | -+ R500_ALPHA_SWIZ_A_B | -+ R500_ALPHA_SEL_B_SRCP | -+ R500_ALPHA_SWIZ_B_A)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | -+ R500_ALU_RGBA_OP_MAD | -+ R500_ALU_RGBA_SEL_C_SRC0 | -+ R500_ALU_RGBA_R_SWIZ_R | -+ R500_ALU_RGBA_G_SWIZ_G | -+ R500_ALU_RGBA_B_SWIZ_B | -+ R500_ALU_RGBA_A_SWIZ_A)); -+ -+ /* Shader constants. */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); -+ -+ /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */ -+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w)); -+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h)); -+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); -+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); -+ -+ FINISH_ACCEL(); -+ } else { -+ BEGIN_ACCEL(19); -+ /* 2 components: 2 for tex0 */ -+ OUT_ACCEL_REG(R300_RS_COUNT, -+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | -+ R300_RS_COUNT_HIRES_EN)); -+ -+ /* R300_INST_COUNT_RS - highest RS instruction used */ -+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); -+ -+ /* Pixel stack frame size. */ -+ OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ -+ -+ /* FP length. */ -+ OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | -+ R500_US_CODE_END_ADDR(1))); -+ OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | -+ R500_US_CODE_RANGE_SIZE(1))); -+ -+ /* Prepare for FP emission. */ -+ OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); -+ -+ /* tex inst */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK | -+ R500_INST_RGB_CLAMP | -+ R500_INST_ALPHA_CLAMP)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | -+ R500_TEX_INST_LD | -+ R500_TEX_SEM_ACQUIRE | -+ R500_TEX_IGNORE_UNCOVERED)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | -+ R500_TEX_SRC_S_SWIZ_R | -+ R500_TEX_SRC_T_SWIZ_G | -+ R500_TEX_DST_ADDR(0) | -+ R500_TEX_DST_R_SWIZ_R | -+ R500_TEX_DST_G_SWIZ_G | -+ R500_TEX_DST_B_SWIZ_B | -+ R500_TEX_DST_A_SWIZ_A)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | -+ R500_DX_S_SWIZ_R | -+ R500_DX_T_SWIZ_R | -+ R500_DX_R_SWIZ_R | -+ R500_DX_Q_SWIZ_R | -+ R500_DY_ADDR(0) | -+ R500_DY_S_SWIZ_R | -+ R500_DY_T_SWIZ_R | -+ R500_DY_R_SWIZ_R | -+ R500_DY_Q_SWIZ_R)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ -+ /* ALU inst */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_LAST | -+ R500_INST_RGB_OMASK_R | -+ R500_INST_RGB_OMASK_G | -+ R500_INST_RGB_OMASK_B | -+ R500_INST_ALPHA_OMASK | -+ R500_INST_RGB_CLAMP | -+ R500_INST_ALPHA_CLAMP)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | -+ R500_RGB_ADDR1(0) | -+ R500_RGB_ADDR1_CONST | -+ R500_RGB_ADDR2(0) | -+ R500_RGB_ADDR2_CONST)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | -+ R500_ALPHA_ADDR1(0) | -+ R500_ALPHA_ADDR1_CONST | -+ R500_ALPHA_ADDR2(0) | -+ R500_ALPHA_ADDR2_CONST)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | -+ R500_ALU_RGB_R_SWIZ_A_R | -+ R500_ALU_RGB_G_SWIZ_A_G | -+ R500_ALU_RGB_B_SWIZ_A_B | -+ R500_ALU_RGB_SEL_B_SRC0 | -+ R500_ALU_RGB_R_SWIZ_B_1 | -+ R500_ALU_RGB_B_SWIZ_B_1 | -+ R500_ALU_RGB_G_SWIZ_B_1)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | -+ R500_ALPHA_SWIZ_A_A | -+ R500_ALPHA_SWIZ_B_1)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | -+ R500_ALU_RGBA_R_SWIZ_0 | -+ R500_ALU_RGBA_G_SWIZ_0 | -+ R500_ALU_RGBA_B_SWIZ_0 | -+ R500_ALU_RGBA_A_SWIZ_0)); -+ FINISH_ACCEL(); -+ } -+ } else { -+ /* -+ * y' = y - .0625 -+ * u' = u - .5 -+ * v' = v - .5; -+ * -+ * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' -+ * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' -+ * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' -+ * -+ * DP3 might look like the straightforward solution -+ * but we'd need to move the texture yuv values in -+ * the same reg for this to work. Therefore use MADs. -+ * Brightness just adds to the off constant. -+ * Contrast is multiplication of luminance. -+ * Saturation and hue change the u and v coeffs. -+ * Default values (before adjustments - depend on colorspace): -+ * yco = 1.1643 -+ * uco = 0, -0.39173, 2.017 -+ * vco = 1.5958, -0.8129, 0 -+ * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], -+ * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], -+ * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], -+ * -+ * temp = MAD(yco, yuv.yyyy, off) -+ * temp = MAD(uco, yuv.uuuu, temp) -+ * result = MAD(vco, yuv.vvvv, temp) -+ */ -+ /* TODO: don't recalc consts always */ -+ const float Loff = -0.0627; -+ const float Coff = -0.502; -+ float uvcosf, uvsinf; -+ float yco; -+ float uco[3], vco[3], off[3]; -+ float bright, cont, gamma; -+ int ref = pPriv->transform_index; -+ Bool needgamma = FALSE; -+ -+ cont = RTFContrast(pPriv->contrast); -+ bright = RTFBrightness(pPriv->brightness); -+ gamma = (float)pPriv->gamma / 1000.0; -+ uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); -+ uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); -+ /* overlay video also does pre-gamma contrast/sat adjust, should we? */ -+ -+ yco = trans[ref].RefLuma * cont; -+ uco[0] = -trans[ref].RefRCr * uvsinf; -+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; -+ uco[2] = trans[ref].RefBCb * uvcosf; -+ vco[0] = trans[ref].RefRCr * uvcosf; -+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; -+ vco[2] = trans[ref].RefBCb * uvsinf; -+ off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; -+ off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; -+ off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; -+ -+ //XXX gamma -+ -+ if (gamma != 1.0) { -+ needgamma = TRUE; -+ /* note: gamma correction is out = in ^ gamma; -+ gpu can only do LG2/EX2 therefore we transform into -+ in ^ gamma = 2 ^ (log2(in) * gamma). -+ Lots of scalar ops, unfortunately (better solution?) - -+ without gamma that's 3 inst, with gamma it's 10... -+ could use different gamma factors per channel, -+ if that's of any use. */ -+ } -+ -+ if (isplanar) { -+ BEGIN_ACCEL(56); -+ /* 2 components: 2 for tex0 */ -+ OUT_ACCEL_REG(R300_RS_COUNT, -+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | -+ R300_RS_COUNT_HIRES_EN)); -+ -+ /* R300_INST_COUNT_RS - highest RS instruction used */ -+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); -+ -+ /* Pixel stack frame size. */ -+ OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ -+ -+ /* FP length. */ -+ OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | -+ R500_US_CODE_END_ADDR(5))); -+ OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | -+ R500_US_CODE_RANGE_SIZE(5))); -+ -+ /* Prepare for FP emission. */ -+ OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); -+ -+ /* tex inst */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK | -+ R500_INST_RGB_CLAMP | -+ R500_INST_ALPHA_CLAMP)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | -+ R500_TEX_INST_LD | -+ R500_TEX_IGNORE_UNCOVERED)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | -+ R500_TEX_SRC_S_SWIZ_R | -+ R500_TEX_SRC_T_SWIZ_G | -+ R500_TEX_DST_ADDR(2) | -+ R500_TEX_DST_R_SWIZ_R | -+ R500_TEX_DST_G_SWIZ_G | -+ R500_TEX_DST_B_SWIZ_B | -+ R500_TEX_DST_A_SWIZ_A)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | -+ R500_DX_S_SWIZ_R | -+ R500_DX_T_SWIZ_R | -+ R500_DX_R_SWIZ_R | -+ R500_DX_Q_SWIZ_R | -+ R500_DY_ADDR(0) | -+ R500_DY_S_SWIZ_R | -+ R500_DY_T_SWIZ_R | -+ R500_DY_R_SWIZ_R | -+ R500_DY_Q_SWIZ_R)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ -+ /* tex inst */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK | -+ R500_INST_RGB_CLAMP | -+ R500_INST_ALPHA_CLAMP)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | -+ R500_TEX_INST_LD | -+ R500_TEX_IGNORE_UNCOVERED)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | -+ R500_TEX_SRC_S_SWIZ_R | -+ R500_TEX_SRC_T_SWIZ_G | -+ R500_TEX_DST_ADDR(1) | -+ R500_TEX_DST_R_SWIZ_R | -+ R500_TEX_DST_G_SWIZ_G | -+ R500_TEX_DST_B_SWIZ_B | -+ R500_TEX_DST_A_SWIZ_A)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | -+ R500_DX_S_SWIZ_R | -+ R500_DX_T_SWIZ_R | -+ R500_DX_R_SWIZ_R | -+ R500_DX_Q_SWIZ_R | -+ R500_DY_ADDR(0) | -+ R500_DY_S_SWIZ_R | -+ R500_DY_T_SWIZ_R | -+ R500_DY_R_SWIZ_R | -+ R500_DY_Q_SWIZ_R)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ -+ /* tex inst */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK | -+ R500_INST_RGB_CLAMP | -+ R500_INST_ALPHA_CLAMP)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(2) | -+ R500_TEX_INST_LD | -+ R500_TEX_SEM_ACQUIRE | -+ R500_TEX_IGNORE_UNCOVERED)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | -+ R500_TEX_SRC_S_SWIZ_R | -+ R500_TEX_SRC_T_SWIZ_G | -+ R500_TEX_DST_ADDR(0) | -+ R500_TEX_DST_R_SWIZ_R | -+ R500_TEX_DST_G_SWIZ_G | -+ R500_TEX_DST_B_SWIZ_B | -+ R500_TEX_DST_A_SWIZ_A)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | -+ R500_DX_S_SWIZ_R | -+ R500_DX_T_SWIZ_R | -+ R500_DX_R_SWIZ_R | -+ R500_DX_Q_SWIZ_R | -+ R500_DY_ADDR(0) | -+ R500_DY_S_SWIZ_R | -+ R500_DY_T_SWIZ_R | -+ R500_DY_R_SWIZ_R | -+ R500_DY_Q_SWIZ_R)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ -+ /* ALU inst */ -+ /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | -+ R500_RGB_ADDR0_CONST | -+ R500_RGB_ADDR1(2) | -+ R500_RGB_ADDR2(0) | -+ R500_RGB_ADDR2_CONST)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | -+ R500_ALPHA_ADDR0_CONST | -+ R500_ALPHA_ADDR1(2) | -+ R500_ALPHA_ADDR2(0) | -+ R500_ALPHA_ADDR2_CONST)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | -+ R500_ALU_RGB_R_SWIZ_A_A | -+ R500_ALU_RGB_G_SWIZ_A_A | -+ R500_ALU_RGB_B_SWIZ_A_A | -+ R500_ALU_RGB_SEL_B_SRC1 | -+ R500_ALU_RGB_R_SWIZ_B_R | -+ R500_ALU_RGB_B_SWIZ_B_G | -+ R500_ALU_RGB_G_SWIZ_B_B)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | -+ R500_ALPHA_ADDRD(2) | -+ R500_ALPHA_SWIZ_A_0 | -+ R500_ALPHA_SWIZ_B_0)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | -+ R500_ALU_RGBA_ADDRD(2) | -+ R500_ALU_RGBA_SEL_C_SRC0 | -+ R500_ALU_RGBA_R_SWIZ_R | -+ R500_ALU_RGBA_G_SWIZ_G | -+ R500_ALU_RGBA_B_SWIZ_B | -+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | -+ R500_ALU_RGBA_A_SWIZ_0)); -+ -+ /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | -+ R500_RGB_ADDR0_CONST | -+ R500_RGB_ADDR1(1) | -+ R500_RGB_ADDR2(2))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | -+ R500_ALPHA_ADDR0_CONST | -+ R500_ALPHA_ADDR1(1) | -+ R500_ALPHA_ADDR2(2))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | -+ R500_ALU_RGB_R_SWIZ_A_R | -+ R500_ALU_RGB_G_SWIZ_A_G | -+ R500_ALU_RGB_B_SWIZ_A_B | -+ R500_ALU_RGB_SEL_B_SRC1 | -+ R500_ALU_RGB_R_SWIZ_B_R | -+ R500_ALU_RGB_B_SWIZ_B_G | -+ R500_ALU_RGB_G_SWIZ_B_B)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | -+ R500_ALPHA_ADDRD(2) | -+ R500_ALPHA_SWIZ_A_0 | -+ R500_ALPHA_SWIZ_B_0)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | -+ R500_ALU_RGBA_ADDRD(2) | -+ R500_ALU_RGBA_SEL_C_SRC2 | -+ R500_ALU_RGBA_R_SWIZ_R | -+ R500_ALU_RGBA_G_SWIZ_G | -+ R500_ALU_RGBA_B_SWIZ_B | -+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | -+ R500_ALU_RGBA_A_SWIZ_0)); -+ -+ /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_LAST | -+ R500_INST_RGB_OMASK_R | -+ R500_INST_RGB_OMASK_G | -+ R500_INST_RGB_OMASK_B | -+ R500_INST_ALPHA_OMASK | -+ R500_INST_RGB_CLAMP | -+ R500_INST_ALPHA_CLAMP)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | -+ R500_RGB_ADDR0_CONST | -+ R500_RGB_ADDR1(0) | -+ R500_RGB_ADDR2(2))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(2) | -+ R500_ALPHA_ADDR0_CONST | -+ R500_ALPHA_ADDR1(0) | -+ R500_ALPHA_ADDR2(2))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | -+ R500_ALU_RGB_R_SWIZ_A_R | -+ R500_ALU_RGB_G_SWIZ_A_G | -+ R500_ALU_RGB_B_SWIZ_A_B | -+ R500_ALU_RGB_SEL_B_SRC1 | -+ R500_ALU_RGB_R_SWIZ_B_R | -+ R500_ALU_RGB_B_SWIZ_B_G | -+ R500_ALU_RGB_G_SWIZ_B_B)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | -+ R500_ALPHA_ADDRD(0) | -+ R500_ALPHA_SWIZ_A_0 | -+ R500_ALPHA_SWIZ_B_0)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | -+ R500_ALU_RGBA_ADDRD(0) | -+ R500_ALU_RGBA_SEL_C_SRC2 | -+ R500_ALU_RGBA_R_SWIZ_R | -+ R500_ALU_RGBA_G_SWIZ_G | -+ R500_ALU_RGBA_B_SWIZ_B | -+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | -+ R500_ALU_RGBA_A_SWIZ_1)); -+ -+ } else { -+ BEGIN_ACCEL(44); -+ /* 2 components: 2 for tex0/1/2 */ -+ OUT_ACCEL_REG(R300_RS_COUNT, -+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | -+ R300_RS_COUNT_HIRES_EN)); -+ -+ /* R300_INST_COUNT_RS - highest RS instruction used */ -+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); -+ -+ /* Pixel stack frame size. */ -+ OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ -+ -+ /* FP length. */ -+ OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | -+ R500_US_CODE_END_ADDR(3))); -+ OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | -+ R500_US_CODE_RANGE_SIZE(3))); -+ -+ /* Prepare for FP emission. */ -+ OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); -+ -+ /* tex inst */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK | -+ R500_INST_RGB_CLAMP | -+ R500_INST_ALPHA_CLAMP)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | -+ R500_TEX_INST_LD | -+ R500_TEX_SEM_ACQUIRE | -+ R500_TEX_IGNORE_UNCOVERED)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | -+ R500_TEX_SRC_S_SWIZ_R | -+ R500_TEX_SRC_T_SWIZ_G | -+ R500_TEX_DST_ADDR(0) | -+ R500_TEX_DST_R_SWIZ_R | -+ R500_TEX_DST_G_SWIZ_G | -+ R500_TEX_DST_B_SWIZ_B | -+ R500_TEX_DST_A_SWIZ_A)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | -+ R500_DX_S_SWIZ_R | -+ R500_DX_T_SWIZ_R | -+ R500_DX_R_SWIZ_R | -+ R500_DX_Q_SWIZ_R | -+ R500_DY_ADDR(0) | -+ R500_DY_S_SWIZ_R | -+ R500_DY_T_SWIZ_R | -+ R500_DY_R_SWIZ_R | -+ R500_DY_Q_SWIZ_R)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); -+ -+ /* ALU inst */ -+ /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | -+ R500_RGB_ADDR0_CONST | -+ R500_RGB_ADDR1(0) | -+ R500_RGB_ADDR2(0) | -+ R500_RGB_ADDR2_CONST)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | -+ R500_ALPHA_ADDR0_CONST | -+ R500_ALPHA_ADDR1(0) | -+ R500_ALPHA_ADDR2(0) | -+ R500_ALPHA_ADDR2_CONST)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | -+ R500_ALU_RGB_R_SWIZ_A_A | -+ R500_ALU_RGB_G_SWIZ_A_A | -+ R500_ALU_RGB_B_SWIZ_A_A | -+ R500_ALU_RGB_SEL_B_SRC1 | -+ R500_ALU_RGB_R_SWIZ_B_G | -+ R500_ALU_RGB_B_SWIZ_B_G | -+ R500_ALU_RGB_G_SWIZ_B_G)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | -+ R500_ALPHA_ADDRD(1) | -+ R500_ALPHA_SWIZ_A_0 | -+ R500_ALPHA_SWIZ_B_0)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | -+ R500_ALU_RGBA_ADDRD(1) | -+ R500_ALU_RGBA_SEL_C_SRC0 | -+ R500_ALU_RGBA_R_SWIZ_R | -+ R500_ALU_RGBA_G_SWIZ_G | -+ R500_ALU_RGBA_B_SWIZ_B | -+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | -+ R500_ALU_RGBA_A_SWIZ_0)); -+ -+ /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_RGB_WMASK_R | -+ R500_INST_RGB_WMASK_G | -+ R500_INST_RGB_WMASK_B | -+ R500_INST_ALPHA_WMASK)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | -+ R500_RGB_ADDR0_CONST | -+ R500_RGB_ADDR1(0) | -+ R500_RGB_ADDR2(1))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | -+ R500_ALPHA_ADDR0_CONST | -+ R500_ALPHA_ADDR1(0) | -+ R500_ALPHA_ADDR2(1))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | -+ R500_ALU_RGB_R_SWIZ_A_R | -+ R500_ALU_RGB_G_SWIZ_A_G | -+ R500_ALU_RGB_B_SWIZ_A_B | -+ R500_ALU_RGB_SEL_B_SRC1 | -+ R500_ALU_RGB_R_SWIZ_B_B | -+ R500_ALU_RGB_B_SWIZ_B_B | -+ R500_ALU_RGB_G_SWIZ_B_B)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | -+ R500_ALPHA_ADDRD(1) | -+ R500_ALPHA_SWIZ_A_0 | -+ R500_ALPHA_SWIZ_B_0)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | -+ R500_ALU_RGBA_ADDRD(1) | -+ R500_ALU_RGBA_SEL_C_SRC2 | -+ R500_ALU_RGBA_R_SWIZ_R | -+ R500_ALU_RGBA_G_SWIZ_G | -+ R500_ALU_RGBA_B_SWIZ_B | -+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | -+ R500_ALU_RGBA_A_SWIZ_0)); -+ -+ /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | -+ R500_INST_TEX_SEM_WAIT | -+ R500_INST_LAST | -+ R500_INST_RGB_OMASK_R | -+ R500_INST_RGB_OMASK_G | -+ R500_INST_RGB_OMASK_B | -+ R500_INST_ALPHA_OMASK | -+ R500_INST_RGB_CLAMP | -+ R500_INST_ALPHA_CLAMP)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | -+ R500_RGB_ADDR0_CONST | -+ R500_RGB_ADDR1(0) | -+ R500_RGB_ADDR2(1))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | -+ R500_ALPHA_ADDR0_CONST | -+ R500_ALPHA_ADDR1(0) | -+ R500_ALPHA_ADDR2(1))); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | -+ R500_ALU_RGB_R_SWIZ_A_R | -+ R500_ALU_RGB_G_SWIZ_A_G | -+ R500_ALU_RGB_B_SWIZ_A_B | -+ R500_ALU_RGB_SEL_B_SRC1 | -+ R500_ALU_RGB_R_SWIZ_B_R | -+ R500_ALU_RGB_B_SWIZ_B_R | -+ R500_ALU_RGB_G_SWIZ_B_R)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | -+ R500_ALPHA_ADDRD(1) | -+ R500_ALPHA_SWIZ_A_0 | -+ R500_ALPHA_SWIZ_B_0)); -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | -+ R500_ALU_RGBA_ADDRD(1) | -+ R500_ALU_RGBA_SEL_C_SRC2 | -+ R500_ALU_RGBA_R_SWIZ_R | -+ R500_ALU_RGBA_G_SWIZ_G | -+ R500_ALU_RGBA_B_SWIZ_B | -+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | -+ R500_ALU_RGBA_A_SWIZ_1)); -+ } -+ -+ /* Shader constants. */ -+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); -+ -+ /* constant 0: off, yco */ -+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[0]); -+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[1]); -+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[2]); -+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, yco); -+ /* constant 1: uco */ -+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[0]); -+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[1]); -+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[2]); -+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, gamma); -+ /* constant 2: vco */ -+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[0]); -+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[1]); -+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[2]); -+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0.0); -+ -+ FINISH_ACCEL(); -+ } -+ -+ BEGIN_ACCEL_RELOC(6, 2); -+ OUT_ACCEL_REG(R300_TX_INVALTAGS, 0); -+ OUT_ACCEL_REG(R300_TX_ENABLE, txenable); -+ -+ EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap); -+ EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap); -+ -+ /* no need to enable blending */ -+ OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); -+ -+ OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count); -+ FINISH_ACCEL(); -+ -+ if (pPriv->vsync) { -+ xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn, -+ pPriv->drw_x, -+ pPriv->drw_x + pPriv->dst_w, -+ pPriv->drw_y, -+ pPriv->drw_y + pPriv->dst_h); -+ if (crtc) { -+ RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; -+ -+ FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, -+ radeon_crtc->crtc_id, -+ pPriv->drw_y - crtc->y, -+ (pPriv->drw_y - crtc->y) + pPriv->dst_h); -+ } -+ } -+ /* -+ * Rendering of the actual polygon is done in two different -+ * ways depending on chip generation: -+ * -+ * < R300: -+ * -+ * These chips can render a rectangle in one pass, so -+ * handling is pretty straight-forward. -+ * -+ * >= R300: -+ * -+ * These chips can accept a quad, but will render it as -+ * two triangles which results in a diagonal tear. Instead -+ * We render a single, large triangle and use the scissor -+ * functionality to restrict it to the desired rectangle. -+ * Due to guardband limits on r3xx/r4xx, we can only use -+ * the single triangle up to 2880 pixels; above that we -+ * render as a quad. -+ */ -+ -+ while (nBox--) { -+ int srcX, srcY, srcw, srch; -+ int dstX, dstY, dstw, dsth; -+ dstX = pBox->x1 + dstxoff; -+ dstY = pBox->y1 + dstyoff; -+ dstw = pBox->x2 - pBox->x1; -+ dsth = pBox->y2 - pBox->y1; -+ -+ srcX = pPriv->src_x; -+ srcX += ((pBox->x1 - pPriv->drw_x) * -+ pPriv->src_w) / pPriv->dst_w; -+ srcY = pPriv->src_y; -+ srcY += ((pBox->y1 - pPriv->drw_y) * -+ pPriv->src_h) / pPriv->dst_h; -+ -+ srcw = (pPriv->src_w * dstw) / pPriv->dst_w; -+ srch = (pPriv->src_h * dsth) / pPriv->dst_h; -+ -+ BEGIN_ACCEL(2); -+ OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX) << R300_SCISSOR_X_SHIFT) | -+ ((dstY) << R300_SCISSOR_Y_SHIFT))); -+ OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw - 1) << R300_SCISSOR_X_SHIFT) | -+ ((dstY + dsth - 1) << R300_SCISSOR_Y_SHIFT))); -+ FINISH_ACCEL(); -+ -+#ifdef ACCEL_CP -+ BEGIN_RING(3 * vtx_count + 4); -+ OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, -+ 3 * vtx_count)); -+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST | -+ RADEON_CP_VC_CNTL_PRIM_WALK_RING | -+ (3 << RADEON_CP_VC_CNTL_NUM_SHIFT)); -+#else /* ACCEL_CP */ -+ BEGIN_ACCEL(2 + vtx_count * 3); -+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST | -+ RADEON_VF_PRIM_WALK_DATA | -+ (3 << RADEON_VF_NUM_VERTICES_SHIFT))); -+#endif -+ if (pPriv->bicubic_enabled) { -+ VTX_OUT_6((float)dstX, (float)dstY, -+ (float)srcX / pPriv->w, (float)srcY / pPriv->h, -+ (float)srcX + 0.5, (float)srcY + 0.5); -+ VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth), -+ (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h, -+ (float)srcX + 0.5, (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5); -+ VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY, -+ ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, -+ (float)srcY / pPriv->h, -+ (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5, -+ (float)srcY + 0.5); -+ } else { -+ /* -+ * Render a big, scissored triangle. This means -+ * increasing the triangle size and adjusting -+ * texture coordinates. -+ */ -+ VTX_OUT_4((float)dstX, (float)dstY, -+ (float)srcX / pPriv->w, (float)srcY / pPriv->h); -+ VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw), -+ (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h); -+ VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY, -+ ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w, -+ (float)srcY / pPriv->h); -+ } -+ -+ /* flushing is pipelined, free/finish is not */ -+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); -+ -+#ifdef ACCEL_CP -+ ADVANCE_RING(); -+#else -+ FINISH_ACCEL(); -+#endif /* !ACCEL_CP */ -+ -+ pBox++; -+ } -+ -+ BEGIN_ACCEL(3); -+ OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); -+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL); - OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); - FINISH_ACCEL(); - - DamageDamageRegion(pPriv->pDraw, &pPriv->clip); - } - --#undef VTX_OUT --#undef VTX_OUT_FILTER -+#undef VTX_OUT_4 -+#undef VTX_OUT_6 - #undef FUNC_NAME -diff --git a/src/radeon_version.h b/src/radeon_version.h -index 5717ead..129046d 100644 ---- a/src/radeon_version.h -+++ b/src/radeon_version.h -@@ -38,6 +38,7 @@ - #define RADEON_DRIVER_NAME "radeon" - #define R200_DRIVER_NAME "r200" - #define R300_DRIVER_NAME "r300" -+#define R600_DRIVER_NAME "r600" - - #define RADEON_VERSION_MAJOR PACKAGE_VERSION_MAJOR - #define RADEON_VERSION_MINOR PACKAGE_VERSION_MINOR -diff --git a/src/radeon_video.c b/src/radeon_video.c -index 6314eb1..f1fe72b 100644 ---- a/src/radeon_video.c -+++ b/src/radeon_video.c -@@ -284,7 +284,7 @@ void RADEONInitVideo(ScreenPtr pScreen) - memcpy(newAdaptors, adaptors, num_adaptors * sizeof(XF86VideoAdaptorPtr)); - adaptors = newAdaptors; - -- if (!IS_AVIVO_VARIANT) { -+ if (!IS_AVIVO_VARIANT && !info->kms_enabled) { - overlayAdaptor = RADEONSetupImageVideo(pScreen); - if (overlayAdaptor != NULL) { - adaptors[num_adaptors++] = overlayAdaptor; -@@ -541,18 +541,6 @@ static XF86ImageRec Images[NUM_IMAGES] = - - #endif - --/* Reference color space transform data */ --typedef struct tagREF_TRANSFORM --{ -- float RefLuma; -- float RefRCb; -- float RefRCr; -- float RefGCb; -- float RefGCr; -- float RefBCb; -- float RefBCr; --} REF_TRANSFORM; -- - /* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces */ - static REF_TRANSFORM trans[2] = - { -@@ -560,7 +548,6 @@ static REF_TRANSFORM trans[2] = - {1.1678, 0.0, 1.7980, -0.2139, -0.5345, 2.1186, 0.0} /* BT.709 */ - }; - -- - /* Gamma curve definition for preset gammas */ - typedef struct tagGAMMA_CURVE_R100 - { -@@ -1661,10 +1648,6 @@ RADEONStopVideo(ScrnInfoPtr pScrn, pointer data, Bool cleanup) - - if (pPriv->textured) { - if (cleanup) { -- if (pPriv->bicubic_memory != NULL) { -- radeon_legacy_free_memory(pScrn, pPriv->bicubic_memory); -- pPriv->bicubic_memory = NULL; -- } - if (pPriv->video_memory != NULL) { - radeon_legacy_free_memory(pScrn, pPriv->video_memory); - pPriv->video_memory = NULL; -@@ -1717,12 +1700,6 @@ RADEONSetPortAttribute(ScrnInfoPtr pScrn, - - RADEON_SYNC(info, pScrn); - --#define RTFSaturation(a) (1.0 + ((a)*1.0)/1000.0) --#define RTFBrightness(a) (((a)*1.0)/2000.0) --#define RTFIntensity(a) (((a)*1.0)/2000.0) --#define RTFContrast(a) (1.0 + ((a)*1.0)/1000.0) --#define RTFHue(a) (((a)*3.1416)/1000.0) -- - if(attribute == xvAutopaintColorkey) - { - pPriv->autopaint_colorkey = ClipValue (value, 0, 1); -diff --git a/src/radeon_video.h b/src/radeon_video.h -index 4498002..989942c 100644 ---- a/src/radeon_video.h -+++ b/src/radeon_video.h -@@ -90,14 +90,10 @@ typedef struct { - void *video_memory; - int video_offset; - -- Bool planar_hw; -- Bool planar_state; - int planeu_offset; - int planev_offset; - - /* bicubic filtering */ -- void *bicubic_memory; -- int bicubic_offset; - Bool bicubic_enabled; - uint32_t bicubic_src_offset; - int bicubic_state; -@@ -120,9 +116,30 @@ typedef struct { - int src_w, src_h, dst_w, dst_h; - int w, h; - int drw_x, drw_y; -+ int src_x, src_y; - int vsync; -+ -+ struct radeon_bo *src_bo; - } RADEONPortPrivRec, *RADEONPortPrivPtr; - -+/* Reference color space transform data */ -+typedef struct tagREF_TRANSFORM -+{ -+ float RefLuma; -+ float RefRCb; -+ float RefRCr; -+ float RefGCb; -+ float RefGCr; -+ float RefBCb; -+ float RefBCr; -+} REF_TRANSFORM; -+ -+#define RTFSaturation(a) (1.0 + ((a)*1.0)/1000.0) -+#define RTFBrightness(a) (((a)*1.0)/2000.0) -+#define RTFIntensity(a) (((a)*1.0)/2000.0) -+#define RTFContrast(a) (1.0 + ((a)*1.0)/1000.0) -+#define RTFHue(a) (((a)*3.1416)/1000.0) -+ - xf86CrtcPtr - radeon_xv_pick_best_crtc(ScrnInfoPtr pScrn, - int x1, int x2, int y1, int y2); diff --git a/sources b/sources index fda057c..cf38d03 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -2bf50461378771497501ca7f678d36f3 xf86-video-ati-6.12.2.tar.bz2 +b2b452333f35258394111e57b5db90b6 xf86-video-ati-20090821.tar.xz diff --git a/xorg-x11-drv-ati.spec b/xorg-x11-drv-ati.spec index 88e9547..11ff938 100644 --- a/xorg-x11-drv-ati.spec +++ b/xorg-x11-drv-ati.spec @@ -1,21 +1,22 @@ %define tarball xf86-video-ati %define moduledir %(pkg-config xorg-server --variable=moduledir ) %define driverdir %{moduledir}/drivers +%define gitdate 20090821 Summary: Xorg X11 ati video driver Name: xorg-x11-drv-ati Version: 6.12.2 -Release: 21%{?dist} +Release: 22%{?dist} URL: http://www.x.org License: MIT Group: User Interface/X Hardware Support BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) -Source0: http://www.x.org/pub/individual/driver/%{tarball}-%{version}.tar.bz2 +#Source0: http://www.x.org/pub/individual/driver/%{tarball}-%{version}.tar.bz2 +Source0: %{tarball}-%{gitdate}.tar.xz # unlike the other drivers, radeon.xinf is generated Source1: mkxinf -Patch0: radeon-6.12.2-to-git.patch Patch6: radeon-6.9.0-bgnr-enable.patch Patch10: radeon-6.12.2-lvds-default-modes.patch Patch13: fix-default-modes.patch @@ -41,8 +42,7 @@ Obsoletes: xorg-x11-drv-avivo <= 0.0.2 X.Org X11 ati video driver. %prep -%setup -q -n %{tarball}-%{version} -%patch0 -p1 -b .git +%setup -q -n %{tarball}-%{gitdate} %patch6 -p1 -b .bgnr %patch10 -p1 -b .lvds %patch13 -p1 -b .def @@ -79,6 +79,9 @@ rm -rf $RPM_BUILD_ROOT %{_mandir}/man4/radeon.4* %changelog +* Fri Aug 21 2009 Dave Airlie 6.12.2-22 +- change to a git snapshot + * Tue Aug 04 2009 Dave Airlie 6.12.2-21 - ati: rebase to git master - need to fixup a few patches later