From 0a0aff6a9324e3124e80fdac06ac8f69bfd22aad Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Feb 27 2009 03:57:32 +0000 Subject: - rebase to latest upstream version 6.11.0 - radeon-6.11.0-git.patch: enable R600 acceleration for EXA and Xv. --- diff --git a/.cvsignore b/.cvsignore index cc59dd6..7590a06 100644 --- a/.cvsignore +++ b/.cvsignore @@ -1 +1 @@ -xf86-video-ati-6.10.0.tar.bz2 +xf86-video-ati-6.11.0.tar.bz2 diff --git a/radeon-6.11.0-git.patch b/radeon-6.11.0-git.patch new file mode 100644 index 0000000..d20da6b --- /dev/null +++ b/radeon-6.11.0-git.patch @@ -0,0 +1,12633 @@ +diff --git a/configure.ac b/configure.ac +index 0523cc0..28207d6 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -22,7 +22,7 @@ + + AC_PREREQ(2.57) + AC_INIT([xf86-video-ati], +- 6.11.0, ++ 6.11.0.99, + [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg], + xf86-video-ati) + +@@ -129,7 +129,7 @@ if test "x$EXA" = xyes; then + AC_MSG_RESULT(yes) + + SAVE_CPPFLAGS="$CPPFLAGS" +- CPPFLAGS="$CPPFLAGS $XORG_CFLAGS" ++ CPPFLAGS="$CPPFLAGS $XORG_CFLAGS -DEXA_DRIVER_KNOWN_MAJOR=3" + AC_CHECK_HEADER(exa.h, + [have_exa_h="yes"], [have_exa_h="no"]) + CPPFLAGS="$SAVE_CPPFLAGS" +@@ -138,7 +138,7 @@ else + fi + + SAVE_CPPFLAGS="$CPPFLAGS" +-CPPFLAGS="$CPPFLAGS $XORG_CFLAGS" ++CPPFLAGS="$CPPFLAGS $XORG_CFLAGS -DEXA_DRIVER_KNOWN_MAJOR=3" + if test "x$have_exa_h" = xyes; then + AC_MSG_CHECKING([whether EXA version is at least 2.0.0]) + AC_PREPROC_IFELSE([AC_LANG_PROGRAM([[ +@@ -153,6 +153,7 @@ if test "x$have_exa_h" = xyes; then + + if test "x$USE_EXA" = xyes; then + AC_DEFINE(USE_EXA, 1, [Build support for Exa]) ++ AC_DEFINE(EXA_DRIVER_KNOWN_MAJOR, 3, [Major version of EXA we know how to handle]) + fi + fi + +diff --git a/src/Makefile.am b/src/Makefile.am +index c15cc30..7ff7d31 100644 +--- a/src/Makefile.am ++++ b/src/Makefile.am +@@ -66,7 +66,7 @@ XMODE_SRCS=\ + modes/xf86DiDGA.c + + if USE_EXA +-RADEON_EXA_SOURCES = radeon_exa.c ++RADEON_EXA_SOURCES = radeon_exa.c r600_exa.c r6xx_accel.c r600_textured_videofuncs.c + endif + + AM_CFLAGS = @XORG_CFLAGS@ @DRI_CFLAGS@ @XMODES_CFLAGS@ -DDISABLE_EASF -DENABLE_ALL_SERVICE_FUNCTIONS -DATOM_BIOS -DATOM_BIOS_PARSER -DDRIVER_PARSER +@@ -128,6 +128,12 @@ EXTRA_DIST = \ + radeon_render.c \ + radeon_accelfuncs.c \ + radeon_textured_videofuncs.c \ ++ r600_reg.h \ ++ r600_reg_auto_r6xx.h \ ++ r600_reg_r6xx.h \ ++ r600_reg_r7xx.h \ ++ r600_shader.h \ ++ r600_state.h \ + ati.h \ + ativersion.h \ + bicubic_table.h \ +diff --git a/src/r600_exa.c b/src/r600_exa.c +new file mode 100644 +index 0000000..17c5567 +--- /dev/null ++++ b/src/r600_exa.c +@@ -0,0 +1,4441 @@ ++/* ++ * Copyright 2008 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ * ++ * Author: Alex Deucher ++ * ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include "xf86.h" ++ ++#include "exa.h" ++ ++#include "radeon.h" ++#include "radeon_macros.h" ++#include "r600_shader.h" ++#include "r600_reg.h" ++#include "r600_state.h" ++ ++extern PixmapPtr ++RADEONGetDrawablePixmap(DrawablePtr pDrawable); ++ ++//#define SHOW_VERTEXES ++ ++# define RADEON_ROP3_ZERO 0x00000000 ++# define RADEON_ROP3_DSa 0x00880000 ++# define RADEON_ROP3_SDna 0x00440000 ++# define RADEON_ROP3_S 0x00cc0000 ++# define RADEON_ROP3_DSna 0x00220000 ++# define RADEON_ROP3_D 0x00aa0000 ++# define RADEON_ROP3_DSx 0x00660000 ++# define RADEON_ROP3_DSo 0x00ee0000 ++# define RADEON_ROP3_DSon 0x00110000 ++# define RADEON_ROP3_DSxn 0x00990000 ++# define RADEON_ROP3_Dn 0x00550000 ++# define RADEON_ROP3_SDno 0x00dd0000 ++# define RADEON_ROP3_Sn 0x00330000 ++# define RADEON_ROP3_DSno 0x00bb0000 ++# define RADEON_ROP3_DSan 0x00770000 ++# define RADEON_ROP3_ONE 0x00ff0000 ++ ++uint32_t RADEON_ROP[16] = { ++ RADEON_ROP3_ZERO, /* GXclear */ ++ RADEON_ROP3_DSa, /* Gxand */ ++ RADEON_ROP3_SDna, /* GXandReverse */ ++ RADEON_ROP3_S, /* GXcopy */ ++ RADEON_ROP3_DSna, /* GXandInverted */ ++ RADEON_ROP3_D, /* GXnoop */ ++ RADEON_ROP3_DSx, /* GXxor */ ++ RADEON_ROP3_DSo, /* GXor */ ++ RADEON_ROP3_DSon, /* GXnor */ ++ RADEON_ROP3_DSxn, /* GXequiv */ ++ RADEON_ROP3_Dn, /* GXinvert */ ++ RADEON_ROP3_SDno, /* GXorReverse */ ++ RADEON_ROP3_Sn, /* GXcopyInverted */ ++ RADEON_ROP3_DSno, /* GXorInverted */ ++ RADEON_ROP3_DSan, /* GXnand */ ++ RADEON_ROP3_ONE, /* GXset */ ++}; ++ ++static void ++R600DoneSolid(PixmapPtr pPix); ++ ++static void ++R600DoneComposite(PixmapPtr pDst); ++ ++ ++static Bool ++R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ cb_config_t cb_conf; ++ shader_config_t vs_conf, ps_conf; ++ int pmask = 0; ++ uint32_t a, r, g, b; ++ float ps_alu_consts[4]; ++ ++ accel_state->dst_mc_addr = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; ++ accel_state->dst_size = exaGetPixmapPitch(pPix) * pPix->drawable.height; ++ accel_state->dst_pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); ++ ++ // bad pitch ++ if (accel_state->dst_pitch & 7) ++ return FALSE; ++ ++ // bad offset ++ if (accel_state->dst_mc_addr & 0xff) ++ return FALSE; ++ ++ if (pPix->drawable.bitsPerPixel == 24) ++ return FALSE; ++ ++ CLEAR (cb_conf); ++ CLEAR (vs_conf); ++ CLEAR (ps_conf); ++ ++ //return FALSE; ++ ++#ifdef SHOW_VERTEXES ++ ErrorF("%dx%d @ %dbpp, 0x%08x\n", pPix->drawable.width, pPix->drawable.height, ++ pPix->drawable.bitsPerPixel, exaGetPixmapPitch(pPix)); ++#endif ++ ++ accel_state->ib = RADEONCPGetBuffer(pScrn); ++ ++ /* Init */ ++ start_3d(pScrn, accel_state->ib); ++ ++ //cp_set_surface_sync(pScrn, accel_state->ib); ++ ++ set_default_state(pScrn, accel_state->ib); ++ ++ /* Scissor / viewport */ ++ ereg (accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); ++ ereg (accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); ++ ++ accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + ++ accel_state->solid_vs_offset; ++ accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + ++ accel_state->solid_ps_offset; ++ accel_state->vs_size = 512; ++ accel_state->ps_size = 512; ++ ++ /* Shader */ ++ ++ /* flush SQ cache */ ++ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, ++ accel_state->vs_size, accel_state->vs_mc_addr); ++ ++ vs_conf.shader_addr = accel_state->vs_mc_addr; ++ vs_conf.num_gprs = 2; ++ vs_conf.stack_size = 0; ++ vs_setup (pScrn, accel_state->ib, &vs_conf); ++ ++ /* flush SQ cache */ ++ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, ++ accel_state->ps_size, accel_state->ps_mc_addr); ++ ++ ps_conf.shader_addr = accel_state->ps_mc_addr; ++ ps_conf.num_gprs = 1; ++ ps_conf.stack_size = 0; ++ ps_conf.uncached_first_inst = 1; ++ ps_conf.clamp_consts = 0; ++ ps_conf.export_mode = 2; ++ ps_setup (pScrn, accel_state->ib, &ps_conf); ++ ++ /* Render setup */ ++ if (pm & 0x000000ff) ++ pmask |= 4; //B ++ if (pm & 0x0000ff00) ++ pmask |= 2; //G ++ if (pm & 0x00ff0000) ++ pmask |= 1; //R ++ if (pm & 0xff000000) ++ pmask |= 8; //A ++ ereg (accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift)); ++ ereg (accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); ++ ereg (accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[alu]); ++ ++ ++ cb_conf.id = 0; ++ cb_conf.w = accel_state->dst_pitch; ++ cb_conf.h = pPix->drawable.height; ++ cb_conf.base = accel_state->dst_mc_addr; ++ ++ if (pPix->drawable.bitsPerPixel == 8) { ++ cb_conf.format = COLOR_8; ++ cb_conf.comp_swap = 3; //A ++ } else if (pPix->drawable.bitsPerPixel == 16) { ++ cb_conf.format = COLOR_5_6_5; ++ cb_conf.comp_swap = 2; //RGB ++ } else { ++ cb_conf.format = COLOR_8_8_8_8; ++ cb_conf.comp_swap = 1; //ARGB ++ } ++ cb_conf.source_format = 1; ++ cb_conf.blend_clamp = 1; ++ set_render_target(pScrn, accel_state->ib, &cb_conf); ++ ++ ereg (accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | ++ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | ++ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); ++ ereg (accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ ++ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ ++ ++ /* Interpolator setup */ ++ // one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) ++ ereg (accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift)); ++ ereg (accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); ++ ++ /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x ++ * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ ++ // no VS exports as PS input (NUM_INTERP is not zero based, no minus one) ++ ereg (accel_state->ib, SPI_PS_IN_CONTROL_0, (0 << NUM_INTERP_shift)); ++ ereg (accel_state->ib, SPI_PS_IN_CONTROL_1, 0); ++ // color semantic id 0 -> GPR[0] ++ ereg (accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | ++ (0x03 << DEFAULT_VAL_shift) | ++ FLAT_SHADE_bit | ++ SEL_CENTROID_bit)); ++ ereg (accel_state->ib, SPI_INTERP_CONTROL_0, FLAT_SHADE_ENA_bit | 0); ++ ++ // PS alu constants ++ if (pPix->drawable.bitsPerPixel == 16) { ++ r = (fg >> 11) & 0x1f; ++ g = (fg >> 5) & 0x3f; ++ b = (fg >> 0) & 0x1f; ++ ps_alu_consts[0] = (float)r / 31; //R ++ ps_alu_consts[1] = (float)g / 63; //G ++ ps_alu_consts[2] = (float)b / 31; //B ++ ps_alu_consts[3] = 1.0; //A ++ } else if (pPix->drawable.bitsPerPixel == 8) { ++ a = (fg >> 0) & 0xff; ++ ps_alu_consts[0] = 0.0; //R ++ ps_alu_consts[1] = 0.0; //G ++ ps_alu_consts[2] = 0.0; //B ++ ps_alu_consts[3] = (float)a / 255; //A ++ } else { ++ a = (fg >> 24) & 0xff; ++ r = (fg >> 16) & 0xff; ++ g = (fg >> 8) & 0xff; ++ b = (fg >> 0) & 0xff; ++ ps_alu_consts[0] = (float)r / 255; //R ++ ps_alu_consts[1] = (float)g / 255; //G ++ ps_alu_consts[2] = (float)b / 255; //B ++ ps_alu_consts[3] = (float)a / 255; //A ++ } ++ set_alu_consts(pScrn, accel_state->ib, 0, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); ++ ++ accel_state->vb_index = 0; ++ ++#ifdef SHOW_VERTEXES ++ ErrorF("PM: 0x%08x\n", pm); ++#endif ++ ++ return TRUE; ++} ++ ++ ++static void ++R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ struct r6xx_solid_vertex vertex[3]; ++ struct r6xx_solid_vertex *solid_vb; ++ ++ if (((accel_state->vb_index + 3) * 8) > (accel_state->ib->total / 2)) { ++ R600DoneSolid(pPix); ++ accel_state->vb_index = 0; ++ accel_state->ib = RADEONCPGetBuffer(pScrn); ++ } ++ ++ solid_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); ++ ++ vertex[0].x = (float)x1; ++ vertex[0].y = (float)y1; ++ ++ vertex[1].x = (float)x1; ++ vertex[1].y = (float)y2; ++ ++ vertex[2].x = (float)x2; ++ vertex[2].y = (float)y2; ++ ++#ifdef SHOW_VERTEXES ++ ErrorF("vertex 0: %f, %f\n", vertex[0].x, vertex[0].y); ++ ErrorF("vertex 1: %f, %f\n", vertex[1].x, vertex[1].y); ++ ErrorF("vertex 2: %f\n", vertex[2].x, vertex[2].y); ++#endif ++ ++ // append to vertex buffer ++ solid_vb[accel_state->vb_index++] = vertex[0]; ++ solid_vb[accel_state->vb_index++] = vertex[1]; ++ solid_vb[accel_state->vb_index++] = vertex[2]; ++} ++ ++static void ++R600DoneSolid(PixmapPtr pPix) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ draw_config_t draw_conf; ++ vtx_resource_t vtx_res; ++ ++ CLEAR (draw_conf); ++ CLEAR (vtx_res); ++ ++ if (accel_state->vb_index == 0) { ++ R600IBDiscard(pScrn, accel_state->ib); ++ return; ++ } ++ ++ accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + ++ (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); ++ accel_state->vb_size = accel_state->vb_index * 8; ++ ++ /* flush vertex cache */ ++ if ((info->ChipFamily == CHIP_FAMILY_RV610) || ++ (info->ChipFamily == CHIP_FAMILY_RV620) || ++ (info->ChipFamily == CHIP_FAMILY_RS780) || ++ (info->ChipFamily == CHIP_FAMILY_RV710)) ++ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, ++ accel_state->vb_size, accel_state->vb_mc_addr); ++ else ++ cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, ++ accel_state->vb_size, accel_state->vb_mc_addr); ++ ++ /* Vertex buffer setup */ ++ vtx_res.id = SQ_VTX_RESOURCE_vs; ++ vtx_res.vtx_size_dw = 8 / 4; ++ vtx_res.vtx_num_entries = accel_state->vb_size / 4; ++ vtx_res.mem_req_size = 1; ++ vtx_res.vb_addr = accel_state->vb_mc_addr; ++ set_vtx_resource (pScrn, accel_state->ib, &vtx_res); ++ ++ /* Draw */ ++ draw_conf.prim_type = DI_PT_RECTLIST; ++ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; ++ draw_conf.num_instances = 1; ++ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; ++ draw_conf.index_type = DI_INDEX_SIZE_16_BIT; ++ ++ draw_auto(pScrn, accel_state->ib, &draw_conf); ++ ++ wait_3d_idle_clean(pScrn, accel_state->ib); ++ ++ /* sync dst surface */ ++ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), ++ accel_state->dst_size, accel_state->dst_mc_addr); ++ ++ R600CPFlushIndirect(pScrn, accel_state->ib); ++} ++ ++static void ++R600DoPrepareCopy(ScrnInfoPtr pScrn, ++ int src_pitch, int src_width, int src_height, uint32_t src_offset, int src_bpp, ++ int dst_pitch, int dst_height, uint32_t dst_offset, int dst_bpp, ++ int rop, Pixel planemask) ++{ ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ int pmask = 0; ++ cb_config_t cb_conf; ++ tex_resource_t tex_res; ++ tex_sampler_t tex_samp; ++ shader_config_t vs_conf, ps_conf; ++ ++ CLEAR (cb_conf); ++ CLEAR (tex_res); ++ CLEAR (tex_samp); ++ CLEAR (vs_conf); ++ CLEAR (ps_conf); ++ ++ accel_state->ib = RADEONCPGetBuffer(pScrn); ++ ++ /* Init */ ++ start_3d(pScrn, accel_state->ib); ++ ++ //cp_set_surface_sync(pScrn, accel_state->ib); ++ ++ set_default_state(pScrn, accel_state->ib); ++ ++ /* Scissor / viewport */ ++ ereg (accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); ++ ereg (accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); ++ ++ accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + ++ accel_state->copy_vs_offset; ++ accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + ++ accel_state->copy_ps_offset; ++ accel_state->vs_size = 512; ++ accel_state->ps_size = 512; ++ ++ /* Shader */ ++ ++ /* flush SQ cache */ ++ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, ++ accel_state->vs_size, accel_state->vs_mc_addr); ++ ++ vs_conf.shader_addr = accel_state->vs_mc_addr; ++ vs_conf.num_gprs = 2; ++ vs_conf.stack_size = 0; ++ vs_setup (pScrn, accel_state->ib, &vs_conf); ++ ++ /* flush SQ cache */ ++ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, ++ accel_state->ps_size, accel_state->ps_mc_addr); ++ ++ ps_conf.shader_addr = accel_state->ps_mc_addr; ++ ps_conf.num_gprs = 1; ++ ps_conf.stack_size = 0; ++ ps_conf.uncached_first_inst = 1; ++ ps_conf.clamp_consts = 0; ++ ps_conf.export_mode = 2; ++ ps_setup (pScrn, accel_state->ib, &ps_conf); ++ ++ accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8); ++ accel_state->src_mc_addr[0] = src_offset; ++ accel_state->src_pitch[0] = src_pitch; ++ accel_state->src_width[0] = src_width; ++ accel_state->src_height[0] = src_height; ++ accel_state->src_bpp[0] = src_bpp; ++ ++ /* flush texture cache */ ++ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, ++ accel_state->src_size[0], accel_state->src_mc_addr[0]); ++ ++ /* Texture */ ++ tex_res.id = 0; ++ tex_res.w = src_width; ++ tex_res.h = src_height; ++ tex_res.pitch = accel_state->src_pitch[0]; ++ tex_res.depth = 0; ++ tex_res.dim = SQ_TEX_DIM_2D; ++ tex_res.base = accel_state->src_mc_addr[0]; ++ tex_res.mip_base = accel_state->src_mc_addr[0]; ++ if (src_bpp == 8) { ++ tex_res.format = FMT_8; ++ tex_res.dst_sel_x = SQ_SEL_1; //R ++ tex_res.dst_sel_y = SQ_SEL_1; //G ++ tex_res.dst_sel_z = SQ_SEL_1; //B ++ tex_res.dst_sel_w = SQ_SEL_X; //A ++ } else if (src_bpp == 16) { ++ tex_res.format = FMT_5_6_5; ++ tex_res.dst_sel_x = SQ_SEL_Z; //R ++ tex_res.dst_sel_y = SQ_SEL_Y; //G ++ tex_res.dst_sel_z = SQ_SEL_X; //B ++ tex_res.dst_sel_w = SQ_SEL_1; //A ++ } else { ++ tex_res.format = FMT_8_8_8_8; ++ tex_res.dst_sel_x = SQ_SEL_Z; //R ++ tex_res.dst_sel_y = SQ_SEL_Y; //G ++ tex_res.dst_sel_z = SQ_SEL_X; //B ++ tex_res.dst_sel_w = SQ_SEL_W; //A ++ } ++ ++ tex_res.request_size = 1; ++ tex_res.base_level = 0; ++ tex_res.last_level = 0; ++ tex_res.perf_modulation = 0; ++ set_tex_resource (pScrn, accel_state->ib, &tex_res); ++ ++ tex_samp.id = 0; ++ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; ++ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; ++ tex_samp.clamp_z = SQ_TEX_WRAP; ++ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; ++ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; ++ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; ++ tex_samp.mip_filter = 0; /* no mipmap */ ++ set_tex_sampler (pScrn, accel_state->ib, &tex_samp); ++ ++ ++ /* Render setup */ ++ if (planemask & 0x000000ff) ++ pmask |= 4; //B ++ if (planemask & 0x0000ff00) ++ pmask |= 2; //G ++ if (planemask & 0x00ff0000) ++ pmask |= 1; //R ++ if (planemask & 0xff000000) ++ pmask |= 8; //A ++ ereg (accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift)); ++ ereg (accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); ++ ereg (accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[rop]); ++ ++ accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8); ++ accel_state->dst_mc_addr = dst_offset; ++ accel_state->dst_pitch = dst_pitch; ++ accel_state->dst_height = dst_height; ++ accel_state->dst_bpp = dst_bpp; ++ ++ cb_conf.id = 0; ++ cb_conf.w = accel_state->dst_pitch; ++ cb_conf.h = dst_height; ++ cb_conf.base = accel_state->dst_mc_addr; ++ if (dst_bpp == 8) { ++ cb_conf.format = COLOR_8; ++ cb_conf.comp_swap = 3; // A ++ } else if (dst_bpp == 16) { ++ cb_conf.format = COLOR_5_6_5; ++ cb_conf.comp_swap = 2; // RGB ++ } else { ++ cb_conf.format = COLOR_8_8_8_8; ++ cb_conf.comp_swap = 1; // ARGB ++ } ++ cb_conf.source_format = 1; ++ cb_conf.blend_clamp = 1; ++ set_render_target(pScrn, accel_state->ib, &cb_conf); ++ ++ ereg (accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | ++ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | ++ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); ++ ereg (accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ ++ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ ++ ++ /* Interpolator setup */ ++ // export tex coord from VS ++ ereg (accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); ++ ereg (accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); ++ ++ /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x ++ * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ ++ // input tex coord from VS ++ ereg (accel_state->ib, SPI_PS_IN_CONTROL_0, ((1 << NUM_INTERP_shift))); ++ ereg (accel_state->ib, SPI_PS_IN_CONTROL_1, 0); ++ // color semantic id 0 -> GPR[0] ++ ereg (accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | ++ (0x01 << DEFAULT_VAL_shift) | ++ SEL_CENTROID_bit)); ++ ereg (accel_state->ib, SPI_INTERP_CONTROL_0, 0); ++ ++ accel_state->vb_index = 0; ++ ++} ++ ++static void ++R600DoCopy(ScrnInfoPtr pScrn) ++{ ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ draw_config_t draw_conf; ++ vtx_resource_t vtx_res; ++ ++ CLEAR (draw_conf); ++ CLEAR (vtx_res); ++ ++ if (accel_state->vb_index == 0) { ++ R600IBDiscard(pScrn, accel_state->ib); ++ return; ++ } ++ ++ accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + ++ (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); ++ accel_state->vb_size = accel_state->vb_index * 16; ++ ++ /* flush vertex cache */ ++ if ((info->ChipFamily == CHIP_FAMILY_RV610) || ++ (info->ChipFamily == CHIP_FAMILY_RV620) || ++ (info->ChipFamily == CHIP_FAMILY_RS780) || ++ (info->ChipFamily == CHIP_FAMILY_RV710)) ++ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, ++ accel_state->vb_size, accel_state->vb_mc_addr); ++ else ++ cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, ++ accel_state->vb_size, accel_state->vb_mc_addr); ++ ++ /* Vertex buffer setup */ ++ vtx_res.id = SQ_VTX_RESOURCE_vs; ++ vtx_res.vtx_size_dw = 16 / 4; ++ vtx_res.vtx_num_entries = accel_state->vb_size / 4; ++ vtx_res.mem_req_size = 1; ++ vtx_res.vb_addr = accel_state->vb_mc_addr; ++ set_vtx_resource (pScrn, accel_state->ib, &vtx_res); ++ ++ draw_conf.prim_type = DI_PT_RECTLIST; ++ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; ++ draw_conf.num_instances = 1; ++ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; ++ draw_conf.index_type = DI_INDEX_SIZE_16_BIT; ++ ++ draw_auto(pScrn, accel_state->ib, &draw_conf); ++ ++ wait_3d_idle_clean(pScrn, accel_state->ib); ++ ++ /* sync dst surface */ ++ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), ++ accel_state->dst_size, accel_state->dst_mc_addr); ++ ++ R600CPFlushIndirect(pScrn, accel_state->ib); ++} ++ ++static void ++R600AppendCopyVertex(ScrnInfoPtr pScrn, ++ int srcX, int srcY, ++ int dstX, int dstY, ++ int w, int h) ++{ ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ struct r6xx_copy_vertex *copy_vb; ++ struct r6xx_copy_vertex vertex[3]; ++ ++ if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) { ++ R600DoCopy(pScrn); ++ accel_state->vb_index = 0; ++ accel_state->ib = RADEONCPGetBuffer(pScrn); ++ } ++ ++ copy_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); ++ ++ vertex[0].x = (float)dstX; ++ vertex[0].y = (float)dstY; ++ vertex[0].s = (float)srcX; ++ vertex[0].t = (float)srcY; ++ ++ vertex[1].x = (float)dstX; ++ vertex[1].y = (float)(dstY + h); ++ vertex[1].s = (float)srcX; ++ vertex[1].t = (float)(srcY + h); ++ ++ vertex[2].x = (float)(dstX + w); ++ vertex[2].y = (float)(dstY + h); ++ vertex[2].s = (float)(srcX + w); ++ vertex[2].t = (float)(srcY + h); ++ ++#ifdef SHOW_VERTEXES ++ ErrorF("vertex 0: %f, %f, %f, %d\n", vertex[0].x, vertex[0].y, vertex[0].s, vertex[0].t); ++ ErrorF("vertex 1: %f, %f, %f, %d\n", vertex[1].x, vertex[1].y, vertex[1].s, vertex[1].t); ++ ErrorF("vertex 2: %f, %f, %f, %d\n", vertex[2].x, vertex[2].y, vertex[2].s, vertex[2].t); ++#endif ++ ++ // append to vertex buffer ++ copy_vb[accel_state->vb_index++] = vertex[0]; ++ copy_vb[accel_state->vb_index++] = vertex[1]; ++ copy_vb[accel_state->vb_index++] = vertex[2]; ++ ++} ++ ++static Bool ++R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, ++ int xdir, int ydir, ++ int rop, ++ Pixel planemask) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ ++ accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); ++ accel_state->src_pitch[0] = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); ++ ++ accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; ++ accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; ++ ++ accel_state->src_width[0] = pSrc->drawable.width; ++ accel_state->src_height[0] = pSrc->drawable.height; ++ accel_state->src_bpp[0] = pSrc->drawable.bitsPerPixel; ++ accel_state->dst_height = pDst->drawable.height; ++ accel_state->dst_bpp = pDst->drawable.bitsPerPixel; ++ ++ // bad pitch ++ if (accel_state->src_pitch[0] & 7) ++ return FALSE; ++ if (accel_state->dst_pitch & 7) ++ return FALSE; ++ ++ // bad offset ++ if (accel_state->src_mc_addr[0] & 0xff) ++ return FALSE; ++ if (accel_state->dst_mc_addr & 0xff) ++ return FALSE; ++ ++ if (pSrc->drawable.bitsPerPixel == 24) ++ return FALSE; ++ if (pDst->drawable.bitsPerPixel == 24) ++ return FALSE; ++ ++ //return FALSE; ++ ++#ifdef SHOW_VERTEXES ++ ErrorF("src: %dx%d @ %dbpp, 0x%08x\n", pSrc->drawable.width, pSrc->drawable.height, ++ pSrc->drawable.bitsPerPixel, exaGetPixmapPitch(pSrc)); ++ ErrorF("dst: %dx%d @ %dbpp, 0x%08x\n", pDst->drawable.width, pDst->drawable.height, ++ pDst->drawable.bitsPerPixel, exaGetPixmapPitch(pDst)); ++#endif ++ ++ accel_state->rop = rop; ++ accel_state->planemask = planemask; ++ ++ if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) { ++ unsigned long size = pDst->drawable.height * accel_state->dst_pitch * pDst->drawable.bitsPerPixel/8; ++ accel_state->same_surface = TRUE; ++ ++ if (accel_state->copy_area) { ++ exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); ++ accel_state->copy_area = NULL; ++ } ++ accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL); ++ } else { ++ accel_state->same_surface = FALSE; ++ ++ R600DoPrepareCopy(pScrn, ++ accel_state->src_pitch[0], pSrc->drawable.width, pSrc->drawable.height, ++ accel_state->src_mc_addr[0], pSrc->drawable.bitsPerPixel, ++ accel_state->dst_pitch, pDst->drawable.height, ++ accel_state->dst_mc_addr, pDst->drawable.bitsPerPixel, ++ rop, planemask); ++ ++ } ++ ++ return TRUE; ++} ++ ++static Bool ++is_overlap(int sx1, int sx2, int sy1, int sy2, int dx1, int dx2, int dy1, int dy2) ++{ ++ if (((sx1 >= dx1) && (sx1 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || // TL x1, y1 ++ ((sx2 >= dx1) && (sx2 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || // TR x2, y1 ++ ((sx1 >= dx1) && (sx1 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2)) || // BL x1, y2 ++ ((sx2 >= dx1) && (sx2 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2))) // BR x2, y2 ++ return TRUE; ++ else ++ return FALSE; ++} ++ ++static void ++R600OverlapCopy(PixmapPtr pDst, ++ int srcX, int srcY, ++ int dstX, int dstY, ++ int w, int h) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); ++ uint32_t dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; ++ int i, hchunk, vchunk; ++ ++ if (is_overlap(srcX, srcX + w, srcY, srcY + h, ++ dstX, dstX + w, dstY, dstY + h)) { ++ /* Calculate height/width of non-overlapping area */ ++ hchunk = (srcX < dstX) ? (dstX - srcX) : (srcX - dstX); ++ vchunk = (srcY < dstY) ? (dstY - srcY) : (srcY - dstY); ++ ++ /* Diagonally offset overlap is reduced to either horizontal or vertical offset-only ++ * by copying a part of the non-overlapping portion, then adjusting coordinates ++ * Choose horizontal vs vertical to minimize the total number of copy operations ++ */ ++ if (vchunk != 0 && hchunk != 0) { //diagonal ++ if ((w / hchunk) <= (h / vchunk)) { // reduce to horizontal ++ if (srcY > dstY ) { // diagonal up ++ R600DoPrepareCopy(pScrn, ++ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ accel_state->rop, accel_state->planemask); ++ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, vchunk); ++ R600DoCopy(pScrn); ++ ++ srcY = srcY + vchunk; ++ dstY = dstY + vchunk; ++ } else { // diagonal down ++ R600DoPrepareCopy(pScrn, ++ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ accel_state->rop, accel_state->planemask); ++ R600AppendCopyVertex(pScrn, srcX, srcY + h - vchunk, dstX, dstY + h - vchunk, w, vchunk); ++ R600DoCopy(pScrn); ++ } ++ h = h - vchunk; ++ vchunk = 0; ++ } else { //reduce to vertical ++ if (srcX > dstX ) { // diagonal left ++ R600DoPrepareCopy(pScrn, ++ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ accel_state->rop, accel_state->planemask); ++ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, hchunk, h); ++ R600DoCopy(pScrn); ++ ++ srcX = srcX + hchunk; ++ dstX = dstX + hchunk; ++ } else { // diagonal right ++ R600DoPrepareCopy(pScrn, ++ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ accel_state->rop, accel_state->planemask); ++ R600AppendCopyVertex(pScrn, srcX + w - hchunk, srcY, dstX + w - hchunk, dstY, hchunk, h); ++ R600DoCopy(pScrn); ++ } ++ w = w - hchunk; ++ hchunk = 0; ++ } ++ } ++ ++ if (vchunk == 0) { // left/right ++ if (srcX < dstX) { // right ++ // copy right to left ++ for (i = w; i > 0; i -= hchunk) { ++ R600DoPrepareCopy(pScrn, ++ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ accel_state->rop, accel_state->planemask); ++ R600AppendCopyVertex(pScrn, srcX + i - hchunk, srcY, dstX + i - hchunk, dstY, hchunk, h); ++ R600DoCopy(pScrn); ++ } ++ } else { //left ++ // copy left to right ++ for (i = 0; i < w; i += hchunk) { ++ R600DoPrepareCopy(pScrn, ++ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ accel_state->rop, accel_state->planemask); ++ ++ R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY, hchunk, h); ++ R600DoCopy(pScrn); ++ } ++ } ++ } else { //up/down ++ if (srcY > dstY) { // up ++ // copy top to bottom ++ for (i = 0; i < h; i += vchunk) { ++ R600DoPrepareCopy(pScrn, ++ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ accel_state->rop, accel_state->planemask); ++ ++ if (vchunk > h - i) vchunk = h - i; ++ R600AppendCopyVertex(pScrn, srcX, srcY + i, dstX, dstY + i, w, vchunk); ++ R600DoCopy(pScrn); ++ } ++ } else { // down ++ // copy bottom to top ++ for (i = h; i > 0; i -= vchunk) { ++ R600DoPrepareCopy(pScrn, ++ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ accel_state->rop, accel_state->planemask); ++ ++ if (vchunk > i) vchunk = i; ++ R600AppendCopyVertex(pScrn, srcX, srcY + i - vchunk, dstX, dstY + i - vchunk, w, vchunk); ++ R600DoCopy(pScrn); ++ } ++ } ++ } ++ } else { ++ R600DoPrepareCopy(pScrn, ++ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, ++ accel_state->rop, accel_state->planemask); ++ ++ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); ++ R600DoCopy(pScrn); ++ } ++} ++ ++static void ++R600Copy(PixmapPtr pDst, ++ int srcX, int srcY, ++ int dstX, int dstY, ++ int w, int h) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ ++ if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY)) ++ return; ++ ++ if (accel_state->same_surface && is_overlap(srcX, srcX + w, srcY, srcY + h, dstX, dstX + w, dstY, dstY + h)) { ++ if (accel_state->copy_area) { ++ uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); ++ uint32_t orig_offset, tmp_offset; ++ ++ tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset; ++ orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; ++ ++ R600DoPrepareCopy(pScrn, ++ pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, ++ pitch, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, ++ accel_state->rop, accel_state->planemask); ++ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); ++ R600DoCopy(pScrn); ++ R600DoPrepareCopy(pScrn, ++ pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, ++ pitch, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, ++ accel_state->rop, accel_state->planemask); ++ R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h); ++ R600DoCopy(pScrn); ++ } else ++ R600OverlapCopy(pDst, srcX, srcY, dstX, dstY, w, h); ++ } else if (accel_state->same_surface) { ++ uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); ++ uint32_t offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; ++ ++ R600DoPrepareCopy(pScrn, ++ pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, ++ pitch, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, ++ accel_state->rop, accel_state->planemask); ++ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); ++ R600DoCopy(pScrn); ++ } else { ++ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); ++ } ++ ++} ++ ++static void ++R600DoneCopy(PixmapPtr pDst) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ ++ if (!accel_state->same_surface) ++ R600DoCopy(pScrn); ++ ++ if (accel_state->copy_area) { ++ exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); ++ accel_state->copy_area = NULL; ++ } ++ ++} ++ ++#define RADEON_TRACE_FALL 0 ++#define RADEON_TRACE_DRAW 0 ++ ++#if RADEON_TRACE_FALL ++#define RADEON_FALLBACK(x) \ ++do { \ ++ ErrorF("%s: ", __FUNCTION__); \ ++ ErrorF x; \ ++ return FALSE; \ ++} while (0) ++#else ++#define RADEON_FALLBACK(x) return FALSE ++#endif ++ ++#define xFixedToFloat(f) (((float) (f)) / 65536) ++ ++static inline void transformPoint(PictTransform *transform, xPointFixed *point) ++{ ++ PictVector v; ++ v.vector[0] = point->x; ++ v.vector[1] = point->y; ++ v.vector[2] = xFixed1; ++ PictureTransformPoint(transform, &v); ++ point->x = v.vector[0]; ++ point->y = v.vector[1]; ++} ++ ++struct blendinfo { ++ Bool dst_alpha; ++ Bool src_alpha; ++ uint32_t blend_cntl; ++}; ++ ++static struct blendinfo R600BlendOp[] = { ++ /* Clear */ ++ {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, ++ /* Src */ ++ {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, ++ /* Dst */ ++ {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, ++ /* Over */ ++ {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, ++ /* OverReverse */ ++ {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, ++ /* In */ ++ {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, ++ /* InReverse */ ++ {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, ++ /* Out */ ++ {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, ++ /* OutReverse */ ++ {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, ++ /* Atop */ ++ {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, ++ /* AtopReverse */ ++ {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, ++ /* Xor */ ++ {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, ++ /* Add */ ++ {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, ++}; ++ ++struct formatinfo { ++ unsigned int fmt; ++ uint32_t card_fmt; ++}; ++ ++static struct formatinfo R600TexFormats[] = { ++ {PICT_a8r8g8b8, FMT_8_8_8_8}, ++ {PICT_x8r8g8b8, FMT_8_8_8_8}, ++ {PICT_a8b8g8r8, FMT_8_8_8_8}, ++ {PICT_x8b8g8r8, FMT_8_8_8_8}, ++ {PICT_r5g6b5, FMT_5_6_5}, ++ {PICT_a1r5g5b5, FMT_1_5_5_5}, ++ {PICT_x1r5g5b5, FMT_1_5_5_5}, ++ {PICT_a8, FMT_8}, ++}; ++ ++static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format) ++{ ++ uint32_t sblend, dblend; ++ ++ sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask; ++ dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask; ++ ++ /* If there's no dst alpha channel, adjust the blend op so that we'll treat ++ * it as always 1. ++ */ ++ if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) { ++ if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift)) ++ sblend = (BLEND_ONE << COLOR_SRCBLEND_shift); ++ else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift)) ++ sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift); ++ } ++ ++ /* If the source alpha is being used, then we should only be in a case where ++ * the source blend factor is 0, and the source blend value is the mask ++ * channels multiplied by the source picture's alpha. ++ */ ++ if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) { ++ if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) { ++ dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift); ++ } else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) { ++ dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift); ++ } ++ } ++ ++ return sblend | dblend; ++} ++ ++static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format) ++{ ++ switch (pDstPicture->format) { ++ case PICT_a8r8g8b8: ++ case PICT_x8r8g8b8: ++ *dst_format = COLOR_8_8_8_8; ++ break; ++ case PICT_r5g6b5: ++ *dst_format = COLOR_5_6_5; ++ break; ++ case PICT_a1r5g5b5: ++ case PICT_x1r5g5b5: ++ *dst_format = COLOR_1_5_5_5; ++ break; ++ case PICT_a8: ++ *dst_format = COLOR_8; ++ break; ++ default: ++ RADEON_FALLBACK(("Unsupported dest format 0x%x\n", ++ (int)pDstPicture->format)); ++ } ++ return TRUE; ++} ++ ++static Bool R600CheckCompositeTexture(PicturePtr pPict, ++ PicturePtr pDstPict, ++ int op, ++ int unit) ++{ ++ int w = pPict->pDrawable->width; ++ int h = pPict->pDrawable->height; ++ unsigned int i; ++ int max_tex_w, max_tex_h; ++ ++ max_tex_w = 8192; ++ max_tex_h = 8192; ++ ++ if ((w > max_tex_w) || (h > max_tex_h)) ++ RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h)); ++ ++ for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) { ++ if (R600TexFormats[i].fmt == pPict->format) ++ break; ++ } ++ if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0])) ++ RADEON_FALLBACK(("Unsupported picture format 0x%x\n", ++ (int)pPict->format)); ++ ++ if (pPict->filter != PictFilterNearest && ++ pPict->filter != PictFilterBilinear) ++ RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter)); ++ ++ /* for REPEAT_NONE, Render semantics are that sampling outside the source ++ * picture results in alpha=0 pixels. We can implement this with a border color ++ * *if* our source texture has an alpha channel, otherwise we need to fall ++ * back. If we're not transformed then we hope that upper layers have clipped ++ * rendering to the bounds of the source drawable, in which case it doesn't ++ * matter. I have not, however, verified that the X server always does such ++ * clipping. ++ */ ++ //FIXME R6xx ++ if (pPict->transform != 0 && !pPict->repeat && PICT_FORMAT_A(pPict->format) == 0) { ++ if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0))) ++ RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n")); ++ } ++ ++ return TRUE; ++} ++ ++static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, ++ int unit) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ int w = pPict->pDrawable->width; ++ int h = pPict->pDrawable->height; ++ unsigned int i; ++ tex_resource_t tex_res; ++ tex_sampler_t tex_samp; ++ ++ CLEAR (tex_res); ++ CLEAR (tex_samp); ++ ++ for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) { ++ if (R600TexFormats[i].fmt == pPict->format) ++ break; ++ } ++ ++ accel_state->texW[unit] = w; ++ accel_state->texH[unit] = h; ++ ++ //ErrorF("Tex %d setup %dx%d\n", unit, w, h); ++ ++ accel_state->src_pitch[unit] = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); ++ accel_state->src_size[unit] = exaGetPixmapPitch(pPix) * h; ++ accel_state->src_mc_addr[unit] = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; ++ /* flush texture cache */ ++ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, ++ accel_state->src_size[unit], accel_state->src_mc_addr[unit]); ++ ++ /* Texture */ ++ tex_res.id = unit; ++ tex_res.w = w; ++ tex_res.h = h; ++ tex_res.pitch = accel_state->src_pitch[unit]; ++ tex_res.depth = 0; ++ tex_res.dim = SQ_TEX_DIM_2D; ++ tex_res.base = accel_state->src_mc_addr[unit]; ++ tex_res.mip_base = accel_state->src_mc_addr[unit]; ++ tex_res.format = R600TexFormats[i].card_fmt; ++ tex_res.request_size = 1; ++ ++ /* component swizzles */ ++ switch (pPict->format) { ++ case PICT_a1r5g5b5: ++ case PICT_a8r8g8b8: ++ tex_res.dst_sel_x = SQ_SEL_Z; //R ++ tex_res.dst_sel_y = SQ_SEL_Y; //G ++ tex_res.dst_sel_z = SQ_SEL_X; //B ++ tex_res.dst_sel_w = SQ_SEL_W; //A ++ break; ++ case PICT_a8b8g8r8: ++ tex_res.dst_sel_x = SQ_SEL_X; //R ++ tex_res.dst_sel_y = SQ_SEL_Y; //G ++ tex_res.dst_sel_z = SQ_SEL_Z; //B ++ tex_res.dst_sel_w = SQ_SEL_W; //A ++ break; ++ case PICT_x8b8g8r8: ++ tex_res.dst_sel_x = SQ_SEL_X; //R ++ tex_res.dst_sel_y = SQ_SEL_Y; //G ++ tex_res.dst_sel_z = SQ_SEL_Z; //B ++ tex_res.dst_sel_w = SQ_SEL_1; //A ++ break; ++ case PICT_x1r5g5b5: ++ case PICT_x8r8g8b8: ++ tex_res.dst_sel_x = SQ_SEL_Z; //R ++ tex_res.dst_sel_y = SQ_SEL_Y; //G ++ tex_res.dst_sel_z = SQ_SEL_X; //B ++ tex_res.dst_sel_w = SQ_SEL_1; //A ++ break; ++ case PICT_r5g6b5: ++ tex_res.dst_sel_x = SQ_SEL_Z; //R ++ tex_res.dst_sel_y = SQ_SEL_Y; //G ++ tex_res.dst_sel_z = SQ_SEL_X; //B ++ tex_res.dst_sel_w = SQ_SEL_1; //A ++ break; ++ case PICT_a8: ++ tex_res.dst_sel_x = SQ_SEL_0; //R ++ tex_res.dst_sel_y = SQ_SEL_0; //G ++ tex_res.dst_sel_z = SQ_SEL_0; //B ++ tex_res.dst_sel_w = SQ_SEL_X; //A ++ break; ++ default: ++ RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format)); ++ } ++ ++ tex_res.base_level = 0; ++ tex_res.last_level = 0; ++ tex_res.perf_modulation = 0; ++ set_tex_resource (pScrn, accel_state->ib, &tex_res); ++ ++ tex_samp.id = unit; ++ tex_samp.border_color = SQ_TEX_BORDER_COLOR_TRANS_BLACK; ++ ++ if (pPict->repeat) { ++ switch (pPict->repeatType) { ++ case RepeatNormal: ++ tex_samp.clamp_x = SQ_TEX_WRAP; ++ tex_samp.clamp_y = SQ_TEX_WRAP; ++ break; ++ case RepeatPad: ++ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; ++ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; ++ break; ++ case RepeatReflect: ++ tex_samp.clamp_x = SQ_TEX_MIRROR; ++ tex_samp.clamp_y = SQ_TEX_MIRROR; ++ break; ++ case RepeatNone: ++ tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER; ++ tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER; ++ break; ++ default: ++ RADEON_FALLBACK(("Bad repeat 0x%x\n", pPict->repeatType)); ++ } ++ } else { ++ tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER; ++ tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER; ++ } ++ ++ switch (pPict->filter) { ++ case PictFilterNearest: ++ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; ++ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; ++ break; ++ case PictFilterBilinear: ++ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; ++ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; ++ break; ++ default: ++ RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); ++ } ++ ++ tex_samp.clamp_z = SQ_TEX_WRAP; ++ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; ++ tex_samp.mip_filter = 0; /* no mipmap */ ++ set_tex_sampler (pScrn, accel_state->ib, &tex_samp); ++ ++ if (pPict->transform != 0) { ++ accel_state->is_transform[unit] = TRUE; ++ accel_state->transform[unit] = pPict->transform; ++ } else ++ accel_state->is_transform[unit] = FALSE; ++ ++ return TRUE; ++} ++ ++static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, ++ PicturePtr pDstPicture) ++{ ++ uint32_t tmp1; ++// ScreenPtr pScreen = pDstPicture->pDrawable->pScreen; ++ PixmapPtr pSrcPixmap, pDstPixmap; ++// ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; ++// RADEONInfoPtr info = RADEONPTR(pScrn); ++ int max_tex_w, max_tex_h, max_dst_w, max_dst_h; ++ ++ /* Check for unsupported compositing operations. */ ++ if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0]))) ++ RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op)); ++ ++ pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable); ++ ++ max_tex_w = 8192; ++ max_tex_h = 8192; ++ max_dst_w = 8192; ++ max_dst_h = 8192; ++ ++ if (pSrcPixmap->drawable.width >= max_tex_w || ++ pSrcPixmap->drawable.height >= max_tex_h) { ++ RADEON_FALLBACK(("Source w/h too large (%d,%d).\n", ++ pSrcPixmap->drawable.width, ++ pSrcPixmap->drawable.height)); ++ } ++ ++ pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable); ++ ++ if (pDstPixmap->drawable.width >= max_dst_w || ++ pDstPixmap->drawable.height >= max_dst_h) { ++ RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n", ++ pDstPixmap->drawable.width, ++ pDstPixmap->drawable.height)); ++ } ++ ++ if (pMaskPicture) { ++ PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable); ++ ++ if (pMaskPixmap->drawable.width >= max_tex_w || ++ pMaskPixmap->drawable.height >= max_tex_h) { ++ RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n", ++ pMaskPixmap->drawable.width, ++ pMaskPixmap->drawable.height)); ++ } ++ ++ if (pMaskPicture->componentAlpha) { ++ /* Check if it's component alpha that relies on a source alpha and ++ * on the source value. We can only get one of those into the ++ * single source value that we get to blend with. ++ */ ++ if (R600BlendOp[op].src_alpha && ++ (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) != ++ (BLEND_ZERO << COLOR_SRCBLEND_shift)) { ++ RADEON_FALLBACK(("Component alpha not supported with source " ++ "alpha and source value blending.\n")); ++ } ++ } ++ ++ if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1)) ++ return FALSE; ++ } ++ ++ if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0)) ++ return FALSE; ++ ++ if (!R600GetDestFormat(pDstPicture, &tmp1)) ++ return FALSE; ++ ++ return TRUE; ++ ++} ++ ++static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, ++ PicturePtr pMaskPicture, PicturePtr pDstPicture, ++ PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ uint32_t blendcntl, dst_format; ++ cb_config_t cb_conf; ++ shader_config_t vs_conf, ps_conf; ++ int i = 0; ++ uint32_t ps[24]; ++ ++ //return FALSE; ++ ++ if (pMask) ++ accel_state->has_mask = TRUE; ++ else ++ accel_state->has_mask = FALSE; ++ ++ accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; ++ accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); ++ accel_state->dst_size = exaGetPixmapPitch(pDst) * pDst->drawable.height; ++ ++ accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; ++ accel_state->src_pitch[0] = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); ++ accel_state->src_size[0] = exaGetPixmapPitch(pSrc) * pSrc->drawable.height; ++ ++ if (accel_state->dst_pitch & 7) ++ RADEON_FALLBACK(("Bad dst pitch 0x%x\n", (int)accel_state->dst_pitch)); ++ ++ if (accel_state->dst_mc_addr & 0xff) ++ RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)accel_state->dst_mc_addr)); ++ ++ if (accel_state->src_pitch[0] & 7) ++ RADEON_FALLBACK(("Bad src pitch 0x%x\n", (int)accel_state->src_pitch[0])); ++ ++ if (accel_state->src_mc_addr[0] & 0xff) ++ RADEON_FALLBACK(("Bad src offset 0x%x\n", (int)accel_state->src_mc_addr[0])); ++ ++ if (!R600GetDestFormat(pDstPicture, &dst_format)) ++ return FALSE; ++ ++ if (pMask) { ++ int src_a, src_r, src_g, src_b; ++ int mask_a, mask_r, mask_g, mask_b; ++ ++ accel_state->src_mc_addr[1] = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset; ++ accel_state->src_pitch[1] = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8); ++ accel_state->src_size[1] = exaGetPixmapPitch(pMask) * pMask->drawable.height; ++ ++ if (accel_state->src_pitch[1] & 7) ++ RADEON_FALLBACK(("Bad mask pitch 0x%x\n", (int)accel_state->src_pitch[1])); ++ ++ if (accel_state->src_mc_addr[1] & 0xff) ++ RADEON_FALLBACK(("Bad mask offset 0x%x\n", (int)accel_state->src_mc_addr[1])); ++ ++ /* setup pixel shader */ ++ if (PICT_FORMAT_RGB(pSrcPicture->format) == 0) { ++ //src_color = R300_ALU_RGB_0_0; ++ src_r = SQ_SEL_0; ++ src_g = SQ_SEL_0; ++ src_b = SQ_SEL_0; ++ } else { ++ //src_color = R300_ALU_RGB_SRC0_RGB; ++ src_r = SQ_SEL_X; ++ src_g = SQ_SEL_Y; ++ src_b = SQ_SEL_Z; ++ } ++ ++ if (PICT_FORMAT_A(pSrcPicture->format) == 0) { ++ //src_alpha = R300_ALU_ALPHA_1_0; ++ src_a = SQ_SEL_1; ++ } else { ++ //src_alpha = R300_ALU_ALPHA_SRC0_A; ++ src_a = SQ_SEL_W; ++ } ++ ++ if (pMaskPicture->componentAlpha) { ++ if (R600BlendOp[op].src_alpha) { ++ if (PICT_FORMAT_A(pSrcPicture->format) == 0) { ++ //src_color = R300_ALU_RGB_1_0; ++ //src_alpha = R300_ALU_ALPHA_1_0; ++ src_r = SQ_SEL_1; ++ src_g = SQ_SEL_1; ++ src_b = SQ_SEL_1; ++ src_a = SQ_SEL_1; ++ } else { ++ //src_color = R300_ALU_RGB_SRC0_AAA; ++ //src_alpha = R300_ALU_ALPHA_SRC0_A; ++ src_r = SQ_SEL_W; ++ src_g = SQ_SEL_W; ++ src_b = SQ_SEL_W; ++ src_a = SQ_SEL_W; ++ } ++ ++ //mask_color = R300_ALU_RGB_SRC1_RGB; ++ mask_r = SQ_SEL_X; ++ mask_g = SQ_SEL_Y; ++ mask_b = SQ_SEL_Z; ++ ++ if (PICT_FORMAT_A(pMaskPicture->format) == 0) { ++ //mask_alpha = R300_ALU_ALPHA_1_0; ++ mask_a = SQ_SEL_1; ++ } else { ++ //mask_alpha = R300_ALU_ALPHA_SRC1_A; ++ mask_a = SQ_SEL_W; ++ } ++ } else { ++ //src_color = R300_ALU_RGB_SRC0_RGB; ++ src_r = SQ_SEL_X; ++ src_g = SQ_SEL_Y; ++ src_b = SQ_SEL_Z; ++ ++ if (PICT_FORMAT_A(pSrcPicture->format) == 0) { ++ //src_alpha = R300_ALU_ALPHA_1_0; ++ src_a = SQ_SEL_1; ++ } else { ++ //src_alpha = R300_ALU_ALPHA_SRC0_A; ++ src_a = SQ_SEL_W; ++ } ++ ++ //mask_color = R300_ALU_RGB_SRC1_RGB; ++ mask_r = SQ_SEL_X; ++ mask_g = SQ_SEL_Y; ++ mask_b = SQ_SEL_Z; ++ ++ if (PICT_FORMAT_A(pMaskPicture->format) == 0) { ++ //mask_alpha = R300_ALU_ALPHA_1_0; ++ mask_a = SQ_SEL_1; ++ } else { ++ //mask_alpha = R300_ALU_ALPHA_SRC1_A; ++ mask_a = SQ_SEL_W; ++ } ++ } ++ } else { ++ if (PICT_FORMAT_A(pMaskPicture->format) == 0) { ++ //mask_color = R300_ALU_RGB_1_0; ++ mask_r = SQ_SEL_1; ++ mask_g = SQ_SEL_1; ++ mask_b = SQ_SEL_1; ++ } else { ++ //mask_color = R300_ALU_RGB_SRC1_AAA; ++ mask_r = SQ_SEL_W; ++ mask_g = SQ_SEL_W; ++ mask_b = SQ_SEL_W; ++ } ++ if (PICT_FORMAT_A(pMaskPicture->format) == 0) { ++ //mask_alpha = R300_ALU_ALPHA_1_0; ++ mask_a = SQ_SEL_1; ++ } else { ++ //mask_alpha = R300_ALU_ALPHA_SRC1_A; ++ mask_a = SQ_SEL_W; ++ } ++ } ++ ++ //0 ++ ps[i++] = CF_DWORD0(ADDR(8)); ++ ps[i++] = CF_DWORD1(POP_COUNT(0), ++ CF_CONST(0), ++ COND(SQ_CF_COND_ACTIVE), ++ I_COUNT(2), ++ CALL_COUNT(0), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_TEX), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ ++ // 1 ++ ps[i++] = CF_ALU_DWORD0(ADDR(3), ++ KCACHE_BANK0(0), ++ KCACHE_BANK1(0), ++ KCACHE_MODE0(SQ_CF_KCACHE_NOP)); ++ ps[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), ++ KCACHE_ADDR0(0), ++ KCACHE_ADDR1(0), ++ I_COUNT(4), ++ USES_WATERFALL(0), ++ CF_INST(SQ_CF_INST_ALU), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ ++ //2 ++ ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), ++ TYPE(SQ_EXPORT_PIXEL), ++ RW_GPR(2), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(1)); ++ ++ ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(1), ++ END_OF_PROGRAM(1), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ ++ // 3 - alu 0 ++ // MUL gpr[2].x gpr[1].x gpr[0].x ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_X), ++ SRC0_NEG(0), ++ SRC1_SEL(0), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_X), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(1), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_MUL), ++ BANK_SWIZZLE(SQ_ALU_VEC_012), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_X), ++ CLAMP(1)); ++ // 4 - alu 1 ++ // MUL gpr[2].y gpr[1].y gpr[0].y ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Y), ++ SRC0_NEG(0), ++ SRC1_SEL(0), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Y), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(1), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_MUL), ++ BANK_SWIZZLE(SQ_ALU_VEC_012), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Y), ++ CLAMP(1)); ++ // 5 - alu 2 ++ // MUL gpr[2].z gpr[1].z gpr[0].z ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Z), ++ SRC0_NEG(0), ++ SRC1_SEL(0), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Z), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(1), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_MUL), ++ BANK_SWIZZLE(SQ_ALU_VEC_012), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Z), ++ CLAMP(1)); ++ // 6 - alu 3 ++ // MUL gpr[2].w gpr[1].w gpr[0].w ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_W), ++ SRC0_NEG(0), ++ SRC1_SEL(0), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_W), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(1)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(1), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_MUL), ++ BANK_SWIZZLE(SQ_ALU_VEC_012), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_W), ++ CLAMP(1)); ++ // 7 ++ ps[i++] = 0x00000000; ++ ps[i++] = 0x00000000; ++ ++ //8/9 - src ++ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), ++ BC_FRAC_MODE(0), ++ FETCH_WHOLE_QUAD(0), ++ RESOURCE_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ R7xx_ALT_CONST(0)); ++ ps[i++] = TEX_DWORD1(DST_GPR(0), ++ DST_REL(ABSOLUTE), ++ DST_SEL_X(src_r), ++ DST_SEL_Y(src_g), ++ DST_SEL_Z(src_b), ++ DST_SEL_W(src_a), ++ LOD_BIAS(0), ++ COORD_TYPE_X(TEX_NORMALIZED), ++ COORD_TYPE_Y(TEX_NORMALIZED), ++ COORD_TYPE_Z(TEX_NORMALIZED), ++ COORD_TYPE_W(TEX_NORMALIZED)); ++ ps[i++] = TEX_DWORD2(OFFSET_X(0), ++ OFFSET_Y(0), ++ OFFSET_Z(0), ++ SAMPLER_ID(0), ++ SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_0), ++ SRC_SEL_W(SQ_SEL_1)); ++ ps[i++] = TEX_DWORD_PAD; ++ //10/11 - mask ++ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), ++ BC_FRAC_MODE(0), ++ FETCH_WHOLE_QUAD(0), ++ RESOURCE_ID(1), ++ SRC_GPR(1), ++ SRC_REL(ABSOLUTE), ++ R7xx_ALT_CONST(0)); ++ ps[i++] = TEX_DWORD1(DST_GPR(1), ++ DST_REL(ABSOLUTE), ++ DST_SEL_X(mask_r), ++ DST_SEL_Y(mask_g), ++ DST_SEL_Z(mask_b), ++ DST_SEL_W(mask_a), ++ LOD_BIAS(0), ++ COORD_TYPE_X(TEX_NORMALIZED), ++ COORD_TYPE_Y(TEX_NORMALIZED), ++ COORD_TYPE_Z(TEX_NORMALIZED), ++ COORD_TYPE_W(TEX_NORMALIZED)); ++ ps[i++] = TEX_DWORD2(OFFSET_X(0), ++ OFFSET_Y(0), ++ OFFSET_Z(0), ++ SAMPLER_ID(1), ++ SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_0), ++ SRC_SEL_W(SQ_SEL_1)); ++ ps[i++] = TEX_DWORD_PAD; ++ } else { ++ int src_a, src_r, src_g, src_b; ++ /* setup pixel shader */ ++ if (PICT_FORMAT_RGB(pSrcPicture->format) == 0) { ++ //src_color = R300_ALU_RGB_0_0; ++ src_r = SQ_SEL_0; ++ src_g = SQ_SEL_0; ++ src_b = SQ_SEL_0; ++ } else { ++ //src_color = R300_ALU_RGB_SRC0_RGB; ++ src_r = SQ_SEL_X; ++ src_g = SQ_SEL_Y; ++ src_b = SQ_SEL_Z; ++ } ++ ++ if (PICT_FORMAT_A(pSrcPicture->format) == 0) { ++ //src_alpha = R300_ALU_ALPHA_1_0; ++ src_a = SQ_SEL_1; ++ } else { ++ //src_alpha = R300_ALU_ALPHA_SRC0_A; ++ src_a = SQ_SEL_W; ++ } ++ ++ //0 ++ ps[i++] = CF_DWORD0(ADDR(2)); ++ ps[i++] = CF_DWORD1(POP_COUNT(0), ++ CF_CONST(0), ++ COND(SQ_CF_COND_ACTIVE), ++ I_COUNT(1), ++ CALL_COUNT(0), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_TEX), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ //1 ++ ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), ++ TYPE(SQ_EXPORT_PIXEL), ++ RW_GPR(0), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(1)); ++ ++ ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(1), ++ END_OF_PROGRAM(1), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ ++ ++ //2/3 - src ++ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), ++ BC_FRAC_MODE(0), ++ FETCH_WHOLE_QUAD(0), ++ RESOURCE_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ R7xx_ALT_CONST(0)); ++ ps[i++] = TEX_DWORD1(DST_GPR(0), ++ DST_REL(ABSOLUTE), ++ DST_SEL_X(src_r), ++ DST_SEL_Y(src_g), ++ DST_SEL_Z(src_b), ++ DST_SEL_W(src_a), ++ LOD_BIAS(0), ++ COORD_TYPE_X(TEX_NORMALIZED), ++ COORD_TYPE_Y(TEX_NORMALIZED), ++ COORD_TYPE_Z(TEX_NORMALIZED), ++ COORD_TYPE_W(TEX_NORMALIZED)); ++ ps[i++] = TEX_DWORD2(OFFSET_X(0), ++ OFFSET_Y(0), ++ OFFSET_Z(0), ++ SAMPLER_ID(0), ++ SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_0), ++ SRC_SEL_W(SQ_SEL_1)); ++ ps[i++] = TEX_DWORD_PAD; ++ } ++ ++ CLEAR (cb_conf); ++ CLEAR (vs_conf); ++ CLEAR (ps_conf); ++ ++ accel_state->ib = RADEONCPGetBuffer(pScrn); ++ ++ /* Init */ ++ start_3d(pScrn, accel_state->ib); ++ ++ //cp_set_surface_sync(pScrn, accel_state->ib); ++ ++ set_default_state(pScrn, accel_state->ib); ++ ++ /* Scissor / viewport */ ++ ereg (accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); ++ ereg (accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); ++ ++ // fix me if false discard buffer! ++ if (!R600TextureSetup(pSrcPicture, pSrc, 0)) ++ return FALSE; ++ ++ if (pMask != NULL) { ++ // fix me if false discard buffer! ++ if (!R600TextureSetup(pMaskPicture, pMask, 1)) ++ return FALSE; ++ } else { ++ accel_state->is_transform[1] = FALSE; ++ } ++ ++ if (pMask != NULL) ++ accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + ++ accel_state->comp_mask_vs_offset; ++ else ++ accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + ++ accel_state->comp_vs_offset; ++ ++ memcpy ((char *)accel_state->ib->address + (accel_state->ib->total / 2) - 256, ps, sizeof(ps)); ++ accel_state->ps_mc_addr = info->gartLocation + info->dri->bufStart + ++ (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2) - 256; ++ ++ accel_state->vs_size = 512; ++ accel_state->ps_size = 512; ++ ++ /* Shader */ ++ ++ /* flush SQ cache */ ++ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, ++ accel_state->vs_size, accel_state->vs_mc_addr); ++ ++ vs_conf.shader_addr = accel_state->vs_mc_addr; ++ vs_conf.num_gprs = 3; ++ vs_conf.stack_size = 0; ++ vs_setup (pScrn, accel_state->ib, &vs_conf); ++ ++ /* flush SQ cache */ ++ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, ++ accel_state->ps_size, accel_state->ps_mc_addr); ++ ++ ps_conf.shader_addr = accel_state->ps_mc_addr; ++ ps_conf.num_gprs = 3; ++ ps_conf.stack_size = 0; ++ ps_conf.uncached_first_inst = 1; ++ ps_conf.clamp_consts = 0; ++ ps_conf.export_mode = 2; ++ ps_setup (pScrn, accel_state->ib, &ps_conf); ++ ++ ereg (accel_state->ib, CB_SHADER_MASK, (0xf << OUTPUT0_ENABLE_shift)); ++ ereg (accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); ++ ++ blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format); ++ ++ if (info->ChipFamily == CHIP_FAMILY_R600) { ++ // no per-MRT blend on R600 ++ ereg (accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift)); ++ ereg (accel_state->ib, CB_BLEND_CONTROL, blendcntl); ++ } else { ++ ereg (accel_state->ib, CB_COLOR_CONTROL, (RADEON_ROP[3] | ++ (1 << TARGET_BLEND_ENABLE_shift) | ++ PER_MRT_BLEND_bit)); ++ ereg (accel_state->ib, CB_BLEND0_CONTROL, blendcntl); ++ } ++ ++ cb_conf.id = 0; ++ cb_conf.w = accel_state->dst_pitch; ++ cb_conf.h = pDst->drawable.height; ++ cb_conf.base = accel_state->dst_mc_addr; ++ cb_conf.format = dst_format; ++ ++ switch (pDstPicture->format) { ++ case PICT_a8r8g8b8: ++ //ErrorF("dst: PICT_a8r8g8b8\n"); ++ cb_conf.comp_swap = 1; //ARGB ++ break; ++ case PICT_x8r8g8b8: ++ //ErrorF("dst: PICT_x8r8g8b8\n"); ++ cb_conf.comp_swap = 1; //ARGB ++ break; ++ case PICT_r5g6b5: ++ //ErrorF("dst: PICT_r5g6b5\n"); ++ cb_conf.comp_swap = 2; //RGB ++ break; ++ case PICT_a1r5g5b5: ++ //ErrorF("dst: PICT_a1r5g5b5\n"); ++ cb_conf.comp_swap = 1; //ARGB ++ break; ++ case PICT_x1r5g5b5: ++ //ErrorF("dst: PICT_x1r5g5b5\n"); ++ cb_conf.comp_swap = 1; //ARGB ++ break; ++ case PICT_a8: ++ //ErrorF("dst: PICT_a8\n"); ++ cb_conf.comp_swap = 3; //A ++ break; ++ default: ++ cb_conf.comp_swap = 1; ++ break; ++ } ++ cb_conf.source_format = 1; ++ cb_conf.blend_clamp = 1; ++ set_render_target(pScrn, accel_state->ib, &cb_conf); ++ ++ ereg (accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | ++ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | ++ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); ++ ereg (accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ ++ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ ++ ++ /* Interpolator setup */ ++ if (pMask) { ++ // export 2 tex coords from VS ++ ereg (accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift)); ++ // src = semantic id 0; mask = semantic id 1 ++ ereg (accel_state->ib, SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) | ++ (1 << SEMANTIC_1_shift))); ++ // input 2 tex coords from VS ++ ereg (accel_state->ib, SPI_PS_IN_CONTROL_0, (2 << NUM_INTERP_shift)); ++ } else { ++ // export 1 tex coords from VS ++ ereg (accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); ++ // src = semantic id 0 ++ ereg (accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); ++ // input 1 tex coords from VS ++ ereg (accel_state->ib, SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift)); ++ } ++ ereg (accel_state->ib, SPI_PS_IN_CONTROL_1, 0); ++ // SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 ++ ereg (accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | ++ (0x01 << DEFAULT_VAL_shift) | ++ SEL_CENTROID_bit)); ++ // SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 ++ ereg (accel_state->ib, SPI_PS_INPUT_CNTL_0 + (1 <<2), ((1 << SEMANTIC_shift) | ++ (0x01 << DEFAULT_VAL_shift) | ++ SEL_CENTROID_bit)); ++ ereg (accel_state->ib, SPI_INTERP_CONTROL_0, 0); ++ ++ accel_state->vb_index = 0; ++ ++ return TRUE; ++} ++ ++static void R600Composite(PixmapPtr pDst, ++ int srcX, int srcY, ++ int maskX, int maskY, ++ int dstX, int dstY, ++ int w, int h) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight; ++ ++ /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n", ++ srcX, srcY, maskX, maskY,dstX, dstY, w, h); */ ++ ++ srcTopLeft.x = IntToxFixed(srcX); ++ srcTopLeft.y = IntToxFixed(srcY); ++ srcTopRight.x = IntToxFixed(srcX + w); ++ srcTopRight.y = IntToxFixed(srcY); ++ srcBottomLeft.x = IntToxFixed(srcX); ++ srcBottomLeft.y = IntToxFixed(srcY + h); ++ srcBottomRight.x = IntToxFixed(srcX + w); ++ srcBottomRight.y = IntToxFixed(srcY + h); ++ ++ //XXX do transform in vertex shader ++ if (accel_state->is_transform[0]) { ++ transformPoint(accel_state->transform[0], &srcTopLeft); ++ transformPoint(accel_state->transform[0], &srcTopRight); ++ transformPoint(accel_state->transform[0], &srcBottomLeft); ++ transformPoint(accel_state->transform[0], &srcBottomRight); ++ } ++ ++ if (accel_state->has_mask) { ++ struct r6xx_comp_mask_vertex *comp_vb; ++ struct r6xx_comp_mask_vertex vertex[3]; ++ xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight; ++ ++ if (((accel_state->vb_index + 3) * 24) > (accel_state->ib->total / 2)) { ++ R600DoneComposite(pDst); ++ accel_state->vb_index = 0; ++ accel_state->ib = RADEONCPGetBuffer(pScrn); ++ } ++ ++ comp_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); ++ ++ maskTopLeft.x = IntToxFixed(maskX); ++ maskTopLeft.y = IntToxFixed(maskY); ++ maskTopRight.x = IntToxFixed(maskX + w); ++ maskTopRight.y = IntToxFixed(maskY); ++ maskBottomLeft.x = IntToxFixed(maskX); ++ maskBottomLeft.y = IntToxFixed(maskY + h); ++ maskBottomRight.x = IntToxFixed(maskX + w); ++ maskBottomRight.y = IntToxFixed(maskY + h); ++ ++ if (accel_state->is_transform[1]) { ++ transformPoint(accel_state->transform[1], &maskTopLeft); ++ transformPoint(accel_state->transform[1], &maskTopRight); ++ transformPoint(accel_state->transform[1], &maskBottomLeft); ++ transformPoint(accel_state->transform[1], &maskBottomRight); ++ } ++ ++ vertex[0].x = (float)dstX; ++ vertex[0].y = (float)dstY; ++ vertex[0].src_s = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0]; ++ vertex[0].src_t = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0]; ++ vertex[0].mask_s = xFixedToFloat(maskTopLeft.x) / accel_state->texW[1]; ++ vertex[0].mask_t = xFixedToFloat(maskTopLeft.y) / accel_state->texH[1]; ++ ++ vertex[1].x = (float)dstX; ++ vertex[1].y = (float)(dstY + h); ++ vertex[1].src_s = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0]; ++ vertex[1].src_t = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0]; ++ vertex[1].mask_s = xFixedToFloat(maskBottomLeft.x) / accel_state->texW[1]; ++ vertex[1].mask_t = xFixedToFloat(maskBottomLeft.y) / accel_state->texH[1]; ++ ++ vertex[2].x = (float)(dstX + w); ++ vertex[2].y = (float)(dstY + h); ++ vertex[2].src_s = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0]; ++ vertex[2].src_t = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0]; ++ vertex[2].mask_s = xFixedToFloat(maskBottomRight.x) / accel_state->texW[1]; ++ vertex[2].mask_t = xFixedToFloat(maskBottomRight.y) / accel_state->texH[1]; ++ ++#ifdef SHOW_VERTEXES ++ ErrorF("vertex 0: %d, %d, %f, %f, %f, %f\n", vertex[0].x, vertex[0].y, ++ vertex[0].src_s, vertex[0].src_t, vertex[0].mask_s, vertex[0].mask_t); ++ ErrorF("vertex 1: %d, %d, %f, %f, %f, %f\n", vertex[1].x, vertex[1].y, ++ vertex[1].src_s, vertex[1].src_t, vertex[1].mask_s, vertex[1].mask_t); ++ ErrorF("vertex 2: %d, %d, %f, %f, %f, %f\n", vertex[2].x, vertex[2].y, ++ vertex[2].src_s, vertex[2].src_t, vertex[2].mask_s, vertex[2].mask_t); ++#endif ++ ++ // append to vertex buffer ++ comp_vb[accel_state->vb_index++] = vertex[0]; ++ comp_vb[accel_state->vb_index++] = vertex[1]; ++ comp_vb[accel_state->vb_index++] = vertex[2]; ++ ++ } else { ++ struct r6xx_comp_vertex *comp_vb; ++ struct r6xx_comp_vertex vertex[3]; ++ ++ if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) { ++ R600DoneComposite(pDst); ++ accel_state->vb_index = 0; ++ accel_state->ib = RADEONCPGetBuffer(pScrn); ++ } ++ ++ comp_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); ++ ++ vertex[0].x = (float)dstX; ++ vertex[0].y = (float)dstY; ++ vertex[0].src_s = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0]; ++ vertex[0].src_t = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0]; ++ ++ vertex[1].x = (float)dstX; ++ vertex[1].y = (float)(dstY + h); ++ vertex[1].src_s = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0]; ++ vertex[1].src_t = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0]; ++ ++ vertex[2].x = (float)(dstX + w); ++ vertex[2].y = (float)(dstY + h); ++ vertex[2].src_s = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0]; ++ vertex[2].src_t = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0]; ++ ++ // append to vertex buffer ++ comp_vb[accel_state->vb_index++] = vertex[0]; ++ comp_vb[accel_state->vb_index++] = vertex[1]; ++ comp_vb[accel_state->vb_index++] = vertex[2]; ++ ++#ifdef SHOW_VERTEXES ++ ErrorF("vertex 0: %d, %d, %f, %f\n", vertex[0].x, vertex[0].y, vertex[0].src_s, vertex[0].src_t); ++ ErrorF("vertex 1: %d, %d, %f, %f\n", vertex[1].x, vertex[1].y, vertex[1].src_s, vertex[1].src_t); ++ ErrorF("vertex 2: %d, %d, %f, %f\n", vertex[2].x, vertex[2].y, vertex[2].src_s, vertex[2].src_t); ++#endif ++ } ++ ++ ++} ++ ++static void R600DoneComposite(PixmapPtr pDst) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ draw_config_t draw_conf; ++ vtx_resource_t vtx_res; ++ ++ CLEAR (draw_conf); ++ CLEAR (vtx_res); ++ ++ if (accel_state->vb_index == 0) { ++ R600IBDiscard(pScrn, accel_state->ib); ++ return; ++ } ++ ++ accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + ++ (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); ++ ++ ++ /* Vertex buffer setup */ ++ if (accel_state->has_mask) { ++ accel_state->vb_size = accel_state->vb_index * 24; ++ vtx_res.id = SQ_VTX_RESOURCE_vs; ++ vtx_res.vtx_size_dw = 24 / 4; ++ vtx_res.vtx_num_entries = accel_state->vb_size / 4; ++ vtx_res.mem_req_size = 1; ++ vtx_res.vb_addr = accel_state->vb_mc_addr; ++ } else { ++ accel_state->vb_size = accel_state->vb_index * 16; ++ vtx_res.id = SQ_VTX_RESOURCE_vs; ++ vtx_res.vtx_size_dw = 16 / 4; ++ vtx_res.vtx_num_entries = accel_state->vb_size / 4; ++ vtx_res.mem_req_size = 1; ++ vtx_res.vb_addr = accel_state->vb_mc_addr; ++ } ++ /* flush vertex cache */ ++ if ((info->ChipFamily == CHIP_FAMILY_RV610) || ++ (info->ChipFamily == CHIP_FAMILY_RV620) || ++ (info->ChipFamily == CHIP_FAMILY_RS780) || ++ (info->ChipFamily == CHIP_FAMILY_RV710)) ++ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, ++ accel_state->vb_size, accel_state->vb_mc_addr); ++ else ++ cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, ++ accel_state->vb_size, accel_state->vb_mc_addr); ++ ++ set_vtx_resource (pScrn, accel_state->ib, &vtx_res); ++ ++ draw_conf.prim_type = DI_PT_RECTLIST; ++ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; ++ draw_conf.num_instances = 1; ++ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; ++ draw_conf.index_type = DI_INDEX_SIZE_16_BIT; ++ ++ draw_auto(pScrn, accel_state->ib, &draw_conf); ++ ++ wait_3d_idle_clean(pScrn, accel_state->ib); ++ ++ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), ++ accel_state->dst_size, accel_state->dst_mc_addr); ++ ++ R600CPFlushIndirect(pScrn, accel_state->ib); ++} ++ ++Bool ++R600CopyToVRAM(ScrnInfoPtr pScrn, ++ char *src, int src_pitch, ++ uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_height, int bpp, ++ int x, int y, int w, int h) ++{ ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ uint32_t scratch_mc_addr; ++ int wpass = w * (bpp/8); ++ int scratch_pitch_bytes = (wpass + 255) & ~255; ++ uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8); ++ int scratch_offset = 0, hpass, temph; ++ char *dst; ++ drmBufPtr scratch; ++ ++ if (dst_pitch & 7) ++ return FALSE; ++ ++ if (dst_mc_addr & 0xff) ++ return FALSE; ++ ++ scratch = RADEONCPGetBuffer(pScrn); ++ if (scratch == NULL) ++ return FALSE; ++ ++ scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total); ++ temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes); ++ dst = (char *)scratch->address; ++ ++ //memcopy from sys to scratch ++ while (temph--) { ++ memcpy (dst, src, wpass); ++ src += src_pitch; ++ dst += scratch_pitch_bytes; ++ } ++ ++ while (h) { ++ uint32_t offset = scratch_mc_addr + scratch_offset; ++ int oldhpass = hpass; ++ h -= oldhpass; ++ temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes); ++ ++ if (hpass) { ++ scratch_offset = scratch->total/2 - scratch_offset; ++ dst = (char *)scratch->address + scratch_offset; ++ // wait for the engine to be idle ++ RADEONWaitForIdleCP(pScrn); ++ //memcopy from sys to scratch ++ while (temph--) { ++ memcpy (dst, src, wpass); ++ src += src_pitch; ++ dst += scratch_pitch_bytes; ++ } ++ } ++ //blit from scratch to vram ++ R600DoPrepareCopy(pScrn, ++ scratch_pitch, w, oldhpass, offset, bpp, ++ dst_pitch, dst_height, dst_mc_addr, bpp, ++ 3, 0xffffffff); ++ R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass); ++ R600DoCopy(pScrn); ++ y += oldhpass; ++ } ++ ++ R600IBDiscard(pScrn, scratch); ++ ++ return TRUE; ++} ++ ++static Bool ++R600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, ++ char *src, int src_pitch) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); ++ uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; ++ uint32_t dst_height = pDst->drawable.height; ++ int bpp = pDst->drawable.bitsPerPixel; ++ ++ return R600CopyToVRAM(pScrn, ++ src, src_pitch, ++ dst_pitch, dst_mc_addr, dst_height, bpp, ++ x, y, w, h); ++} ++ ++static Bool ++R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, ++ char *dst, int dst_pitch) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); ++ uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; ++ uint32_t src_width = pSrc->drawable.width; ++ uint32_t src_height = pSrc->drawable.height; ++ int bpp = pSrc->drawable.bitsPerPixel; ++ uint32_t scratch_mc_addr; ++ int scratch_pitch_bytes = (dst_pitch + 255) & ~255; ++ int scratch_offset = 0, hpass; ++ uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8); ++ int wpass = w * (bpp/8); ++ drmBufPtr scratch; ++ ++ if (src_pitch & 7) ++ return FALSE; ++ ++ scratch = RADEONCPGetBuffer(pScrn); ++ if (scratch == NULL) ++ return FALSE; ++ ++ scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total); ++ hpass = min(h, scratch->total/2 / scratch_pitch_bytes); ++ ++ //blit from vram to scratch ++ R600DoPrepareCopy(pScrn, ++ src_pitch, src_width, src_height, src_mc_addr, bpp, ++ scratch_pitch, hpass, scratch_mc_addr, bpp, ++ 3, 0xffffffff); ++ R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); ++ R600DoCopy(pScrn); ++ ++ while (h) { ++ char *src = (char *)scratch->address + scratch_offset; ++ int oldhpass = hpass; ++ h -= oldhpass; ++ y += oldhpass; ++ hpass = min(h, scratch->total/2 / scratch_pitch_bytes); ++ ++ if (hpass) { ++ scratch_offset = scratch->total/2 - scratch_offset; ++ //blit from vram to scratch ++ R600DoPrepareCopy(pScrn, ++ src_pitch, src_width, src_height, src_mc_addr, bpp, ++ scratch_pitch, hpass, scratch_mc_addr + scratch_offset, bpp, ++ 3, 0xffffffff); ++ R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); ++ R600DoCopy(pScrn); ++ } ++ ++ // wait for the engine to be idle ++ RADEONWaitForIdleCP(pScrn); ++ //memcopy from scratch to sys ++ while (oldhpass--) { ++ memcpy (dst, src, wpass); ++ dst += dst_pitch; ++ src += scratch_pitch_bytes; ++ } ++ } ++ ++ R600IBDiscard(pScrn, scratch); ++ ++ return TRUE; ++ ++} ++ ++static int ++R600MarkSync(ScreenPtr pScreen) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ ++ return ++accel_state->exaSyncMarker; ++ ++} ++ ++static void ++R600Sync(ScreenPtr pScreen, int marker) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ ++ if (accel_state->exaMarkerSynced != marker) { ++ RADEONWaitForIdleCP(pScrn); ++ accel_state->exaMarkerSynced = marker; ++ } ++ ++} ++ ++static Bool ++R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) ++{ ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ uint32_t *vs; ++ uint32_t *ps; ++ // 512 bytes per shader for now ++ int size = 512 * 11; ++ int i; ++ ++ accel_state->shaders = NULL; ++ ++ accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256, ++ TRUE, NULL, NULL); ++ ++ if (accel_state->shaders == NULL) ++ return FALSE; ++ ++ vs = (pointer)((char *)info->FB + accel_state->shaders->offset); ++ ps = (pointer)((char *)info->FB + accel_state->shaders->offset); ++ accel_state->solid_vs_offset = 0; ++ accel_state->solid_ps_offset = 512; ++ accel_state->copy_vs_offset = 1024; ++ accel_state->copy_ps_offset = 1536; ++ accel_state->comp_vs_offset = 2048; ++ accel_state->comp_ps_offset = 2560; ++ accel_state->comp_mask_vs_offset = 3072; ++ accel_state->comp_mask_ps_offset = 3584; ++ accel_state->xv_vs_offset = 4096; ++ accel_state->xv_ps_offset_packed = 4608; ++ accel_state->xv_ps_offset_planar = 5120; ++ ++ // solid vs --------------------------------------- ++ i = accel_state->solid_vs_offset / 4; ++ //0 ++ vs[i++] = CF_DWORD0(ADDR(4)); ++ vs[i++] = CF_DWORD1(POP_COUNT(0), ++ CF_CONST(0), ++ COND(SQ_CF_COND_ACTIVE), ++ I_COUNT(1), ++ CALL_COUNT(0), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_VTX), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ //1 ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), ++ TYPE(SQ_EXPORT_POS), ++ RW_GPR(1), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(0)); ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(1), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ //2 - always export a param whether it's used or not ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), ++ TYPE(SQ_EXPORT_PARAM), ++ RW_GPR(0), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(0)); ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(0), ++ END_OF_PROGRAM(1), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(0)); ++ //3 - padding ++ vs[i++] = 0x00000000; ++ vs[i++] = 0x00000000; ++ //4/5 ++ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), ++ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), ++ FETCH_WHOLE_QUAD(0), ++ BUFFER_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ SRC_SEL_X(SQ_SEL_X), ++ MEGA_FETCH_COUNT(8)); ++ vs[i++] = VTX_DWORD1_GPR(DST_GPR(1), ++ DST_REL(0), ++ DST_SEL_X(SQ_SEL_X), ++ DST_SEL_Y(SQ_SEL_Y), ++ DST_SEL_Z(SQ_SEL_0), ++ DST_SEL_W(SQ_SEL_1), ++ USE_CONST_FIELDS(0), ++ DATA_FORMAT(FMT_32_32_FLOAT), //xxx ++ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx ++ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx ++ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); ++ vs[i++] = VTX_DWORD2(OFFSET(0), ++ ENDIAN_SWAP(ENDIAN_NONE), ++ CONST_BUF_NO_STRIDE(0), ++ MEGA_FETCH(1)); ++ vs[i++] = VTX_DWORD_PAD; ++ ++ // solid ps --------------------------------------- ++ i = accel_state->solid_ps_offset / 4; ++ // 0 ++ ps[i++] = CF_ALU_DWORD0(ADDR(2), ++ KCACHE_BANK0(0), ++ KCACHE_BANK1(0), ++ KCACHE_MODE0(SQ_CF_KCACHE_NOP)); ++ ps[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), ++ KCACHE_ADDR0(0), ++ KCACHE_ADDR1(0), ++ I_COUNT(4), ++ USES_WATERFALL(0), ++ CF_INST(SQ_CF_INST_ALU), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ // 1 ++ ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), ++ TYPE(SQ_EXPORT_PIXEL), ++ RW_GPR(0), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(1)); ++ ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(1), ++ END_OF_PROGRAM(1), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ ++ // 2 ++ ps[i++] = ALU_DWORD0(SRC0_SEL(256), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_X), ++ SRC0_NEG(0), ++ SRC1_SEL(0), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_X), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_AR_X), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(1), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_MOV), ++ BANK_SWIZZLE(SQ_ALU_VEC_012), ++ DST_GPR(0), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_X), ++ CLAMP(1)); ++ // 3 ++ ps[i++] = ALU_DWORD0(SRC0_SEL(256), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Y), ++ SRC0_NEG(0), ++ SRC1_SEL(0), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Y), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_AR_X), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(1), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_MOV), ++ BANK_SWIZZLE(SQ_ALU_VEC_012), ++ DST_GPR(0), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Y), ++ CLAMP(1)); ++ // 4 ++ ps[i++] = ALU_DWORD0(SRC0_SEL(256), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Z), ++ SRC0_NEG(0), ++ SRC1_SEL(0), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Z), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_AR_X), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(1), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_MOV), ++ BANK_SWIZZLE(SQ_ALU_VEC_012), ++ DST_GPR(0), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Z), ++ CLAMP(1)); ++ // 5 ++ ps[i++] = ALU_DWORD0(SRC0_SEL(256), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_W), ++ SRC0_NEG(0), ++ SRC1_SEL(0), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_W), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_AR_X), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(1)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(1), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_MOV), ++ BANK_SWIZZLE(SQ_ALU_VEC_012), ++ DST_GPR(0), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_W), ++ CLAMP(1)); ++ ++ // copy vs --------------------------------------- ++ i = accel_state->copy_vs_offset / 4; ++ //0 ++ vs[i++] = CF_DWORD0(ADDR(4)); ++ vs[i++] = CF_DWORD1(POP_COUNT(0), ++ CF_CONST(0), ++ COND(SQ_CF_COND_ACTIVE), ++ I_COUNT(2), ++ CALL_COUNT(0), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_VTX), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ //1 ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), ++ TYPE(SQ_EXPORT_POS), ++ RW_GPR(1), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(0)); ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(0), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ //2 ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), ++ TYPE(SQ_EXPORT_PARAM), ++ RW_GPR(0), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(0)); ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(0), ++ END_OF_PROGRAM(1), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(0)); ++ //3 ++ vs[i++] = 0x00000000; ++ vs[i++] = 0x00000000; ++ //4/5 ++ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), ++ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), ++ FETCH_WHOLE_QUAD(0), ++ BUFFER_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ SRC_SEL_X(SQ_SEL_X), ++ MEGA_FETCH_COUNT(16)); ++ vs[i++] = VTX_DWORD1_GPR(DST_GPR(1), ++ DST_REL(0), ++ DST_SEL_X(SQ_SEL_X), ++ DST_SEL_Y(SQ_SEL_Y), ++ DST_SEL_Z(SQ_SEL_0), ++ DST_SEL_W(SQ_SEL_1), ++ USE_CONST_FIELDS(0), ++ DATA_FORMAT(FMT_32_32_FLOAT), //xxx ++ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx ++ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx ++ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); ++ vs[i++] = VTX_DWORD2(OFFSET(0), ++ ENDIAN_SWAP(ENDIAN_NONE), ++ CONST_BUF_NO_STRIDE(0), ++ MEGA_FETCH(1)); ++ vs[i++] = VTX_DWORD_PAD; ++ //6/7 ++ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), ++ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), ++ FETCH_WHOLE_QUAD(0), ++ BUFFER_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ SRC_SEL_X(SQ_SEL_X), ++ MEGA_FETCH_COUNT(8)); ++ vs[i++] = VTX_DWORD1_GPR(DST_GPR(0), ++ DST_REL(0), ++ DST_SEL_X(SQ_SEL_X), ++ DST_SEL_Y(SQ_SEL_Y), ++ DST_SEL_Z(SQ_SEL_0), ++ DST_SEL_W(SQ_SEL_1), ++ USE_CONST_FIELDS(0), ++ DATA_FORMAT(FMT_32_32_FLOAT), //xxx ++ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx ++ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx ++ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); ++ vs[i++] = VTX_DWORD2(OFFSET(8), ++ ENDIAN_SWAP(ENDIAN_NONE), ++ CONST_BUF_NO_STRIDE(0), ++ MEGA_FETCH(0)); ++ vs[i++] = VTX_DWORD_PAD; ++ ++ // copy ps --------------------------------------- ++ i = accel_state->copy_ps_offset / 4; ++ // CF INST 0 ++ ps[i++] = CF_DWORD0(ADDR(2)); ++ ps[i++] = CF_DWORD1(POP_COUNT(0), ++ CF_CONST(0), ++ COND(SQ_CF_COND_ACTIVE), ++ I_COUNT(1), ++ CALL_COUNT(0), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_TEX), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ // CF INST 1 ++ ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), ++ TYPE(SQ_EXPORT_PIXEL), ++ RW_GPR(0), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(1)); ++ ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(1), ++ END_OF_PROGRAM(1), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ // TEX INST 0 ++ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), ++ BC_FRAC_MODE(0), ++ FETCH_WHOLE_QUAD(0), ++ RESOURCE_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ R7xx_ALT_CONST(0)); ++ ps[i++] = TEX_DWORD1(DST_GPR(0), ++ DST_REL(ABSOLUTE), ++ DST_SEL_X(SQ_SEL_X), //R ++ DST_SEL_Y(SQ_SEL_Y), //G ++ DST_SEL_Z(SQ_SEL_Z), //B ++ DST_SEL_W(SQ_SEL_W), //A ++ LOD_BIAS(0), ++ COORD_TYPE_X(TEX_UNNORMALIZED), ++ COORD_TYPE_Y(TEX_UNNORMALIZED), ++ COORD_TYPE_Z(TEX_UNNORMALIZED), ++ COORD_TYPE_W(TEX_UNNORMALIZED)); ++ ps[i++] = TEX_DWORD2(OFFSET_X(0), ++ OFFSET_Y(0), ++ OFFSET_Z(0), ++ SAMPLER_ID(0), ++ SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_0), ++ SRC_SEL_W(SQ_SEL_1)); ++ ps[i++] = TEX_DWORD_PAD; ++ ++ // xv vs --------------------------------------- ++ i = accel_state->xv_vs_offset / 4; ++ //0 ++ vs[i++] = CF_DWORD0(ADDR(4)); ++ vs[i++] = CF_DWORD1(POP_COUNT(0), ++ CF_CONST(0), ++ COND(SQ_CF_COND_ACTIVE), ++ I_COUNT(2), ++ CALL_COUNT(0), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_VTX), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ //1 ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), ++ TYPE(SQ_EXPORT_POS), ++ RW_GPR(1), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(0)); ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(0), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ //2 ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), ++ TYPE(SQ_EXPORT_PARAM), ++ RW_GPR(0), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(0)); ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(0), ++ END_OF_PROGRAM(1), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(0)); ++ //3 ++ vs[i++] = 0x00000000; ++ vs[i++] = 0x00000000; ++ //4/5 ++ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), ++ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), ++ FETCH_WHOLE_QUAD(0), ++ BUFFER_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ SRC_SEL_X(SQ_SEL_X), ++ MEGA_FETCH_COUNT(16)); ++ vs[i++] = VTX_DWORD1_GPR(DST_GPR(1), ++ DST_REL(0), ++ DST_SEL_X(SQ_SEL_X), ++ DST_SEL_Y(SQ_SEL_Y), ++ DST_SEL_Z(SQ_SEL_0), ++ DST_SEL_W(SQ_SEL_1), ++ USE_CONST_FIELDS(0), ++ DATA_FORMAT(FMT_32_32_FLOAT), //xxx ++ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx ++ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx ++ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); ++ vs[i++] = VTX_DWORD2(OFFSET(0), ++ ENDIAN_SWAP(ENDIAN_NONE), ++ CONST_BUF_NO_STRIDE(0), ++ MEGA_FETCH(1)); ++ vs[i++] = VTX_DWORD_PAD; ++ //6/7 ++ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), ++ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), ++ FETCH_WHOLE_QUAD(0), ++ BUFFER_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ SRC_SEL_X(SQ_SEL_X), ++ MEGA_FETCH_COUNT(8)); ++ vs[i++] = VTX_DWORD1_GPR(DST_GPR(0), ++ DST_REL(0), ++ DST_SEL_X(SQ_SEL_X), ++ DST_SEL_Y(SQ_SEL_Y), ++ DST_SEL_Z(SQ_SEL_0), ++ DST_SEL_W(SQ_SEL_1), ++ USE_CONST_FIELDS(0), ++ DATA_FORMAT(FMT_32_32_FLOAT), //xxx ++ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx ++ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx ++ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); ++ vs[i++] = VTX_DWORD2(OFFSET(8), ++ ENDIAN_SWAP(ENDIAN_NONE), ++ CONST_BUF_NO_STRIDE(0), ++ MEGA_FETCH(0)); ++ vs[i++] = VTX_DWORD_PAD; ++ ++ // xv ps packed ---------------------------------- ++ i = accel_state->xv_ps_offset_packed / 4; ++ // 0 ++ ps[i++] = CF_DWORD0(ADDR(20)); ++ ps[i++] = CF_DWORD1(POP_COUNT(0), ++ CF_CONST(0), ++ COND(SQ_CF_COND_ACTIVE), ++ I_COUNT(2), ++ CALL_COUNT(0), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_TEX), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(0)); ++ // 1 ++ ps[i++] = CF_ALU_DWORD0(ADDR(3), ++ KCACHE_BANK0(0), ++ KCACHE_BANK1(0), ++ KCACHE_MODE0(SQ_CF_KCACHE_NOP)); ++ ps[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), ++ KCACHE_ADDR0(0), ++ KCACHE_ADDR1(0), ++ I_COUNT(16), ++ USES_WATERFALL(0), ++ CF_INST(SQ_CF_INST_ALU), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ // 2 ++ ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), ++ TYPE(SQ_EXPORT_PIXEL), ++ RW_GPR(2), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(3)); ++ ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(1), ++ END_OF_PROGRAM(1), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ /* Undo scaling of Y'CbCr values ++ * Y' is scaled from 16:235 ++ * Cb/Cr are scaled from 16:240 ++ */ ++ // 3 - alu 0 ++ // MULADD gpr[1].x gpr[1].x c[3].x c[3].y ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_X), ++ SRC0_NEG(0), ++ SRC1_SEL(259), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_X), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), ++ SRC2_REL(ABSOLUTE), ++ SRC2_ELEM(ELEM_Y), ++ SRC2_NEG(0), ++ ALU_INST(SQ_OP3_INST_MULADD), ++ BANK_SWIZZLE(SQ_ALU_VEC_012), ++ DST_GPR(1), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_X), ++ CLAMP(1)); ++ // 4 - alu 1 ++ // MULADD gpr[1].y gpr[1].y c[3].z c[3].w ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Y), ++ SRC0_NEG(0), ++ SRC1_SEL(259), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Z), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), ++ SRC2_REL(ABSOLUTE), ++ SRC2_ELEM(ELEM_W), ++ SRC2_NEG(0), ++ ALU_INST(SQ_OP3_INST_MULADD), ++ BANK_SWIZZLE(SQ_ALU_VEC_012), ++ DST_GPR(1), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Y), ++ CLAMP(0)); ++ // 5 - alu 2 ++ // MULADD gpr[1].z gpr[1].z c[3].z c[3].w ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Z), ++ SRC0_NEG(0), ++ SRC1_SEL(259), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Z), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), ++ SRC2_REL(ABSOLUTE), ++ SRC2_ELEM(ELEM_W), ++ SRC2_NEG(0), ++ ALU_INST(SQ_OP3_INST_MULADD), ++ BANK_SWIZZLE(SQ_ALU_VEC_012), ++ DST_GPR(1), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Z), ++ CLAMP(0)); ++ // 6 - alu 3 ++ // MOV gpr[1].w 0.0 ++ ps[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_X), ++ SRC0_NEG(0), ++ SRC1_SEL(SQ_ALU_SRC_0), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_X), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(1)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(1), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(1), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_W), ++ CLAMP(0)); ++ // 7 - alu 4 ++ // DP4 gpr[2].x gpr[1].x c[0].x ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_X), ++ SRC0_NEG(0), ++ SRC1_SEL(256), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_X), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(1), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_X), ++ CLAMP(1)); ++ // 8 - alu 5 ++ // DP4 gpr[2].y gpr[1].y c[0].y ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Y), ++ SRC0_NEG(0), ++ SRC1_SEL(256), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Y), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Y), ++ CLAMP(1)); ++ // 9 - alu 6 ++ // DP4 gpr[2].z gpr[1].z c[0].z ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Z), ++ SRC0_NEG(0), ++ SRC1_SEL(256), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Z), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Z), ++ CLAMP(1)); ++ // 10 - alu 7 ++ // DP4 gpr[2].w gpr[1].w c[0].w ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_W), ++ SRC0_NEG(0), ++ SRC1_SEL(256), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_W), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(1)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_021), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_W), ++ CLAMP(1)); ++ // 11 - alu 8 ++ // DP4 gpr[2].x gpr[1].x c[1].x ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_X), ++ SRC0_NEG(0), ++ SRC1_SEL(257), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_X), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_X), ++ CLAMP(1)); ++ // 12 - alu 9 ++ // DP4 gpr[2].y gpr[1].y c[1].y ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Y), ++ SRC0_NEG(0), ++ SRC1_SEL(257), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Y), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(1), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Y), ++ CLAMP(1)); ++ // 13 - alu 10 ++ // DP4 gpr[2].z gpr[1].z c[1].z ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Z), ++ SRC0_NEG(0), ++ SRC1_SEL(257), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Z), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Z), ++ CLAMP(1)); ++ // 14 - alu 11 ++ // DP4 gpr[2].w gpr[1].w c[1].w ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_W), ++ SRC0_NEG(0), ++ SRC1_SEL(257), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_W), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(1)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_021), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_W), ++ CLAMP(1)); ++ // 15 - alu 12 ++ // DP4 gpr[2].x gpr[1].x c[2].x ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_X), ++ SRC0_NEG(0), ++ SRC1_SEL(258), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_X), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_X), ++ CLAMP(1)); ++ // 16 - alu 13 ++ // DP4 gpr[2].y gpr[1].y c[2].y ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Y), ++ SRC0_NEG(0), ++ SRC1_SEL(258), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Y), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Y), ++ CLAMP(1)); ++ // 17 - alu 14 ++ // DP4 gpr[2].z gpr[1].z c[2].z ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Z), ++ SRC0_NEG(0), ++ SRC1_SEL(258), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Z), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(1), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Z), ++ CLAMP(1)); ++ // 18 - alu 15 ++ // DP4 gpr[2].w gpr[1].w c[2].w ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_W), ++ SRC0_NEG(0), ++ SRC1_SEL(258), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_W), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(1)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_021), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_W), ++ CLAMP(1)); ++ // 19 - alignment ++ ps[i++] = 0x00000000; ++ ps[i++] = 0x00000000; ++ // 20/21 - tex 0 ++ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), ++ BC_FRAC_MODE(0), ++ FETCH_WHOLE_QUAD(0), ++ RESOURCE_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ R7xx_ALT_CONST(0)); ++ ps[i++] = TEX_DWORD1(DST_GPR(1), ++ DST_REL(ABSOLUTE), ++ DST_SEL_X(SQ_SEL_X), //R ++ DST_SEL_Y(SQ_SEL_MASK), //G ++ DST_SEL_Z(SQ_SEL_MASK), //B ++ DST_SEL_W(SQ_SEL_1), //A ++ LOD_BIAS(0), ++ COORD_TYPE_X(TEX_NORMALIZED), ++ COORD_TYPE_Y(TEX_NORMALIZED), ++ COORD_TYPE_Z(TEX_NORMALIZED), ++ COORD_TYPE_W(TEX_NORMALIZED)); ++ ps[i++] = TEX_DWORD2(OFFSET_X(0), ++ OFFSET_Y(0), ++ OFFSET_Z(0), ++ SAMPLER_ID(0), ++ SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_0), ++ SRC_SEL_W(SQ_SEL_1)); ++ ps[i++] = TEX_DWORD_PAD; ++ // 22/23 - tex 1 ++ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), ++ BC_FRAC_MODE(0), ++ FETCH_WHOLE_QUAD(0), ++ RESOURCE_ID(1), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ R7xx_ALT_CONST(0)); ++ ps[i++] = TEX_DWORD1(DST_GPR(1), ++ DST_REL(ABSOLUTE), ++ DST_SEL_X(SQ_SEL_MASK), //R ++ DST_SEL_Y(SQ_SEL_X), //G ++ DST_SEL_Z(SQ_SEL_Y), //B ++ DST_SEL_W(SQ_SEL_MASK), //A ++ LOD_BIAS(0), ++ COORD_TYPE_X(TEX_NORMALIZED), ++ COORD_TYPE_Y(TEX_NORMALIZED), ++ COORD_TYPE_Z(TEX_NORMALIZED), ++ COORD_TYPE_W(TEX_NORMALIZED)); ++ ps[i++] = TEX_DWORD2(OFFSET_X(0), ++ OFFSET_Y(0), ++ OFFSET_Z(0), ++ SAMPLER_ID(1), ++ SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_0), ++ SRC_SEL_W(SQ_SEL_1)); ++ ps[i++] = TEX_DWORD_PAD; ++ ++ // xv ps planar ---------------------------------- ++ i = accel_state->xv_ps_offset_planar / 4; ++ // 0 ++ ps[i++] = CF_DWORD0(ADDR(20)); ++ ps[i++] = CF_DWORD1(POP_COUNT(0), ++ CF_CONST(0), ++ COND(SQ_CF_COND_ACTIVE), ++ I_COUNT(3), ++ CALL_COUNT(0), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_TEX), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(0)); ++ // 1 ++ ps[i++] = CF_ALU_DWORD0(ADDR(3), ++ KCACHE_BANK0(0), ++ KCACHE_BANK1(0), ++ KCACHE_MODE0(SQ_CF_KCACHE_NOP)); ++ ps[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), ++ KCACHE_ADDR0(0), ++ KCACHE_ADDR1(0), ++ I_COUNT(16), ++ USES_WATERFALL(0), ++ CF_INST(SQ_CF_INST_ALU), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ // 2 ++ ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), ++ TYPE(SQ_EXPORT_PIXEL), ++ RW_GPR(2), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(3)); ++ ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(1), ++ END_OF_PROGRAM(1), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ /* Undo scaling of Y'CbCr values ++ * Y' is scaled from 16:235 ++ * Cb/Cr are scaled from 16:240 ++ */ ++ // 3 - alu 0 ++ // MULADD gpr[1].x gpr[1].x c[3].x c[3].y ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_X), ++ SRC0_NEG(0), ++ SRC1_SEL(259), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_X), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), ++ SRC2_REL(ABSOLUTE), ++ SRC2_ELEM(ELEM_Y), ++ SRC2_NEG(0), ++ ALU_INST(SQ_OP3_INST_MULADD), ++ BANK_SWIZZLE(SQ_ALU_VEC_012), ++ DST_GPR(1), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_X), ++ CLAMP(1)); ++ // 4 - alu 1 ++ // MULADD gpr[1].y gpr[1].y c[3].z c[3].w ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Y), ++ SRC0_NEG(0), ++ SRC1_SEL(259), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Z), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), ++ SRC2_REL(ABSOLUTE), ++ SRC2_ELEM(ELEM_W), ++ SRC2_NEG(0), ++ ALU_INST(SQ_OP3_INST_MULADD), ++ BANK_SWIZZLE(SQ_ALU_VEC_012), ++ DST_GPR(1), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Y), ++ CLAMP(0)); ++ // 5 - alu 2 ++ // MULADD gpr[1].z gpr[1].z c[3].z c[3].w ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Z), ++ SRC0_NEG(0), ++ SRC1_SEL(259), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Z), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), ++ SRC2_REL(ABSOLUTE), ++ SRC2_ELEM(ELEM_W), ++ SRC2_NEG(0), ++ ALU_INST(SQ_OP3_INST_MULADD), ++ BANK_SWIZZLE(SQ_ALU_VEC_012), ++ DST_GPR(1), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Z), ++ CLAMP(0)); ++ // 6 - alu 3 ++ // MOV gpr[1].w 0.0 ++ ps[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_X), ++ SRC0_NEG(0), ++ SRC1_SEL(SQ_ALU_SRC_0), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_X), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(1)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(1), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(1), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_W), ++ CLAMP(0)); ++ // 7 - alu 4 ++ // DP4 gpr[2].x gpr[1].x c[0].x ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_X), ++ SRC0_NEG(0), ++ SRC1_SEL(256), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_X), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(1), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_X), ++ CLAMP(1)); ++ // 8 - alu 5 ++ // DP4 gpr[2].y gpr[1].y c[0].y ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Y), ++ SRC0_NEG(0), ++ SRC1_SEL(256), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Y), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Y), ++ CLAMP(1)); ++ // 9 - alu 6 ++ // DP4 gpr[2].z gpr[1].z c[0].z ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Z), ++ SRC0_NEG(0), ++ SRC1_SEL(256), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Z), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Z), ++ CLAMP(1)); ++ // 10 - alu 7 ++ // DP4 gpr[2].w gpr[1].w c[0].w ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_W), ++ SRC0_NEG(0), ++ SRC1_SEL(256), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_W), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(1)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_021), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_W), ++ CLAMP(1)); ++ // 11 - alu 8 ++ // DP4 gpr[2].x gpr[1].x c[1].x ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_X), ++ SRC0_NEG(0), ++ SRC1_SEL(257), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_X), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_X), ++ CLAMP(1)); ++ // 12 - alu 9 ++ // DP4 gpr[2].y gpr[1].y c[1].y ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Y), ++ SRC0_NEG(0), ++ SRC1_SEL(257), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Y), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(1), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Y), ++ CLAMP(1)); ++ // 13 - alu 10 ++ // DP4 gpr[2].z gpr[1].z c[1].z ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Z), ++ SRC0_NEG(0), ++ SRC1_SEL(257), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Z), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Z), ++ CLAMP(1)); ++ // 14 - alu 11 ++ // DP4 gpr[2].w gpr[1].w c[1].w ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_W), ++ SRC0_NEG(0), ++ SRC1_SEL(257), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_W), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(1)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_021), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_W), ++ CLAMP(1)); ++ // 15 - alu 12 ++ // DP4 gpr[2].x gpr[1].x c[2].x ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_X), ++ SRC0_NEG(0), ++ SRC1_SEL(258), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_X), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_X), ++ CLAMP(1)); ++ // 16 - alu 13 ++ // DP4 gpr[2].y gpr[1].y c[2].y ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Y), ++ SRC0_NEG(0), ++ SRC1_SEL(258), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Y), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Y), ++ CLAMP(1)); ++ // 17 - alu 14 ++ // DP4 gpr[2].z gpr[1].z c[2].z ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_Z), ++ SRC0_NEG(0), ++ SRC1_SEL(258), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_Z), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(0)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(1), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_102), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_Z), ++ CLAMP(1)); ++ // 18 - alu 15 ++ // DP4 gpr[2].w gpr[1].w c[2].w ++ ps[i++] = ALU_DWORD0(SRC0_SEL(1), ++ SRC0_REL(ABSOLUTE), ++ SRC0_ELEM(ELEM_W), ++ SRC0_NEG(0), ++ SRC1_SEL(258), ++ SRC1_REL(ABSOLUTE), ++ SRC1_ELEM(ELEM_W), ++ SRC1_NEG(0), ++ INDEX_MODE(SQ_INDEX_LOOP), ++ PRED_SEL(SQ_PRED_SEL_OFF), ++ LAST(1)); ++ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, ++ SRC0_ABS(0), ++ SRC1_ABS(0), ++ UPDATE_EXECUTE_MASK(0), ++ UPDATE_PRED(0), ++ WRITE_MASK(0), ++ FOG_MERGE(0), ++ OMOD(SQ_ALU_OMOD_OFF), ++ ALU_INST(SQ_OP2_INST_DOT4), ++ BANK_SWIZZLE(SQ_ALU_VEC_021), ++ DST_GPR(2), ++ DST_REL(ABSOLUTE), ++ DST_ELEM(ELEM_W), ++ CLAMP(1)); ++ // 19 - alignment ++ ps[i++] = 0x00000000; ++ ps[i++] = 0x00000000; ++ // 20/21 - tex 0 ++ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), ++ BC_FRAC_MODE(0), ++ FETCH_WHOLE_QUAD(0), ++ RESOURCE_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ R7xx_ALT_CONST(0)); ++ ps[i++] = TEX_DWORD1(DST_GPR(1), ++ DST_REL(ABSOLUTE), ++ DST_SEL_X(SQ_SEL_X), //R ++ DST_SEL_Y(SQ_SEL_MASK), //G ++ DST_SEL_Z(SQ_SEL_MASK), //B ++ DST_SEL_W(SQ_SEL_1), //A ++ LOD_BIAS(0), ++ COORD_TYPE_X(TEX_NORMALIZED), ++ COORD_TYPE_Y(TEX_NORMALIZED), ++ COORD_TYPE_Z(TEX_NORMALIZED), ++ COORD_TYPE_W(TEX_NORMALIZED)); ++ ps[i++] = TEX_DWORD2(OFFSET_X(0), ++ OFFSET_Y(0), ++ OFFSET_Z(0), ++ SAMPLER_ID(0), ++ SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_0), ++ SRC_SEL_W(SQ_SEL_1)); ++ ps[i++] = TEX_DWORD_PAD; ++ // 22/23 - tex 1 ++ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), ++ BC_FRAC_MODE(0), ++ FETCH_WHOLE_QUAD(0), ++ RESOURCE_ID(1), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ R7xx_ALT_CONST(0)); ++ ps[i++] = TEX_DWORD1(DST_GPR(1), ++ DST_REL(ABSOLUTE), ++ DST_SEL_X(SQ_SEL_MASK), //R ++ DST_SEL_Y(SQ_SEL_MASK), //G ++ DST_SEL_Z(SQ_SEL_X), //B ++ DST_SEL_W(SQ_SEL_MASK), //A ++ LOD_BIAS(0), ++ COORD_TYPE_X(TEX_NORMALIZED), ++ COORD_TYPE_Y(TEX_NORMALIZED), ++ COORD_TYPE_Z(TEX_NORMALIZED), ++ COORD_TYPE_W(TEX_NORMALIZED)); ++ ps[i++] = TEX_DWORD2(OFFSET_X(0), ++ OFFSET_Y(0), ++ OFFSET_Z(0), ++ SAMPLER_ID(1), ++ SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_0), ++ SRC_SEL_W(SQ_SEL_1)); ++ ps[i++] = TEX_DWORD_PAD; ++ // 24/25 - tex 2 ++ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), ++ BC_FRAC_MODE(0), ++ FETCH_WHOLE_QUAD(0), ++ RESOURCE_ID(2), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ R7xx_ALT_CONST(0)); ++ ps[i++] = TEX_DWORD1(DST_GPR(1), ++ DST_REL(ABSOLUTE), ++ DST_SEL_X(SQ_SEL_MASK), //R ++ DST_SEL_Y(SQ_SEL_X), //G ++ DST_SEL_Z(SQ_SEL_MASK), //B ++ DST_SEL_W(SQ_SEL_MASK), //A ++ LOD_BIAS(0), ++ COORD_TYPE_X(TEX_NORMALIZED), ++ COORD_TYPE_Y(TEX_NORMALIZED), ++ COORD_TYPE_Z(TEX_NORMALIZED), ++ COORD_TYPE_W(TEX_NORMALIZED)); ++ ps[i++] = TEX_DWORD2(OFFSET_X(0), ++ OFFSET_Y(0), ++ OFFSET_Z(0), ++ SAMPLER_ID(2), ++ SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_0), ++ SRC_SEL_W(SQ_SEL_1)); ++ ps[i++] = TEX_DWORD_PAD; ++ ++ // comp mask vs --------------------------------------- ++ i = accel_state->comp_mask_vs_offset / 4; ++ //0 ++ vs[i++] = CF_DWORD0(ADDR(4)); ++ vs[i++] = CF_DWORD1(POP_COUNT(0), ++ CF_CONST(0), ++ COND(SQ_CF_COND_ACTIVE), ++ I_COUNT(3), ++ CALL_COUNT(0), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_VTX), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ //1 - dst ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), ++ TYPE(SQ_EXPORT_POS), ++ RW_GPR(2), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(0)); ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(1), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ //2 - src ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), ++ TYPE(SQ_EXPORT_PARAM), ++ RW_GPR(1), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(0)); ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(1), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(0)); ++ //3 - mask ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), ++ TYPE(SQ_EXPORT_PARAM), ++ RW_GPR(0), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(0)); ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(1), ++ END_OF_PROGRAM(1), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(0)); ++ //4/5 - dst ++ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), ++ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), ++ FETCH_WHOLE_QUAD(0), ++ BUFFER_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ SRC_SEL_X(SQ_SEL_X), ++ MEGA_FETCH_COUNT(24)); ++ vs[i++] = VTX_DWORD1_GPR(DST_GPR(2), ++ DST_REL(0), ++ DST_SEL_X(SQ_SEL_X), ++ DST_SEL_Y(SQ_SEL_Y), ++ DST_SEL_Z(SQ_SEL_0), ++ DST_SEL_W(SQ_SEL_1), ++ USE_CONST_FIELDS(0), ++ DATA_FORMAT(FMT_32_32_FLOAT), //xxx ++ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx ++ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx ++ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); ++ vs[i++] = VTX_DWORD2(OFFSET(0), ++ ENDIAN_SWAP(ENDIAN_NONE), ++ CONST_BUF_NO_STRIDE(0), ++ MEGA_FETCH(1)); ++ vs[i++] = VTX_DWORD_PAD; ++ //6/7 - src ++ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), ++ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), ++ FETCH_WHOLE_QUAD(0), ++ BUFFER_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ SRC_SEL_X(SQ_SEL_X), ++ MEGA_FETCH_COUNT(8)); ++ vs[i++] = VTX_DWORD1_GPR(DST_GPR(1), ++ DST_REL(0), ++ DST_SEL_X(SQ_SEL_X), ++ DST_SEL_Y(SQ_SEL_Y), ++ DST_SEL_Z(SQ_SEL_0), ++ DST_SEL_W(SQ_SEL_1), ++ USE_CONST_FIELDS(0), ++ DATA_FORMAT(FMT_32_32_FLOAT), //xxx ++ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx ++ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx ++ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); ++ vs[i++] = VTX_DWORD2(OFFSET(8), ++ ENDIAN_SWAP(ENDIAN_NONE), ++ CONST_BUF_NO_STRIDE(0), ++ MEGA_FETCH(0)); ++ vs[i++] = VTX_DWORD_PAD; ++ //8/9 - mask ++ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), ++ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), ++ FETCH_WHOLE_QUAD(0), ++ BUFFER_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ SRC_SEL_X(SQ_SEL_X), ++ MEGA_FETCH_COUNT(8)); ++ vs[i++] = VTX_DWORD1_GPR(DST_GPR(0), ++ DST_REL(0), ++ DST_SEL_X(SQ_SEL_X), ++ DST_SEL_Y(SQ_SEL_Y), ++ DST_SEL_Z(SQ_SEL_0), ++ DST_SEL_W(SQ_SEL_1), ++ USE_CONST_FIELDS(0), ++ DATA_FORMAT(FMT_32_32_FLOAT), //xxx ++ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx ++ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx ++ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); ++ vs[i++] = VTX_DWORD2(OFFSET(16), ++ ENDIAN_SWAP(ENDIAN_NONE), ++ CONST_BUF_NO_STRIDE(0), ++ MEGA_FETCH(0)); ++ vs[i++] = VTX_DWORD_PAD; ++ ++ // comp mask vs --------------------------------------- ++ i = accel_state->comp_mask_vs_offset / 4; ++ //0 ++ vs[i++] = CF_DWORD0(ADDR(4)); ++ vs[i++] = CF_DWORD1(POP_COUNT(0), ++ CF_CONST(0), ++ COND(SQ_CF_COND_ACTIVE), ++ I_COUNT(3), ++ CALL_COUNT(0), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_VTX), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ //1 - dst ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), ++ TYPE(SQ_EXPORT_POS), ++ RW_GPR(2), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(0)); ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(1), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ //2 - src ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), ++ TYPE(SQ_EXPORT_PARAM), ++ RW_GPR(1), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(0)); ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(1), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(0)); ++ //3 - mask ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), ++ TYPE(SQ_EXPORT_PARAM), ++ RW_GPR(0), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(0)); ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(1), ++ END_OF_PROGRAM(1), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(0)); ++ //4/5 - dst ++ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), ++ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), ++ FETCH_WHOLE_QUAD(0), ++ BUFFER_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ SRC_SEL_X(SQ_SEL_X), ++ MEGA_FETCH_COUNT(24)); ++ vs[i++] = VTX_DWORD1_GPR(DST_GPR(2), ++ DST_REL(0), ++ DST_SEL_X(SQ_SEL_X), ++ DST_SEL_Y(SQ_SEL_Y), ++ DST_SEL_Z(SQ_SEL_0), ++ DST_SEL_W(SQ_SEL_1), ++ USE_CONST_FIELDS(0), ++ DATA_FORMAT(FMT_32_32_FLOAT), //xxx ++ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx ++ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx ++ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); ++ vs[i++] = VTX_DWORD2(OFFSET(0), ++ ENDIAN_SWAP(ENDIAN_NONE), ++ CONST_BUF_NO_STRIDE(0), ++ MEGA_FETCH(1)); ++ vs[i++] = VTX_DWORD_PAD; ++ //6/7 - src ++ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), ++ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), ++ FETCH_WHOLE_QUAD(0), ++ BUFFER_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ SRC_SEL_X(SQ_SEL_X), ++ MEGA_FETCH_COUNT(8)); ++ vs[i++] = VTX_DWORD1_GPR(DST_GPR(1), ++ DST_REL(0), ++ DST_SEL_X(SQ_SEL_X), ++ DST_SEL_Y(SQ_SEL_Y), ++ DST_SEL_Z(SQ_SEL_0), ++ DST_SEL_W(SQ_SEL_1), ++ USE_CONST_FIELDS(0), ++ DATA_FORMAT(FMT_32_32_FLOAT), //xxx ++ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx ++ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx ++ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); ++ vs[i++] = VTX_DWORD2(OFFSET(8), ++ ENDIAN_SWAP(ENDIAN_NONE), ++ CONST_BUF_NO_STRIDE(0), ++ MEGA_FETCH(0)); ++ vs[i++] = VTX_DWORD_PAD; ++ //8/9 - mask ++ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), ++ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), ++ FETCH_WHOLE_QUAD(0), ++ BUFFER_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ SRC_SEL_X(SQ_SEL_X), ++ MEGA_FETCH_COUNT(8)); ++ vs[i++] = VTX_DWORD1_GPR(DST_GPR(0), ++ DST_REL(0), ++ DST_SEL_X(SQ_SEL_X), ++ DST_SEL_Y(SQ_SEL_Y), ++ DST_SEL_Z(SQ_SEL_0), ++ DST_SEL_W(SQ_SEL_1), ++ USE_CONST_FIELDS(0), ++ DATA_FORMAT(FMT_32_32_FLOAT), //xxx ++ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx ++ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx ++ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); ++ vs[i++] = VTX_DWORD2(OFFSET(16), ++ ENDIAN_SWAP(ENDIAN_NONE), ++ CONST_BUF_NO_STRIDE(0), ++ MEGA_FETCH(0)); ++ vs[i++] = VTX_DWORD_PAD; ++ ++ // comp mask ps --------------------------------------- ++ // not yet ++ ++ // comp vs --------------------------------------- ++ i = accel_state->comp_vs_offset / 4; ++ //0 ++ vs[i++] = CF_DWORD0(ADDR(4)); ++ vs[i++] = CF_DWORD1(POP_COUNT(0), ++ CF_CONST(0), ++ COND(SQ_CF_COND_ACTIVE), ++ I_COUNT(2), ++ CALL_COUNT(0), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_VTX), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ //1 - dst ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), ++ TYPE(SQ_EXPORT_POS), ++ RW_GPR(1), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(0)); ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(0), ++ END_OF_PROGRAM(0), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(1)); ++ //2 - src ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), ++ TYPE(SQ_EXPORT_PARAM), ++ RW_GPR(0), ++ RW_REL(ABSOLUTE), ++ INDEX_GPR(0), ++ ELEM_SIZE(0)); ++ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), ++ SRC_SEL_Y(SQ_SEL_Y), ++ SRC_SEL_Z(SQ_SEL_Z), ++ SRC_SEL_W(SQ_SEL_W), ++ R6xx_ELEM_LOOP(0), ++ BURST_COUNT(0), ++ END_OF_PROGRAM(1), ++ VALID_PIXEL_MODE(0), ++ CF_INST(SQ_CF_INST_EXPORT_DONE), ++ WHOLE_QUAD_MODE(0), ++ BARRIER(0)); ++ //3 ++ vs[i++] = 0x00000000; ++ vs[i++] = 0x00000000; ++ //4/5 - dst ++ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), ++ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), ++ FETCH_WHOLE_QUAD(0), ++ BUFFER_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ SRC_SEL_X(SQ_SEL_X), ++ MEGA_FETCH_COUNT(16)); ++ vs[i++] = VTX_DWORD1_GPR(DST_GPR(1), ++ DST_REL(0), ++ DST_SEL_X(SQ_SEL_X), ++ DST_SEL_Y(SQ_SEL_Y), ++ DST_SEL_Z(SQ_SEL_0), ++ DST_SEL_W(SQ_SEL_1), ++ USE_CONST_FIELDS(0), ++ DATA_FORMAT(FMT_32_32_FLOAT), //xxx ++ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx ++ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx ++ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); ++ vs[i++] = VTX_DWORD2(OFFSET(0), ++ ENDIAN_SWAP(ENDIAN_NONE), ++ CONST_BUF_NO_STRIDE(0), ++ MEGA_FETCH(1)); ++ vs[i++] = VTX_DWORD_PAD; ++ //6/7 - src ++ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), ++ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), ++ FETCH_WHOLE_QUAD(0), ++ BUFFER_ID(0), ++ SRC_GPR(0), ++ SRC_REL(ABSOLUTE), ++ SRC_SEL_X(SQ_SEL_X), ++ MEGA_FETCH_COUNT(8)); ++ vs[i++] = VTX_DWORD1_GPR(DST_GPR(0), ++ DST_REL(0), ++ DST_SEL_X(SQ_SEL_X), ++ DST_SEL_Y(SQ_SEL_Y), ++ DST_SEL_Z(SQ_SEL_0), ++ DST_SEL_W(SQ_SEL_1), ++ USE_CONST_FIELDS(0), ++ DATA_FORMAT(FMT_32_32_FLOAT), //xxx ++ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx ++ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx ++ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); ++ vs[i++] = VTX_DWORD2(OFFSET(8), ++ ENDIAN_SWAP(ENDIAN_NONE), ++ CONST_BUF_NO_STRIDE(0), ++ MEGA_FETCH(0)); ++ vs[i++] = VTX_DWORD_PAD; ++ ++ // comp ps --------------------------------------- ++ // not yet ++ ++ ++ return TRUE; ++} ++ ++static Bool ++R600PrepareAccess(PixmapPtr pPix, int index) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ unsigned char *RADEONMMIO = info->MMIO; ++ ++ //flush HDP read/write caches ++ OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); ++ ++ return TRUE; ++} ++ ++static void ++R600FinishAccess(PixmapPtr pPix, int index) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ unsigned char *RADEONMMIO = info->MMIO; ++ ++ //flush HDP read/write caches ++ OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); ++ ++} ++ ++ ++Bool ++R600DrawInit(ScreenPtr pScreen) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ ++ if (info->accel_state->exa == NULL) { ++ xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n"); ++ return FALSE; ++ } ++ ++ info->accel_state->exa->exa_major = EXA_VERSION_MAJOR; ++ info->accel_state->exa->exa_minor = EXA_VERSION_MINOR; ++ ++ info->accel_state->exa->PrepareSolid = R600PrepareSolid; ++ info->accel_state->exa->Solid = R600Solid; ++ info->accel_state->exa->DoneSolid = R600DoneSolid; ++ ++ info->accel_state->exa->PrepareCopy = R600PrepareCopy; ++ info->accel_state->exa->Copy = R600Copy; ++ info->accel_state->exa->DoneCopy = R600DoneCopy; ++ ++ info->accel_state->exa->MarkSync = R600MarkSync; ++ info->accel_state->exa->WaitMarker = R600Sync; ++ ++ info->accel_state->exa->PrepareAccess = R600PrepareAccess; ++ info->accel_state->exa->FinishAccess = R600FinishAccess; ++ ++ info->accel_state->exa->UploadToScreen = R600UploadToScreen; ++ info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen; ++ ++ info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS; ++ info->accel_state->exa->pixmapOffsetAlign = 256; ++ info->accel_state->exa->pixmapPitchAlign = 256; ++ ++ info->accel_state->exa->CheckComposite = R600CheckComposite; ++ info->accel_state->exa->PrepareComposite = R600PrepareComposite; ++ info->accel_state->exa->Composite = R600Composite; ++ info->accel_state->exa->DoneComposite = R600DoneComposite; ++ ++#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3) ++ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n"); ++ ++ info->accel_state->exa->maxPitchBytes = 16320; ++ info->accel_state->exa->maxX = 8192; ++#else ++ info->accel_state->exa->maxX = 16320 / 4; ++#endif ++ info->accel_state->exa->maxY = 8192; ++ ++ if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) { ++ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n"); ++ info->accel_state->vsync = TRUE; ++ } else ++ info->accel_state->vsync = FALSE; ++ ++ if (!exaDriverInit(pScreen, info->accel_state->exa)) { ++ xfree(info->accel_state->exa); ++ return FALSE; ++ } ++ ++ if (!info->gartLocation) ++ return FALSE; ++ ++ info->accel_state->XInited3D = FALSE; ++ info->accel_state->copy_area = NULL; ++ ++ if (!R600LoadShaders(pScrn, pScreen)) ++ return FALSE; ++ ++ exaMarkSync(pScreen); ++ ++ return TRUE; ++ ++} ++ +diff --git a/src/r600_reg.h b/src/r600_reg.h +new file mode 100644 +index 0000000..9036e2a +--- /dev/null ++++ b/src/r600_reg.h +@@ -0,0 +1,132 @@ ++/* ++ * RadeonHD R6xx, R7xx Register documentation ++ * ++ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. ++ * Copyright (C) 2008-2009 Matthias Hopf ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included ++ * in all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN ++ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef _R600_REG_H_ ++#define _R600_REG_H_ ++ ++/* ++ * Register definitions ++ */ ++ ++#include "r600_reg_auto_r6xx.h" ++#include "r600_reg_r6xx.h" ++#include "r600_reg_r7xx.h" ++ ++ ++/* SET_*_REG offsets + ends */ ++enum { ++ SET_CONFIG_REG_offset = 0x00008000, ++ SET_CONFIG_REG_end = 0x0000ac00, ++ SET_CONTEXT_REG_offset = 0x00028000, ++ SET_CONTEXT_REG_end = 0x00029000, ++ SET_ALU_CONST_offset = 0x00030000, ++ SET_ALU_CONST_end = 0x00032000, ++ SET_RESOURCE_offset = 0x00038000, ++ SET_RESOURCE_end = 0x0003c000, ++ SET_SAMPLER_offset = 0x0003c000, ++ SET_SAMPLER_end = 0x0003cff0, ++ SET_CTL_CONST_offset = 0x0003cff0, ++ SET_CTL_CONST_end = 0x0003e200, ++ SET_LOOP_CONST_offset = 0x0003e200, ++ SET_LOOP_CONST_end = 0x0003e380, ++ SET_BOOL_CONST_offset = 0x0003e380, ++ SET_BOOL_CONST_end = 0x00040000, ++} ; ++ ++/* packet3 IT_SURFACE_BASE_UPDATE bits */ ++enum { ++ DEPTH_BASE = (1 << 0), ++ COLOR0_BASE = (1 << 1), ++ COLOR1_BASE = (1 << 2), ++ COLOR2_BASE = (1 << 3), ++ COLOR3_BASE = (1 << 4), ++ COLOR4_BASE = (1 << 5), ++ COLOR5_BASE = (1 << 6), ++ COLOR6_BASE = (1 << 7), ++ COLOR7_BASE = (1 << 8), ++ STRMOUT_BASE0 = (1 << 9), ++ STRMOUT_BASE1 = (1 << 10), ++ STRMOUT_BASE2 = (1 << 11), ++ STRMOUT_BASE3 = (1 << 12), ++ COHER_BASE0 = (1 << 13), ++ COHER_BASE1 = (1 << 14), ++}; ++ ++/* Packet3 commands */ ++enum { ++ IT_NOP = 0x10, ++ IT_INDIRECT_BUFFER_END = 0x17, ++ IT_SET_PREDICATION = 0x20, ++ IT_REG_RMW = 0x21, ++ IT_COND_EXEC = 0x22, ++ IT_PRED_EXEC = 0x23, ++ IT_START_3D_CMDBUF = 0x24, ++ IT_DRAW_INDEX_2 = 0x27, ++ IT_CONTEXT_CONTROL = 0x28, ++ IT_DRAW_INDEX_IMMD_BE = 0x29, ++ IT_INDEX_TYPE = 0x2A, ++ IT_DRAW_INDEX = 0x2B, ++ IT_DRAW_INDEX_AUTO = 0x2D, ++ IT_DRAW_INDEX_IMMD = 0x2E, ++ IT_NUM_INSTANCES = 0x2F, ++ IT_STRMOUT_BUFFER_UPDATE = 0x34, ++ IT_INDIRECT_BUFFER_MP = 0x38, ++ IT_MEM_SEMAPHORE = 0x39, ++ IT_MPEG_INDEX = 0x3A, ++ IT_WAIT_REG_MEM = 0x3C, ++ IT_MEM_WRITE = 0x3D, ++ IT_INDIRECT_BUFFER = 0x32, ++ IT_CP_INTERRUPT = 0x40, ++ IT_SURFACE_SYNC = 0x43, ++ IT_ME_INITIALIZE = 0x44, ++ IT_COND_WRITE = 0x45, ++ IT_EVENT_WRITE = 0x46, ++ IT_EVENT_WRITE_EOP = 0x47, ++ IT_ONE_REG_WRITE = 0x57, ++ IT_SET_CONFIG_REG = 0x68, ++ IT_SET_CONTEXT_REG = 0x69, ++ IT_SET_ALU_CONST = 0x6A, ++ IT_SET_BOOL_CONST = 0x6B, ++ IT_SET_LOOP_CONST = 0x6C, ++ IT_SET_RESOURCE = 0x6D, ++ IT_SET_SAMPLER = 0x6E, ++ IT_SET_CTL_CONST = 0x6F, ++ IT_SURFACE_BASE_UPDATE = 0x73, ++} ; ++ ++/* IT_WAIT_REG_MEM operation encoding */ ++ ++#define IT_WAIT_ALWAYS (0<<0) ++#define IT_WAIT_LT (1<<0) ++#define IT_WAIT_LE (2<<0) ++#define IT_WAIT_EQ (3<<0) ++#define IT_WAIT_NE (4<<0) ++#define IT_WAIT_GE (5<<0) ++#define IT_WAIT_GT (6<<0) ++#define IT_WAIT_REG (0<<4) ++#define IT_WAIT_MEM (1<<4) ++ ++#define IT_WAIT_ADDR(x) ((x) >> 2) ++ ++#endif +diff --git a/src/r600_reg_auto_r6xx.h b/src/r600_reg_auto_r6xx.h +new file mode 100644 +index 0000000..9d5aa3c +--- /dev/null ++++ b/src/r600_reg_auto_r6xx.h +@@ -0,0 +1,3087 @@ ++/* ++ * RadeonHD R6xx, R7xx Register documentation ++ * ++ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. ++ * Copyright (C) 2008-2009 Matthias Hopf ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included ++ * in all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN ++ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef _AUTOREGS ++#define _AUTOREGS ++ ++enum { ++ ++ VGT_VTX_VECT_EJECT_REG = 0x000088b0, ++ PRIM_COUNT_mask = 0x3ff << 0, ++ PRIM_COUNT_shift = 0, ++ VGT_LAST_COPY_STATE = 0x000088c0, ++ SRC_STATE_ID_mask = 0x07 << 0, ++ SRC_STATE_ID_shift = 0, ++ DST_STATE_ID_mask = 0x07 << 16, ++ DST_STATE_ID_shift = 16, ++ VGT_CACHE_INVALIDATION = 0x000088c4, ++ CACHE_INVALIDATION_mask = 0x03 << 0, ++ CACHE_INVALIDATION_shift = 0, ++ VC_ONLY = 0x00, ++ TC_ONLY = 0x01, ++ VC_AND_TC = 0x02, ++ VS_NO_EXTRA_BUFFER_bit = 1 << 5, ++ VGT_GS_PER_ES = 0x000088c8, ++ VGT_ES_PER_GS = 0x000088cc, ++ VGT_GS_VERTEX_REUSE = 0x000088d4, ++ VERT_REUSE_mask = 0x1f << 0, ++ VERT_REUSE_shift = 0, ++ VGT_MC_LAT_CNTL = 0x000088d8, ++ MC_TIME_STAMP_RES_mask = 0x03 << 0, ++ MC_TIME_STAMP_RES_shift = 0, ++ X_0_992_MAX_LATENCY = 0x00, ++ X_0_496_MAX_LATENCY = 0x01, ++ X_0_248_MAX_LATENCY = 0x02, ++ X_0_124_MAX_LATENCY = 0x03, ++ VGT_GS_PER_VS = 0x000088e8, ++ GS_PER_VS_mask = 0x0f << 0, ++ GS_PER_VS_shift = 0, ++ VGT_CNTL_STATUS = 0x000088f0, ++ VGT_OUT_INDX_BUSY_bit = 1 << 0, ++ VGT_OUT_BUSY_bit = 1 << 1, ++ VGT_PT_BUSY_bit = 1 << 2, ++ VGT_TE_BUSY_bit = 1 << 3, ++ VGT_VR_BUSY_bit = 1 << 4, ++ VGT_GRP_BUSY_bit = 1 << 5, ++ VGT_DMA_REQ_BUSY_bit = 1 << 6, ++ VGT_DMA_BUSY_bit = 1 << 7, ++ VGT_GS_BUSY_bit = 1 << 8, ++ VGT_BUSY_bit = 1 << 9, ++ VGT_PRIMITIVE_TYPE = 0x00008958, ++ VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask = 0x3f << 0, ++ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift = 0, ++ DI_PT_NONE = 0x00, ++ DI_PT_POINTLIST = 0x01, ++ DI_PT_LINELIST = 0x02, ++ DI_PT_LINESTRIP = 0x03, ++ DI_PT_TRILIST = 0x04, ++ DI_PT_TRIFAN = 0x05, ++ DI_PT_TRISTRIP = 0x06, ++ DI_PT_UNUSED_0 = 0x07, ++ DI_PT_UNUSED_1 = 0x08, ++ DI_PT_UNUSED_2 = 0x09, ++ DI_PT_LINELIST_ADJ = 0x0a, ++ DI_PT_LINESTRIP_ADJ = 0x0b, ++ DI_PT_TRILIST_ADJ = 0x0c, ++ DI_PT_TRISTRIP_ADJ = 0x0d, ++ DI_PT_UNUSED_3 = 0x0e, ++ DI_PT_UNUSED_4 = 0x0f, ++ DI_PT_TRI_WITH_WFLAGS = 0x10, ++ DI_PT_RECTLIST = 0x11, ++ DI_PT_LINELOOP = 0x12, ++ DI_PT_QUADLIST = 0x13, ++ DI_PT_QUADSTRIP = 0x14, ++ DI_PT_POLYGON = 0x15, ++ DI_PT_2D_COPY_RECT_LIST_V0 = 0x16, ++ DI_PT_2D_COPY_RECT_LIST_V1 = 0x17, ++ DI_PT_2D_COPY_RECT_LIST_V2 = 0x18, ++ DI_PT_2D_COPY_RECT_LIST_V3 = 0x19, ++ DI_PT_2D_FILL_RECT_LIST = 0x1a, ++ DI_PT_2D_LINE_STRIP = 0x1b, ++ DI_PT_2D_TRI_STRIP = 0x1c, ++ VGT_INDEX_TYPE = 0x0000895c, ++ INDEX_TYPE_mask = 0x03 << 0, ++ INDEX_TYPE_shift = 0, ++ DI_INDEX_SIZE_16_BIT = 0x00, ++ DI_INDEX_SIZE_32_BIT = 0x01, ++ VGT_STRMOUT_BUFFER_FILLED_SIZE_0 = 0x00008960, ++ VGT_STRMOUT_BUFFER_FILLED_SIZE_1 = 0x00008964, ++ VGT_STRMOUT_BUFFER_FILLED_SIZE_2 = 0x00008968, ++ VGT_STRMOUT_BUFFER_FILLED_SIZE_3 = 0x0000896c, ++ VGT_NUM_INDICES = 0x00008970, ++ VGT_NUM_INSTANCES = 0x00008974, ++ PA_CL_CNTL_STATUS = 0x00008a10, ++ CL_BUSY_bit = 1 << 31, ++ PA_CL_ENHANCE = 0x00008a14, ++ CLIP_VTX_REORDER_ENA_bit = 1 << 0, ++ NUM_CLIP_SEQ_mask = 0x03 << 1, ++ NUM_CLIP_SEQ_shift = 1, ++ CLIPPED_PRIM_SEQ_STALL_bit = 1 << 3, ++ VE_NAN_PROC_DISABLE_bit = 1 << 4, ++ PA_SU_CNTL_STATUS = 0x00008a50, ++ SU_BUSY_bit = 1 << 31, ++ PA_SC_LINE_STIPPLE_STATE = 0x00008b10, ++ CURRENT_PTR_mask = 0x0f << 0, ++ CURRENT_PTR_shift = 0, ++ CURRENT_COUNT_mask = 0xff << 8, ++ CURRENT_COUNT_shift = 8, ++ PA_SC_MULTI_CHIP_CNTL = 0x00008b20, ++ LOG2_NUM_CHIPS_mask = 0x07 << 0, ++ LOG2_NUM_CHIPS_shift = 0, ++ MULTI_CHIP_TILE_SIZE_mask = 0x03 << 3, ++ MULTI_CHIP_TILE_SIZE_shift = 3, ++ X_16_X_16_PIXEL_TILE_PER_CHIP = 0x00, ++ X_32_X_32_PIXEL_TILE_PER_CHIP = 0x01, ++ X_64_X_64_PIXEL_TILE_PER_CHIP = 0x02, ++ X_128X128_PIXEL_TILE_PER_CHIP = 0x03, ++ CHIP_TILE_X_LOC_mask = 0x07 << 5, ++ CHIP_TILE_X_LOC_shift = 5, ++ CHIP_TILE_Y_LOC_mask = 0x07 << 8, ++ CHIP_TILE_Y_LOC_shift = 8, ++ CHIP_SUPER_TILE_B_bit = 1 << 11, ++ PA_SC_AA_SAMPLE_LOCS_2S = 0x00008b40, ++ S0_X_mask = 0x0f << 0, ++ S0_X_shift = 0, ++ S0_Y_mask = 0x0f << 4, ++ S0_Y_shift = 4, ++ S1_X_mask = 0x0f << 8, ++ S1_X_shift = 8, ++ S1_Y_mask = 0x0f << 12, ++ S1_Y_shift = 12, ++ PA_SC_AA_SAMPLE_LOCS_4S = 0x00008b44, ++/* S0_X_mask = 0x0f << 0, */ ++/* S0_X_shift = 0, */ ++/* S0_Y_mask = 0x0f << 4, */ ++/* S0_Y_shift = 4, */ ++/* S1_X_mask = 0x0f << 8, */ ++/* S1_X_shift = 8, */ ++/* S1_Y_mask = 0x0f << 12, */ ++/* S1_Y_shift = 12, */ ++ S2_X_mask = 0x0f << 16, ++ S2_X_shift = 16, ++ S2_Y_mask = 0x0f << 20, ++ S2_Y_shift = 20, ++ S3_X_mask = 0x0f << 24, ++ S3_X_shift = 24, ++ S3_Y_mask = 0x0f << 28, ++ S3_Y_shift = 28, ++ PA_SC_AA_SAMPLE_LOCS_8S_WD0 = 0x00008b48, ++/* S0_X_mask = 0x0f << 0, */ ++/* S0_X_shift = 0, */ ++/* S0_Y_mask = 0x0f << 4, */ ++/* S0_Y_shift = 4, */ ++/* S1_X_mask = 0x0f << 8, */ ++/* S1_X_shift = 8, */ ++/* S1_Y_mask = 0x0f << 12, */ ++/* S1_Y_shift = 12, */ ++/* S2_X_mask = 0x0f << 16, */ ++/* S2_X_shift = 16, */ ++/* S2_Y_mask = 0x0f << 20, */ ++/* S2_Y_shift = 20, */ ++/* S3_X_mask = 0x0f << 24, */ ++/* S3_X_shift = 24, */ ++/* S3_Y_mask = 0x0f << 28, */ ++/* S3_Y_shift = 28, */ ++ PA_SC_AA_SAMPLE_LOCS_8S_WD1 = 0x00008b4c, ++ S4_X_mask = 0x0f << 0, ++ S4_X_shift = 0, ++ S4_Y_mask = 0x0f << 4, ++ S4_Y_shift = 4, ++ S5_X_mask = 0x0f << 8, ++ S5_X_shift = 8, ++ S5_Y_mask = 0x0f << 12, ++ S5_Y_shift = 12, ++ S6_X_mask = 0x0f << 16, ++ S6_X_shift = 16, ++ S6_Y_mask = 0x0f << 20, ++ S6_Y_shift = 20, ++ S7_X_mask = 0x0f << 24, ++ S7_X_shift = 24, ++ S7_Y_mask = 0x0f << 28, ++ S7_Y_shift = 28, ++ PA_SC_CNTL_STATUS = 0x00008be0, ++ MPASS_OVERFLOW_bit = 1 << 30, ++ PA_SC_ENHANCE = 0x00008bf0, ++ FORCE_EOV_MAX_CLK_CNT_mask = 0xfff << 0, ++ FORCE_EOV_MAX_CLK_CNT_shift = 0, ++ FORCE_EOV_MAX_TILE_CNT_mask = 0xfff << 12, ++ FORCE_EOV_MAX_TILE_CNT_shift = 12, ++ SQ_CONFIG = 0x00008c00, ++ VC_ENABLE_bit = 1 << 0, ++ EXPORT_SRC_C_bit = 1 << 1, ++ DX9_CONSTS_bit = 1 << 2, ++ ALU_INST_PREFER_VECTOR_bit = 1 << 3, ++ SQ_CONFIG__DX10_CLAMP_bit = 1 << 4, ++ ALU_PREFER_ONE_WATERFALL_bit = 1 << 5, ++ ALU_MAX_ONE_WATERFALL_bit = 1 << 6, ++ CLAUSE_SEQ_PRIO_mask = 0x03 << 8, ++ CLAUSE_SEQ_PRIO_shift = 8, ++ SQ_CL_PRIO_RND_ROBIN = 0x00, ++ SQ_CL_PRIO_MACRO_SEQ = 0x01, ++ SQ_CL_PRIO_NONE = 0x02, ++ PS_PRIO_mask = 0x03 << 24, ++ PS_PRIO_shift = 24, ++ VS_PRIO_mask = 0x03 << 26, ++ VS_PRIO_shift = 26, ++ GS_PRIO_mask = 0x03 << 28, ++ GS_PRIO_shift = 28, ++ ES_PRIO_mask = 0x03 << 30, ++ ES_PRIO_shift = 30, ++ SQ_GPR_RESOURCE_MGMT_1 = 0x00008c04, ++ NUM_PS_GPRS_mask = 0xff << 0, ++ NUM_PS_GPRS_shift = 0, ++ NUM_VS_GPRS_mask = 0xff << 16, ++ NUM_VS_GPRS_shift = 16, ++ NUM_CLAUSE_TEMP_GPRS_mask = 0x0f << 28, ++ NUM_CLAUSE_TEMP_GPRS_shift = 28, ++ SQ_GPR_RESOURCE_MGMT_2 = 0x00008c08, ++ NUM_GS_GPRS_mask = 0xff << 0, ++ NUM_GS_GPRS_shift = 0, ++ NUM_ES_GPRS_mask = 0xff << 16, ++ NUM_ES_GPRS_shift = 16, ++ SQ_THREAD_RESOURCE_MGMT = 0x00008c0c, ++ NUM_PS_THREADS_mask = 0xff << 0, ++ NUM_PS_THREADS_shift = 0, ++ NUM_VS_THREADS_mask = 0xff << 8, ++ NUM_VS_THREADS_shift = 8, ++ NUM_GS_THREADS_mask = 0xff << 16, ++ NUM_GS_THREADS_shift = 16, ++ NUM_ES_THREADS_mask = 0xff << 24, ++ NUM_ES_THREADS_shift = 24, ++ SQ_STACK_RESOURCE_MGMT_1 = 0x00008c10, ++ NUM_PS_STACK_ENTRIES_mask = 0xfff << 0, ++ NUM_PS_STACK_ENTRIES_shift = 0, ++ NUM_VS_STACK_ENTRIES_mask = 0xfff << 16, ++ NUM_VS_STACK_ENTRIES_shift = 16, ++ SQ_STACK_RESOURCE_MGMT_2 = 0x00008c14, ++ NUM_GS_STACK_ENTRIES_mask = 0xfff << 0, ++ NUM_GS_STACK_ENTRIES_shift = 0, ++ NUM_ES_STACK_ENTRIES_mask = 0xfff << 16, ++ NUM_ES_STACK_ENTRIES_shift = 16, ++ SQ_ESGS_RING_BASE = 0x00008c40, ++ SQ_ESGS_RING_SIZE = 0x00008c44, ++ SQ_GSVS_RING_BASE = 0x00008c48, ++ SQ_GSVS_RING_SIZE = 0x00008c4c, ++ SQ_ESTMP_RING_BASE = 0x00008c50, ++ SQ_ESTMP_RING_SIZE = 0x00008c54, ++ SQ_GSTMP_RING_BASE = 0x00008c58, ++ SQ_GSTMP_RING_SIZE = 0x00008c5c, ++ SQ_VSTMP_RING_BASE = 0x00008c60, ++ SQ_VSTMP_RING_SIZE = 0x00008c64, ++ SQ_PSTMP_RING_BASE = 0x00008c68, ++ SQ_PSTMP_RING_SIZE = 0x00008c6c, ++ SQ_FBUF_RING_BASE = 0x00008c70, ++ SQ_FBUF_RING_SIZE = 0x00008c74, ++ SQ_REDUC_RING_BASE = 0x00008c78, ++ SQ_REDUC_RING_SIZE = 0x00008c7c, ++ SQ_ALU_WORD1_OP3 = 0x00008dfc, ++ SRC2_SEL_mask = 0x1ff << 0, ++ SRC2_SEL_shift = 0, ++ SQ_ALU_SRC_0 = 0xf8, ++ SQ_ALU_SRC_1 = 0xf9, ++ SQ_ALU_SRC_1_INT = 0xfa, ++ SQ_ALU_SRC_M_1_INT = 0xfb, ++ SQ_ALU_SRC_0_5 = 0xfc, ++ SQ_ALU_SRC_LITERAL = 0xfd, ++ SQ_ALU_SRC_PV = 0xfe, ++ SQ_ALU_SRC_PS = 0xff, ++ SRC2_REL_bit = 1 << 9, ++ SRC2_CHAN_mask = 0x03 << 10, ++ SRC2_CHAN_shift = 10, ++ SQ_CHAN_X = 0x00, ++ SQ_CHAN_Y = 0x01, ++ SQ_CHAN_Z = 0x02, ++ SQ_CHAN_W = 0x03, ++ SRC2_NEG_bit = 1 << 12, ++ SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13, ++ SQ_ALU_WORD1_OP3__ALU_INST_shift = 13, ++ SQ_OP3_INST_MUL_LIT = 0x0c, ++ SQ_OP3_INST_MUL_LIT_M2 = 0x0d, ++ SQ_OP3_INST_MUL_LIT_M4 = 0x0e, ++ SQ_OP3_INST_MUL_LIT_D2 = 0x0f, ++ SQ_OP3_INST_MULADD = 0x10, ++ SQ_OP3_INST_MULADD_M2 = 0x11, ++ SQ_OP3_INST_MULADD_M4 = 0x12, ++ SQ_OP3_INST_MULADD_D2 = 0x13, ++ SQ_OP3_INST_MULADD_IEEE = 0x14, ++ SQ_OP3_INST_MULADD_IEEE_M2 = 0x15, ++ SQ_OP3_INST_MULADD_IEEE_M4 = 0x16, ++ SQ_OP3_INST_MULADD_IEEE_D2 = 0x17, ++ SQ_OP3_INST_CNDE = 0x18, ++ SQ_OP3_INST_CNDGT = 0x19, ++ SQ_OP3_INST_CNDGE = 0x1a, ++ SQ_OP3_INST_CNDE_INT = 0x1c, ++ SQ_OP3_INST_CNDGT_INT = 0x1d, ++ SQ_OP3_INST_CNDGE_INT = 0x1e, ++ SQ_TEX_WORD2 = 0x00008dfc, ++ OFFSET_X_mask = 0x1f << 0, ++ OFFSET_X_shift = 0, ++ OFFSET_Y_mask = 0x1f << 5, ++ OFFSET_Y_shift = 5, ++ OFFSET_Z_mask = 0x1f << 10, ++ OFFSET_Z_shift = 10, ++ SAMPLER_ID_mask = 0x1f << 15, ++ SAMPLER_ID_shift = 15, ++ SQ_TEX_WORD2__SRC_SEL_X_mask = 0x07 << 20, ++ SQ_TEX_WORD2__SRC_SEL_X_shift = 20, ++ SQ_SEL_X = 0x00, ++ SQ_SEL_Y = 0x01, ++ SQ_SEL_Z = 0x02, ++ SQ_SEL_W = 0x03, ++ SQ_SEL_0 = 0x04, ++ SQ_SEL_1 = 0x05, ++ SRC_SEL_Y_mask = 0x07 << 23, ++ SRC_SEL_Y_shift = 23, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++ SRC_SEL_Z_mask = 0x07 << 26, ++ SRC_SEL_Z_shift = 26, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++ SRC_SEL_W_mask = 0x07 << 29, ++ SRC_SEL_W_shift = 29, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++ SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc, ++ BURST_COUNT_mask = 0x0f << 17, ++ BURST_COUNT_shift = 17, ++ END_OF_PROGRAM_bit = 1 << 21, ++ VALID_PIXEL_MODE_bit = 1 << 22, ++ SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23, ++ SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift = 23, ++ SQ_CF_INST_MEM_STREAM0 = 0x20, ++ SQ_CF_INST_MEM_STREAM1 = 0x21, ++ SQ_CF_INST_MEM_STREAM2 = 0x22, ++ SQ_CF_INST_MEM_STREAM3 = 0x23, ++ SQ_CF_INST_MEM_SCRATCH = 0x24, ++ SQ_CF_INST_MEM_REDUCTION = 0x25, ++ SQ_CF_INST_MEM_RING = 0x26, ++ SQ_CF_INST_EXPORT = 0x27, ++ SQ_CF_INST_EXPORT_DONE = 0x28, ++ WHOLE_QUAD_MODE_bit = 1 << 30, ++ BARRIER_bit = 1 << 31, ++ SQ_CF_ALU_WORD1 = 0x00008dfc, ++ KCACHE_MODE1_mask = 0x03 << 0, ++ KCACHE_MODE1_shift = 0, ++ SQ_CF_KCACHE_NOP = 0x00, ++ SQ_CF_KCACHE_LOCK_1 = 0x01, ++ SQ_CF_KCACHE_LOCK_2 = 0x02, ++ SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, ++ KCACHE_ADDR0_mask = 0xff << 2, ++ KCACHE_ADDR0_shift = 2, ++ KCACHE_ADDR1_mask = 0xff << 10, ++ KCACHE_ADDR1_shift = 10, ++ SQ_CF_ALU_WORD1__COUNT_mask = 0x7f << 18, ++ SQ_CF_ALU_WORD1__COUNT_shift = 18, ++ SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25, ++ SQ_CF_ALU_WORD1__CF_INST_mask = 0x0f << 26, ++ SQ_CF_ALU_WORD1__CF_INST_shift = 26, ++ SQ_CF_INST_ALU = 0x08, ++ SQ_CF_INST_ALU_PUSH_BEFORE = 0x09, ++ SQ_CF_INST_ALU_POP_AFTER = 0x0a, ++ SQ_CF_INST_ALU_POP2_AFTER = 0x0b, ++ SQ_CF_INST_ALU_CONTINUE = 0x0d, ++ SQ_CF_INST_ALU_BREAK = 0x0e, ++ SQ_CF_INST_ALU_ELSE_AFTER = 0x0f, ++/* WHOLE_QUAD_MODE_bit = 1 << 30, */ ++/* BARRIER_bit = 1 << 31, */ ++ SQ_TEX_WORD1 = 0x00008dfc, ++ SQ_TEX_WORD1__DST_GPR_mask = 0x7f << 0, ++ SQ_TEX_WORD1__DST_GPR_shift = 0, ++ SQ_TEX_WORD1__DST_REL_bit = 1 << 7, ++ SQ_TEX_WORD1__DST_SEL_X_mask = 0x07 << 9, ++ SQ_TEX_WORD1__DST_SEL_X_shift = 9, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++ SQ_SEL_MASK = 0x07, ++ SQ_TEX_WORD1__DST_SEL_Y_mask = 0x07 << 12, ++ SQ_TEX_WORD1__DST_SEL_Y_shift = 12, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++/* SQ_SEL_MASK = 0x07, */ ++ SQ_TEX_WORD1__DST_SEL_Z_mask = 0x07 << 15, ++ SQ_TEX_WORD1__DST_SEL_Z_shift = 15, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++/* SQ_SEL_MASK = 0x07, */ ++ SQ_TEX_WORD1__DST_SEL_W_mask = 0x07 << 18, ++ SQ_TEX_WORD1__DST_SEL_W_shift = 18, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++/* SQ_SEL_MASK = 0x07, */ ++ SQ_TEX_WORD1__LOD_BIAS_mask = 0x7f << 21, ++ SQ_TEX_WORD1__LOD_BIAS_shift = 21, ++ COORD_TYPE_X_bit = 1 << 28, ++ COORD_TYPE_Y_bit = 1 << 29, ++ COORD_TYPE_Z_bit = 1 << 30, ++ COORD_TYPE_W_bit = 1 << 31, ++ SQ_VTX_WORD0 = 0x00008dfc, ++ VTX_INST_mask = 0x1f << 0, ++ VTX_INST_shift = 0, ++ SQ_VTX_INST_FETCH = 0x00, ++ SQ_VTX_INST_SEMANTIC = 0x01, ++ FETCH_TYPE_mask = 0x03 << 5, ++ FETCH_TYPE_shift = 5, ++ SQ_VTX_FETCH_VERTEX_DATA = 0x00, ++ SQ_VTX_FETCH_INSTANCE_DATA = 0x01, ++ SQ_VTX_FETCH_NO_INDEX_OFFSET = 0x02, ++ FETCH_WHOLE_QUAD_bit = 1 << 7, ++ BUFFER_ID_mask = 0xff << 8, ++ BUFFER_ID_shift = 8, ++ SRC_GPR_mask = 0x7f << 16, ++ SRC_GPR_shift = 16, ++ SRC_REL_bit = 1 << 23, ++ SQ_VTX_WORD0__SRC_SEL_X_mask = 0x03 << 24, ++ SQ_VTX_WORD0__SRC_SEL_X_shift = 24, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++ MEGA_FETCH_COUNT_mask = 0x3f << 26, ++ MEGA_FETCH_COUNT_shift = 26, ++ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ = 0x00008dfc, ++ SEL_X_mask = 0x07 << 0, ++ SEL_X_shift = 0, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++/* SQ_SEL_MASK = 0x07, */ ++ SEL_Y_mask = 0x07 << 3, ++ SEL_Y_shift = 3, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++/* SQ_SEL_MASK = 0x07, */ ++ SEL_Z_mask = 0x07 << 6, ++ SEL_Z_shift = 6, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++/* SQ_SEL_MASK = 0x07, */ ++ SEL_W_mask = 0x07 << 9, ++ SEL_W_shift = 9, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++/* SQ_SEL_MASK = 0x07, */ ++ SQ_ALU_WORD1 = 0x00008dfc, ++ ENCODING_mask = 0x07 << 15, ++ ENCODING_shift = 15, ++ BANK_SWIZZLE_mask = 0x07 << 18, ++ BANK_SWIZZLE_shift = 18, ++ SQ_ALU_VEC_012 = 0x00, ++ SQ_ALU_VEC_021 = 0x01, ++ SQ_ALU_VEC_120 = 0x02, ++ SQ_ALU_VEC_102 = 0x03, ++ SQ_ALU_VEC_201 = 0x04, ++ SQ_ALU_VEC_210 = 0x05, ++ SQ_ALU_WORD1__DST_GPR_mask = 0x7f << 21, ++ SQ_ALU_WORD1__DST_GPR_shift = 21, ++ SQ_ALU_WORD1__DST_REL_bit = 1 << 28, ++ DST_CHAN_mask = 0x03 << 29, ++ DST_CHAN_shift = 29, ++ CHAN_X = 0x00, ++ CHAN_Y = 0x01, ++ CHAN_Z = 0x02, ++ CHAN_W = 0x03, ++ SQ_ALU_WORD1__CLAMP_bit = 1 << 31, ++ SQ_CF_ALU_WORD0 = 0x00008dfc, ++ SQ_CF_ALU_WORD0__ADDR_mask = 0x3fffff << 0, ++ SQ_CF_ALU_WORD0__ADDR_shift = 0, ++ KCACHE_BANK0_mask = 0x0f << 22, ++ KCACHE_BANK0_shift = 22, ++ KCACHE_BANK1_mask = 0x0f << 26, ++ KCACHE_BANK1_shift = 26, ++ KCACHE_MODE0_mask = 0x03 << 30, ++ KCACHE_MODE0_shift = 30, ++/* SQ_CF_KCACHE_NOP = 0x00, */ ++/* SQ_CF_KCACHE_LOCK_1 = 0x01, */ ++/* SQ_CF_KCACHE_LOCK_2 = 0x02, */ ++/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */ ++ SQ_VTX_WORD2 = 0x00008dfc, ++ SQ_VTX_WORD2__OFFSET_mask = 0xffff << 0, ++ SQ_VTX_WORD2__OFFSET_shift = 0, ++ SQ_VTX_WORD2__ENDIAN_SWAP_mask = 0x03 << 16, ++ SQ_VTX_WORD2__ENDIAN_SWAP_shift = 16, ++ SQ_ENDIAN_NONE = 0x00, ++ SQ_ENDIAN_8IN16 = 0x01, ++ SQ_ENDIAN_8IN32 = 0x02, ++ CONST_BUF_NO_STRIDE_bit = 1 << 18, ++ MEGA_FETCH_bit = 1 << 19, ++ SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20, ++ SQ_ALU_WORD1_OP2_V2 = 0x00008dfc, ++ SRC0_ABS_bit = 1 << 0, ++ SRC1_ABS_bit = 1 << 1, ++ UPDATE_EXECUTE_MASK_bit = 1 << 2, ++ UPDATE_PRED_bit = 1 << 3, ++ WRITE_MASK_bit = 1 << 4, ++ SQ_ALU_WORD1_OP2_V2__OMOD_mask = 0x03 << 5, ++ SQ_ALU_WORD1_OP2_V2__OMOD_shift = 5, ++ SQ_ALU_OMOD_OFF = 0x00, ++ SQ_ALU_OMOD_M2 = 0x01, ++ SQ_ALU_OMOD_M4 = 0x02, ++ SQ_ALU_OMOD_D2 = 0x03, ++ SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7, ++ SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7, ++ SQ_OP2_INST_ADD = 0x00, ++ SQ_OP2_INST_MUL = 0x01, ++ SQ_OP2_INST_MUL_IEEE = 0x02, ++ SQ_OP2_INST_MAX = 0x03, ++ SQ_OP2_INST_MIN = 0x04, ++ SQ_OP2_INST_MAX_DX10 = 0x05, ++ SQ_OP2_INST_MIN_DX10 = 0x06, ++ SQ_OP2_INST_SETE = 0x08, ++ SQ_OP2_INST_SETGT = 0x09, ++ SQ_OP2_INST_SETGE = 0x0a, ++ SQ_OP2_INST_SETNE = 0x0b, ++ SQ_OP2_INST_SETE_DX10 = 0x0c, ++ SQ_OP2_INST_SETGT_DX10 = 0x0d, ++ SQ_OP2_INST_SETGE_DX10 = 0x0e, ++ SQ_OP2_INST_SETNE_DX10 = 0x0f, ++ SQ_OP2_INST_FRACT = 0x10, ++ SQ_OP2_INST_TRUNC = 0x11, ++ SQ_OP2_INST_CEIL = 0x12, ++ SQ_OP2_INST_RNDNE = 0x13, ++ SQ_OP2_INST_FLOOR = 0x14, ++ SQ_OP2_INST_MOVA = 0x15, ++ SQ_OP2_INST_MOVA_FLOOR = 0x16, ++ SQ_OP2_INST_MOVA_INT = 0x18, ++ SQ_OP2_INST_MOV = 0x19, ++ SQ_OP2_INST_NOP = 0x1a, ++ SQ_OP2_INST_PRED_SETGT_UINT = 0x1e, ++ SQ_OP2_INST_PRED_SETGE_UINT = 0x1f, ++ SQ_OP2_INST_PRED_SETE = 0x20, ++ SQ_OP2_INST_PRED_SETGT = 0x21, ++ SQ_OP2_INST_PRED_SETGE = 0x22, ++ SQ_OP2_INST_PRED_SETNE = 0x23, ++ SQ_OP2_INST_PRED_SET_INV = 0x24, ++ SQ_OP2_INST_PRED_SET_POP = 0x25, ++ SQ_OP2_INST_PRED_SET_CLR = 0x26, ++ SQ_OP2_INST_PRED_SET_RESTORE = 0x27, ++ SQ_OP2_INST_PRED_SETE_PUSH = 0x28, ++ SQ_OP2_INST_PRED_SETGT_PUSH = 0x29, ++ SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a, ++ SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b, ++ SQ_OP2_INST_KILLE = 0x2c, ++ SQ_OP2_INST_KILLGT = 0x2d, ++ SQ_OP2_INST_KILLGE = 0x2e, ++ SQ_OP2_INST_KILLNE = 0x2f, ++ SQ_OP2_INST_AND_INT = 0x30, ++ SQ_OP2_INST_OR_INT = 0x31, ++ SQ_OP2_INST_XOR_INT = 0x32, ++ SQ_OP2_INST_NOT_INT = 0x33, ++ SQ_OP2_INST_ADD_INT = 0x34, ++ SQ_OP2_INST_SUB_INT = 0x35, ++ SQ_OP2_INST_MAX_INT = 0x36, ++ SQ_OP2_INST_MIN_INT = 0x37, ++ SQ_OP2_INST_MAX_UINT = 0x38, ++ SQ_OP2_INST_MIN_UINT = 0x39, ++ SQ_OP2_INST_SETE_INT = 0x3a, ++ SQ_OP2_INST_SETGT_INT = 0x3b, ++ SQ_OP2_INST_SETGE_INT = 0x3c, ++ SQ_OP2_INST_SETNE_INT = 0x3d, ++ SQ_OP2_INST_SETGT_UINT = 0x3e, ++ SQ_OP2_INST_SETGE_UINT = 0x3f, ++ SQ_OP2_INST_KILLGT_UINT = 0x40, ++ SQ_OP2_INST_KILLGE_UINT = 0x41, ++ SQ_OP2_INST_PRED_SETE_INT = 0x42, ++ SQ_OP2_INST_PRED_SETGT_INT = 0x43, ++ SQ_OP2_INST_PRED_SETGE_INT = 0x44, ++ SQ_OP2_INST_PRED_SETNE_INT = 0x45, ++ SQ_OP2_INST_KILLE_INT = 0x46, ++ SQ_OP2_INST_KILLGT_INT = 0x47, ++ SQ_OP2_INST_KILLGE_INT = 0x48, ++ SQ_OP2_INST_KILLNE_INT = 0x49, ++ SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a, ++ SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b, ++ SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c, ++ SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d, ++ SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e, ++ SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f, ++ SQ_OP2_INST_DOT4 = 0x50, ++ SQ_OP2_INST_DOT4_IEEE = 0x51, ++ SQ_OP2_INST_CUBE = 0x52, ++ SQ_OP2_INST_MAX4 = 0x53, ++ SQ_OP2_INST_MOVA_GPR_INT = 0x60, ++ SQ_OP2_INST_EXP_IEEE = 0x61, ++ SQ_OP2_INST_LOG_CLAMPED = 0x62, ++ SQ_OP2_INST_LOG_IEEE = 0x63, ++ SQ_OP2_INST_RECIP_CLAMPED = 0x64, ++ SQ_OP2_INST_RECIP_FF = 0x65, ++ SQ_OP2_INST_RECIP_IEEE = 0x66, ++ SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67, ++ SQ_OP2_INST_RECIPSQRT_FF = 0x68, ++ SQ_OP2_INST_RECIPSQRT_IEEE = 0x69, ++ SQ_OP2_INST_SQRT_IEEE = 0x6a, ++ SQ_OP2_INST_FLT_TO_INT = 0x6b, ++ SQ_OP2_INST_INT_TO_FLT = 0x6c, ++ SQ_OP2_INST_UINT_TO_FLT = 0x6d, ++ SQ_OP2_INST_SIN = 0x6e, ++ SQ_OP2_INST_COS = 0x6f, ++ SQ_OP2_INST_ASHR_INT = 0x70, ++ SQ_OP2_INST_LSHR_INT = 0x71, ++ SQ_OP2_INST_LSHL_INT = 0x72, ++ SQ_OP2_INST_MULLO_INT = 0x73, ++ SQ_OP2_INST_MULHI_INT = 0x74, ++ SQ_OP2_INST_MULLO_UINT = 0x75, ++ SQ_OP2_INST_MULHI_UINT = 0x76, ++ SQ_OP2_INST_RECIP_INT = 0x77, ++ SQ_OP2_INST_RECIP_UINT = 0x78, ++ SQ_OP2_INST_FLT_TO_UINT = 0x79, ++ SQ_CF_ALLOC_EXPORT_WORD1_BUF = 0x00008dfc, ++ ARRAY_SIZE_mask = 0xfff << 0, ++ ARRAY_SIZE_shift = 0, ++ COMP_MASK_mask = 0x0f << 12, ++ COMP_MASK_shift = 12, ++ SQ_CF_WORD0 = 0x00008dfc, ++ SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc, ++ ARRAY_BASE_mask = 0x1fff << 0, ++ ARRAY_BASE_shift = 0, ++ SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask = 0x03 << 13, ++ SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift = 13, ++ SQ_EXPORT_PIXEL = 0x00, ++ SQ_EXPORT_POS = 0x01, ++ SQ_EXPORT_PARAM = 0x02, ++ X_UNUSED_FOR_SX_EXPORTS = 0x03, ++ RW_GPR_mask = 0x7f << 15, ++ RW_GPR_shift = 15, ++ RW_REL_bit = 1 << 22, ++ INDEX_GPR_mask = 0x7f << 23, ++ INDEX_GPR_shift = 23, ++ ELEM_SIZE_mask = 0x03 << 30, ++ ELEM_SIZE_shift = 30, ++ SQ_VTX_WORD1 = 0x00008dfc, ++ SQ_VTX_WORD1__DST_SEL_X_mask = 0x07 << 9, ++ SQ_VTX_WORD1__DST_SEL_X_shift = 9, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++/* SQ_SEL_MASK = 0x07, */ ++ SQ_VTX_WORD1__DST_SEL_Y_mask = 0x07 << 12, ++ SQ_VTX_WORD1__DST_SEL_Y_shift = 12, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++/* SQ_SEL_MASK = 0x07, */ ++ SQ_VTX_WORD1__DST_SEL_Z_mask = 0x07 << 15, ++ SQ_VTX_WORD1__DST_SEL_Z_shift = 15, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++/* SQ_SEL_MASK = 0x07, */ ++ SQ_VTX_WORD1__DST_SEL_W_mask = 0x07 << 18, ++ SQ_VTX_WORD1__DST_SEL_W_shift = 18, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++/* SQ_SEL_MASK = 0x07, */ ++ USE_CONST_FIELDS_bit = 1 << 21, ++ SQ_VTX_WORD1__DATA_FORMAT_mask = 0x3f << 22, ++ SQ_VTX_WORD1__DATA_FORMAT_shift = 22, ++ SQ_VTX_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28, ++ SQ_VTX_WORD1__NUM_FORMAT_ALL_shift = 28, ++ SQ_NUM_FORMAT_NORM = 0x00, ++ SQ_NUM_FORMAT_INT = 0x01, ++ SQ_NUM_FORMAT_SCALED = 0x02, ++ SQ_VTX_WORD1__FORMAT_COMP_ALL_bit = 1 << 30, ++ SQ_VTX_WORD1__SRF_MODE_ALL_bit = 1 << 31, ++ SQ_ALU_WORD1_OP2 = 0x00008dfc, ++/* SRC0_ABS_bit = 1 << 0, */ ++/* SRC1_ABS_bit = 1 << 1, */ ++/* UPDATE_EXECUTE_MASK_bit = 1 << 2, */ ++/* UPDATE_PRED_bit = 1 << 3, */ ++/* WRITE_MASK_bit = 1 << 4, */ ++ FOG_MERGE_bit = 1 << 5, ++ SQ_ALU_WORD1_OP2__OMOD_mask = 0x03 << 6, ++ SQ_ALU_WORD1_OP2__OMOD_shift = 6, ++/* SQ_ALU_OMOD_OFF = 0x00, */ ++/* SQ_ALU_OMOD_M2 = 0x01, */ ++/* SQ_ALU_OMOD_M4 = 0x02, */ ++/* SQ_ALU_OMOD_D2 = 0x03, */ ++ SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8, ++ SQ_ALU_WORD1_OP2__ALU_INST_shift = 8, ++/* SQ_OP2_INST_ADD = 0x00, */ ++/* SQ_OP2_INST_MUL = 0x01, */ ++/* SQ_OP2_INST_MUL_IEEE = 0x02, */ ++/* SQ_OP2_INST_MAX = 0x03, */ ++/* SQ_OP2_INST_MIN = 0x04, */ ++/* SQ_OP2_INST_MAX_DX10 = 0x05, */ ++/* SQ_OP2_INST_MIN_DX10 = 0x06, */ ++/* SQ_OP2_INST_SETE = 0x08, */ ++/* SQ_OP2_INST_SETGT = 0x09, */ ++/* SQ_OP2_INST_SETGE = 0x0a, */ ++/* SQ_OP2_INST_SETNE = 0x0b, */ ++/* SQ_OP2_INST_SETE_DX10 = 0x0c, */ ++/* SQ_OP2_INST_SETGT_DX10 = 0x0d, */ ++/* SQ_OP2_INST_SETGE_DX10 = 0x0e, */ ++/* SQ_OP2_INST_SETNE_DX10 = 0x0f, */ ++/* SQ_OP2_INST_FRACT = 0x10, */ ++/* SQ_OP2_INST_TRUNC = 0x11, */ ++/* SQ_OP2_INST_CEIL = 0x12, */ ++/* SQ_OP2_INST_RNDNE = 0x13, */ ++/* SQ_OP2_INST_FLOOR = 0x14, */ ++/* SQ_OP2_INST_MOVA = 0x15, */ ++/* SQ_OP2_INST_MOVA_FLOOR = 0x16, */ ++/* SQ_OP2_INST_MOVA_INT = 0x18, */ ++/* SQ_OP2_INST_MOV = 0x19, */ ++/* SQ_OP2_INST_NOP = 0x1a, */ ++/* SQ_OP2_INST_PRED_SETGT_UINT = 0x1e, */ ++/* SQ_OP2_INST_PRED_SETGE_UINT = 0x1f, */ ++/* SQ_OP2_INST_PRED_SETE = 0x20, */ ++/* SQ_OP2_INST_PRED_SETGT = 0x21, */ ++/* SQ_OP2_INST_PRED_SETGE = 0x22, */ ++/* SQ_OP2_INST_PRED_SETNE = 0x23, */ ++/* SQ_OP2_INST_PRED_SET_INV = 0x24, */ ++/* SQ_OP2_INST_PRED_SET_POP = 0x25, */ ++/* SQ_OP2_INST_PRED_SET_CLR = 0x26, */ ++/* SQ_OP2_INST_PRED_SET_RESTORE = 0x27, */ ++/* SQ_OP2_INST_PRED_SETE_PUSH = 0x28, */ ++/* SQ_OP2_INST_PRED_SETGT_PUSH = 0x29, */ ++/* SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a, */ ++/* SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b, */ ++/* SQ_OP2_INST_KILLE = 0x2c, */ ++/* SQ_OP2_INST_KILLGT = 0x2d, */ ++/* SQ_OP2_INST_KILLGE = 0x2e, */ ++/* SQ_OP2_INST_KILLNE = 0x2f, */ ++/* SQ_OP2_INST_AND_INT = 0x30, */ ++/* SQ_OP2_INST_OR_INT = 0x31, */ ++/* SQ_OP2_INST_XOR_INT = 0x32, */ ++/* SQ_OP2_INST_NOT_INT = 0x33, */ ++/* SQ_OP2_INST_ADD_INT = 0x34, */ ++/* SQ_OP2_INST_SUB_INT = 0x35, */ ++/* SQ_OP2_INST_MAX_INT = 0x36, */ ++/* SQ_OP2_INST_MIN_INT = 0x37, */ ++/* SQ_OP2_INST_MAX_UINT = 0x38, */ ++/* SQ_OP2_INST_MIN_UINT = 0x39, */ ++/* SQ_OP2_INST_SETE_INT = 0x3a, */ ++/* SQ_OP2_INST_SETGT_INT = 0x3b, */ ++/* SQ_OP2_INST_SETGE_INT = 0x3c, */ ++/* SQ_OP2_INST_SETNE_INT = 0x3d, */ ++/* SQ_OP2_INST_SETGT_UINT = 0x3e, */ ++/* SQ_OP2_INST_SETGE_UINT = 0x3f, */ ++/* SQ_OP2_INST_KILLGT_UINT = 0x40, */ ++/* SQ_OP2_INST_KILLGE_UINT = 0x41, */ ++/* SQ_OP2_INST_PRED_SETE_INT = 0x42, */ ++/* SQ_OP2_INST_PRED_SETGT_INT = 0x43, */ ++/* SQ_OP2_INST_PRED_SETGE_INT = 0x44, */ ++/* SQ_OP2_INST_PRED_SETNE_INT = 0x45, */ ++/* SQ_OP2_INST_KILLE_INT = 0x46, */ ++/* SQ_OP2_INST_KILLGT_INT = 0x47, */ ++/* SQ_OP2_INST_KILLGE_INT = 0x48, */ ++/* SQ_OP2_INST_KILLNE_INT = 0x49, */ ++/* SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a, */ ++/* SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b, */ ++/* SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c, */ ++/* SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d, */ ++/* SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e, */ ++/* SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f, */ ++/* SQ_OP2_INST_DOT4 = 0x50, */ ++/* SQ_OP2_INST_DOT4_IEEE = 0x51, */ ++/* SQ_OP2_INST_CUBE = 0x52, */ ++/* SQ_OP2_INST_MAX4 = 0x53, */ ++/* SQ_OP2_INST_MOVA_GPR_INT = 0x60, */ ++/* SQ_OP2_INST_EXP_IEEE = 0x61, */ ++/* SQ_OP2_INST_LOG_CLAMPED = 0x62, */ ++/* SQ_OP2_INST_LOG_IEEE = 0x63, */ ++/* SQ_OP2_INST_RECIP_CLAMPED = 0x64, */ ++/* SQ_OP2_INST_RECIP_FF = 0x65, */ ++/* SQ_OP2_INST_RECIP_IEEE = 0x66, */ ++/* SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67, */ ++/* SQ_OP2_INST_RECIPSQRT_FF = 0x68, */ ++/* SQ_OP2_INST_RECIPSQRT_IEEE = 0x69, */ ++/* SQ_OP2_INST_SQRT_IEEE = 0x6a, */ ++/* SQ_OP2_INST_FLT_TO_INT = 0x6b, */ ++/* SQ_OP2_INST_INT_TO_FLT = 0x6c, */ ++/* SQ_OP2_INST_UINT_TO_FLT = 0x6d, */ ++/* SQ_OP2_INST_SIN = 0x6e, */ ++/* SQ_OP2_INST_COS = 0x6f, */ ++/* SQ_OP2_INST_ASHR_INT = 0x70, */ ++/* SQ_OP2_INST_LSHR_INT = 0x71, */ ++/* SQ_OP2_INST_LSHL_INT = 0x72, */ ++/* SQ_OP2_INST_MULLO_INT = 0x73, */ ++/* SQ_OP2_INST_MULHI_INT = 0x74, */ ++/* SQ_OP2_INST_MULLO_UINT = 0x75, */ ++/* SQ_OP2_INST_MULHI_UINT = 0x76, */ ++/* SQ_OP2_INST_RECIP_INT = 0x77, */ ++/* SQ_OP2_INST_RECIP_UINT = 0x78, */ ++/* SQ_OP2_INST_FLT_TO_UINT = 0x79, */ ++ SQ_CF_WORD1 = 0x00008dfc, ++ POP_COUNT_mask = 0x07 << 0, ++ POP_COUNT_shift = 0, ++ CF_CONST_mask = 0x1f << 3, ++ CF_CONST_shift = 3, ++ COND_mask = 0x03 << 8, ++ COND_shift = 8, ++ SQ_CF_COND_ACTIVE = 0x00, ++ SQ_CF_COND_FALSE = 0x01, ++ SQ_CF_COND_BOOL = 0x02, ++ SQ_CF_COND_NOT_BOOL = 0x03, ++ SQ_CF_WORD1__COUNT_mask = 0x07 << 10, ++ SQ_CF_WORD1__COUNT_shift = 10, ++ CALL_COUNT_mask = 0x3f << 13, ++ CALL_COUNT_shift = 13, ++ COUNT_3_bit = 1 << 19, ++/* END_OF_PROGRAM_bit = 1 << 21, */ ++/* VALID_PIXEL_MODE_bit = 1 << 22, */ ++ SQ_CF_WORD1__CF_INST_mask = 0x7f << 23, ++ SQ_CF_WORD1__CF_INST_shift = 23, ++ SQ_CF_INST_NOP = 0x00, ++ SQ_CF_INST_TEX = 0x01, ++ SQ_CF_INST_VTX = 0x02, ++ SQ_CF_INST_VTX_TC = 0x03, ++ SQ_CF_INST_LOOP_START = 0x04, ++ SQ_CF_INST_LOOP_END = 0x05, ++ SQ_CF_INST_LOOP_START_DX10 = 0x06, ++ SQ_CF_INST_LOOP_START_NO_AL = 0x07, ++ SQ_CF_INST_LOOP_CONTINUE = 0x08, ++ SQ_CF_INST_LOOP_BREAK = 0x09, ++ SQ_CF_INST_JUMP = 0x0a, ++ SQ_CF_INST_PUSH = 0x0b, ++ SQ_CF_INST_PUSH_ELSE = 0x0c, ++ SQ_CF_INST_ELSE = 0x0d, ++ SQ_CF_INST_POP = 0x0e, ++ SQ_CF_INST_POP_JUMP = 0x0f, ++ SQ_CF_INST_POP_PUSH = 0x10, ++ SQ_CF_INST_POP_PUSH_ELSE = 0x11, ++ SQ_CF_INST_CALL = 0x12, ++ SQ_CF_INST_CALL_FS = 0x13, ++ SQ_CF_INST_RETURN = 0x14, ++ SQ_CF_INST_EMIT_VERTEX = 0x15, ++ SQ_CF_INST_EMIT_CUT_VERTEX = 0x16, ++ SQ_CF_INST_CUT_VERTEX = 0x17, ++ SQ_CF_INST_KILL = 0x18, ++/* WHOLE_QUAD_MODE_bit = 1 << 30, */ ++/* BARRIER_bit = 1 << 31, */ ++ SQ_VTX_WORD1_SEM = 0x00008dfc, ++ SEMANTIC_ID_mask = 0xff << 0, ++ SEMANTIC_ID_shift = 0, ++ SQ_TEX_WORD0 = 0x00008dfc, ++ TEX_INST_mask = 0x1f << 0, ++ TEX_INST_shift = 0, ++ SQ_TEX_INST_VTX_FETCH = 0x00, ++ SQ_TEX_INST_VTX_SEMANTIC = 0x01, ++ SQ_TEX_INST_LD = 0x03, ++ SQ_TEX_INST_GET_TEXTURE_RESINFO = 0x04, ++ SQ_TEX_INST_GET_NUMBER_OF_SAMPLES = 0x05, ++ SQ_TEX_INST_GET_LOD = 0x06, ++ SQ_TEX_INST_GET_GRADIENTS_H = 0x07, ++ SQ_TEX_INST_GET_GRADIENTS_V = 0x08, ++ SQ_TEX_INST_GET_LERP = 0x09, ++ SQ_TEX_INST_RESERVED_10 = 0x0a, ++ SQ_TEX_INST_SET_GRADIENTS_H = 0x0b, ++ SQ_TEX_INST_SET_GRADIENTS_V = 0x0c, ++ SQ_TEX_INST_PASS = 0x0d, ++ X_Z_SET_INDEX_FOR_ARRAY_OF_CUBEMAPS = 0x0e, ++ SQ_TEX_INST_SAMPLE = 0x10, ++ SQ_TEX_INST_SAMPLE_L = 0x11, ++ SQ_TEX_INST_SAMPLE_LB = 0x12, ++ SQ_TEX_INST_SAMPLE_LZ = 0x13, ++ SQ_TEX_INST_SAMPLE_G = 0x14, ++ SQ_TEX_INST_SAMPLE_G_L = 0x15, ++ SQ_TEX_INST_SAMPLE_G_LB = 0x16, ++ SQ_TEX_INST_SAMPLE_G_LZ = 0x17, ++ SQ_TEX_INST_SAMPLE_C = 0x18, ++ SQ_TEX_INST_SAMPLE_C_L = 0x19, ++ SQ_TEX_INST_SAMPLE_C_LB = 0x1a, ++ SQ_TEX_INST_SAMPLE_C_LZ = 0x1b, ++ SQ_TEX_INST_SAMPLE_C_G = 0x1c, ++ SQ_TEX_INST_SAMPLE_C_G_L = 0x1d, ++ SQ_TEX_INST_SAMPLE_C_G_LB = 0x1e, ++ SQ_TEX_INST_SAMPLE_C_G_LZ = 0x1f, ++ BC_FRAC_MODE_bit = 1 << 5, ++/* FETCH_WHOLE_QUAD_bit = 1 << 7, */ ++ RESOURCE_ID_mask = 0xff << 8, ++ RESOURCE_ID_shift = 8, ++/* SRC_GPR_mask = 0x7f << 16, */ ++/* SRC_GPR_shift = 16, */ ++/* SRC_REL_bit = 1 << 23, */ ++ SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24, ++ SQ_VTX_WORD1_GPR = 0x00008dfc, ++ SQ_VTX_WORD1_GPR__DST_GPR_mask = 0x7f << 0, ++ SQ_VTX_WORD1_GPR__DST_GPR_shift = 0, ++ SQ_VTX_WORD1_GPR__DST_REL_bit = 1 << 7, ++ SQ_ALU_WORD0 = 0x00008dfc, ++ SRC0_SEL_mask = 0x1ff << 0, ++ SRC0_SEL_shift = 0, ++/* SQ_ALU_SRC_0 = 0xf8, */ ++/* SQ_ALU_SRC_1 = 0xf9, */ ++/* SQ_ALU_SRC_1_INT = 0xfa, */ ++/* SQ_ALU_SRC_M_1_INT = 0xfb, */ ++/* SQ_ALU_SRC_0_5 = 0xfc, */ ++/* SQ_ALU_SRC_LITERAL = 0xfd, */ ++/* SQ_ALU_SRC_PV = 0xfe, */ ++/* SQ_ALU_SRC_PS = 0xff, */ ++ SRC0_REL_bit = 1 << 9, ++ SRC0_CHAN_mask = 0x03 << 10, ++ SRC0_CHAN_shift = 10, ++/* SQ_CHAN_X = 0x00, */ ++/* SQ_CHAN_Y = 0x01, */ ++/* SQ_CHAN_Z = 0x02, */ ++/* SQ_CHAN_W = 0x03, */ ++ SRC0_NEG_bit = 1 << 12, ++ SRC1_SEL_mask = 0x1ff << 13, ++ SRC1_SEL_shift = 13, ++/* SQ_ALU_SRC_0 = 0xf8, */ ++/* SQ_ALU_SRC_1 = 0xf9, */ ++/* SQ_ALU_SRC_1_INT = 0xfa, */ ++/* SQ_ALU_SRC_M_1_INT = 0xfb, */ ++/* SQ_ALU_SRC_0_5 = 0xfc, */ ++/* SQ_ALU_SRC_LITERAL = 0xfd, */ ++/* SQ_ALU_SRC_PV = 0xfe, */ ++/* SQ_ALU_SRC_PS = 0xff, */ ++ SRC1_REL_bit = 1 << 22, ++ SRC1_CHAN_mask = 0x03 << 23, ++ SRC1_CHAN_shift = 23, ++/* SQ_CHAN_X = 0x00, */ ++/* SQ_CHAN_Y = 0x01, */ ++/* SQ_CHAN_Z = 0x02, */ ++/* SQ_CHAN_W = 0x03, */ ++ SRC1_NEG_bit = 1 << 25, ++ INDEX_MODE_mask = 0x07 << 26, ++ INDEX_MODE_shift = 26, ++ SQ_INDEX_AR_X = 0x00, ++ SQ_INDEX_AR_Y = 0x01, ++ SQ_INDEX_AR_Z = 0x02, ++ SQ_INDEX_AR_W = 0x03, ++ SQ_INDEX_LOOP = 0x04, ++ PRED_SEL_mask = 0x03 << 29, ++ PRED_SEL_shift = 29, ++ SQ_PRED_SEL_OFF = 0x00, ++ SQ_PRED_SEL_ZERO = 0x02, ++ SQ_PRED_SEL_ONE = 0x03, ++ LAST_bit = 1 << 31, ++ SX_EXPORT_BUFFER_SIZES = 0x0000900c, ++ COLOR_BUFFER_SIZE_mask = 0xff << 0, ++ COLOR_BUFFER_SIZE_shift = 0, ++ POSITION_BUFFER_SIZE_mask = 0xff << 8, ++ POSITION_BUFFER_SIZE_shift = 8, ++ SMX_BUFFER_SIZE_mask = 0xff << 16, ++ SMX_BUFFER_SIZE_shift = 16, ++ SX_MEMORY_EXPORT_BASE = 0x00009010, ++ SX_MEMORY_EXPORT_SIZE = 0x00009014, ++ SPI_CONFIG_CNTL = 0x00009100, ++ GPR_WRITE_PRIORITY_mask = 0x1f << 0, ++ GPR_WRITE_PRIORITY_shift = 0, ++ X_PRIORITY_ORDER = 0x00, ++ X_PRIORITY_ORDER_VS = 0x01, ++ DISABLE_INTERP_1_bit = 1 << 5, ++ DEBUG_THREAD_TYPE_SEL_mask = 0x03 << 6, ++ DEBUG_THREAD_TYPE_SEL_shift = 6, ++ DEBUG_GROUP_SEL_mask = 0x1f << 8, ++ DEBUG_GROUP_SEL_shift = 8, ++ DEBUG_GRBM_OVERRIDE_bit = 1 << 13, ++ SPI_CONFIG_CNTL_1 = 0x0000913c, ++ VTX_DONE_DELAY_mask = 0x0f << 0, ++ VTX_DONE_DELAY_shift = 0, ++ X_DELAY_10_CLKS = 0x00, ++ X_DELAY_11_CLKS = 0x01, ++ X_DELAY_12_CLKS = 0x02, ++ X_DELAY_13_CLKS = 0x03, ++ X_DELAY_14_CLKS = 0x04, ++ X_DELAY_15_CLKS = 0x05, ++ X_DELAY_16_CLKS = 0x06, ++ X_DELAY_17_CLKS = 0x07, ++ X_DELAY_2_CLKS = 0x08, ++ X_DELAY_3_CLKS = 0x09, ++ X_DELAY_4_CLKS = 0x0a, ++ X_DELAY_5_CLKS = 0x0b, ++ X_DELAY_6_CLKS = 0x0c, ++ X_DELAY_7_CLKS = 0x0d, ++ X_DELAY_8_CLKS = 0x0e, ++ X_DELAY_9_CLKS = 0x0f, ++ INTERP_ONE_PRIM_PER_ROW_bit = 1 << 4, ++ TD_FILTER4 = 0x00009400, ++ WEIGHT_1_mask = 0x7ff << 0, ++ WEIGHT_1_shift = 0, ++ WEIGHT_0_mask = 0x7ff << 11, ++ WEIGHT_0_shift = 11, ++ WEIGHT_PAIR_bit = 1 << 22, ++ PHASE_mask = 0x0f << 23, ++ PHASE_shift = 23, ++ DIRECTION_bit = 1 << 27, ++ TD_FILTER4_1 = 0x00009404, ++ TD_FILTER4_1_num = 35, ++/* WEIGHT_1_mask = 0x7ff << 0, */ ++/* WEIGHT_1_shift = 0, */ ++/* WEIGHT_0_mask = 0x7ff << 11, */ ++/* WEIGHT_0_shift = 11, */ ++ TD_CNTL = 0x00009490, ++ SYNC_PHASE_SH_mask = 0x03 << 0, ++ SYNC_PHASE_SH_shift = 0, ++ SYNC_PHASE_VC_SMX_mask = 0x03 << 4, ++ SYNC_PHASE_VC_SMX_shift = 4, ++ TD0_CNTL = 0x00009494, ++ TD0_CNTL_num = 4, ++ ID_OVERRIDE_mask = 0x03 << 28, ++ ID_OVERRIDE_shift = 28, ++ TD0_STATUS = 0x000094a4, ++ TD0_STATUS_num = 4, ++ BUSY_bit = 1 << 31, ++ TA_CNTL = 0x00009504, ++ GRADIENT_CREDIT_mask = 0x1f << 0, ++ GRADIENT_CREDIT_shift = 0, ++ WALKER_CREDIT_mask = 0x1f << 8, ++ WALKER_CREDIT_shift = 8, ++ ALIGNER_CREDIT_mask = 0x1f << 16, ++ ALIGNER_CREDIT_shift = 16, ++ TD_FIFO_CREDIT_mask = 0x3ff << 22, ++ TD_FIFO_CREDIT_shift = 22, ++ TA_CNTL_AUX = 0x00009508, ++ DISABLE_CUBE_WRAP_bit = 1 << 0, ++ SYNC_GRADIENT_bit = 1 << 24, ++ SYNC_WALKER_bit = 1 << 25, ++ SYNC_ALIGNER_bit = 1 << 26, ++ BILINEAR_PRECISION_bit = 1 << 31, ++ TA0_CNTL = 0x00009510, ++/* ID_OVERRIDE_mask = 0x03 << 28, */ ++/* ID_OVERRIDE_shift = 28, */ ++ TA1_CNTL = 0x00009514, ++/* ID_OVERRIDE_mask = 0x03 << 28, */ ++/* ID_OVERRIDE_shift = 28, */ ++ TA2_CNTL = 0x00009518, ++/* ID_OVERRIDE_mask = 0x03 << 28, */ ++/* ID_OVERRIDE_shift = 28, */ ++ TA3_CNTL = 0x0000951c, ++/* ID_OVERRIDE_mask = 0x03 << 28, */ ++/* ID_OVERRIDE_shift = 28, */ ++ TA0_STATUS = 0x00009520, ++ FG_PFIFO_EMPTYB_bit = 1 << 12, ++ FG_LFIFO_EMPTYB_bit = 1 << 13, ++ FG_SFIFO_EMPTYB_bit = 1 << 14, ++ FL_PFIFO_EMPTYB_bit = 1 << 16, ++ FL_LFIFO_EMPTYB_bit = 1 << 17, ++ FL_SFIFO_EMPTYB_bit = 1 << 18, ++ FA_PFIFO_EMPTYB_bit = 1 << 20, ++ FA_LFIFO_EMPTYB_bit = 1 << 21, ++ FA_SFIFO_EMPTYB_bit = 1 << 22, ++ IN_BUSY_bit = 1 << 24, ++ FG_BUSY_bit = 1 << 25, ++ FL_BUSY_bit = 1 << 27, ++ TA_BUSY_bit = 1 << 28, ++ FA_BUSY_bit = 1 << 29, ++ AL_BUSY_bit = 1 << 30, ++/* BUSY_bit = 1 << 31, */ ++ TA1_STATUS = 0x00009524, ++/* FG_PFIFO_EMPTYB_bit = 1 << 12, */ ++/* FG_LFIFO_EMPTYB_bit = 1 << 13, */ ++/* FG_SFIFO_EMPTYB_bit = 1 << 14, */ ++/* FL_PFIFO_EMPTYB_bit = 1 << 16, */ ++/* FL_LFIFO_EMPTYB_bit = 1 << 17, */ ++/* FL_SFIFO_EMPTYB_bit = 1 << 18, */ ++/* FA_PFIFO_EMPTYB_bit = 1 << 20, */ ++/* FA_LFIFO_EMPTYB_bit = 1 << 21, */ ++/* FA_SFIFO_EMPTYB_bit = 1 << 22, */ ++/* IN_BUSY_bit = 1 << 24, */ ++/* FG_BUSY_bit = 1 << 25, */ ++/* FL_BUSY_bit = 1 << 27, */ ++/* TA_BUSY_bit = 1 << 28, */ ++/* FA_BUSY_bit = 1 << 29, */ ++/* AL_BUSY_bit = 1 << 30, */ ++/* BUSY_bit = 1 << 31, */ ++ TA2_STATUS = 0x00009528, ++/* FG_PFIFO_EMPTYB_bit = 1 << 12, */ ++/* FG_LFIFO_EMPTYB_bit = 1 << 13, */ ++/* FG_SFIFO_EMPTYB_bit = 1 << 14, */ ++/* FL_PFIFO_EMPTYB_bit = 1 << 16, */ ++/* FL_LFIFO_EMPTYB_bit = 1 << 17, */ ++/* FL_SFIFO_EMPTYB_bit = 1 << 18, */ ++/* FA_PFIFO_EMPTYB_bit = 1 << 20, */ ++/* FA_LFIFO_EMPTYB_bit = 1 << 21, */ ++/* FA_SFIFO_EMPTYB_bit = 1 << 22, */ ++/* IN_BUSY_bit = 1 << 24, */ ++/* FG_BUSY_bit = 1 << 25, */ ++/* FL_BUSY_bit = 1 << 27, */ ++/* TA_BUSY_bit = 1 << 28, */ ++/* FA_BUSY_bit = 1 << 29, */ ++/* AL_BUSY_bit = 1 << 30, */ ++/* BUSY_bit = 1 << 31, */ ++ TA3_STATUS = 0x0000952c, ++/* FG_PFIFO_EMPTYB_bit = 1 << 12, */ ++/* FG_LFIFO_EMPTYB_bit = 1 << 13, */ ++/* FG_SFIFO_EMPTYB_bit = 1 << 14, */ ++/* FL_PFIFO_EMPTYB_bit = 1 << 16, */ ++/* FL_LFIFO_EMPTYB_bit = 1 << 17, */ ++/* FL_SFIFO_EMPTYB_bit = 1 << 18, */ ++/* FA_PFIFO_EMPTYB_bit = 1 << 20, */ ++/* FA_LFIFO_EMPTYB_bit = 1 << 21, */ ++/* FA_SFIFO_EMPTYB_bit = 1 << 22, */ ++/* IN_BUSY_bit = 1 << 24, */ ++/* FG_BUSY_bit = 1 << 25, */ ++/* FL_BUSY_bit = 1 << 27, */ ++/* TA_BUSY_bit = 1 << 28, */ ++/* FA_BUSY_bit = 1 << 29, */ ++/* AL_BUSY_bit = 1 << 30, */ ++/* BUSY_bit = 1 << 31, */ ++ TC_STATUS = 0x00009600, ++ TC_BUSY_bit = 1 << 0, ++ TC_INVALIDATE = 0x00009604, ++ START_bit = 1 << 0, ++ TC_CNTL = 0x00009608, ++ FORCE_HIT_bit = 1 << 0, ++ FORCE_MISS_bit = 1 << 1, ++ L2_SIZE_mask = 0x0f << 5, ++ L2_SIZE_shift = 5, ++ _256K = 0x00, ++ _224K = 0x01, ++ _192K = 0x02, ++ _160K = 0x03, ++ _128K = 0x04, ++ _96K = 0x05, ++ _64K = 0x06, ++ _32K = 0x07, ++ L2_DISABLE_LATE_HIT_bit = 1 << 9, ++ DISABLE_VERT_PERF_bit = 1 << 10, ++ DISABLE_INVAL_BUSY_bit = 1 << 11, ++ DISABLE_INVAL_SAME_SURFACE_bit = 1 << 12, ++ PARTITION_MODE_mask = 0x03 << 13, ++ PARTITION_MODE_shift = 13, ++ X_VERTEX = 0x00, ++ MISS_ARB_MODE_bit = 1 << 15, ++ HIT_ARB_MODE_bit = 1 << 16, ++ DISABLE_WRITE_DELAY_bit = 1 << 17, ++ HIT_FIFO_DEPTH_bit = 1 << 18, ++ VC_CNTL = 0x00009700, ++ L2_INVALIDATE_bit = 1 << 0, ++ RESERVED_bit = 1 << 1, ++ CC_FORCE_MISS_bit = 1 << 2, ++ MI_CHAN_SEL_mask = 0x03 << 3, ++ MI_CHAN_SEL_shift = 3, ++ X_MC0_USES_CH_0_1 = 0x00, ++ X_MC0_USES_CH_0_3 = 0x01, ++ X_VC_MC0_IS_ACTIVE = 0x02, ++ X_VC_MC1_IS_DISABLED = 0x03, ++ MI_STEER_DISABLE_bit = 1 << 5, ++ MI_CREDIT_CTR_mask = 0x0f << 6, ++ MI_CREDIT_CTR_shift = 6, ++ MI_CREDIT_WE_bit = 1 << 10, ++ MI_REQ_STALL_THLD_mask = 0x07 << 11, ++ MI_REQ_STALL_THLD_shift = 11, ++ X_LATENCY_EXCEEDS_399_CLOCKS = 0x00, ++ X_LATENCY_EXCEEDS_415_CLOCKS = 0x01, ++ X_LATENCY_EXCEEDS_431_CLOCKS = 0x02, ++ X_LATENCY_EXCEEDS_447_CLOCKS = 0x03, ++ X_LATENCY_EXCEEDS_463_CLOCKS = 0x04, ++ X_LATENCY_EXCEEDS_479_CLOCKS = 0x05, ++ X_LATENCY_EXCEEDS_495_CLOCKS = 0x06, ++ X_LATENCY_EXCEEDS_511_CLOCKS = 0x07, ++ VC_CNTL__MI_TIMESTAMP_RES_mask = 0x1f << 14, ++ VC_CNTL__MI_TIMESTAMP_RES_shift = 14, ++ X_1X_SYSTEM_CLOCK = 0x00, ++ X_2X_SYSTEM_CLOCK = 0x01, ++ X_4X_SYSTEM_CLOCK = 0x02, ++ X_8X_SYSTEM_CLOCK = 0x03, ++ X_16X_SYSTEM_CLOCK = 0x04, ++ X_32X_SYSTEM_CLOCK = 0x05, ++ X_64X_SYSTEM_CLOCK = 0x06, ++ X_128X_SYSTEM_CLOCK = 0x07, ++ X_256X_SYSTEM_CLOCK = 0x08, ++ X_512X_SYSTEM_CLOCK = 0x09, ++ X_1024X_SYSTEM_CLOCK = 0x0a, ++ X_2048X_SYSTEM_CLOCK = 0x0b, ++ X_4092X_SYSTEM_CLOCK = 0x0c, ++ X_8192X_SYSTEM_CLOCK = 0x0d, ++ X_16384X_SYSTEM_CLOCK = 0x0e, ++ X_32768X_SYSTEM_CLOCK = 0x0f, ++ VC_CNTL_STATUS = 0x00009704, ++ RP_BUSY_bit = 1 << 0, ++ RG_BUSY_bit = 1 << 1, ++ VC_BUSY_bit = 1 << 2, ++ CLAMP_DETECT_bit = 1 << 3, ++ VC_CONFIG = 0x00009718, ++ WRITE_DIS_bit = 1 << 0, ++ GPR_DATA_PHASE_ADJ_mask = 0x07 << 1, ++ GPR_DATA_PHASE_ADJ_shift = 1, ++ X_LATENCY_BASE_0_CYCLES = 0x00, ++ X_LATENCY_BASE_1_CYCLES = 0x01, ++ X_LATENCY_BASE_2_CYCLES = 0x02, ++ X_LATENCY_BASE_3_CYCLES = 0x03, ++ TD_SIMD_SYNC_ADJ_mask = 0x07 << 4, ++ TD_SIMD_SYNC_ADJ_shift = 4, ++ X_0_CYCLES_DELAY = 0x00, ++ X_1_CYCLES_DELAY = 0x01, ++ X_2_CYCLES_DELAY = 0x02, ++ X_3_CYCLES_DELAY = 0x03, ++ X_4_CYCLES_DELAY = 0x04, ++ X_5_CYCLES_DELAY = 0x05, ++ X_6_CYCLES_DELAY = 0x06, ++ X_7_CYCLES_DELAY = 0x07, ++ SMX_DC_CTL0 = 0x0000a020, ++ WR_GATHER_STREAM0_bit = 1 << 0, ++ WR_GATHER_STREAM1_bit = 1 << 1, ++ WR_GATHER_STREAM2_bit = 1 << 2, ++ WR_GATHER_STREAM3_bit = 1 << 3, ++ WR_GATHER_SCRATCH_bit = 1 << 4, ++ WR_GATHER_REDUC_BUF_bit = 1 << 5, ++ WR_GATHER_RING_BUF_bit = 1 << 6, ++ WR_GATHER_F_BUF_bit = 1 << 7, ++ DISABLE_CACHES_bit = 1 << 8, ++ AUTO_FLUSH_INVAL_EN_bit = 1 << 10, ++ AUTO_FLUSH_EN_bit = 1 << 11, ++ AUTO_FLUSH_CNT_mask = 0xffff << 12, ++ AUTO_FLUSH_CNT_shift = 12, ++ MC_RD_STALL_FACTOR_mask = 0x03 << 28, ++ MC_RD_STALL_FACTOR_shift = 28, ++ MC_WR_STALL_FACTOR_mask = 0x03 << 30, ++ MC_WR_STALL_FACTOR_shift = 30, ++ SMX_DC_CTL1 = 0x0000a024, ++ OP_FIFO_SKID_mask = 0x7f << 0, ++ OP_FIFO_SKID_shift = 0, ++ CACHE_LINE_SIZE_bit = 1 << 8, ++ MULTI_FLUSH_MODE_bit = 1 << 9, ++ MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_mask = 0x0f << 10, ++ MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_shift = 10, ++ DISABLE_WR_GATHER_RD_HIT_FORCE_EVICT_bit = 1 << 16, ++ DISABLE_WR_GATHER_RD_HIT_COMP_VLDS_CHECK_bit = 1 << 17, ++ DISABLE_FLUSH_ES_ALSO_INVALS_bit = 1 << 18, ++ DISABLE_FLUSH_GS_ALSO_INVALS_bit = 1 << 19, ++ SMX_DC_CTL2 = 0x0000a028, ++ INVALIDATE_CACHES_bit = 1 << 0, ++ CACHES_INVALID_bit = 1 << 1, ++ CACHES_DIRTY_bit = 1 << 2, ++ FLUSH_ALL_bit = 1 << 4, ++ FLUSH_GS_THREADS_bit = 1 << 8, ++ FLUSH_ES_THREADS_bit = 1 << 9, ++ SMX_DC_MC_INTF_CTL = 0x0000a02c, ++ MC_RD_REQ_CRED_mask = 0xff << 0, ++ MC_RD_REQ_CRED_shift = 0, ++ MC_WR_REQ_CRED_mask = 0xff << 16, ++ MC_WR_REQ_CRED_shift = 16, ++ TD_PS_SAMPLER0_BORDER_RED = 0x0000a400, ++ TD_PS_SAMPLER0_BORDER_RED_num = 18, ++ TD_PS_SAMPLER0_BORDER_RED_offset = 16, ++ TD_PS_SAMPLER0_BORDER_GREEN = 0x0000a404, ++ TD_PS_SAMPLER0_BORDER_GREEN_num = 18, ++ TD_PS_SAMPLER0_BORDER_GREEN_offset = 16, ++ TD_PS_SAMPLER0_BORDER_BLUE = 0x0000a408, ++ TD_PS_SAMPLER0_BORDER_BLUE_num = 18, ++ TD_PS_SAMPLER0_BORDER_BLUE_offset = 16, ++ TD_PS_SAMPLER0_BORDER_ALPHA = 0x0000a40c, ++ TD_PS_SAMPLER0_BORDER_ALPHA_num = 18, ++ TD_PS_SAMPLER0_BORDER_ALPHA_offset = 16, ++ TD_VS_SAMPLER0_BORDER_RED = 0x0000a600, ++ TD_VS_SAMPLER0_BORDER_RED_num = 18, ++ TD_VS_SAMPLER0_BORDER_RED_offset = 16, ++ TD_VS_SAMPLER0_BORDER_GREEN = 0x0000a604, ++ TD_VS_SAMPLER0_BORDER_GREEN_num = 18, ++ TD_VS_SAMPLER0_BORDER_GREEN_offset = 16, ++ TD_VS_SAMPLER0_BORDER_BLUE = 0x0000a608, ++ TD_VS_SAMPLER0_BORDER_BLUE_num = 18, ++ TD_VS_SAMPLER0_BORDER_BLUE_offset = 16, ++ TD_VS_SAMPLER0_BORDER_ALPHA = 0x0000a60c, ++ TD_VS_SAMPLER0_BORDER_ALPHA_num = 18, ++ TD_VS_SAMPLER0_BORDER_ALPHA_offset = 16, ++ TD_GS_SAMPLER0_BORDER_RED = 0x0000a800, ++ TD_GS_SAMPLER0_BORDER_RED_num = 18, ++ TD_GS_SAMPLER0_BORDER_RED_offset = 16, ++ TD_GS_SAMPLER0_BORDER_GREEN = 0x0000a804, ++ TD_GS_SAMPLER0_BORDER_GREEN_num = 18, ++ TD_GS_SAMPLER0_BORDER_GREEN_offset = 16, ++ TD_GS_SAMPLER0_BORDER_BLUE = 0x0000a808, ++ TD_GS_SAMPLER0_BORDER_BLUE_num = 18, ++ TD_GS_SAMPLER0_BORDER_BLUE_offset = 16, ++ TD_GS_SAMPLER0_BORDER_ALPHA = 0x0000a80c, ++ TD_GS_SAMPLER0_BORDER_ALPHA_num = 18, ++ TD_GS_SAMPLER0_BORDER_ALPHA_offset = 16, ++ TD_PS_SAMPLER0_CLEARTYPE_KERNEL = 0x0000aa00, ++ TD_PS_SAMPLER0_CLEARTYPE_KERNEL_num = 18, ++ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_mask = 0x07 << 0, ++ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_shift = 0, ++ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_mask = 0x07 << 3, ++ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_shift = 3, ++ DB_DEPTH_SIZE = 0x00028000, ++ PITCH_TILE_MAX_mask = 0x3ff << 0, ++ PITCH_TILE_MAX_shift = 0, ++ SLICE_TILE_MAX_mask = 0xfffff << 10, ++ SLICE_TILE_MAX_shift = 10, ++ DB_DEPTH_VIEW = 0x00028004, ++ SLICE_START_mask = 0x7ff << 0, ++ SLICE_START_shift = 0, ++ SLICE_MAX_mask = 0x7ff << 13, ++ SLICE_MAX_shift = 13, ++ DB_DEPTH_BASE = 0x0002800c, ++ DB_DEPTH_INFO = 0x00028010, ++ DB_DEPTH_INFO__FORMAT_mask = 0x07 << 0, ++ DB_DEPTH_INFO__FORMAT_shift = 0, ++ DEPTH_INVALID = 0x00, ++ DEPTH_16 = 0x01, ++ DEPTH_X8_24 = 0x02, ++ DEPTH_8_24 = 0x03, ++ DEPTH_X8_24_FLOAT = 0x04, ++ DEPTH_8_24_FLOAT = 0x05, ++ DEPTH_32_FLOAT = 0x06, ++ DEPTH_X24_8_32_FLOAT = 0x07, ++ DB_DEPTH_INFO__READ_SIZE_bit = 1 << 3, ++ DB_DEPTH_INFO__ARRAY_MODE_mask = 0x0f << 15, ++ DB_DEPTH_INFO__ARRAY_MODE_shift = 15, ++ ARRAY_2D_TILED_THIN1 = 0x04, ++ TILE_SURFACE_ENABLE_bit = 1 << 25, ++ TILE_COMPACT_bit = 1 << 26, ++ ZRANGE_PRECISION_bit = 1 << 31, ++ DB_HTILE_DATA_BASE = 0x00028014, ++ DB_STENCIL_CLEAR = 0x00028028, ++ DB_STENCIL_CLEAR__CLEAR_mask = 0xff << 0, ++ DB_STENCIL_CLEAR__CLEAR_shift = 0, ++ MIN_mask = 0xff << 16, ++ MIN_shift = 16, ++ DB_DEPTH_CLEAR = 0x0002802c, ++ PA_SC_SCREEN_SCISSOR_TL = 0x00028030, ++ PA_SC_SCREEN_SCISSOR_TL__TL_X_mask = 0x7fff << 0, ++ PA_SC_SCREEN_SCISSOR_TL__TL_X_shift = 0, ++ PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask = 0x7fff << 16, ++ PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift = 16, ++ PA_SC_SCREEN_SCISSOR_BR = 0x00028034, ++ PA_SC_SCREEN_SCISSOR_BR__BR_X_mask = 0x7fff << 0, ++ PA_SC_SCREEN_SCISSOR_BR__BR_X_shift = 0, ++ PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask = 0x7fff << 16, ++ PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift = 16, ++ CB_COLOR0_BASE = 0x00028040, ++ CB_COLOR0_BASE_num = 8, ++ CB_COLOR0_SIZE = 0x00028060, ++ CB_COLOR0_SIZE_num = 8, ++/* PITCH_TILE_MAX_mask = 0x3ff << 0, */ ++/* PITCH_TILE_MAX_shift = 0, */ ++/* SLICE_TILE_MAX_mask = 0xfffff << 10, */ ++/* SLICE_TILE_MAX_shift = 10, */ ++ CB_COLOR0_VIEW = 0x00028080, ++ CB_COLOR0_VIEW_num = 8, ++/* SLICE_START_mask = 0x7ff << 0, */ ++/* SLICE_START_shift = 0, */ ++/* SLICE_MAX_mask = 0x7ff << 13, */ ++/* SLICE_MAX_shift = 13, */ ++ CB_COLOR0_INFO = 0x000280a0, ++ CB_COLOR0_INFO_num = 8, ++ ENDIAN_mask = 0x03 << 0, ++ ENDIAN_shift = 0, ++ ENDIAN_NONE = 0x00, ++ ENDIAN_8IN16 = 0x01, ++ ENDIAN_8IN32 = 0x02, ++ ENDIAN_8IN64 = 0x03, ++ CB_COLOR0_INFO__FORMAT_mask = 0x3f << 2, ++ CB_COLOR0_INFO__FORMAT_shift = 2, ++ COLOR_INVALID = 0x00, ++ COLOR_8 = 0x01, ++ COLOR_4_4 = 0x02, ++ COLOR_3_3_2 = 0x03, ++ COLOR_16 = 0x05, ++ COLOR_16_FLOAT = 0x06, ++ COLOR_8_8 = 0x07, ++ COLOR_5_6_5 = 0x08, ++ COLOR_6_5_5 = 0x09, ++ COLOR_1_5_5_5 = 0x0a, ++ COLOR_4_4_4_4 = 0x0b, ++ COLOR_5_5_5_1 = 0x0c, ++ COLOR_32 = 0x0d, ++ COLOR_32_FLOAT = 0x0e, ++ COLOR_16_16 = 0x0f, ++ COLOR_16_16_FLOAT = 0x10, ++ COLOR_8_24 = 0x11, ++ COLOR_8_24_FLOAT = 0x12, ++ COLOR_24_8 = 0x13, ++ COLOR_24_8_FLOAT = 0x14, ++ COLOR_10_11_11 = 0x15, ++ COLOR_10_11_11_FLOAT = 0x16, ++ COLOR_11_11_10 = 0x17, ++ COLOR_11_11_10_FLOAT = 0x18, ++ COLOR_2_10_10_10 = 0x19, ++ COLOR_8_8_8_8 = 0x1a, ++ COLOR_10_10_10_2 = 0x1b, ++ COLOR_X24_8_32_FLOAT = 0x1c, ++ COLOR_32_32 = 0x1d, ++ COLOR_32_32_FLOAT = 0x1e, ++ COLOR_16_16_16_16 = 0x1f, ++ COLOR_16_16_16_16_FLOAT = 0x20, ++ COLOR_32_32_32_32 = 0x22, ++ COLOR_32_32_32_32_FLOAT = 0x23, ++ CB_COLOR0_INFO__ARRAY_MODE_mask = 0x0f << 8, ++ CB_COLOR0_INFO__ARRAY_MODE_shift = 8, ++ ARRAY_LINEAR_GENERAL = 0x00, ++ ARRAY_LINEAR_ALIGNED = 0x01, ++/* ARRAY_2D_TILED_THIN1 = 0x04, */ ++ NUMBER_TYPE_mask = 0x07 << 12, ++ NUMBER_TYPE_shift = 12, ++ NUMBER_UNORM = 0x00, ++ NUMBER_SNORM = 0x01, ++ NUMBER_USCALED = 0x02, ++ NUMBER_SSCALED = 0x03, ++ NUMBER_UINT = 0x04, ++ NUMBER_SINT = 0x05, ++ NUMBER_SRGB = 0x06, ++ NUMBER_FLOAT = 0x07, ++ CB_COLOR0_INFO__READ_SIZE_bit = 1 << 15, ++ COMP_SWAP_mask = 0x03 << 16, ++ COMP_SWAP_shift = 16, ++ SWAP_STD = 0x00, ++ SWAP_ALT = 0x01, ++ SWAP_STD_REV = 0x02, ++ SWAP_ALT_REV = 0x03, ++ CB_COLOR0_INFO__TILE_MODE_mask = 0x03 << 18, ++ CB_COLOR0_INFO__TILE_MODE_shift = 18, ++ TILE_DISABLE = 0x00, ++ TILE_CLEAR_ENABLE = 0x01, ++ TILE_FRAG_ENABLE = 0x02, ++ BLEND_CLAMP_bit = 1 << 20, ++ CLEAR_COLOR_bit = 1 << 21, ++ BLEND_BYPASS_bit = 1 << 22, ++ BLEND_FLOAT32_bit = 1 << 23, ++ SIMPLE_FLOAT_bit = 1 << 24, ++ CB_COLOR0_INFO__ROUND_MODE_bit = 1 << 25, ++/* TILE_COMPACT_bit = 1 << 26, */ ++ SOURCE_FORMAT_bit = 1 << 27, ++ CB_COLOR0_TILE = 0x000280c0, ++ CB_COLOR0_TILE_num = 8, ++ CB_COLOR0_FRAG = 0x000280e0, ++ CB_COLOR0_FRAG_num = 8, ++ CB_COLOR0_MASK = 0x00028100, ++ CB_COLOR0_MASK_num = 8, ++ CMASK_BLOCK_MAX_mask = 0xfff << 0, ++ CMASK_BLOCK_MAX_shift = 0, ++ FMASK_TILE_MAX_mask = 0xfffff << 12, ++ FMASK_TILE_MAX_shift = 12, ++ CB_CLEAR_RED = 0x00028120, ++ CB_CLEAR_GREEN = 0x00028124, ++ CB_CLEAR_BLUE = 0x00028128, ++ CB_CLEAR_ALPHA = 0x0002812c, ++ SQ_ALU_CONST_BUFFER_SIZE_PS_0 = 0x00028140, ++ SQ_ALU_CONST_BUFFER_SIZE_PS_0_num = 16, ++ SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask = 0x1ff << 0, ++ SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift = 0, ++ SQ_ALU_CONST_BUFFER_SIZE_VS_0 = 0x00028180, ++ SQ_ALU_CONST_BUFFER_SIZE_VS_0_num = 16, ++ SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask = 0x1ff << 0, ++ SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift = 0, ++ SQ_ALU_CONST_BUFFER_SIZE_GS_0 = 0x000281c0, ++ SQ_ALU_CONST_BUFFER_SIZE_GS_0_num = 16, ++ SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask = 0x1ff << 0, ++ SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift = 0, ++ PA_SC_WINDOW_OFFSET = 0x00028200, ++ WINDOW_X_OFFSET_mask = 0x7fff << 0, ++ WINDOW_X_OFFSET_shift = 0, ++ WINDOW_Y_OFFSET_mask = 0x7fff << 16, ++ WINDOW_Y_OFFSET_shift = 16, ++ PA_SC_WINDOW_SCISSOR_TL = 0x00028204, ++ PA_SC_WINDOW_SCISSOR_TL__TL_X_mask = 0x3fff << 0, ++ PA_SC_WINDOW_SCISSOR_TL__TL_X_shift = 0, ++ PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask = 0x3fff << 16, ++ PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift = 16, ++ WINDOW_OFFSET_DISABLE_bit = 1 << 31, ++ PA_SC_WINDOW_SCISSOR_BR = 0x00028208, ++ PA_SC_WINDOW_SCISSOR_BR__BR_X_mask = 0x3fff << 0, ++ PA_SC_WINDOW_SCISSOR_BR__BR_X_shift = 0, ++ PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask = 0x3fff << 16, ++ PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift = 16, ++ PA_SC_CLIPRECT_RULE = 0x0002820c, ++ CLIP_RULE_mask = 0xffff << 0, ++ CLIP_RULE_shift = 0, ++ PA_SC_CLIPRECT_0_TL = 0x00028210, ++ PA_SC_CLIPRECT_0_TL_num = 4, ++ PA_SC_CLIPRECT_0_TL_offset = 8, ++ PA_SC_CLIPRECT_0_TL__TL_X_mask = 0x3fff << 0, ++ PA_SC_CLIPRECT_0_TL__TL_X_shift = 0, ++ PA_SC_CLIPRECT_0_TL__TL_Y_mask = 0x3fff << 16, ++ PA_SC_CLIPRECT_0_TL__TL_Y_shift = 16, ++ PA_SC_CLIPRECT_0_BR = 0x00028214, ++ PA_SC_CLIPRECT_0_BR_num = 4, ++ PA_SC_CLIPRECT_0_BR_offset = 8, ++ PA_SC_CLIPRECT_0_BR__BR_X_mask = 0x3fff << 0, ++ PA_SC_CLIPRECT_0_BR__BR_X_shift = 0, ++ PA_SC_CLIPRECT_0_BR__BR_Y_mask = 0x3fff << 16, ++ PA_SC_CLIPRECT_0_BR__BR_Y_shift = 16, ++ CB_TARGET_MASK = 0x00028238, ++ TARGET0_ENABLE_mask = 0x0f << 0, ++ TARGET0_ENABLE_shift = 0, ++ TARGET1_ENABLE_mask = 0x0f << 4, ++ TARGET1_ENABLE_shift = 4, ++ TARGET2_ENABLE_mask = 0x0f << 8, ++ TARGET2_ENABLE_shift = 8, ++ TARGET3_ENABLE_mask = 0x0f << 12, ++ TARGET3_ENABLE_shift = 12, ++ TARGET4_ENABLE_mask = 0x0f << 16, ++ TARGET4_ENABLE_shift = 16, ++ TARGET5_ENABLE_mask = 0x0f << 20, ++ TARGET5_ENABLE_shift = 20, ++ TARGET6_ENABLE_mask = 0x0f << 24, ++ TARGET6_ENABLE_shift = 24, ++ TARGET7_ENABLE_mask = 0x0f << 28, ++ TARGET7_ENABLE_shift = 28, ++ CB_SHADER_MASK = 0x0002823c, ++ OUTPUT0_ENABLE_mask = 0x0f << 0, ++ OUTPUT0_ENABLE_shift = 0, ++ OUTPUT1_ENABLE_mask = 0x0f << 4, ++ OUTPUT1_ENABLE_shift = 4, ++ OUTPUT2_ENABLE_mask = 0x0f << 8, ++ OUTPUT2_ENABLE_shift = 8, ++ OUTPUT3_ENABLE_mask = 0x0f << 12, ++ OUTPUT3_ENABLE_shift = 12, ++ OUTPUT4_ENABLE_mask = 0x0f << 16, ++ OUTPUT4_ENABLE_shift = 16, ++ OUTPUT5_ENABLE_mask = 0x0f << 20, ++ OUTPUT5_ENABLE_shift = 20, ++ OUTPUT6_ENABLE_mask = 0x0f << 24, ++ OUTPUT6_ENABLE_shift = 24, ++ OUTPUT7_ENABLE_mask = 0x0f << 28, ++ OUTPUT7_ENABLE_shift = 28, ++ PA_SC_GENERIC_SCISSOR_TL = 0x00028240, ++ PA_SC_GENERIC_SCISSOR_TL__TL_X_mask = 0x3fff << 0, ++ PA_SC_GENERIC_SCISSOR_TL__TL_X_shift = 0, ++ PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask = 0x3fff << 16, ++ PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift = 16, ++/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */ ++ PA_SC_GENERIC_SCISSOR_BR = 0x00028244, ++ PA_SC_GENERIC_SCISSOR_BR__BR_X_mask = 0x3fff << 0, ++ PA_SC_GENERIC_SCISSOR_BR__BR_X_shift = 0, ++ PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask = 0x3fff << 16, ++ PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift = 16, ++ PA_SC_VPORT_SCISSOR_0_TL = 0x00028250, ++ PA_SC_VPORT_SCISSOR_0_TL_num = 16, ++ PA_SC_VPORT_SCISSOR_0_TL_offset = 8, ++ PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask = 0x3fff << 0, ++ PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift = 0, ++ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask = 0x3fff << 16, ++ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift = 16, ++/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */ ++ PA_SC_VPORT_SCISSOR_0_BR = 0x00028254, ++ PA_SC_VPORT_SCISSOR_0_BR_num = 16, ++ PA_SC_VPORT_SCISSOR_0_BR_offset = 8, ++ PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask = 0x3fff << 0, ++ PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift = 0, ++ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask = 0x3fff << 16, ++ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift = 16, ++ PA_SC_VPORT_ZMIN_0 = 0x000282d0, ++ PA_SC_VPORT_ZMIN_0_num = 16, ++ PA_SC_VPORT_ZMIN_0_offset = 8, ++ PA_SC_VPORT_ZMAX_0 = 0x000282d4, ++ PA_SC_VPORT_ZMAX_0_num = 16, ++ PA_SC_VPORT_ZMAX_0_offset = 8, ++ SX_MISC = 0x00028350, ++ MULTIPASS_bit = 1 << 0, ++ SQ_VTX_SEMANTIC_0 = 0x00028380, ++ SQ_VTX_SEMANTIC_0_num = 32, ++/* SEMANTIC_ID_mask = 0xff << 0, */ ++/* SEMANTIC_ID_shift = 0, */ ++ VGT_MAX_VTX_INDX = 0x00028400, ++ VGT_MIN_VTX_INDX = 0x00028404, ++ VGT_INDX_OFFSET = 0x00028408, ++ VGT_MULTI_PRIM_IB_RESET_INDX = 0x0002840c, ++ SX_ALPHA_TEST_CONTROL = 0x00028410, ++ ALPHA_FUNC_mask = 0x07 << 0, ++ ALPHA_FUNC_shift = 0, ++ REF_NEVER = 0x00, ++ REF_LESS = 0x01, ++ REF_EQUAL = 0x02, ++ REF_LEQUAL = 0x03, ++ REF_GREATER = 0x04, ++ REF_NOTEQUAL = 0x05, ++ REF_GEQUAL = 0x06, ++ REF_ALWAYS = 0x07, ++ ALPHA_TEST_ENABLE_bit = 1 << 3, ++ ALPHA_TEST_BYPASS_bit = 1 << 8, ++ CB_BLEND_RED = 0x00028414, ++ CB_BLEND_GREEN = 0x00028418, ++ CB_BLEND_BLUE = 0x0002841c, ++ CB_BLEND_ALPHA = 0x00028420, ++ CB_FOG_RED = 0x00028424, ++ CB_FOG_GREEN = 0x00028428, ++ CB_FOG_BLUE = 0x0002842c, ++ DB_STENCILREFMASK = 0x00028430, ++ STENCILREF_mask = 0xff << 0, ++ STENCILREF_shift = 0, ++ STENCILMASK_mask = 0xff << 8, ++ STENCILMASK_shift = 8, ++ STENCILWRITEMASK_mask = 0xff << 16, ++ STENCILWRITEMASK_shift = 16, ++ DB_STENCILREFMASK_BF = 0x00028434, ++ STENCILREF_BF_mask = 0xff << 0, ++ STENCILREF_BF_shift = 0, ++ STENCILMASK_BF_mask = 0xff << 8, ++ STENCILMASK_BF_shift = 8, ++ STENCILWRITEMASK_BF_mask = 0xff << 16, ++ STENCILWRITEMASK_BF_shift = 16, ++ SX_ALPHA_REF = 0x00028438, ++ PA_CL_VPORT_XSCALE_0 = 0x0002843c, ++ PA_CL_VPORT_XSCALE_0_num = 16, ++ PA_CL_VPORT_XSCALE_0_offset = 24, ++ PA_CL_VPORT_XOFFSET_0 = 0x00028440, ++ PA_CL_VPORT_XOFFSET_0_num = 16, ++ PA_CL_VPORT_XOFFSET_0_offset = 24, ++ PA_CL_VPORT_YSCALE_0 = 0x00028444, ++ PA_CL_VPORT_YSCALE_0_num = 16, ++ PA_CL_VPORT_YSCALE_0_offset = 24, ++ PA_CL_VPORT_YOFFSET_0 = 0x00028448, ++ PA_CL_VPORT_YOFFSET_0_num = 16, ++ PA_CL_VPORT_YOFFSET_0_offset = 24, ++ PA_CL_VPORT_ZSCALE_0 = 0x0002844c, ++ PA_CL_VPORT_ZSCALE_0_num = 16, ++ PA_CL_VPORT_ZSCALE_0_offset = 24, ++ PA_CL_VPORT_ZOFFSET_0 = 0x00028450, ++ PA_CL_VPORT_ZOFFSET_0_num = 16, ++ PA_CL_VPORT_ZOFFSET_0_offset = 24, ++ SPI_VS_OUT_ID_0 = 0x00028614, ++ SPI_VS_OUT_ID_0_num = 10, ++ SEMANTIC_0_mask = 0xff << 0, ++ SEMANTIC_0_shift = 0, ++ SEMANTIC_1_mask = 0xff << 8, ++ SEMANTIC_1_shift = 8, ++ SEMANTIC_2_mask = 0xff << 16, ++ SEMANTIC_2_shift = 16, ++ SEMANTIC_3_mask = 0xff << 24, ++ SEMANTIC_3_shift = 24, ++ SPI_PS_INPUT_CNTL_0 = 0x00028644, ++ SPI_PS_INPUT_CNTL_0_num = 32, ++ SEMANTIC_mask = 0xff << 0, ++ SEMANTIC_shift = 0, ++ DEFAULT_VAL_mask = 0x03 << 8, ++ DEFAULT_VAL_shift = 8, ++ X_0_0F = 0x00, ++ FLAT_SHADE_bit = 1 << 10, ++ SEL_CENTROID_bit = 1 << 11, ++ SEL_LINEAR_bit = 1 << 12, ++ CYL_WRAP_mask = 0x0f << 13, ++ CYL_WRAP_shift = 13, ++ PT_SPRITE_TEX_bit = 1 << 17, ++ SEL_SAMPLE_bit = 1 << 18, ++ SPI_VS_OUT_CONFIG = 0x000286c4, ++ VS_PER_COMPONENT_bit = 1 << 0, ++ VS_EXPORT_COUNT_mask = 0x1f << 1, ++ VS_EXPORT_COUNT_shift = 1, ++ VS_EXPORTS_FOG_bit = 1 << 8, ++ VS_OUT_FOG_VEC_ADDR_mask = 0x1f << 9, ++ VS_OUT_FOG_VEC_ADDR_shift = 9, ++ SPI_PS_IN_CONTROL_0 = 0x000286cc, ++ NUM_INTERP_mask = 0x3f << 0, ++ NUM_INTERP_shift = 0, ++ POSITION_ENA_bit = 1 << 8, ++ POSITION_CENTROID_bit = 1 << 9, ++ POSITION_ADDR_mask = 0x1f << 10, ++ POSITION_ADDR_shift = 10, ++ PARAM_GEN_mask = 0x0f << 15, ++ PARAM_GEN_shift = 15, ++ PARAM_GEN_ADDR_mask = 0x7f << 19, ++ PARAM_GEN_ADDR_shift = 19, ++ BARYC_SAMPLE_CNTL_mask = 0x03 << 26, ++ BARYC_SAMPLE_CNTL_shift = 26, ++ CENTROIDS_ONLY = 0x00, ++ CENTERS_ONLY = 0x01, ++ CENTROIDS_AND_CENTERS = 0x02, ++ UNDEF = 0x03, ++ PERSP_GRADIENT_ENA_bit = 1 << 28, ++ LINEAR_GRADIENT_ENA_bit = 1 << 29, ++ POSITION_SAMPLE_bit = 1 << 30, ++ BARYC_AT_SAMPLE_ENA_bit = 1 << 31, ++ SPI_PS_IN_CONTROL_1 = 0x000286d0, ++ GEN_INDEX_PIX_bit = 1 << 0, ++ GEN_INDEX_PIX_ADDR_mask = 0x7f << 1, ++ GEN_INDEX_PIX_ADDR_shift = 1, ++ FRONT_FACE_ENA_bit = 1 << 8, ++ FRONT_FACE_CHAN_mask = 0x03 << 9, ++ FRONT_FACE_CHAN_shift = 9, ++ FRONT_FACE_ALL_BITS_bit = 1 << 11, ++ FRONT_FACE_ADDR_mask = 0x1f << 12, ++ FRONT_FACE_ADDR_shift = 12, ++ FOG_ADDR_mask = 0x7f << 17, ++ FOG_ADDR_shift = 17, ++ FIXED_PT_POSITION_ENA_bit = 1 << 24, ++ FIXED_PT_POSITION_ADDR_mask = 0x1f << 25, ++ FIXED_PT_POSITION_ADDR_shift = 25, ++ SPI_INTERP_CONTROL_0 = 0x000286d4, ++ FLAT_SHADE_ENA_bit = 1 << 0, ++ PNT_SPRITE_ENA_bit = 1 << 1, ++ PNT_SPRITE_OVRD_X_mask = 0x07 << 2, ++ PNT_SPRITE_OVRD_X_shift = 2, ++ SPI_PNT_SPRITE_SEL_0 = 0x00, ++ SPI_PNT_SPRITE_SEL_1 = 0x01, ++ SPI_PNT_SPRITE_SEL_S = 0x02, ++ SPI_PNT_SPRITE_SEL_T = 0x03, ++ SPI_PNT_SPRITE_SEL_NONE = 0x04, ++ PNT_SPRITE_OVRD_Y_mask = 0x07 << 5, ++ PNT_SPRITE_OVRD_Y_shift = 5, ++/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ ++/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ ++/* SPI_PNT_SPRITE_SEL_S = 0x02, */ ++/* SPI_PNT_SPRITE_SEL_T = 0x03, */ ++/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ ++ PNT_SPRITE_OVRD_Z_mask = 0x07 << 8, ++ PNT_SPRITE_OVRD_Z_shift = 8, ++/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ ++/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ ++/* SPI_PNT_SPRITE_SEL_S = 0x02, */ ++/* SPI_PNT_SPRITE_SEL_T = 0x03, */ ++/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ ++ PNT_SPRITE_OVRD_W_mask = 0x07 << 11, ++ PNT_SPRITE_OVRD_W_shift = 11, ++/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ ++/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ ++/* SPI_PNT_SPRITE_SEL_S = 0x02, */ ++/* SPI_PNT_SPRITE_SEL_T = 0x03, */ ++/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ ++ PNT_SPRITE_TOP_1_bit = 1 << 14, ++ SPI_INPUT_Z = 0x000286d8, ++ PROVIDE_Z_TO_SPI_bit = 1 << 0, ++ SPI_FOG_CNTL = 0x000286dc, ++ PASS_FOG_THROUGH_PS_bit = 1 << 0, ++ PIXEL_FOG_FUNC_mask = 0x03 << 1, ++ PIXEL_FOG_FUNC_shift = 1, ++ SPI_FOG_NONE = 0x00, ++ SPI_FOG_EXP = 0x01, ++ SPI_FOG_EXP2 = 0x02, ++ SPI_FOG_LINEAR = 0x03, ++ PIXEL_FOG_SRC_SEL_bit = 1 << 3, ++ VS_FOG_CLAMP_DISABLE_bit = 1 << 4, ++ SPI_FOG_FUNC_SCALE = 0x000286e0, ++ SPI_FOG_FUNC_BIAS = 0x000286e4, ++ CB_BLEND0_CONTROL = 0x00028780, ++ CB_BLEND0_CONTROL_num = 8, ++ COLOR_SRCBLEND_mask = 0x1f << 0, ++ COLOR_SRCBLEND_shift = 0, ++ COLOR_COMB_FCN_mask = 0x07 << 5, ++ COLOR_COMB_FCN_shift = 5, ++ COLOR_DESTBLEND_mask = 0x1f << 8, ++ COLOR_DESTBLEND_shift = 8, ++ OPACITY_WEIGHT_bit = 1 << 13, ++ ALPHA_SRCBLEND_mask = 0x1f << 16, ++ ALPHA_SRCBLEND_shift = 16, ++ ALPHA_COMB_FCN_mask = 0x07 << 21, ++ ALPHA_COMB_FCN_shift = 21, ++ ALPHA_DESTBLEND_mask = 0x1f << 24, ++ ALPHA_DESTBLEND_shift = 24, ++ SEPARATE_ALPHA_BLEND_bit = 1 << 29, ++ VGT_DMA_BASE_HI = 0x000287e4, ++ VGT_DMA_BASE_HI__BASE_ADDR_mask = 0xff << 0, ++ VGT_DMA_BASE_HI__BASE_ADDR_shift = 0, ++ VGT_DMA_BASE = 0x000287e8, ++ VGT_DRAW_INITIATOR = 0x000287f0, ++ SOURCE_SELECT_mask = 0x03 << 0, ++ SOURCE_SELECT_shift = 0, ++ DI_SRC_SEL_DMA = 0x00, ++ DI_SRC_SEL_IMMEDIATE = 0x01, ++ DI_SRC_SEL_AUTO_INDEX = 0x02, ++ DI_SRC_SEL_RESERVED = 0x03, ++ MAJOR_MODE_mask = 0x03 << 2, ++ MAJOR_MODE_shift = 2, ++ DI_MAJOR_MODE_0 = 0x00, ++ DI_MAJOR_MODE_1 = 0x01, ++ SPRITE_EN_bit = 1 << 4, ++ NOT_EOP_bit = 1 << 5, ++ USE_OPAQUE_bit = 1 << 6, ++ VGT_IMMED_DATA = 0x000287f4, ++ VGT_EVENT_ADDRESS_REG = 0x000287f8, ++ ADDRESS_LOW_mask = 0xfffffff << 0, ++ ADDRESS_LOW_shift = 0, ++ DB_DEPTH_CONTROL = 0x00028800, ++ STENCIL_ENABLE_bit = 1 << 0, ++ Z_ENABLE_bit = 1 << 1, ++ Z_WRITE_ENABLE_bit = 1 << 2, ++ ZFUNC_mask = 0x07 << 4, ++ ZFUNC_shift = 4, ++ FRAG_NEVER = 0x00, ++ FRAG_LESS = 0x01, ++ FRAG_EQUAL = 0x02, ++ FRAG_LEQUAL = 0x03, ++ FRAG_GREATER = 0x04, ++ FRAG_NOTEQUAL = 0x05, ++ FRAG_GEQUAL = 0x06, ++ FRAG_ALWAYS = 0x07, ++ BACKFACE_ENABLE_bit = 1 << 7, ++ STENCILFUNC_mask = 0x07 << 8, ++ STENCILFUNC_shift = 8, ++/* REF_NEVER = 0x00, */ ++/* REF_LESS = 0x01, */ ++/* REF_EQUAL = 0x02, */ ++/* REF_LEQUAL = 0x03, */ ++/* REF_GREATER = 0x04, */ ++/* REF_NOTEQUAL = 0x05, */ ++/* REF_GEQUAL = 0x06, */ ++/* REF_ALWAYS = 0x07, */ ++ STENCILFAIL_mask = 0x07 << 11, ++ STENCILFAIL_shift = 11, ++ STENCIL_KEEP = 0x00, ++ STENCIL_ZERO = 0x01, ++ STENCIL_REPLACE = 0x02, ++ STENCIL_INCR_CLAMP = 0x03, ++ STENCIL_DECR_CLAMP = 0x04, ++ STENCIL_INVERT = 0x05, ++ STENCIL_INCR_WRAP = 0x06, ++ STENCIL_DECR_WRAP = 0x07, ++ STENCILZPASS_mask = 0x07 << 14, ++ STENCILZPASS_shift = 14, ++/* STENCIL_KEEP = 0x00, */ ++/* STENCIL_ZERO = 0x01, */ ++/* STENCIL_REPLACE = 0x02, */ ++/* STENCIL_INCR_CLAMP = 0x03, */ ++/* STENCIL_DECR_CLAMP = 0x04, */ ++/* STENCIL_INVERT = 0x05, */ ++/* STENCIL_INCR_WRAP = 0x06, */ ++/* STENCIL_DECR_WRAP = 0x07, */ ++ STENCILZFAIL_mask = 0x07 << 17, ++ STENCILZFAIL_shift = 17, ++/* STENCIL_KEEP = 0x00, */ ++/* STENCIL_ZERO = 0x01, */ ++/* STENCIL_REPLACE = 0x02, */ ++/* STENCIL_INCR_CLAMP = 0x03, */ ++/* STENCIL_DECR_CLAMP = 0x04, */ ++/* STENCIL_INVERT = 0x05, */ ++/* STENCIL_INCR_WRAP = 0x06, */ ++/* STENCIL_DECR_WRAP = 0x07, */ ++ STENCILFUNC_BF_mask = 0x07 << 20, ++ STENCILFUNC_BF_shift = 20, ++/* REF_NEVER = 0x00, */ ++/* REF_LESS = 0x01, */ ++/* REF_EQUAL = 0x02, */ ++/* REF_LEQUAL = 0x03, */ ++/* REF_GREATER = 0x04, */ ++/* REF_NOTEQUAL = 0x05, */ ++/* REF_GEQUAL = 0x06, */ ++/* REF_ALWAYS = 0x07, */ ++ STENCILFAIL_BF_mask = 0x07 << 23, ++ STENCILFAIL_BF_shift = 23, ++/* STENCIL_KEEP = 0x00, */ ++/* STENCIL_ZERO = 0x01, */ ++/* STENCIL_REPLACE = 0x02, */ ++/* STENCIL_INCR_CLAMP = 0x03, */ ++/* STENCIL_DECR_CLAMP = 0x04, */ ++/* STENCIL_INVERT = 0x05, */ ++/* STENCIL_INCR_WRAP = 0x06, */ ++/* STENCIL_DECR_WRAP = 0x07, */ ++ STENCILZPASS_BF_mask = 0x07 << 26, ++ STENCILZPASS_BF_shift = 26, ++/* STENCIL_KEEP = 0x00, */ ++/* STENCIL_ZERO = 0x01, */ ++/* STENCIL_REPLACE = 0x02, */ ++/* STENCIL_INCR_CLAMP = 0x03, */ ++/* STENCIL_DECR_CLAMP = 0x04, */ ++/* STENCIL_INVERT = 0x05, */ ++/* STENCIL_INCR_WRAP = 0x06, */ ++/* STENCIL_DECR_WRAP = 0x07, */ ++ STENCILZFAIL_BF_mask = 0x07 << 29, ++ STENCILZFAIL_BF_shift = 29, ++/* STENCIL_KEEP = 0x00, */ ++/* STENCIL_ZERO = 0x01, */ ++/* STENCIL_REPLACE = 0x02, */ ++/* STENCIL_INCR_CLAMP = 0x03, */ ++/* STENCIL_DECR_CLAMP = 0x04, */ ++/* STENCIL_INVERT = 0x05, */ ++/* STENCIL_INCR_WRAP = 0x06, */ ++/* STENCIL_DECR_WRAP = 0x07, */ ++ CB_BLEND_CONTROL = 0x00028804, ++/* COLOR_SRCBLEND_mask = 0x1f << 0, */ ++/* COLOR_SRCBLEND_shift = 0, */ ++ BLEND_ZERO = 0x00, ++ BLEND_ONE = 0x01, ++ BLEND_SRC_COLOR = 0x02, ++ BLEND_ONE_MINUS_SRC_COLOR = 0x03, ++ BLEND_SRC_ALPHA = 0x04, ++ BLEND_ONE_MINUS_SRC_ALPHA = 0x05, ++ BLEND_DST_ALPHA = 0x06, ++ BLEND_ONE_MINUS_DST_ALPHA = 0x07, ++ BLEND_DST_COLOR = 0x08, ++ BLEND_ONE_MINUS_DST_COLOR = 0x09, ++ BLEND_SRC_ALPHA_SATURATE = 0x0a, ++ BLEND_BOTH_SRC_ALPHA = 0x0b, ++ BLEND_BOTH_INV_SRC_ALPHA = 0x0c, ++ BLEND_CONSTANT_COLOR = 0x0d, ++ BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, ++ BLEND_SRC1_COLOR = 0x0f, ++ BLEND_INV_SRC1_COLOR = 0x10, ++ BLEND_SRC1_ALPHA = 0x11, ++ BLEND_INV_SRC1_ALPHA = 0x12, ++ BLEND_CONSTANT_ALPHA = 0x13, ++ BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, ++/* COLOR_COMB_FCN_mask = 0x07 << 5, */ ++/* COLOR_COMB_FCN_shift = 5, */ ++ COMB_DST_PLUS_SRC = 0x00, ++ COMB_SRC_MINUS_DST = 0x01, ++ COMB_MIN_DST_SRC = 0x02, ++ COMB_MAX_DST_SRC = 0x03, ++ COMB_DST_MINUS_SRC = 0x04, ++/* COLOR_DESTBLEND_mask = 0x1f << 8, */ ++/* COLOR_DESTBLEND_shift = 8, */ ++/* BLEND_ZERO = 0x00, */ ++/* BLEND_ONE = 0x01, */ ++/* BLEND_SRC_COLOR = 0x02, */ ++/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ ++/* BLEND_SRC_ALPHA = 0x04, */ ++/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ ++/* BLEND_DST_ALPHA = 0x06, */ ++/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ ++/* BLEND_DST_COLOR = 0x08, */ ++/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ ++/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ ++/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ ++/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ ++/* BLEND_CONSTANT_COLOR = 0x0d, */ ++/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ ++/* BLEND_SRC1_COLOR = 0x0f, */ ++/* BLEND_INV_SRC1_COLOR = 0x10, */ ++/* BLEND_SRC1_ALPHA = 0x11, */ ++/* BLEND_INV_SRC1_ALPHA = 0x12, */ ++/* BLEND_CONSTANT_ALPHA = 0x13, */ ++/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ ++/* OPACITY_WEIGHT_bit = 1 << 13, */ ++/* ALPHA_SRCBLEND_mask = 0x1f << 16, */ ++/* ALPHA_SRCBLEND_shift = 16, */ ++/* BLEND_ZERO = 0x00, */ ++/* BLEND_ONE = 0x01, */ ++/* BLEND_SRC_COLOR = 0x02, */ ++/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ ++/* BLEND_SRC_ALPHA = 0x04, */ ++/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ ++/* BLEND_DST_ALPHA = 0x06, */ ++/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ ++/* BLEND_DST_COLOR = 0x08, */ ++/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ ++/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ ++/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ ++/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ ++/* BLEND_CONSTANT_COLOR = 0x0d, */ ++/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ ++/* BLEND_SRC1_COLOR = 0x0f, */ ++/* BLEND_INV_SRC1_COLOR = 0x10, */ ++/* BLEND_SRC1_ALPHA = 0x11, */ ++/* BLEND_INV_SRC1_ALPHA = 0x12, */ ++/* BLEND_CONSTANT_ALPHA = 0x13, */ ++/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ ++/* ALPHA_COMB_FCN_mask = 0x07 << 21, */ ++/* ALPHA_COMB_FCN_shift = 21, */ ++/* COMB_DST_PLUS_SRC = 0x00, */ ++/* COMB_SRC_MINUS_DST = 0x01, */ ++/* COMB_MIN_DST_SRC = 0x02, */ ++/* COMB_MAX_DST_SRC = 0x03, */ ++/* COMB_DST_MINUS_SRC = 0x04, */ ++/* ALPHA_DESTBLEND_mask = 0x1f << 24, */ ++/* ALPHA_DESTBLEND_shift = 24, */ ++/* BLEND_ZERO = 0x00, */ ++/* BLEND_ONE = 0x01, */ ++/* BLEND_SRC_COLOR = 0x02, */ ++/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ ++/* BLEND_SRC_ALPHA = 0x04, */ ++/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ ++/* BLEND_DST_ALPHA = 0x06, */ ++/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ ++/* BLEND_DST_COLOR = 0x08, */ ++/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ ++/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ ++/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ ++/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ ++/* BLEND_CONSTANT_COLOR = 0x0d, */ ++/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ ++/* BLEND_SRC1_COLOR = 0x0f, */ ++/* BLEND_INV_SRC1_COLOR = 0x10, */ ++/* BLEND_SRC1_ALPHA = 0x11, */ ++/* BLEND_INV_SRC1_ALPHA = 0x12, */ ++/* BLEND_CONSTANT_ALPHA = 0x13, */ ++/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ ++/* SEPARATE_ALPHA_BLEND_bit = 1 << 29, */ ++ CB_COLOR_CONTROL = 0x00028808, ++ FOG_ENABLE_bit = 1 << 0, ++ MULTIWRITE_ENABLE_bit = 1 << 1, ++ DITHER_ENABLE_bit = 1 << 2, ++ DEGAMMA_ENABLE_bit = 1 << 3, ++ SPECIAL_OP_mask = 0x07 << 4, ++ SPECIAL_OP_shift = 4, ++ SPECIAL_NORMAL = 0x00, ++ SPECIAL_DISABLE = 0x01, ++ SPECIAL_FAST_CLEAR = 0x02, ++ SPECIAL_FORCE_CLEAR = 0x03, ++ SPECIAL_EXPAND_COLOR = 0x04, ++ SPECIAL_EXPAND_TEXTURE = 0x05, ++ SPECIAL_EXPAND_SAMPLES = 0x06, ++ SPECIAL_RESOLVE_BOX = 0x07, ++ PER_MRT_BLEND_bit = 1 << 7, ++ TARGET_BLEND_ENABLE_mask = 0xff << 8, ++ TARGET_BLEND_ENABLE_shift = 8, ++ ROP3_mask = 0xff << 16, ++ ROP3_shift = 16, ++ DB_SHADER_CONTROL = 0x0002880c, ++ Z_EXPORT_ENABLE_bit = 1 << 0, ++ STENCIL_REF_EXPORT_ENABLE_bit = 1 << 1, ++ Z_ORDER_mask = 0x03 << 4, ++ Z_ORDER_shift = 4, ++ LATE_Z = 0x00, ++ EARLY_Z_THEN_LATE_Z = 0x01, ++ RE_Z = 0x02, ++ EARLY_Z_THEN_RE_Z = 0x03, ++ KILL_ENABLE_bit = 1 << 6, ++ COVERAGE_TO_MASK_ENABLE_bit = 1 << 7, ++ MASK_EXPORT_ENABLE_bit = 1 << 8, ++ DUAL_EXPORT_ENABLE_bit = 1 << 9, ++ EXEC_ON_HIER_FAIL_bit = 1 << 10, ++ EXEC_ON_NOOP_bit = 1 << 11, ++ PA_CL_CLIP_CNTL = 0x00028810, ++ UCP_ENA_0_bit = 1 << 0, ++ UCP_ENA_1_bit = 1 << 1, ++ UCP_ENA_2_bit = 1 << 2, ++ UCP_ENA_3_bit = 1 << 3, ++ UCP_ENA_4_bit = 1 << 4, ++ UCP_ENA_5_bit = 1 << 5, ++ PS_UCP_Y_SCALE_NEG_bit = 1 << 13, ++ PS_UCP_MODE_mask = 0x03 << 14, ++ PS_UCP_MODE_shift = 14, ++ CLIP_DISABLE_bit = 1 << 16, ++ UCP_CULL_ONLY_ENA_bit = 1 << 17, ++ BOUNDARY_EDGE_FLAG_ENA_bit = 1 << 18, ++ DX_CLIP_SPACE_DEF_bit = 1 << 19, ++ DIS_CLIP_ERR_DETECT_bit = 1 << 20, ++ VTX_KILL_OR_bit = 1 << 21, ++ DX_LINEAR_ATTR_CLIP_ENA_bit = 1 << 24, ++ VTE_VPORT_PROVOKE_DISABLE_bit = 1 << 25, ++ ZCLIP_NEAR_DISABLE_bit = 1 << 26, ++ ZCLIP_FAR_DISABLE_bit = 1 << 27, ++ PA_SU_SC_MODE_CNTL = 0x00028814, ++ CULL_FRONT_bit = 1 << 0, ++ CULL_BACK_bit = 1 << 1, ++ FACE_bit = 1 << 2, ++ POLY_MODE_mask = 0x03 << 3, ++ POLY_MODE_shift = 3, ++ X_DISABLE_POLY_MODE = 0x00, ++ X_DUAL_MODE = 0x01, ++ POLYMODE_FRONT_PTYPE_mask = 0x07 << 5, ++ POLYMODE_FRONT_PTYPE_shift = 5, ++ X_DRAW_POINTS = 0x00, ++ X_DRAW_LINES = 0x01, ++ X_DRAW_TRIANGLES = 0x02, ++ POLYMODE_BACK_PTYPE_mask = 0x07 << 8, ++ POLYMODE_BACK_PTYPE_shift = 8, ++/* X_DRAW_POINTS = 0x00, */ ++/* X_DRAW_LINES = 0x01, */ ++/* X_DRAW_TRIANGLES = 0x02, */ ++ POLY_OFFSET_FRONT_ENABLE_bit = 1 << 11, ++ POLY_OFFSET_BACK_ENABLE_bit = 1 << 12, ++ POLY_OFFSET_PARA_ENABLE_bit = 1 << 13, ++ VTX_WINDOW_OFFSET_ENABLE_bit = 1 << 16, ++ PROVOKING_VTX_LAST_bit = 1 << 19, ++ PERSP_CORR_DIS_bit = 1 << 20, ++ MULTI_PRIM_IB_ENA_bit = 1 << 21, ++ PA_CL_VTE_CNTL = 0x00028818, ++ VPORT_X_SCALE_ENA_bit = 1 << 0, ++ VPORT_X_OFFSET_ENA_bit = 1 << 1, ++ VPORT_Y_SCALE_ENA_bit = 1 << 2, ++ VPORT_Y_OFFSET_ENA_bit = 1 << 3, ++ VPORT_Z_SCALE_ENA_bit = 1 << 4, ++ VPORT_Z_OFFSET_ENA_bit = 1 << 5, ++ VTX_XY_FMT_bit = 1 << 8, ++ VTX_Z_FMT_bit = 1 << 9, ++ VTX_W0_FMT_bit = 1 << 10, ++ PERFCOUNTER_REF_bit = 1 << 11, ++ PA_CL_VS_OUT_CNTL = 0x0002881c, ++ CLIP_DIST_ENA_0_bit = 1 << 0, ++ CLIP_DIST_ENA_1_bit = 1 << 1, ++ CLIP_DIST_ENA_2_bit = 1 << 2, ++ CLIP_DIST_ENA_3_bit = 1 << 3, ++ CLIP_DIST_ENA_4_bit = 1 << 4, ++ CLIP_DIST_ENA_5_bit = 1 << 5, ++ CLIP_DIST_ENA_6_bit = 1 << 6, ++ CLIP_DIST_ENA_7_bit = 1 << 7, ++ CULL_DIST_ENA_0_bit = 1 << 8, ++ CULL_DIST_ENA_1_bit = 1 << 9, ++ CULL_DIST_ENA_2_bit = 1 << 10, ++ CULL_DIST_ENA_3_bit = 1 << 11, ++ CULL_DIST_ENA_4_bit = 1 << 12, ++ CULL_DIST_ENA_5_bit = 1 << 13, ++ CULL_DIST_ENA_6_bit = 1 << 14, ++ CULL_DIST_ENA_7_bit = 1 << 15, ++ USE_VTX_POINT_SIZE_bit = 1 << 16, ++ USE_VTX_EDGE_FLAG_bit = 1 << 17, ++ USE_VTX_RENDER_TARGET_INDX_bit = 1 << 18, ++ USE_VTX_VIEWPORT_INDX_bit = 1 << 19, ++ USE_VTX_KILL_FLAG_bit = 1 << 20, ++ VS_OUT_MISC_VEC_ENA_bit = 1 << 21, ++ VS_OUT_CCDIST0_VEC_ENA_bit = 1 << 22, ++ VS_OUT_CCDIST1_VEC_ENA_bit = 1 << 23, ++ PA_CL_NANINF_CNTL = 0x00028820, ++ VTE_XY_INF_DISCARD_bit = 1 << 0, ++ VTE_Z_INF_DISCARD_bit = 1 << 1, ++ VTE_W_INF_DISCARD_bit = 1 << 2, ++ VTE_0XNANINF_IS_0_bit = 1 << 3, ++ VTE_XY_NAN_RETAIN_bit = 1 << 4, ++ VTE_Z_NAN_RETAIN_bit = 1 << 5, ++ VTE_W_NAN_RETAIN_bit = 1 << 6, ++ VTE_W_RECIP_NAN_IS_0_bit = 1 << 7, ++ VS_XY_NAN_TO_INF_bit = 1 << 8, ++ VS_XY_INF_RETAIN_bit = 1 << 9, ++ VS_Z_NAN_TO_INF_bit = 1 << 10, ++ VS_Z_INF_RETAIN_bit = 1 << 11, ++ VS_W_NAN_TO_INF_bit = 1 << 12, ++ VS_W_INF_RETAIN_bit = 1 << 13, ++ VS_CLIP_DIST_INF_DISCARD_bit = 1 << 14, ++ VTE_NO_OUTPUT_NEG_0_bit = 1 << 20, ++ SQ_PGM_START_PS = 0x00028840, ++ SQ_PGM_RESOURCES_PS = 0x00028850, ++ NUM_GPRS_mask = 0xff << 0, ++ NUM_GPRS_shift = 0, ++ STACK_SIZE_mask = 0xff << 8, ++ STACK_SIZE_shift = 8, ++ SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit = 1 << 21, ++ FETCH_CACHE_LINES_mask = 0x07 << 24, ++ FETCH_CACHE_LINES_shift = 24, ++ UNCACHED_FIRST_INST_bit = 1 << 28, ++ CLAMP_CONSTS_bit = 1 << 31, ++ SQ_PGM_EXPORTS_PS = 0x00028854, ++ EXPORT_MODE_mask = 0x1f << 0, ++ EXPORT_MODE_shift = 0, ++ SQ_PGM_START_VS = 0x00028858, ++ SQ_PGM_RESOURCES_VS = 0x00028868, ++/* NUM_GPRS_mask = 0xff << 0, */ ++/* NUM_GPRS_shift = 0, */ ++/* STACK_SIZE_mask = 0xff << 8, */ ++/* STACK_SIZE_shift = 8, */ ++ SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit = 1 << 21, ++/* FETCH_CACHE_LINES_mask = 0x07 << 24, */ ++/* FETCH_CACHE_LINES_shift = 24, */ ++/* UNCACHED_FIRST_INST_bit = 1 << 28, */ ++ SQ_PGM_START_GS = 0x0002886c, ++ SQ_PGM_RESOURCES_GS = 0x0002887c, ++/* NUM_GPRS_mask = 0xff << 0, */ ++/* NUM_GPRS_shift = 0, */ ++/* STACK_SIZE_mask = 0xff << 8, */ ++/* STACK_SIZE_shift = 8, */ ++ SQ_PGM_RESOURCES_GS__DX10_CLAMP_bit = 1 << 21, ++/* FETCH_CACHE_LINES_mask = 0x07 << 24, */ ++/* FETCH_CACHE_LINES_shift = 24, */ ++/* UNCACHED_FIRST_INST_bit = 1 << 28, */ ++ SQ_PGM_START_ES = 0x00028880, ++ SQ_PGM_RESOURCES_ES = 0x00028890, ++/* NUM_GPRS_mask = 0xff << 0, */ ++/* NUM_GPRS_shift = 0, */ ++/* STACK_SIZE_mask = 0xff << 8, */ ++/* STACK_SIZE_shift = 8, */ ++ SQ_PGM_RESOURCES_ES__DX10_CLAMP_bit = 1 << 21, ++/* FETCH_CACHE_LINES_mask = 0x07 << 24, */ ++/* FETCH_CACHE_LINES_shift = 24, */ ++/* UNCACHED_FIRST_INST_bit = 1 << 28, */ ++ SQ_PGM_START_FS = 0x00028894, ++ SQ_PGM_RESOURCES_FS = 0x000288a4, ++/* NUM_GPRS_mask = 0xff << 0, */ ++/* NUM_GPRS_shift = 0, */ ++/* STACK_SIZE_mask = 0xff << 8, */ ++/* STACK_SIZE_shift = 8, */ ++ SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit = 1 << 21, ++ SQ_ESGS_RING_ITEMSIZE = 0x000288a8, ++ ITEMSIZE_mask = 0x7fff << 0, ++ ITEMSIZE_shift = 0, ++ SQ_GSVS_RING_ITEMSIZE = 0x000288ac, ++/* ITEMSIZE_mask = 0x7fff << 0, */ ++/* ITEMSIZE_shift = 0, */ ++ SQ_ESTMP_RING_ITEMSIZE = 0x000288b0, ++/* ITEMSIZE_mask = 0x7fff << 0, */ ++/* ITEMSIZE_shift = 0, */ ++ SQ_GSTMP_RING_ITEMSIZE = 0x000288b4, ++/* ITEMSIZE_mask = 0x7fff << 0, */ ++/* ITEMSIZE_shift = 0, */ ++ SQ_VSTMP_RING_ITEMSIZE = 0x000288b8, ++/* ITEMSIZE_mask = 0x7fff << 0, */ ++/* ITEMSIZE_shift = 0, */ ++ SQ_PSTMP_RING_ITEMSIZE = 0x000288bc, ++/* ITEMSIZE_mask = 0x7fff << 0, */ ++/* ITEMSIZE_shift = 0, */ ++ SQ_FBUF_RING_ITEMSIZE = 0x000288c0, ++/* ITEMSIZE_mask = 0x7fff << 0, */ ++/* ITEMSIZE_shift = 0, */ ++ SQ_REDUC_RING_ITEMSIZE = 0x000288c4, ++/* ITEMSIZE_mask = 0x7fff << 0, */ ++/* ITEMSIZE_shift = 0, */ ++ SQ_GS_VERT_ITEMSIZE = 0x000288c8, ++/* ITEMSIZE_mask = 0x7fff << 0, */ ++/* ITEMSIZE_shift = 0, */ ++ SQ_PGM_CF_OFFSET_PS = 0x000288cc, ++ PGM_CF_OFFSET_mask = 0xfffff << 0, ++ PGM_CF_OFFSET_shift = 0, ++ SQ_PGM_CF_OFFSET_VS = 0x000288d0, ++/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ ++/* PGM_CF_OFFSET_shift = 0, */ ++ SQ_PGM_CF_OFFSET_GS = 0x000288d4, ++/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ ++/* PGM_CF_OFFSET_shift = 0, */ ++ SQ_PGM_CF_OFFSET_ES = 0x000288d8, ++/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ ++/* PGM_CF_OFFSET_shift = 0, */ ++ SQ_PGM_CF_OFFSET_FS = 0x000288dc, ++/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ ++/* PGM_CF_OFFSET_shift = 0, */ ++ SQ_VTX_SEMANTIC_CLEAR = 0x000288e0, ++ SQ_ALU_CONST_CACHE_PS_0 = 0x00028940, ++ SQ_ALU_CONST_CACHE_PS_0_num = 16, ++ SQ_ALU_CONST_CACHE_VS_0 = 0x00028980, ++ SQ_ALU_CONST_CACHE_VS_0_num = 16, ++ SQ_ALU_CONST_CACHE_GS_0 = 0x000289c0, ++ SQ_ALU_CONST_CACHE_GS_0_num = 16, ++ PA_SU_POINT_SIZE = 0x00028a00, ++ PA_SU_POINT_SIZE__HEIGHT_mask = 0xffff << 0, ++ PA_SU_POINT_SIZE__HEIGHT_shift = 0, ++ PA_SU_POINT_SIZE__WIDTH_mask = 0xffff << 16, ++ PA_SU_POINT_SIZE__WIDTH_shift = 16, ++ PA_SU_POINT_MINMAX = 0x00028a04, ++ MIN_SIZE_mask = 0xffff << 0, ++ MIN_SIZE_shift = 0, ++ MAX_SIZE_mask = 0xffff << 16, ++ MAX_SIZE_shift = 16, ++ PA_SU_LINE_CNTL = 0x00028a08, ++ PA_SU_LINE_CNTL__WIDTH_mask = 0xffff << 0, ++ PA_SU_LINE_CNTL__WIDTH_shift = 0, ++ PA_SC_LINE_STIPPLE = 0x00028a0c, ++ LINE_PATTERN_mask = 0xffff << 0, ++ LINE_PATTERN_shift = 0, ++ REPEAT_COUNT_mask = 0xff << 16, ++ REPEAT_COUNT_shift = 16, ++ PATTERN_BIT_ORDER_bit = 1 << 28, ++ AUTO_RESET_CNTL_mask = 0x03 << 29, ++ AUTO_RESET_CNTL_shift = 29, ++ VGT_OUTPUT_PATH_CNTL = 0x00028a10, ++ PATH_SELECT_mask = 0x03 << 0, ++ PATH_SELECT_shift = 0, ++ VGT_OUTPATH_VTX_REUSE = 0x00, ++ VGT_OUTPATH_TESS_EN = 0x01, ++ VGT_OUTPATH_PASSTHRU = 0x02, ++ VGT_OUTPATH_GS_BLOCK = 0x03, ++ VGT_HOS_CNTL = 0x00028a14, ++ TESS_MODE_mask = 0x03 << 0, ++ TESS_MODE_shift = 0, ++ VGT_HOS_MAX_TESS_LEVEL = 0x00028a18, ++ VGT_HOS_MIN_TESS_LEVEL = 0x00028a1c, ++ VGT_HOS_REUSE_DEPTH = 0x00028a20, ++ REUSE_DEPTH_mask = 0xff << 0, ++ REUSE_DEPTH_shift = 0, ++ VGT_GROUP_PRIM_TYPE = 0x00028a24, ++ VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask = 0x1f << 0, ++ VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift = 0, ++ VGT_GRP_3D_POINT = 0x00, ++ VGT_GRP_3D_LINE = 0x01, ++ VGT_GRP_3D_TRI = 0x02, ++ VGT_GRP_3D_RECT = 0x03, ++ VGT_GRP_3D_QUAD = 0x04, ++ VGT_GRP_2D_COPY_RECT_V0 = 0x05, ++ VGT_GRP_2D_COPY_RECT_V1 = 0x06, ++ VGT_GRP_2D_COPY_RECT_V2 = 0x07, ++ VGT_GRP_2D_COPY_RECT_V3 = 0x08, ++ VGT_GRP_2D_FILL_RECT = 0x09, ++ VGT_GRP_2D_LINE = 0x0a, ++ VGT_GRP_2D_TRI = 0x0b, ++ VGT_GRP_PRIM_INDEX_LINE = 0x0c, ++ VGT_GRP_PRIM_INDEX_TRI = 0x0d, ++ VGT_GRP_PRIM_INDEX_QUAD = 0x0e, ++ VGT_GRP_3D_LINE_ADJ = 0x0f, ++ VGT_GRP_3D_TRI_ADJ = 0x10, ++ RETAIN_ORDER_bit = 1 << 14, ++ RETAIN_QUADS_bit = 1 << 15, ++ PRIM_ORDER_mask = 0x07 << 16, ++ PRIM_ORDER_shift = 16, ++ VGT_GRP_LIST = 0x00, ++ VGT_GRP_STRIP = 0x01, ++ VGT_GRP_FAN = 0x02, ++ VGT_GRP_LOOP = 0x03, ++ VGT_GRP_POLYGON = 0x04, ++ VGT_GROUP_FIRST_DECR = 0x00028a28, ++ FIRST_DECR_mask = 0x0f << 0, ++ FIRST_DECR_shift = 0, ++ VGT_GROUP_DECR = 0x00028a2c, ++ DECR_mask = 0x0f << 0, ++ DECR_shift = 0, ++ VGT_GROUP_VECT_0_CNTL = 0x00028a30, ++ COMP_X_EN_bit = 1 << 0, ++ COMP_Y_EN_bit = 1 << 1, ++ COMP_Z_EN_bit = 1 << 2, ++ COMP_W_EN_bit = 1 << 3, ++ VGT_GROUP_VECT_0_CNTL__STRIDE_mask = 0xff << 8, ++ VGT_GROUP_VECT_0_CNTL__STRIDE_shift = 8, ++ SHIFT_mask = 0xff << 16, ++ SHIFT_shift = 16, ++ VGT_GROUP_VECT_1_CNTL = 0x00028a34, ++/* COMP_X_EN_bit = 1 << 0, */ ++/* COMP_Y_EN_bit = 1 << 1, */ ++/* COMP_Z_EN_bit = 1 << 2, */ ++/* COMP_W_EN_bit = 1 << 3, */ ++ VGT_GROUP_VECT_1_CNTL__STRIDE_mask = 0xff << 8, ++ VGT_GROUP_VECT_1_CNTL__STRIDE_shift = 8, ++/* SHIFT_mask = 0xff << 16, */ ++/* SHIFT_shift = 16, */ ++ VGT_GROUP_VECT_0_FMT_CNTL = 0x00028a38, ++ X_CONV_mask = 0x0f << 0, ++ X_CONV_shift = 0, ++ VGT_GRP_INDEX_16 = 0x00, ++ VGT_GRP_INDEX_32 = 0x01, ++ VGT_GRP_UINT_16 = 0x02, ++ VGT_GRP_UINT_32 = 0x03, ++ VGT_GRP_SINT_16 = 0x04, ++ VGT_GRP_SINT_32 = 0x05, ++ VGT_GRP_FLOAT_32 = 0x06, ++ VGT_GRP_AUTO_PRIM = 0x07, ++ VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, ++ X_OFFSET_mask = 0x0f << 4, ++ X_OFFSET_shift = 4, ++ Y_CONV_mask = 0x0f << 8, ++ Y_CONV_shift = 8, ++/* VGT_GRP_INDEX_16 = 0x00, */ ++/* VGT_GRP_INDEX_32 = 0x01, */ ++/* VGT_GRP_UINT_16 = 0x02, */ ++/* VGT_GRP_UINT_32 = 0x03, */ ++/* VGT_GRP_SINT_16 = 0x04, */ ++/* VGT_GRP_SINT_32 = 0x05, */ ++/* VGT_GRP_FLOAT_32 = 0x06, */ ++/* VGT_GRP_AUTO_PRIM = 0x07, */ ++/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ ++ Y_OFFSET_mask = 0x0f << 12, ++ Y_OFFSET_shift = 12, ++ Z_CONV_mask = 0x0f << 16, ++ Z_CONV_shift = 16, ++/* VGT_GRP_INDEX_16 = 0x00, */ ++/* VGT_GRP_INDEX_32 = 0x01, */ ++/* VGT_GRP_UINT_16 = 0x02, */ ++/* VGT_GRP_UINT_32 = 0x03, */ ++/* VGT_GRP_SINT_16 = 0x04, */ ++/* VGT_GRP_SINT_32 = 0x05, */ ++/* VGT_GRP_FLOAT_32 = 0x06, */ ++/* VGT_GRP_AUTO_PRIM = 0x07, */ ++/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ ++ Z_OFFSET_mask = 0x0f << 20, ++ Z_OFFSET_shift = 20, ++ W_CONV_mask = 0x0f << 24, ++ W_CONV_shift = 24, ++/* VGT_GRP_INDEX_16 = 0x00, */ ++/* VGT_GRP_INDEX_32 = 0x01, */ ++/* VGT_GRP_UINT_16 = 0x02, */ ++/* VGT_GRP_UINT_32 = 0x03, */ ++/* VGT_GRP_SINT_16 = 0x04, */ ++/* VGT_GRP_SINT_32 = 0x05, */ ++/* VGT_GRP_FLOAT_32 = 0x06, */ ++/* VGT_GRP_AUTO_PRIM = 0x07, */ ++/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ ++ W_OFFSET_mask = 0x0f << 28, ++ W_OFFSET_shift = 28, ++ VGT_GROUP_VECT_1_FMT_CNTL = 0x00028a3c, ++/* X_CONV_mask = 0x0f << 0, */ ++/* X_CONV_shift = 0, */ ++/* VGT_GRP_INDEX_16 = 0x00, */ ++/* VGT_GRP_INDEX_32 = 0x01, */ ++/* VGT_GRP_UINT_16 = 0x02, */ ++/* VGT_GRP_UINT_32 = 0x03, */ ++/* VGT_GRP_SINT_16 = 0x04, */ ++/* VGT_GRP_SINT_32 = 0x05, */ ++/* VGT_GRP_FLOAT_32 = 0x06, */ ++/* VGT_GRP_AUTO_PRIM = 0x07, */ ++/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ ++/* X_OFFSET_mask = 0x0f << 4, */ ++/* X_OFFSET_shift = 4, */ ++/* Y_CONV_mask = 0x0f << 8, */ ++/* Y_CONV_shift = 8, */ ++/* VGT_GRP_INDEX_16 = 0x00, */ ++/* VGT_GRP_INDEX_32 = 0x01, */ ++/* VGT_GRP_UINT_16 = 0x02, */ ++/* VGT_GRP_UINT_32 = 0x03, */ ++/* VGT_GRP_SINT_16 = 0x04, */ ++/* VGT_GRP_SINT_32 = 0x05, */ ++/* VGT_GRP_FLOAT_32 = 0x06, */ ++/* VGT_GRP_AUTO_PRIM = 0x07, */ ++/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ ++/* Y_OFFSET_mask = 0x0f << 12, */ ++/* Y_OFFSET_shift = 12, */ ++/* Z_CONV_mask = 0x0f << 16, */ ++/* Z_CONV_shift = 16, */ ++/* VGT_GRP_INDEX_16 = 0x00, */ ++/* VGT_GRP_INDEX_32 = 0x01, */ ++/* VGT_GRP_UINT_16 = 0x02, */ ++/* VGT_GRP_UINT_32 = 0x03, */ ++/* VGT_GRP_SINT_16 = 0x04, */ ++/* VGT_GRP_SINT_32 = 0x05, */ ++/* VGT_GRP_FLOAT_32 = 0x06, */ ++/* VGT_GRP_AUTO_PRIM = 0x07, */ ++/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ ++/* Z_OFFSET_mask = 0x0f << 20, */ ++/* Z_OFFSET_shift = 20, */ ++/* W_CONV_mask = 0x0f << 24, */ ++/* W_CONV_shift = 24, */ ++/* VGT_GRP_INDEX_16 = 0x00, */ ++/* VGT_GRP_INDEX_32 = 0x01, */ ++/* VGT_GRP_UINT_16 = 0x02, */ ++/* VGT_GRP_UINT_32 = 0x03, */ ++/* VGT_GRP_SINT_16 = 0x04, */ ++/* VGT_GRP_SINT_32 = 0x05, */ ++/* VGT_GRP_FLOAT_32 = 0x06, */ ++/* VGT_GRP_AUTO_PRIM = 0x07, */ ++/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ ++/* W_OFFSET_mask = 0x0f << 28, */ ++/* W_OFFSET_shift = 28, */ ++ VGT_GS_MODE = 0x00028a40, ++ MODE_mask = 0x03 << 0, ++ MODE_shift = 0, ++ GS_OFF = 0x00, ++ GS_SCENARIO_A = 0x01, ++ GS_SCENARIO_B = 0x02, ++ GS_SCENARIO_G = 0x03, ++ ES_PASSTHRU_bit = 1 << 2, ++ CUT_MODE_mask = 0x03 << 3, ++ CUT_MODE_shift = 3, ++ GS_CUT_1024 = 0x00, ++ GS_CUT_512 = 0x01, ++ GS_CUT_256 = 0x02, ++ GS_CUT_128 = 0x03, ++ PA_SC_MPASS_PS_CNTL = 0x00028a48, ++ MPASS_PIX_VEC_PER_PASS_mask = 0xfffff << 0, ++ MPASS_PIX_VEC_PER_PASS_shift = 0, ++ MPASS_PS_ENA_bit = 1 << 31, ++ PA_SC_MODE_CNTL = 0x00028a4c, ++ MSAA_ENABLE_bit = 1 << 0, ++ CLIPRECT_ENABLE_bit = 1 << 1, ++ LINE_STIPPLE_ENABLE_bit = 1 << 2, ++ MULTI_CHIP_PRIM_DISCARD_ENAB_bit = 1 << 3, ++ WALK_ORDER_ENABLE_bit = 1 << 4, ++ HALVE_DETAIL_SAMPLE_PERF_bit = 1 << 5, ++ WALK_SIZE_bit = 1 << 6, ++ WALK_ALIGNMENT_bit = 1 << 7, ++ WALK_ALIGN8_PRIM_FITS_ST_bit = 1 << 8, ++ TILE_COVER_NO_SCISSOR_bit = 1 << 9, ++ KILL_PIX_POST_HI_Z_bit = 1 << 10, ++ KILL_PIX_POST_DETAIL_MASK_bit = 1 << 11, ++ MULTI_CHIP_SUPERTILE_ENABLE_bit = 1 << 12, ++ TILE_COVER_DISABLE_bit = 1 << 13, ++ FORCE_EOV_CNTDWN_ENABLE_bit = 1 << 14, ++ FORCE_EOV_TILE_ENABLE_bit = 1 << 15, ++ FORCE_EOV_REZ_ENABLE_bit = 1 << 16, ++ PS_ITER_SAMPLE_bit = 1 << 17, ++ VGT_ENHANCE = 0x00028a50, ++ VGT_ENHANCE__MI_TIMESTAMP_RES_mask = 0x03 << 0, ++ VGT_ENHANCE__MI_TIMESTAMP_RES_shift = 0, ++ X_0_992_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_32 = 0x00, ++ X_0_496_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_16 = 0x01, ++ X_0_248_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_8 = 0x02, ++ X_0_124_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_4 = 0x03, ++ MISC_mask = 0x3fffffff << 2, ++ MISC_shift = 2, ++ VGT_GS_OUT_PRIM_TYPE = 0x00028a6c, ++ OUTPRIM_TYPE_mask = 0x3f << 0, ++ OUTPRIM_TYPE_shift = 0, ++ POINTLIST = 0x00, ++ LINESTRIP = 0x01, ++ TRISTRIP = 0x02, ++ VGT_DMA_SIZE = 0x00028a74, ++ VGT_DMA_INDEX_TYPE = 0x00028a7c, ++/* INDEX_TYPE_mask = 0x03 << 0, */ ++/* INDEX_TYPE_shift = 0, */ ++ VGT_INDEX_16 = 0x00, ++ VGT_INDEX_32 = 0x01, ++ SWAP_MODE_mask = 0x03 << 2, ++ SWAP_MODE_shift = 2, ++ VGT_DMA_SWAP_NONE = 0x00, ++ VGT_DMA_SWAP_16_BIT = 0x01, ++ VGT_DMA_SWAP_32_BIT = 0x02, ++ VGT_DMA_SWAP_WORD = 0x03, ++ VGT_PRIMITIVEID_EN = 0x00028a84, ++ PRIMITIVEID_EN_bit = 1 << 0, ++ VGT_DMA_NUM_INSTANCES = 0x00028a88, ++ VGT_EVENT_INITIATOR = 0x00028a90, ++ EVENT_TYPE_mask = 0x3f << 0, ++ EVENT_TYPE_shift = 0, ++ CACHE_FLUSH_TS = 0x04, ++ CONTEXT_DONE = 0x05, ++ CACHE_FLUSH = 0x06, ++ VIZQUERY_START = 0x07, ++ VIZQUERY_END = 0x08, ++ SC_WAIT_WC = 0x09, ++ MPASS_PS_CP_REFETCH = 0x0a, ++ MPASS_PS_RST_START = 0x0b, ++ MPASS_PS_INCR_START = 0x0c, ++ RST_PIX_CNT = 0x0d, ++ RST_VTX_CNT = 0x0e, ++ VS_PARTIAL_FLUSH = 0x0f, ++ PS_PARTIAL_FLUSH = 0x10, ++ CACHE_FLUSH_AND_INV_TS_EVENT = 0x14, ++ ZPASS_DONE = 0x15, ++ CACHE_FLUSH_AND_INV_EVENT = 0x16, ++ PERFCOUNTER_START = 0x17, ++ PERFCOUNTER_STOP = 0x18, ++ PIPELINESTAT_START = 0x19, ++ PIPELINESTAT_STOP = 0x1a, ++ PERFCOUNTER_SAMPLE = 0x1b, ++ FLUSH_ES_OUTPUT = 0x1c, ++ FLUSH_GS_OUTPUT = 0x1d, ++ SAMPLE_PIPELINESTAT = 0x1e, ++ SO_VGTSTREAMOUT_FLUSH = 0x1f, ++ SAMPLE_STREAMOUTSTATS = 0x20, ++ RESET_VTX_CNT = 0x21, ++ BLOCK_CONTEXT_DONE = 0x22, ++ CR_CONTEXT_DONE = 0x23, ++ VGT_FLUSH = 0x24, ++ CR_DONE_TS = 0x25, ++ SQ_NON_EVENT = 0x26, ++ SC_SEND_DB_VPZ = 0x27, ++ BOTTOM_OF_PIPE_TS = 0x28, ++ DB_CACHE_FLUSH_AND_INV = 0x2a, ++ ADDRESS_HI_mask = 0xff << 19, ++ ADDRESS_HI_shift = 19, ++ EXTENDED_EVENT_bit = 1 << 27, ++ VGT_MULTI_PRIM_IB_RESET_EN = 0x00028a94, ++ RESET_EN_bit = 1 << 0, ++ VGT_INSTANCE_STEP_RATE_0 = 0x00028aa0, ++ VGT_INSTANCE_STEP_RATE_1 = 0x00028aa4, ++ VGT_STRMOUT_EN = 0x00028ab0, ++ STREAMOUT_bit = 1 << 0, ++ VGT_REUSE_OFF = 0x00028ab4, ++ REUSE_OFF_bit = 1 << 0, ++ VGT_VTX_CNT_EN = 0x00028ab8, ++ VTX_CNT_EN_bit = 1 << 0, ++ VGT_STRMOUT_BUFFER_SIZE_0 = 0x00028ad0, ++ VGT_STRMOUT_VTX_STRIDE_0 = 0x00028ad4, ++ VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask = 0x3ff << 0, ++ VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift = 0, ++ VGT_STRMOUT_BUFFER_BASE_0 = 0x00028ad8, ++ VGT_STRMOUT_BUFFER_OFFSET_0 = 0x00028adc, ++ VGT_STRMOUT_BUFFER_SIZE_1 = 0x00028ae0, ++ VGT_STRMOUT_VTX_STRIDE_1 = 0x00028ae4, ++ VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask = 0x3ff << 0, ++ VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift = 0, ++ VGT_STRMOUT_BUFFER_BASE_1 = 0x00028ae8, ++ VGT_STRMOUT_BUFFER_OFFSET_1 = 0x00028aec, ++ VGT_STRMOUT_BUFFER_SIZE_2 = 0x00028af0, ++ VGT_STRMOUT_VTX_STRIDE_2 = 0x00028af4, ++ VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask = 0x3ff << 0, ++ VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift = 0, ++ VGT_STRMOUT_BUFFER_BASE_2 = 0x00028af8, ++ VGT_STRMOUT_BUFFER_OFFSET_2 = 0x00028afc, ++ VGT_STRMOUT_BUFFER_SIZE_3 = 0x00028b00, ++ VGT_STRMOUT_VTX_STRIDE_3 = 0x00028b04, ++ VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask = 0x3ff << 0, ++ VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift = 0, ++ VGT_STRMOUT_BUFFER_BASE_3 = 0x00028b08, ++ VGT_STRMOUT_BUFFER_OFFSET_3 = 0x00028b0c, ++ VGT_STRMOUT_BASE_OFFSET_0 = 0x00028b10, ++ VGT_STRMOUT_BASE_OFFSET_1 = 0x00028b14, ++ VGT_STRMOUT_BASE_OFFSET_2 = 0x00028b18, ++ VGT_STRMOUT_BASE_OFFSET_3 = 0x00028b1c, ++ VGT_STRMOUT_BUFFER_EN = 0x00028b20, ++ BUFFER_0_EN_bit = 1 << 0, ++ BUFFER_1_EN_bit = 1 << 1, ++ BUFFER_2_EN_bit = 1 << 2, ++ BUFFER_3_EN_bit = 1 << 3, ++ VGT_STRMOUT_DRAW_OPAQUE_OFFSET = 0x00028b28, ++ VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE = 0x00028b2c, ++ VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE = 0x00028b30, ++ VGT_STRMOUT_BASE_OFFSET_HI_0 = 0x00028b44, ++ VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask = 0x3f << 0, ++ VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift = 0, ++ VGT_STRMOUT_BASE_OFFSET_HI_1 = 0x00028b48, ++ VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask = 0x3f << 0, ++ VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift = 0, ++ VGT_STRMOUT_BASE_OFFSET_HI_2 = 0x00028b4c, ++ VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask = 0x3f << 0, ++ VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift = 0, ++ VGT_STRMOUT_BASE_OFFSET_HI_3 = 0x00028b50, ++ VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask = 0x3f << 0, ++ VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift = 0, ++ PA_SC_LINE_CNTL = 0x00028c00, ++ BRES_CNTL_mask = 0xff << 0, ++ BRES_CNTL_shift = 0, ++ USE_BRES_CNTL_bit = 1 << 8, ++ EXPAND_LINE_WIDTH_bit = 1 << 9, ++ LAST_PIXEL_bit = 1 << 10, ++ PA_SC_AA_CONFIG = 0x00028c04, ++ MSAA_NUM_SAMPLES_mask = 0x03 << 0, ++ MSAA_NUM_SAMPLES_shift = 0, ++ AA_MASK_CENTROID_DTMN_bit = 1 << 4, ++ MAX_SAMPLE_DIST_mask = 0x0f << 13, ++ MAX_SAMPLE_DIST_shift = 13, ++ PA_SU_VTX_CNTL = 0x00028c08, ++ PIX_CENTER_bit = 1 << 0, ++ PA_SU_VTX_CNTL__ROUND_MODE_mask = 0x03 << 1, ++ PA_SU_VTX_CNTL__ROUND_MODE_shift = 1, ++ X_TRUNCATE = 0x00, ++ X_ROUND = 0x01, ++ X_ROUND_TO_EVEN = 0x02, ++ X_ROUND_TO_ODD = 0x03, ++ QUANT_MODE_mask = 0x07 << 3, ++ QUANT_MODE_shift = 3, ++ X_1_16TH = 0x00, ++ X_1_8TH = 0x01, ++ X_1_4TH = 0x02, ++ X_1_2 = 0x03, ++ X_1 = 0x04, ++ X_1_256TH = 0x05, ++ PA_CL_GB_VERT_CLIP_ADJ = 0x00028c0c, ++ PA_CL_GB_VERT_DISC_ADJ = 0x00028c10, ++ PA_CL_GB_HORZ_CLIP_ADJ = 0x00028c14, ++ PA_CL_GB_HORZ_DISC_ADJ = 0x00028c18, ++ PA_SC_AA_SAMPLE_LOCS_MCTX = 0x00028c1c, ++/* S0_X_mask = 0x0f << 0, */ ++/* S0_X_shift = 0, */ ++/* S0_Y_mask = 0x0f << 4, */ ++/* S0_Y_shift = 4, */ ++/* S1_X_mask = 0x0f << 8, */ ++/* S1_X_shift = 8, */ ++/* S1_Y_mask = 0x0f << 12, */ ++/* S1_Y_shift = 12, */ ++/* S2_X_mask = 0x0f << 16, */ ++/* S2_X_shift = 16, */ ++/* S2_Y_mask = 0x0f << 20, */ ++/* S2_Y_shift = 20, */ ++/* S3_X_mask = 0x0f << 24, */ ++/* S3_X_shift = 24, */ ++/* S3_Y_mask = 0x0f << 28, */ ++/* S3_Y_shift = 28, */ ++ PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX = 0x00028c20, ++/* S4_X_mask = 0x0f << 0, */ ++/* S4_X_shift = 0, */ ++/* S4_Y_mask = 0x0f << 4, */ ++/* S4_Y_shift = 4, */ ++/* S5_X_mask = 0x0f << 8, */ ++/* S5_X_shift = 8, */ ++/* S5_Y_mask = 0x0f << 12, */ ++/* S5_Y_shift = 12, */ ++/* S6_X_mask = 0x0f << 16, */ ++/* S6_X_shift = 16, */ ++/* S6_Y_mask = 0x0f << 20, */ ++/* S6_Y_shift = 20, */ ++/* S7_X_mask = 0x0f << 24, */ ++/* S7_X_shift = 24, */ ++/* S7_Y_mask = 0x0f << 28, */ ++/* S7_Y_shift = 28, */ ++ CB_CLRCMP_CONTROL = 0x00028c30, ++ CLRCMP_FCN_SRC_mask = 0x07 << 0, ++ CLRCMP_FCN_SRC_shift = 0, ++ CLRCMP_DRAW_ALWAYS = 0x00, ++ CLRCMP_DRAW_NEVER = 0x01, ++ CLRCMP_DRAW_ON_NEQ = 0x04, ++ CLRCMP_DRAW_ON_EQ = 0x05, ++ CLRCMP_FCN_DST_mask = 0x07 << 8, ++ CLRCMP_FCN_DST_shift = 8, ++/* CLRCMP_DRAW_ALWAYS = 0x00, */ ++/* CLRCMP_DRAW_NEVER = 0x01, */ ++/* CLRCMP_DRAW_ON_NEQ = 0x04, */ ++/* CLRCMP_DRAW_ON_EQ = 0x05, */ ++ CLRCMP_FCN_SEL_mask = 0x03 << 24, ++ CLRCMP_FCN_SEL_shift = 24, ++ CLRCMP_SEL_DST = 0x00, ++ CLRCMP_SEL_SRC = 0x01, ++ CLRCMP_SEL_AND = 0x02, ++ CB_CLRCMP_SRC = 0x00028c34, ++ CB_CLRCMP_DST = 0x00028c38, ++ CB_CLRCMP_MSK = 0x00028c3c, ++ PA_SC_AA_MASK = 0x00028c48, ++ VGT_VERTEX_REUSE_BLOCK_CNTL = 0x00028c58, ++ VTX_REUSE_DEPTH_mask = 0xff << 0, ++ VTX_REUSE_DEPTH_shift = 0, ++ VGT_OUT_DEALLOC_CNTL = 0x00028c5c, ++ DEALLOC_DIST_mask = 0x7f << 0, ++ DEALLOC_DIST_shift = 0, ++ DB_RENDER_CONTROL = 0x00028d0c, ++ DEPTH_CLEAR_ENABLE_bit = 1 << 0, ++ STENCIL_CLEAR_ENABLE_bit = 1 << 1, ++ DEPTH_COPY_bit = 1 << 2, ++ STENCIL_COPY_bit = 1 << 3, ++ RESUMMARIZE_ENABLE_bit = 1 << 4, ++ STENCIL_COMPRESS_DISABLE_bit = 1 << 5, ++ DEPTH_COMPRESS_DISABLE_bit = 1 << 6, ++ COPY_CENTROID_bit = 1 << 7, ++ COPY_SAMPLE_mask = 0x07 << 8, ++ COPY_SAMPLE_shift = 8, ++ ZPASS_INCREMENT_DISABLE_bit = 1 << 11, ++ DB_RENDER_OVERRIDE = 0x00028d10, ++ FORCE_HIZ_ENABLE_mask = 0x03 << 0, ++ FORCE_HIZ_ENABLE_shift = 0, ++ FORCE_OFF = 0x00, ++ FORCE_ENABLE = 0x01, ++ FORCE_DISABLE = 0x02, ++ FORCE_RESERVED = 0x03, ++ FORCE_HIS_ENABLE0_mask = 0x03 << 2, ++ FORCE_HIS_ENABLE0_shift = 2, ++/* FORCE_OFF = 0x00, */ ++/* FORCE_ENABLE = 0x01, */ ++/* FORCE_DISABLE = 0x02, */ ++/* FORCE_RESERVED = 0x03, */ ++ FORCE_HIS_ENABLE1_mask = 0x03 << 4, ++ FORCE_HIS_ENABLE1_shift = 4, ++/* FORCE_OFF = 0x00, */ ++/* FORCE_ENABLE = 0x01, */ ++/* FORCE_DISABLE = 0x02, */ ++/* FORCE_RESERVED = 0x03, */ ++ FORCE_SHADER_Z_ORDER_bit = 1 << 6, ++ FAST_Z_DISABLE_bit = 1 << 7, ++ FAST_STENCIL_DISABLE_bit = 1 << 8, ++ NOOP_CULL_DISABLE_bit = 1 << 9, ++ FORCE_COLOR_KILL_bit = 1 << 10, ++ FORCE_Z_READ_bit = 1 << 11, ++ FORCE_STENCIL_READ_bit = 1 << 12, ++ FORCE_FULL_Z_RANGE_mask = 0x03 << 13, ++ FORCE_FULL_Z_RANGE_shift = 13, ++/* FORCE_OFF = 0x00, */ ++/* FORCE_ENABLE = 0x01, */ ++/* FORCE_DISABLE = 0x02, */ ++/* FORCE_RESERVED = 0x03, */ ++ FORCE_QC_SMASK_CONFLICT_bit = 1 << 15, ++ DISABLE_VIEWPORT_CLAMP_bit = 1 << 16, ++ IGNORE_SC_ZRANGE_bit = 1 << 17, ++ DB_HTILE_SURFACE = 0x00028d24, ++ HTILE_WIDTH_bit = 1 << 0, ++ HTILE_HEIGHT_bit = 1 << 1, ++ LINEAR_bit = 1 << 2, ++ FULL_CACHE_bit = 1 << 3, ++ HTILE_USES_PRELOAD_WIN_bit = 1 << 4, ++ PRELOAD_bit = 1 << 5, ++ PREFETCH_WIDTH_mask = 0x3f << 6, ++ PREFETCH_WIDTH_shift = 6, ++ PREFETCH_HEIGHT_mask = 0x3f << 12, ++ PREFETCH_HEIGHT_shift = 12, ++ DB_SRESULTS_COMPARE_STATE1 = 0x00028d2c, ++ COMPAREFUNC1_mask = 0x07 << 0, ++ COMPAREFUNC1_shift = 0, ++/* REF_NEVER = 0x00, */ ++/* REF_LESS = 0x01, */ ++/* REF_EQUAL = 0x02, */ ++/* REF_LEQUAL = 0x03, */ ++/* REF_GREATER = 0x04, */ ++/* REF_NOTEQUAL = 0x05, */ ++/* REF_GEQUAL = 0x06, */ ++/* REF_ALWAYS = 0x07, */ ++ COMPAREVALUE1_mask = 0xff << 4, ++ COMPAREVALUE1_shift = 4, ++ COMPAREMASK1_mask = 0xff << 12, ++ COMPAREMASK1_shift = 12, ++ ENABLE1_bit = 1 << 24, ++ DB_PRELOAD_CONTROL = 0x00028d30, ++ START_X_mask = 0xff << 0, ++ START_X_shift = 0, ++ START_Y_mask = 0xff << 8, ++ START_Y_shift = 8, ++ MAX_X_mask = 0xff << 16, ++ MAX_X_shift = 16, ++ MAX_Y_mask = 0xff << 24, ++ MAX_Y_shift = 24, ++ DB_PREFETCH_LIMIT = 0x00028d34, ++ DEPTH_HEIGHT_TILE_MAX_mask = 0x3ff << 0, ++ DEPTH_HEIGHT_TILE_MAX_shift = 0, ++ PA_SU_POLY_OFFSET_DB_FMT_CNTL = 0x00028df8, ++ POLY_OFFSET_NEG_NUM_DB_BITS_mask = 0xff << 0, ++ POLY_OFFSET_NEG_NUM_DB_BITS_shift = 0, ++ POLY_OFFSET_DB_IS_FLOAT_FMT_bit = 1 << 8, ++ PA_SU_POLY_OFFSET_CLAMP = 0x00028dfc, ++ PA_SU_POLY_OFFSET_FRONT_SCALE = 0x00028e00, ++ PA_SU_POLY_OFFSET_FRONT_OFFSET = 0x00028e04, ++ PA_SU_POLY_OFFSET_BACK_SCALE = 0x00028e08, ++ PA_SU_POLY_OFFSET_BACK_OFFSET = 0x00028e0c, ++ PA_CL_POINT_X_RAD = 0x00028e10, ++ PA_CL_POINT_Y_RAD = 0x00028e14, ++ PA_CL_POINT_SIZE = 0x00028e18, ++ PA_CL_POINT_CULL_RAD = 0x00028e1c, ++ PA_CL_UCP_0_X = 0x00028e20, ++ PA_CL_UCP_0_X_num = 6, ++ PA_CL_UCP_0_X_offset = 16, ++ PA_CL_UCP_0_Y = 0x00028e24, ++ PA_CL_UCP_0_Y_num = 6, ++ PA_CL_UCP_0_Y_offset = 16, ++ PA_CL_UCP_0_Z = 0x00028e28, ++ PA_CL_UCP_0_Z_num = 6, ++ PA_CL_UCP_0_Z_offset = 16, ++ SQ_ALU_CONSTANT0_0 = 0x00030000, ++ SQ_ALU_CONSTANT1_0 = 0x00030004, ++ SQ_ALU_CONSTANT2_0 = 0x00030008, ++ SQ_ALU_CONSTANT3_0 = 0x0003000c, ++ SQ_VTX_CONSTANT_WORD0_0 = 0x00038000, ++ SQ_TEX_RESOURCE_WORD0_0 = 0x00038000, ++ DIM_mask = 0x07 << 0, ++ DIM_shift = 0, ++ SQ_TEX_DIM_1D = 0x00, ++ SQ_TEX_DIM_2D = 0x01, ++ SQ_TEX_DIM_3D = 0x02, ++ SQ_TEX_DIM_CUBEMAP = 0x03, ++ SQ_TEX_DIM_1D_ARRAY = 0x04, ++ SQ_TEX_DIM_2D_ARRAY = 0x05, ++ SQ_TEX_DIM_2D_MSAA = 0x06, ++ SQ_TEX_DIM_2D_ARRAY_MSAA = 0x07, ++ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask = 0x0f << 3, ++ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift = 3, ++ TILE_TYPE_bit = 1 << 7, ++ PITCH_mask = 0x7ff << 8, ++ PITCH_shift = 8, ++ TEX_WIDTH_mask = 0x1fff << 19, ++ TEX_WIDTH_shift = 19, ++ SQ_VTX_CONSTANT_WORD1_0 = 0x00038004, ++ SQ_TEX_RESOURCE_WORD1_0 = 0x00038004, ++ TEX_HEIGHT_mask = 0x1fff << 0, ++ TEX_HEIGHT_shift = 0, ++ TEX_DEPTH_mask = 0x1fff << 13, ++ TEX_DEPTH_shift = 13, ++ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask = 0x3f << 26, ++ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift = 26, ++ SQ_VTX_CONSTANT_WORD2_0 = 0x00038008, ++ BASE_ADDRESS_HI_mask = 0xff << 0, ++ BASE_ADDRESS_HI_shift = 0, ++ SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask = 0x7ff << 8, ++ SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift = 8, ++ SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit = 1 << 19, ++ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20, ++ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift = 20, ++ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask = 0x03 << 26, ++ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift = 26, ++/* SQ_NUM_FORMAT_NORM = 0x00, */ ++/* SQ_NUM_FORMAT_INT = 0x01, */ ++/* SQ_NUM_FORMAT_SCALED = 0x02, */ ++ SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit = 1 << 28, ++ SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit = 1 << 29, ++ SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask = 0x03 << 30, ++ SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift = 30, ++/* SQ_ENDIAN_NONE = 0x00, */ ++/* SQ_ENDIAN_8IN16 = 0x01, */ ++/* SQ_ENDIAN_8IN32 = 0x02, */ ++ SQ_TEX_RESOURCE_WORD2_0 = 0x00038008, ++ SQ_VTX_CONSTANT_WORD3_0 = 0x0003800c, ++ MEM_REQUEST_SIZE_mask = 0x03 << 0, ++ MEM_REQUEST_SIZE_shift = 0, ++ SQ_TEX_RESOURCE_WORD3_0 = 0x0003800c, ++ SQ_TEX_RESOURCE_WORD4_0 = 0x00038010, ++ FORMAT_COMP_X_mask = 0x03 << 0, ++ FORMAT_COMP_X_shift = 0, ++ SQ_FORMAT_COMP_UNSIGNED = 0x00, ++ SQ_FORMAT_COMP_SIGNED = 0x01, ++ SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, ++ FORMAT_COMP_Y_mask = 0x03 << 2, ++ FORMAT_COMP_Y_shift = 2, ++/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ ++/* SQ_FORMAT_COMP_SIGNED = 0x01, */ ++/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ ++ FORMAT_COMP_Z_mask = 0x03 << 4, ++ FORMAT_COMP_Z_shift = 4, ++/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ ++/* SQ_FORMAT_COMP_SIGNED = 0x01, */ ++/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ ++ FORMAT_COMP_W_mask = 0x03 << 6, ++ FORMAT_COMP_W_shift = 6, ++/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ ++/* SQ_FORMAT_COMP_SIGNED = 0x01, */ ++/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ ++ SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask = 0x03 << 8, ++ SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift = 8, ++/* SQ_NUM_FORMAT_NORM = 0x00, */ ++/* SQ_NUM_FORMAT_INT = 0x01, */ ++/* SQ_NUM_FORMAT_SCALED = 0x02, */ ++ SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit = 1 << 10, ++ SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit = 1 << 11, ++ SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask = 0x03 << 12, ++ SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift = 12, ++/* SQ_ENDIAN_NONE = 0x00, */ ++/* SQ_ENDIAN_8IN16 = 0x01, */ ++/* SQ_ENDIAN_8IN32 = 0x02, */ ++ REQUEST_SIZE_mask = 0x03 << 14, ++ REQUEST_SIZE_shift = 14, ++ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask = 0x07 << 16, ++ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift = 16, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask = 0x07 << 19, ++ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift = 19, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask = 0x07 << 22, ++ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift = 22, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask = 0x07 << 25, ++ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift = 25, ++/* SQ_SEL_X = 0x00, */ ++/* SQ_SEL_Y = 0x01, */ ++/* SQ_SEL_Z = 0x02, */ ++/* SQ_SEL_W = 0x03, */ ++/* SQ_SEL_0 = 0x04, */ ++/* SQ_SEL_1 = 0x05, */ ++ BASE_LEVEL_mask = 0x0f << 28, ++ BASE_LEVEL_shift = 28, ++ SQ_TEX_RESOURCE_WORD5_0 = 0x00038014, ++ LAST_LEVEL_mask = 0x0f << 0, ++ LAST_LEVEL_shift = 0, ++ BASE_ARRAY_mask = 0x1fff << 4, ++ BASE_ARRAY_shift = 4, ++ LAST_ARRAY_mask = 0x1fff << 17, ++ LAST_ARRAY_shift = 17, ++ SQ_TEX_RESOURCE_WORD6_0 = 0x00038018, ++ MPEG_CLAMP_mask = 0x03 << 0, ++ MPEG_CLAMP_shift = 0, ++ SQ_TEX_MPEG_CLAMP_OFF = 0x00, ++ SQ_TEX_MPEG_9 = 0x01, ++ SQ_TEX_MPEG_10 = 0x02, ++ PERF_MODULATION_mask = 0x07 << 5, ++ PERF_MODULATION_shift = 5, ++ INTERLACED_bit = 1 << 8, ++ SQ_TEX_RESOURCE_WORD6_0__TYPE_mask = 0x03 << 30, ++ SQ_TEX_RESOURCE_WORD6_0__TYPE_shift = 30, ++ SQ_TEX_VTX_INVALID_TEXTURE = 0x00, ++ SQ_TEX_VTX_INVALID_BUFFER = 0x01, ++ SQ_TEX_VTX_VALID_TEXTURE = 0x02, ++ SQ_TEX_VTX_VALID_BUFFER = 0x03, ++ SQ_VTX_CONSTANT_WORD6_0 = 0x00038018, ++ SQ_VTX_CONSTANT_WORD6_0__TYPE_mask = 0x03 << 30, ++ SQ_VTX_CONSTANT_WORD6_0__TYPE_shift = 30, ++/* SQ_TEX_VTX_INVALID_TEXTURE = 0x00, */ ++/* SQ_TEX_VTX_INVALID_BUFFER = 0x01, */ ++/* SQ_TEX_VTX_VALID_TEXTURE = 0x02, */ ++/* SQ_TEX_VTX_VALID_BUFFER = 0x03, */ ++ SQ_TEX_SAMPLER_WORD0_0 = 0x0003c000, ++ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask = 0x07 << 0, ++ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift = 0, ++ SQ_TEX_WRAP = 0x00, ++ SQ_TEX_MIRROR = 0x01, ++ SQ_TEX_CLAMP_LAST_TEXEL = 0x02, ++ SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, ++ SQ_TEX_CLAMP_HALF_BORDER = 0x04, ++ SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, ++ SQ_TEX_CLAMP_BORDER = 0x06, ++ SQ_TEX_MIRROR_ONCE_BORDER = 0x07, ++ CLAMP_Y_mask = 0x07 << 3, ++ CLAMP_Y_shift = 3, ++/* SQ_TEX_WRAP = 0x00, */ ++/* SQ_TEX_MIRROR = 0x01, */ ++/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */ ++/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */ ++/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */ ++/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */ ++/* SQ_TEX_CLAMP_BORDER = 0x06, */ ++/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */ ++ CLAMP_Z_mask = 0x07 << 6, ++ CLAMP_Z_shift = 6, ++/* SQ_TEX_WRAP = 0x00, */ ++/* SQ_TEX_MIRROR = 0x01, */ ++/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */ ++/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */ ++/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */ ++/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */ ++/* SQ_TEX_CLAMP_BORDER = 0x06, */ ++/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */ ++ XY_MAG_FILTER_mask = 0x07 << 9, ++ XY_MAG_FILTER_shift = 9, ++ SQ_TEX_XY_FILTER_POINT = 0x00, ++ SQ_TEX_XY_FILTER_BILINEAR = 0x01, ++ SQ_TEX_XY_FILTER_BICUBIC = 0x02, ++ XY_MIN_FILTER_mask = 0x07 << 12, ++ XY_MIN_FILTER_shift = 12, ++/* SQ_TEX_XY_FILTER_POINT = 0x00, */ ++/* SQ_TEX_XY_FILTER_BILINEAR = 0x01, */ ++/* SQ_TEX_XY_FILTER_BICUBIC = 0x02, */ ++ Z_FILTER_mask = 0x03 << 15, ++ Z_FILTER_shift = 15, ++ SQ_TEX_Z_FILTER_NONE = 0x00, ++ SQ_TEX_Z_FILTER_POINT = 0x01, ++ SQ_TEX_Z_FILTER_LINEAR = 0x02, ++ MIP_FILTER_mask = 0x03 << 17, ++ MIP_FILTER_shift = 17, ++/* SQ_TEX_Z_FILTER_NONE = 0x00, */ ++/* SQ_TEX_Z_FILTER_POINT = 0x01, */ ++/* SQ_TEX_Z_FILTER_LINEAR = 0x02, */ ++ BORDER_COLOR_TYPE_mask = 0x03 << 22, ++ BORDER_COLOR_TYPE_shift = 22, ++ SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00, ++ SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x01, ++ SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x02, ++ SQ_TEX_BORDER_COLOR_REGISTER = 0x03, ++ POINT_SAMPLING_CLAMP_bit = 1 << 24, ++ TEX_ARRAY_OVERRIDE_bit = 1 << 25, ++ DEPTH_COMPARE_FUNCTION_mask = 0x07 << 26, ++ DEPTH_COMPARE_FUNCTION_shift = 26, ++ SQ_TEX_DEPTH_COMPARE_NEVER = 0x00, ++ SQ_TEX_DEPTH_COMPARE_LESS = 0x01, ++ SQ_TEX_DEPTH_COMPARE_EQUAL = 0x02, ++ SQ_TEX_DEPTH_COMPARE_LESSEQUAL = 0x03, ++ SQ_TEX_DEPTH_COMPARE_GREATER = 0x04, ++ SQ_TEX_DEPTH_COMPARE_NOTEQUAL = 0x05, ++ SQ_TEX_DEPTH_COMPARE_GREATEREQUAL = 0x06, ++ SQ_TEX_DEPTH_COMPARE_ALWAYS = 0x07, ++ CHROMA_KEY_mask = 0x03 << 29, ++ CHROMA_KEY_shift = 29, ++ SQ_TEX_CHROMA_KEY_DISABLED = 0x00, ++ SQ_TEX_CHROMA_KEY_KILL = 0x01, ++ SQ_TEX_CHROMA_KEY_BLEND = 0x02, ++ LOD_USES_MINOR_AXIS_bit = 1 << 31, ++ SQ_TEX_SAMPLER_WORD1_0 = 0x0003c004, ++ MIN_LOD_mask = 0x3ff << 0, ++ MIN_LOD_shift = 0, ++ MAX_LOD_mask = 0x3ff << 10, ++ MAX_LOD_shift = 10, ++ SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_mask = 0xfff << 20, ++ SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift = 20, ++ SQ_TEX_SAMPLER_WORD2_0 = 0x0003c008, ++ LOD_BIAS_SEC_mask = 0xfff << 0, ++ LOD_BIAS_SEC_shift = 0, ++ MC_COORD_TRUNCATE_bit = 1 << 12, ++ SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit = 1 << 13, ++ HIGH_PRECISION_FILTER_bit = 1 << 14, ++ PERF_MIP_mask = 0x07 << 15, ++ PERF_MIP_shift = 15, ++ PERF_Z_mask = 0x03 << 18, ++ PERF_Z_shift = 18, ++ FETCH_4_bit = 1 << 26, ++ SAMPLE_IS_PCF_bit = 1 << 27, ++ SQ_TEX_SAMPLER_WORD2_0__TYPE_bit = 1 << 31, ++ SQ_VTX_BASE_VTX_LOC = 0x0003cff0, ++ SQ_VTX_START_INST_LOC = 0x0003cff4, ++ SQ_LOOP_CONST_DX10_0 = 0x0003e200, ++ SQ_LOOP_CONST_0 = 0x0003e200, ++ SQ_LOOP_CONST_0__COUNT_mask = 0xfff << 0, ++ SQ_LOOP_CONST_0__COUNT_shift = 0, ++ INIT_mask = 0xfff << 12, ++ INIT_shift = 12, ++ INC_mask = 0xff << 24, ++ INC_shift = 24, ++ SQ_BOOL_CONST_0 = 0x0003e380, ++ SQ_BOOL_CONST_0_num = 3, ++ ++} ; ++ ++#endif /* _AUTOREGS */ ++ +diff --git a/src/r600_reg_r6xx.h b/src/r600_reg_r6xx.h +new file mode 100644 +index 0000000..2e7dfa9 +--- /dev/null ++++ b/src/r600_reg_r6xx.h +@@ -0,0 +1,494 @@ ++/* ++ * RadeonHD R6xx, R7xx Register documentation ++ * ++ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. ++ * Copyright (C) 2008-2009 Matthias Hopf ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included ++ * in all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN ++ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef _R600_REG_R6xx_H_ ++#define _R600_REG_R6xx_H_ ++ ++/* ++ * Registers for R6xx chips that are not documented yet ++ */ ++ ++enum { ++ ++ MM_INDEX = 0x0000, ++ MM_DATA = 0x0004, ++ ++ SRBM_STATUS = 0x0e50, ++ RLC_RQ_PENDING_bit = 1 << 3, ++ RCU_RQ_PENDING_bit = 1 << 4, ++ GRBM_RQ_PENDING_bit = 1 << 5, ++ HI_RQ_PENDING_bit = 1 << 6, ++ IO_EXTERN_SIGNAL_bit = 1 << 7, ++ VMC_BUSY_bit = 1 << 8, ++ MCB_BUSY_bit = 1 << 9, ++ MCDZ_BUSY_bit = 1 << 10, ++ MCDY_BUSY_bit = 1 << 11, ++ MCDX_BUSY_bit = 1 << 12, ++ MCDW_BUSY_bit = 1 << 13, ++ SEM_BUSY_bit = 1 << 14, ++ SRBM_STATUS__RLC_BUSY_bit = 1 << 15, ++ PDMA_BUSY_bit = 1 << 16, ++ IH_BUSY_bit = 1 << 17, ++ CSC_BUSY_bit = 1 << 20, ++ CMC7_BUSY_bit = 1 << 21, ++ CMC6_BUSY_bit = 1 << 22, ++ CMC5_BUSY_bit = 1 << 23, ++ CMC4_BUSY_bit = 1 << 24, ++ CMC3_BUSY_bit = 1 << 25, ++ CMC2_BUSY_bit = 1 << 26, ++ CMC1_BUSY_bit = 1 << 27, ++ CMC0_BUSY_bit = 1 << 28, ++ BIF_BUSY_bit = 1 << 29, ++ IDCT_BUSY_bit = 1 << 30, ++ ++ SRBM_READ_ERROR = 0x0e98, ++ READ_ADDRESS_mask = 0xffff << 2, ++ READ_ADDRESS_shift = 2, ++ READ_REQUESTER_HI_bit = 1 << 24, ++ READ_REQUESTER_GRBM_bit = 1 << 25, ++ READ_REQUESTER_RCU_bit = 1 << 26, ++ READ_REQUESTER_RLC_bit = 1 << 27, ++ READ_ERROR_bit = 1 << 31, ++ ++ SRBM_INT_STATUS = 0x0ea4, ++ RDERR_INT_STAT_bit = 1 << 0, ++ GFX_CNTX_SWITCH_INT_STAT_bit = 1 << 1, ++ SRBM_INT_ACK = 0x0ea8, ++ RDERR_INT_ACK_bit = 1 << 0, ++ GFX_CNTX_SWITCH_INT_ACK_bit = 1 << 1, ++ ++ R6XX_MC_VM_FB_LOCATION = 0x2180, ++ ++ VENDOR_DEVICE_ID = 0x4000, ++ ++ HDP_MEM_COHERENCY_FLUSH_CNTL = 0x5480, ++ ++ D1GRPH_PRIMARY_SURFACE_ADDRESS = 0x6110, ++ D1GRPH_PITCH = 0x6120, ++ D1GRPH_Y_END = 0x6138, ++ ++ GRBM_STATUS = 0x8010, ++ CMDFIFO_AVAIL_mask = 0x1f << 0, ++ CMDFIFO_AVAIL_shift = 0, ++ SRBM_RQ_PENDING_bit = 1 << 5, ++ CP_RQ_PENDING_bit = 1 << 6, ++ CF_RQ_PENDING_bit = 1 << 7, ++ PF_RQ_PENDING_bit = 1 << 8, ++ GRBM_EE_BUSY_bit = 1 << 10, ++ GRBM_STATUS__VC_BUSY_bit = 1 << 11, ++ DB03_CLEAN_bit = 1 << 12, ++ CB03_CLEAN_bit = 1 << 13, ++ VGT_BUSY_NO_DMA_bit = 1 << 16, ++ GRBM_STATUS__VGT_BUSY_bit = 1 << 17, ++ TA03_BUSY_bit = 1 << 18, ++ GRBM_STATUS__TC_BUSY_bit = 1 << 19, ++ SX_BUSY_bit = 1 << 20, ++ SH_BUSY_bit = 1 << 21, ++ SPI03_BUSY_bit = 1 << 22, ++ SMX_BUSY_bit = 1 << 23, ++ SC_BUSY_bit = 1 << 24, ++ PA_BUSY_bit = 1 << 25, ++ DB03_BUSY_bit = 1 << 26, ++ CR_BUSY_bit = 1 << 27, ++ CP_COHERENCY_BUSY_bit = 1 << 28, ++ GRBM_STATUS__CP_BUSY_bit = 1 << 29, ++ CB03_BUSY_bit = 1 << 30, ++ GUI_ACTIVE_bit = 1 << 31, ++ GRBM_STATUS2 = 0x8014, ++ CR_CLEAN_bit = 1 << 0, ++ SMX_CLEAN_bit = 1 << 1, ++ SPI0_BUSY_bit = 1 << 8, ++ SPI1_BUSY_bit = 1 << 9, ++ SPI2_BUSY_bit = 1 << 10, ++ SPI3_BUSY_bit = 1 << 11, ++ TA0_BUSY_bit = 1 << 12, ++ TA1_BUSY_bit = 1 << 13, ++ TA2_BUSY_bit = 1 << 14, ++ TA3_BUSY_bit = 1 << 15, ++ DB0_BUSY_bit = 1 << 16, ++ DB1_BUSY_bit = 1 << 17, ++ DB2_BUSY_bit = 1 << 18, ++ DB3_BUSY_bit = 1 << 19, ++ CB0_BUSY_bit = 1 << 20, ++ CB1_BUSY_bit = 1 << 21, ++ CB2_BUSY_bit = 1 << 22, ++ CB3_BUSY_bit = 1 << 23, ++ GRBM_SOFT_RESET = 0x8020, ++ SOFT_RESET_CP_bit = 1 << 0, ++ SOFT_RESET_CB_bit = 1 << 1, ++ SOFT_RESET_CR_bit = 1 << 2, ++ SOFT_RESET_DB_bit = 1 << 3, ++ SOFT_RESET_PA_bit = 1 << 5, ++ SOFT_RESET_SC_bit = 1 << 6, ++ SOFT_RESET_SMX_bit = 1 << 7, ++ SOFT_RESET_SPI_bit = 1 << 8, ++ SOFT_RESET_SH_bit = 1 << 9, ++ SOFT_RESET_SX_bit = 1 << 10, ++ SOFT_RESET_TC_bit = 1 << 11, ++ SOFT_RESET_TA_bit = 1 << 12, ++ SOFT_RESET_VC_bit = 1 << 13, ++ SOFT_RESET_VGT_bit = 1 << 14, ++ SOFT_RESET_GRBM_GCA_bit = 1 << 15, ++ ++ WAIT_UNTIL = 0x8040, ++ WAIT_CP_DMA_IDLE_bit = 1 << 8, ++ WAIT_CMDFIFO_bit = 1 << 10, ++ WAIT_2D_IDLE_bit = 1 << 14, ++ WAIT_3D_IDLE_bit = 1 << 15, ++ WAIT_2D_IDLECLEAN_bit = 1 << 16, ++ WAIT_3D_IDLECLEAN_bit = 1 << 17, ++ WAIT_EXTERN_SIG_bit = 1 << 19, ++ CMDFIFO_ENTRIES_mask = 0x1f << 20, ++ CMDFIFO_ENTRIES_shift = 20, ++ ++ GRBM_READ_ERROR = 0x8058, ++/* READ_ADDRESS_mask = 0xffff << 2, */ ++/* READ_ADDRESS_shift = 2, */ ++ READ_REQUESTER_SRBM_bit = 1 << 28, ++ READ_REQUESTER_CP_bit = 1 << 29, ++ READ_REQUESTER_WU_POLL_bit = 1 << 30, ++/* READ_ERROR_bit = 1 << 31, */ ++ ++ SCRATCH_REG0 = 0x8500, ++ SCRATCH_REG1 = 0x8504, ++ SCRATCH_REG2 = 0x8508, ++ SCRATCH_REG3 = 0x850c, ++ SCRATCH_REG4 = 0x8510, ++ SCRATCH_REG5 = 0x8514, ++ SCRATCH_REG6 = 0x8518, ++ SCRATCH_REG7 = 0x851c, ++ SCRATCH_UMSK = 0x8540, ++ SCRATCH_ADDR = 0x8544, ++ ++ CP_COHER_CNTL = 0x85f0, ++ DEST_BASE_0_ENA_bit = 1 << 0, ++ DEST_BASE_1_ENA_bit = 1 << 1, ++ SO0_DEST_BASE_ENA_bit = 1 << 2, ++ SO1_DEST_BASE_ENA_bit = 1 << 3, ++ SO2_DEST_BASE_ENA_bit = 1 << 4, ++ SO3_DEST_BASE_ENA_bit = 1 << 5, ++ CB0_DEST_BASE_ENA_bit = 1 << 6, ++ CB1_DEST_BASE_ENA_bit = 1 << 7, ++ CB2_DEST_BASE_ENA_bit = 1 << 8, ++ CB3_DEST_BASE_ENA_bit = 1 << 9, ++ CB4_DEST_BASE_ENA_bit = 1 << 10, ++ CB5_DEST_BASE_ENA_bit = 1 << 11, ++ CB6_DEST_BASE_ENA_bit = 1 << 12, ++ CB7_DEST_BASE_ENA_bit = 1 << 13, ++ DB_DEST_BASE_ENA_bit = 1 << 14, ++ CR_DEST_BASE_ENA_bit = 1 << 15, ++ TC_ACTION_ENA_bit = 1 << 23, ++ VC_ACTION_ENA_bit = 1 << 24, ++ CB_ACTION_ENA_bit = 1 << 25, ++ DB_ACTION_ENA_bit = 1 << 26, ++ SH_ACTION_ENA_bit = 1 << 27, ++ SMX_ACTION_ENA_bit = 1 << 28, ++ CR0_ACTION_ENA_bit = 1 << 29, ++ CR1_ACTION_ENA_bit = 1 << 30, ++ CR2_ACTION_ENA_bit = 1 << 31, ++ CP_COHER_SIZE = 0x85f4, ++ CP_COHER_BASE = 0x85f8, ++ CP_COHER_STATUS = 0x85fc, ++ MATCHING_GFX_CNTX_mask = 0xff << 0, ++ MATCHING_GFX_CNTX_shift = 0, ++ MATCHING_CR_CNTX_mask = 0xffff << 8, ++ MATCHING_CR_CNTX_shift = 8, ++ STATUS_bit = 1 << 31, ++ ++ CP_STALLED_STAT1 = 0x8674, ++ RBIU_TO_DMA_NOT_RDY_TO_RCV_bit = 1 << 0, ++ RBIU_TO_IBS_NOT_RDY_TO_RCV_bit = 1 << 1, ++ RBIU_TO_SEM_NOT_RDY_TO_RCV_bit = 1 << 2, ++ RBIU_TO_2DREGS_NOT_RDY_TO_RCV_bit = 1 << 3, ++ RBIU_TO_MEMWR_NOT_RDY_TO_RCV_bit = 1 << 4, ++ RBIU_TO_MEMRD_NOT_RDY_TO_RCV_bit = 1 << 5, ++ RBIU_TO_EOPD_NOT_RDY_TO_RCV_bit = 1 << 6, ++ RBIU_TO_RECT_NOT_RDY_TO_RCV_bit = 1 << 7, ++ RBIU_TO_STRMO_NOT_RDY_TO_RCV_bit = 1 << 8, ++ RBIU_TO_PSTAT_NOT_RDY_TO_RCV_bit = 1 << 9, ++ MIU_WAITING_ON_RDREQ_FREE_bit = 1 << 16, ++ MIU_WAITING_ON_WRREQ_FREE_bit = 1 << 17, ++ MIU_NEEDS_AVAIL_WRREQ_PHASE_bit = 1 << 18, ++ RCIU_WAITING_ON_GRBM_FREE_bit = 1 << 24, ++ RCIU_WAITING_ON_VGT_FREE_bit = 1 << 25, ++ RCIU_STALLED_ON_ME_READ_bit = 1 << 26, ++ RCIU_STALLED_ON_DMA_READ_bit = 1 << 27, ++ RCIU_HALTED_BY_REG_VIOLATION_bit = 1 << 28, ++ CP_STALLED_STAT2 = 0x8678, ++ PFP_TO_CSF_NOT_RDY_TO_RCV_bit = 1 << 0, ++ PFP_TO_MEQ_NOT_RDY_TO_RCV_bit = 1 << 1, ++ PFP_TO_VGT_NOT_RDY_TO_RCV_bit = 1 << 2, ++ PFP_HALTED_BY_INSTR_VIOLATION_bit = 1 << 3, ++ MULTIPASS_IB_PENDING_IN_PFP_bit = 1 << 4, ++ ME_BRUSH_WC_NOT_RDY_TO_RCV_bit = 1 << 8, ++ ME_STALLED_ON_BRUSH_LOGIC_bit = 1 << 9, ++ CR_CNTX_NOT_AVAIL_TO_ME_bit = 1 << 10, ++ GFX_CNTX_NOT_AVAIL_TO_ME_bit = 1 << 11, ++ ME_RCIU_NOT_RDY_TO_RCV_bit = 1 << 12, ++ ME_TO_CONST_NOT_RDY_TO_RCV_bit = 1 << 13, ++ ME_WAITING_DATA_FROM_PFP_bit = 1 << 14, ++ ME_WAITING_ON_PARTIAL_FLUSH_bit = 1 << 15, ++ RECT_FIFO_NEEDS_CR_RECT_DONE_bit = 1 << 16, ++ RECT_FIFO_NEEDS_WR_CONFIRM_bit = 1 << 17, ++ EOPD_FIFO_NEEDS_SC_EOP_DONE_bit = 1 << 18, ++ EOPD_FIFO_NEEDS_SMX_EOP_DONE_bit = 1 << 19, ++ EOPD_FIFO_NEEDS_WR_CONFIRM_bit = 1 << 20, ++ EOPD_FIFO_NEEDS_SIGNAL_SEM_bit = 1 << 21, ++ SO_NUMPRIM_FIFO_NEEDS_SOADDR_bit = 1 << 22, ++ SO_NUMPRIM_FIFO_NEEDS_NUMPRIM_bit = 1 << 23, ++ PIPE_STATS_FIFO_NEEDS_SAMPLE_bit = 1 << 24, ++ SURF_SYNC_NEEDS_IDLE_CNTXS_bit = 1 << 30, ++ SURF_SYNC_NEEDS_ALL_CLEAN_bit = 1 << 31, ++ CP_BUSY_STAT = 0x867c, ++ REG_BUS_FIFO_BUSY_bit = 1 << 0, ++ RING_FETCHING_DATA_bit = 1 << 1, ++ INDR1_FETCHING_DATA_bit = 1 << 2, ++ INDR2_FETCHING_DATA_bit = 1 << 3, ++ STATE_FETCHING_DATA_bit = 1 << 4, ++ PRED_FETCHING_DATA_bit = 1 << 5, ++ COHER_CNTR_NEQ_ZERO_bit = 1 << 6, ++ PFP_PARSING_PACKETS_bit = 1 << 7, ++ ME_PARSING_PACKETS_bit = 1 << 8, ++ RCIU_PFP_BUSY_bit = 1 << 9, ++ RCIU_ME_BUSY_bit = 1 << 10, ++ OUTSTANDING_READ_TAGS_bit = 1 << 11, ++ SEM_CMDFIFO_NOT_EMPTY_bit = 1 << 12, ++ SEM_FAILED_AND_HOLDING_bit = 1 << 13, ++ SEM_POLLING_FOR_PASS_bit = 1 << 14, ++ _3D_BUSY_bit = 1 << 15, ++ _2D_BUSY_bit = 1 << 16, ++ CP_STAT = 0x8680, ++ CSF_RING_BUSY_bit = 1 << 0, ++ CSF_WPTR_POLL_BUSY_bit = 1 << 1, ++ CSF_INDIRECT1_BUSY_bit = 1 << 2, ++ CSF_INDIRECT2_BUSY_bit = 1 << 3, ++ CSF_STATE_BUSY_bit = 1 << 4, ++ CSF_PREDICATE_BUSY_bit = 1 << 5, ++ CSF_BUSY_bit = 1 << 6, ++ MIU_RDREQ_BUSY_bit = 1 << 7, ++ MIU_WRREQ_BUSY_bit = 1 << 8, ++ ROQ_RING_BUSY_bit = 1 << 9, ++ ROQ_INDIRECT1_BUSY_bit = 1 << 10, ++ ROQ_INDIRECT2_BUSY_bit = 1 << 11, ++ ROQ_STATE_BUSY_bit = 1 << 12, ++ ROQ_PREDICATE_BUSY_bit = 1 << 13, ++ ROQ_ALIGN_BUSY_bit = 1 << 14, ++ PFP_BUSY_bit = 1 << 15, ++ MEQ_BUSY_bit = 1 << 16, ++ ME_BUSY_bit = 1 << 17, ++ QUERY_BUSY_bit = 1 << 18, ++ SEMAPHORE_BUSY_bit = 1 << 19, ++ INTERRUPT_BUSY_bit = 1 << 20, ++ SURFACE_SYNC_BUSY_bit = 1 << 21, ++ DMA_BUSY_bit = 1 << 22, ++ RCIU_BUSY_bit = 1 << 23, ++ CP_STAT__CP_BUSY_bit = 1 << 31, ++ ++ CP_ME_CNTL = 0x86d8, ++ ME_STATMUX_mask = 0xff << 0, ++ ME_STATMUX_shift = 0, ++ ME_HALT_bit = 1 << 28, ++ CP_ME_STATUS = 0x86dc, ++ ++ CP_RB_RPTR = 0x8700, ++ RB_RPTR_mask = 0xfffff << 0, ++ RB_RPTR_shift = 0, ++ CP_RB_WPTR_DELAY = 0x8704, ++ PRE_WRITE_TIMER_mask = 0xfffffff << 0, ++ PRE_WRITE_TIMER_shift = 0, ++ PRE_WRITE_LIMIT_mask = 0x0f << 28, ++ PRE_WRITE_LIMIT_shift = 28, ++ ++ CP_ROQ_RB_STAT = 0x8780, ++ ROQ_RPTR_PRIMARY_mask = 0x3ff << 0, ++ ROQ_RPTR_PRIMARY_shift = 0, ++ ROQ_WPTR_PRIMARY_mask = 0x3ff << 16, ++ ROQ_WPTR_PRIMARY_shift = 16, ++ CP_ROQ_IB1_STAT = 0x8784, ++ ROQ_RPTR_INDIRECT1_mask = 0x3ff << 0, ++ ROQ_RPTR_INDIRECT1_shift = 0, ++ ROQ_WPTR_INDIRECT1_mask = 0x3ff << 16, ++ ROQ_WPTR_INDIRECT1_shift = 16, ++ CP_ROQ_IB2_STAT = 0x8788, ++ ROQ_RPTR_INDIRECT2_mask = 0x3ff << 0, ++ ROQ_RPTR_INDIRECT2_shift = 0, ++ ROQ_WPTR_INDIRECT2_mask = 0x3ff << 16, ++ ROQ_WPTR_INDIRECT2_shift = 16, ++ ++ CP_MEQ_STAT = 0x8794, ++ MEQ_RPTR_mask = 0x3ff << 0, ++ MEQ_RPTR_shift = 0, ++ MEQ_WPTR_mask = 0x3ff << 16, ++ MEQ_WPTR_shift = 16, ++ ++ CC_GC_SHADER_PIPE_CONFIG = 0x8950, ++ INACTIVE_QD_PIPES_mask = 0xff << 8, ++ INACTIVE_QD_PIPES_shift = 8, ++ R6XX_MAX_QD_PIPES = 8, ++ INACTIVE_SIMDS_mask = 0xff << 16, ++ INACTIVE_SIMDS_shift = 16, ++ R6XX_MAX_SIMDS = 8, ++ GC_USER_SHADER_PIPE_CONFIG = 0x8954, ++ ++ VC_ENHANCE = 0x9714, ++ DB_DEBUG = 0x9830, ++ PREZ_MUST_WAIT_FOR_POSTZ_DONE = 1 << 31, ++ ++ DB_WATERMARKS = 0x00009838, ++ DEPTH_FREE_mask = 0x1f << 0, ++ DEPTH_FREE_shift = 0, ++ DEPTH_FLUSH_mask = 0x3f << 5, ++ DEPTH_FLUSH_shift = 5, ++ FORCE_SUMMARIZE_mask = 0x0f << 11, ++ FORCE_SUMMARIZE_shift = 11, ++ DEPTH_PENDING_FREE_mask = 0x1f << 15, ++ DEPTH_PENDING_FREE_shift = 15, ++ DEPTH_CACHELINE_FREE_mask = 0x1f << 20, ++ DEPTH_CACHELINE_FREE_shift = 20, ++ EARLY_Z_PANIC_DISABLE_bit = 1 << 25, ++ LATE_Z_PANIC_DISABLE_bit = 1 << 26, ++ RE_Z_PANIC_DISABLE_bit = 1 << 27, ++ DB_EXTRA_DEBUG_mask = 0x0f << 28, ++ DB_EXTRA_DEBUG_shift = 28, ++ ++ CP_RB_BASE = 0xc100, ++ CP_RB_CNTL = 0xc104, ++ RB_BUFSZ_mask = 0x3f << 0, ++ CP_RB_WPTR = 0xc114, ++ RB_WPTR_mask = 0xfffff << 0, ++ RB_WPTR_shift = 0, ++ CP_RB_RPTR_WR = 0xc108, ++ RB_RPTR_WR_mask = 0xfffff << 0, ++ RB_RPTR_WR_shift = 0, ++ ++ CP_INT_STATUS = 0xc128, ++ DISABLE_CNTX_SWITCH_INT_STAT_bit = 1 << 0, ++ ENABLE_CNTX_SWITCH_INT_STAT_bit = 1 << 1, ++ SEM_SIGNAL_INT_STAT_bit = 1 << 18, ++ CNTX_BUSY_INT_STAT_bit = 1 << 19, ++ CNTX_EMPTY_INT_STAT_bit = 1 << 20, ++ WAITMEM_SEM_INT_STAT_bit = 1 << 21, ++ PRIV_INSTR_INT_STAT_bit = 1 << 22, ++ PRIV_REG_INT_STAT_bit = 1 << 23, ++ OPCODE_ERROR_INT_STAT_bit = 1 << 24, ++ SCRATCH_INT_STAT_bit = 1 << 25, ++ TIME_STAMP_INT_STAT_bit = 1 << 26, ++ RESERVED_BIT_ERROR_INT_STAT_bit = 1 << 27, ++ DMA_INT_STAT_bit = 1 << 28, ++ IB2_INT_STAT_bit = 1 << 29, ++ IB1_INT_STAT_bit = 1 << 30, ++ RB_INT_STAT_bit = 1 << 31, ++ ++// SX_ALPHA_TEST_CONTROL = 0x00028410, ++ ALPHA_FUNC__REF_NEVER = 0, ++ ALPHA_FUNC__REF_ALWAYS = 7, ++// DB_SHADER_CONTROL = 0x0002880c, ++ Z_ORDER__EARLY_Z_THEN_LATE_Z = 2, ++// PA_SU_SC_MODE_CNTL = 0x00028814, ++// POLY_MODE_mask = 0x03 << 3, ++ POLY_MODE__TRIANGLES = 0, POLY_MODE__DUAL_MODE, ++// POLYMODE_FRONT_PTYPE_mask = 0x07 << 5, ++ POLYMODE_PTYPE__POINTS = 0, POLYMODE_PTYPE__LINES, POLYMODE_PTYPE__TRIANGLES, ++ PA_SC_AA_SAMPLE_LOCS_8S_WD1_M = 0x00028c20, ++ DB_SRESULTS_COMPARE_STATE0 = 0x00028d28, /* See autoregs: DB_SRESULTS_COMPARE_STATE1 */ ++// DB_SRESULTS_COMPARE_STATE1 = 0x00028d2c, ++ DB_ALPHA_TO_MASK = 0x00028d44, ++ ALPHA_TO_MASK_ENABLE = 1 << 0, ++ ALPHA_TO_MASK_OFFSET0_mask = 0x03 << 8, ++ ALPHA_TO_MASK_OFFSET0_shift = 8, ++ ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 8, ++ ALPHA_TO_MASK_OFFSET1_shift = 10, ++ ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 8, ++ ALPHA_TO_MASK_OFFSET2_shift = 12, ++ ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 8, ++ ALPHA_TO_MASK_OFFSET3_shift = 14, ++ ++// SQ_VTX_CONSTANT_WORD2_0 = 0x00038008, ++// SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20, ++ FMT_INVALID=0, FMT_8, FMT_4_4, FMT_3_3_2, ++ FMT_16=5, FMT_16_FLOAT, FMT_8_8, ++ FMT_5_6_5, FMT_6_5_5, FMT_1_5_5_5, FMT_4_4_4_4, ++ FMT_5_5_5_1, FMT_32, FMT_32_FLOAT, FMT_16_16, ++ FMT_16_16_FLOAT=16, FMT_8_24, FMT_8_24_FLOAT, FMT_24_8, ++ FMT_24_8_FLOAT, FMT_10_11_11, FMT_10_11_11_FLOAT, FMT_11_11_10, ++ FMT_11_11_10_FLOAT, FMT_2_10_10_10, FMT_8_8_8_8, FMT_10_10_10_2, ++ FMT_X24_8_32_FLOAT, FMT_32_32, FMT_32_32_FLOAT, FMT_16_16_16_16, ++ FMT_16_16_16_16_FLOAT=32, FMT_32_32_32_32=34, FMT_32_32_32_32_FLOAT, ++ FMT_1 = 37, FMT_GB_GR=39, ++ FMT_BG_RG, FMT_32_AS_8, FMT_32_AS_8_8, FMT_5_9_9_9_SHAREDEXP, ++ FMT_8_8_8, FMT_16_16_16, FMT_16_16_16_FLOAT, FMT_32_32_32, ++ FMT_32_32_32_FLOAT=48, ++ ++// High level register file lengths ++ SQ_ALU_CONSTANT = SQ_ALU_CONSTANT0_0, /* 256 PS, 256 VS */ ++ SQ_ALU_CONSTANT_ps_num = 256, ++ SQ_ALU_CONSTANT_vs_num = 256, ++ SQ_ALU_CONSTANT_all_num = 512, ++ SQ_ALU_CONSTANT_offset = 16, ++ SQ_ALU_CONSTANT_ps = 0, ++ SQ_ALU_CONSTANT_vs = SQ_ALU_CONSTANT_ps + SQ_ALU_CONSTANT_ps_num, ++ SQ_TEX_RESOURCE = SQ_TEX_RESOURCE_WORD0_0, /* 160 PS, 160 VS, 16 FS, 160 GS */ ++ SQ_TEX_RESOURCE_ps_num = 160, ++ SQ_TEX_RESOURCE_vs_num = 160, ++ SQ_TEX_RESOURCE_fs_num = 16, ++ SQ_TEX_RESOURCE_gs_num = 160, ++ SQ_TEX_RESOURCE_all_num = 496, ++ SQ_TEX_RESOURCE_offset = 28, ++ SQ_TEX_RESOURCE_ps = 0, ++ SQ_TEX_RESOURCE_vs = SQ_TEX_RESOURCE_ps + SQ_TEX_RESOURCE_ps_num, ++ SQ_TEX_RESOURCE_fs = SQ_TEX_RESOURCE_vs + SQ_TEX_RESOURCE_vs_num, ++ SQ_TEX_RESOURCE_gs = SQ_TEX_RESOURCE_fs + SQ_TEX_RESOURCE_fs_num, ++ SQ_VTX_RESOURCE = SQ_VTX_CONSTANT_WORD0_0, /* 160 PS, 160 VS, 16 FS, 160 GS */ ++ SQ_VTX_RESOURCE_ps_num = 160, ++ SQ_VTX_RESOURCE_vs_num = 160, ++ SQ_VTX_RESOURCE_fs_num = 16, ++ SQ_VTX_RESOURCE_gs_num = 160, ++ SQ_VTX_RESOURCE_all_num = 496, ++ SQ_VTX_RESOURCE_offset = 28, ++ SQ_VTX_RESOURCE_ps = 0, ++ SQ_VTX_RESOURCE_vs = SQ_VTX_RESOURCE_ps + SQ_VTX_RESOURCE_ps_num, ++ SQ_VTX_RESOURCE_fs = SQ_VTX_RESOURCE_vs + SQ_VTX_RESOURCE_vs_num, ++ SQ_VTX_RESOURCE_gs = SQ_VTX_RESOURCE_fs + SQ_VTX_RESOURCE_fs_num, ++ SQ_TEX_SAMPLER_WORD = SQ_TEX_SAMPLER_WORD0_0, /* 18 per PS, VS, GS */ ++ SQ_TEX_SAMPLER_WORD_ps_num = 18, ++ SQ_TEX_SAMPLER_WORD_vs_num = 18, ++ SQ_TEX_SAMPLER_WORD_gs_num = 18, ++ SQ_TEX_SAMPLER_WORD_all_num = 54, ++ SQ_TEX_SAMPLER_WORD_offset = 12, ++ SQ_TEX_SAMPLER_WORD_ps = 0, ++ SQ_TEX_SAMPLER_WORD_vs = SQ_TEX_SAMPLER_WORD_ps + SQ_TEX_SAMPLER_WORD_ps_num, ++ SQ_TEX_SAMPLER_WORD_gs = SQ_TEX_SAMPLER_WORD_vs + SQ_TEX_SAMPLER_WORD_vs_num, ++ SQ_LOOP_CONST = SQ_LOOP_CONST_0, /* 32 per PS, VS, GS */ ++ SQ_LOOP_CONST_ps_num = 32, ++ SQ_LOOP_CONST_vs_num = 32, ++ SQ_LOOP_CONST_gs_num = 32, ++ SQ_LOOP_CONST_all_num = 96, ++ SQ_LOOP_CONST_offset = 4, ++ SQ_LOOP_CONST_ps = 0, ++ SQ_LOOP_CONST_vs = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num, ++ SQ_LOOP_CONST_gs = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num, ++} ; ++ ++ ++#endif +diff --git a/src/r600_reg_r7xx.h b/src/r600_reg_r7xx.h +new file mode 100644 +index 0000000..e5c01c8 +--- /dev/null ++++ b/src/r600_reg_r7xx.h +@@ -0,0 +1,149 @@ ++/* ++ * RadeonHD R6xx, R7xx Register documentation ++ * ++ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. ++ * Copyright (C) 2008-2009 Matthias Hopf ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included ++ * in all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN ++ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef _R600_REG_R7xx_H_ ++#define _R600_REG_R7xx_H_ ++ ++/* ++ * Register update for R7xx chips ++ */ ++ ++enum { ++ ++ R7XX_MC_VM_FB_LOCATION = 0x00002024, ++ ++// GRBM_STATUS = 0x00008010, ++ R7XX_TA_BUSY_bit = 1 << 14, ++ ++ R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ = 0x00008d8c, ++ RING0_OFFSET_mask = 0xff << 0, ++ RING0_OFFSET_shift = 0, ++ ISOLATE_ES_ENABLE_bit = 1 << 12, ++ ISOLATE_GS_ENABLE_bit = 1 << 13, ++ VS_PC_LIMIT_ENABLE_bit = 1 << 14, ++ ++// SQ_ALU_WORD0 = 0x00008dfc, ++// SRC0_SEL_mask = 0x1ff << 0, ++// SRC1_SEL_mask = 0x1ff << 13, ++ R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4, ++ R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5, ++ R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6, ++ R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7, ++// INDEX_MODE_mask = 0x07 << 26, ++ R7xx_SQ_INDEX_GLOBAL = 0x05, ++ R7xx_SQ_INDEX_GLOBAL_AR_X = 0x06, ++ R6xx_SQ_ALU_WORD1_OP2 = 0x00008dfc, ++ R7xx_SQ_ALU_WORD1_OP2_V2 = 0x00008dfc, ++ R6xx_FOG_MERGE_bit = 1 << 5, ++ R6xx_OMOD_mask = 0x03 << 6, ++ R7xx_OMOD_mask = 0x03 << 5, ++ R6xx_OMOD_shift = 6, ++ R7xx_OMOD_shift = 5, ++ R6xx_SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8, ++ R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7, ++ R6xx_SQ_ALU_WORD1_OP2__ALU_INST_shift = 8, ++ R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7, ++ R7xx_SQ_OP2_INST_FREXP_64 = 0x07, ++ R7xx_SQ_OP2_INST_ADD_64 = 0x17, ++ R7xx_SQ_OP2_INST_MUL_64 = 0x1b, ++ R7xx_SQ_OP2_INST_FLT64_TO_FLT32 = 0x1c, ++ R7xx_SQ_OP2_INST_FLT32_TO_FLT64 = 0x1d, ++ R7xx_SQ_OP2_INST_LDEXP_64 = 0x7a, ++ R7xx_SQ_OP2_INST_FRACT_64 = 0x7b, ++ R7xx_SQ_OP2_INST_PRED_SETGT_64 = 0x7c, ++ R7xx_SQ_OP2_INST_PRED_SETE_64 = 0x7d, ++ R7xx_SQ_OP2_INST_PRED_SETGE_64 = 0x7e, ++// SQ_ALU_WORD1_OP3 = 0x00008dfc, ++// SRC2_SEL_mask = 0x1ff << 0, ++// R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4, ++// R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5, ++// R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6, ++// R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7, ++// SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13, ++ R7xx_SQ_OP3_INST_MULADD_64 = 0x08, ++ R7xx_SQ_OP3_INST_MULADD_64_M2 = 0x09, ++ R7xx_SQ_OP3_INST_MULADD_64_M4 = 0x0a, ++ R7xx_SQ_OP3_INST_MULADD_64_D2 = 0x0b, ++// SQ_CF_ALU_WORD1 = 0x00008dfc, ++ R6xx_USES_WATERFALL_bit = 1 << 25, ++ R7xx_SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25, ++// SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc, ++// ARRAY_BASE_mask = 0x1fff << 0, ++// TYPE_mask = 0x03 << 13, ++// SQ_EXPORT_PARAM = 0x02, ++// X_UNUSED_FOR_SX_EXPORTS = 0x03, ++// ELEM_SIZE_mask = 0x03 << 30, ++// SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc, ++// SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23, ++ R7xx_SQ_CF_INST_MEM_EXPORT = 0x3a, ++// SQ_CF_WORD1 = 0x00008dfc, ++// SQ_CF_WORD1__COUNT_mask = 0x07 << 10, ++ R7xx_COUNT_3_bit = 1 << 19, ++// SQ_CF_WORD1__CF_INST_mask = 0x7f << 23, ++ R7xx_SQ_CF_INST_END_PROGRAM = 0x19, ++ R7xx_SQ_CF_INST_WAIT_ACK = 0x1a, ++ R7xx_SQ_CF_INST_TEX_ACK = 0x1b, ++ R7xx_SQ_CF_INST_VTX_ACK = 0x1c, ++ R7xx_SQ_CF_INST_VTX_TC_ACK = 0x1d, ++// SQ_VTX_WORD0 = 0x00008dfc, ++// VTX_INST_mask = 0x1f << 0, ++ R7xx_SQ_VTX_INST_MEM = 0x02, ++// SQ_VTX_WORD2 = 0x00008dfc, ++ R7xx_SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20, ++ ++// SQ_TEX_WORD0 = 0x00008dfc, ++// TEX_INST_mask = 0x1f << 0, ++ R7xx_X_MEMORY_READ = 0x02, ++ R7xx_SQ_TEX_INST_KEEP_GRADIENTS = 0x0a, ++ R7xx_X_FETCH4_LOAD4_INSTRUCTION_FOR_DX10_1 = 0x0f, ++ R7xx_SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24, ++ ++ R7xx_PA_SC_EDGERULE = 0x00028230, ++ R7xx_SPI_THREAD_GROUPING = 0x000286c8, ++ PS_GROUPING_mask = 0x1f << 0, ++ PS_GROUPING_shift = 0, ++ VS_GROUPING_mask = 0x1f << 8, ++ VS_GROUPING_shift = 8, ++ GS_GROUPING_mask = 0x1f << 16, ++ GS_GROUPING_shift = 16, ++ ES_GROUPING_mask = 0x1f << 24, ++ ES_GROUPING_shift = 24, ++ R7xx_CB_SHADER_CONTROL = 0x000287a0, ++ RT0_ENABLE_bit = 1 << 0, ++ RT1_ENABLE_bit = 1 << 1, ++ RT2_ENABLE_bit = 1 << 2, ++ RT3_ENABLE_bit = 1 << 3, ++ RT4_ENABLE_bit = 1 << 4, ++ RT5_ENABLE_bit = 1 << 5, ++ RT6_ENABLE_bit = 1 << 6, ++ RT7_ENABLE_bit = 1 << 7, ++// DB_ALPHA_TO_MASK = 0x00028d44, ++ R7xx_OFFSET_ROUND_bit = 1 << 16, ++// SQ_TEX_SAMPLER_MISC_0 = 0x0003d03c, ++ R7xx_TRUNCATE_COORD_bit = 1 << 9, ++ R7xx_DISABLE_CUBE_WRAP_bit = 1 << 10, ++ ++} ; ++ ++#endif /* _R600_REG_R7xx_H_ */ +diff --git a/src/r600_shader.h b/src/r600_shader.h +new file mode 100644 +index 0000000..58f5a52 +--- /dev/null ++++ b/src/r600_shader.h +@@ -0,0 +1,346 @@ ++/* ++ * RadeonHD R6xx, R7xx DRI driver ++ * ++ * Copyright (C) 2008-2009 Alexander Deucher ++ * Copyright (C) 2008-2009 Matthias Hopf ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included ++ * in all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN ++ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++/* ++ * Shader macros ++ */ ++ ++#ifndef __SHADER_H__ ++#define __SHADER_H__ ++ ++ ++/* Restrictions of ALU instructions ++ * order of scalar ops is always x,y,z,w,t(rans), last to be indicated by last==1. ++ * max of 3 different src GPRs per instr. ++ * max of 4 different cfile constant components per instr. ++ * max of 2 (different) constants (any type) for t. ++ * bank swizzle (see below). ++ * GPR write stalls read of same register. Auto-replaced by PV/PS, NOP needed if registers are relative to ++ * different indices (gpr,loop,nothing). ++ * may use constant registers or constant cache, but not both. ++ */ ++ ++/* Bank_swizzle: (pp. 297ff) ++ * Only one of each x,y,z,w GPR component can be loaded per cycle (3 cycles per instr, called 0-2). ++ * per scalar instruction bank_swizzle can select which cycle each operand comes from. e.g.: ++ * SRC0 SRC1 SRC2 SWIZZLE cycle0 cycle1 cycle2 ++ * 1.x 2.x 012 1.x 2.x - ++ * 3.x 1.y 201 1.y - 3.x ++ * 2.x 1.y 102 (1.y) (2.x) - ++ * If data is read in a cycle, multiple scalar instructions can reference it. ++ * Special case: square() - i.e. same component in src0+src1 doesn't need read port -> ignores swizzle for src1. ++ * No restrictions for constants or PV/PS. ++ * t can load multiple components in a single cycle slot, but has to share cycles with xyzw. ++ * t with single constant may not load GPRs or PV/PS in cycle 0 (carefull with ALU_TRANS_210). ++ * t with two constants may only load GPRs or PV/PS in cycle 2. ++ */ ++ ++ ++/* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */ ++ ++ ++// CF insts ++// addr ++#define ADDR(x) (x) ++// pc ++#define POP_COUNT(x) (x) ++// const ++#define CF_CONST(x) (x) ++// cond ++#define COND(x) (x) // SQ_COND_* ++// count ++#define I_COUNT(x) ((x) ? ((x) - 1) : 0) ++//r7xx ++#define COUNT_3(x) (x) ++// call count ++#define CALL_COUNT(x) (x) ++// eop ++#define END_OF_PROGRAM(x) (x) ++// vpm ++#define VALID_PIXEL_MODE(x) (x) ++// cf inst ++#define CF_INST(x) (x) // SQ_CF_INST_* ++ ++// wqm ++#define WHOLE_QUAD_MODE(x) (x) ++// barrier ++#define BARRIER(x) (x) ++//kb0 ++#define KCACHE_BANK0(x) (x) ++//kb1 ++#define KCACHE_BANK1(x) (x) ++// km0/1 ++#define KCACHE_MODE0(x) (x) ++#define KCACHE_MODE1(x) (x) // SQ_CF_KCACHE_* ++// ++#define KCACHE_ADDR0(x) (x) ++#define KCACHE_ADDR1(x) (x) ++// uw ++#define USES_WATERFALL(x) (x) ++ ++#define ARRAY_BASE(x) (x) ++// export pixel ++#define CF_PIXEL_MRT0 0 ++#define CF_PIXEL_MRT1 1 ++#define CF_PIXEL_MRT2 2 ++#define CF_PIXEL_MRT3 3 ++#define CF_PIXEL_MRT4 4 ++#define CF_PIXEL_MRT5 5 ++#define CF_PIXEL_MRT6 6 ++#define CF_PIXEL_MRT7 7 ++// *_FOG: r6xx only ++#define CF_PIXEL_MRT0_FOG 16 ++#define CF_PIXEL_MRT1_FOG 17 ++#define CF_PIXEL_MRT2_FOG 18 ++#define CF_PIXEL_MRT3_FOG 19 ++#define CF_PIXEL_MRT4_FOG 20 ++#define CF_PIXEL_MRT5_FOG 21 ++#define CF_PIXEL_MRT6_FOG 22 ++#define CF_PIXEL_MRT7_FOG 23 ++#define CF_PIXEL_Z 61 ++// export pos ++#define CF_POS0 60 ++#define CF_POS1 61 ++#define CF_POS2 62 ++#define CF_POS3 63 ++// export param ++// 0...31 ++#define TYPE(x) (x) // SQ_EXPORT_* ++#if 0 ++// type export ++#define SQ_EXPORT_PIXEL 0 ++#define SQ_EXPORT_POS 1 ++#define SQ_EXPORT_PARAM 2 ++// reserved 3 ++// type mem ++#define SQ_EXPORT_WRITE 0 ++#define SQ_EXPORT_WRITE_IND 1 ++#define SQ_EXPORT_WRITE_ACK 2 ++#define SQ_EXPORT_WRITE_IND_ACK 3 ++#endif ++ ++#define RW_GPR(x) (x) ++#define RW_REL(x) (x) ++#define ABSOLUTE 0 ++#define RELATIVE 1 ++#define INDEX_GPR(x) (x) ++#define ELEM_SIZE(x) (x ? (x - 1) : 0) ++#define COMP_MASK(x) (x) ++#define R6xx_ELEM_LOOP(x) (x) ++#define BURST_COUNT(x) (x ? (x - 1) : 0) ++ ++// swiz ++#define SRC_SEL_X(x) (x) // SQ_SEL_* each ++#define SRC_SEL_Y(x) (x) ++#define SRC_SEL_Z(x) (x) ++#define SRC_SEL_W(x) (x) ++ ++#define CF_DWORD0(addr) (addr) ++// R7xx has another entry (COUNT3), but that is only used for adding a bit to count. ++// We allow one more bit for count in the argument of the macro on R7xx instead. ++// R6xx: [0,7] R7xx: [1,16] ++#define CF_DWORD1(pc, cf_const, cond, count, call_count, eop, vpm, cf_inst, wqm, b) \ ++ (((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | (((count) & 7) << 10) | (((count) >> 3) << 19) | \ ++ ((call_count) << 13) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31)) ++ ++#define CF_ALU_DWORD0(addr, kb0, kb1, km0) (((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30)) ++#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, uw, cf_inst, wqm, b) \ ++ (((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \ ++ ((count) << 18) | ((uw) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31)) ++ ++#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \ ++ (((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | ((index_gpr) << 23) | \ ++ ((es) << 30)) ++// R7xx apparently doesn't have the ELEM_LOOP entry any more ++// We still expose it, but ELEM_LOOP is explicitely R6xx now. ++// TODO: is this just forgotten in the docs, or really not available any more? ++#define CF_ALLOC_IMP_EXP_DWORD1_BUF(array_size, comp_mask, el, bc, eop, vpm, cf_inst, wqm, b) \ ++ (((array_size) << 0) | ((comp_mask) << 12) | ((el) << 16) | ((bc) << 17) | \ ++ ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31)) ++#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, el, bc, eop, vpm, cf_inst, wqm, b) \ ++ (((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | ((el) << 16) | \ ++ ((bc) << 17) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | \ ++ ((wqm) << 30) | ((b) << 31)) ++ ++// ALU clause insts ++#define SRC0_SEL(x) (x) ++#define SRC1_SEL(x) (x) ++#define SRC2_SEL(x) (x) ++// src[0-2]_sel ++// 0-127 GPR ++// 128-159 kcache constants bank 0 ++// 160-191 kcache constants bank 1 ++// 248-255 special SQ_ALU_SRC_* (0, 1, etc.) ++ ++#define SRC0_REL(x) (x) ++#define SRC1_REL(x) (x) ++#define SRC2_REL(x) (x) ++// elem ++#define SRC0_ELEM(x) (x) ++#define SRC1_ELEM(x) (x) ++#define SRC2_ELEM(x) (x) ++#define ELEM_X 0 ++#define ELEM_Y 1 ++#define ELEM_Z 2 ++#define ELEM_W 3 ++// neg ++#define SRC0_NEG(x) (x) ++#define SRC1_NEG(x) (x) ++#define SRC2_NEG(x) (x) ++// im ++#define INDEX_MODE(x) (x) // SQ_INDEX_* ++// ps ++#define PRED_SEL(x) (x) // SQ_PRED_SEL_* ++// last ++#define LAST(x) (x) ++// abs ++#define SRC0_ABS(x) (x) ++#define SRC1_ABS(x) (x) ++// uem ++#define UPDATE_EXECUTE_MASK(x) (x) ++// up ++#define UPDATE_PRED(x) (x) ++// wm ++#define WRITE_MASK(x) (x) ++// fm ++#define FOG_MERGE(x) (x) ++// omod ++#define OMOD(x) (x) // SQ_ALU_OMOD_* ++// alu inst ++#define ALU_INST(x) (x) // SQ_ALU_INST_* ++//bs ++#define BANK_SWIZZLE(x) (x) // SQ_ALU_VEC_* ++#define DST_GPR(x) (x) ++#define DST_REL(x) (x) ++#define DST_ELEM(x) (x) ++#define CLAMP(x) (x) ++ ++#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \ ++ (((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \ ++ ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \ ++ ((im) << 26) | ((ps) << 29) | ((last) << 31)) ++// R7xx has alu_inst at a different slot, and no fog merge any more (no fix function fog any more) ++#define R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ ++ (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \ ++ ((fm) << 5) | ((omod) << 6) | ((alu_inst) << 8) | ((bs) << 18) | ((dst_gpr) << 21) | \ ++ ((dr) << 28) | ((de) << 29) | ((clamp) << 31)) ++#define R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ ++ (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \ ++ ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \ ++ ((dr) << 28) | ((de) << 29) | ((clamp) << 31)) ++// This is a general chipset macro, but due to selection by chipid typically not usable in static arrays ++// Fog is NOT USED on R7xx, even if specified. ++#define ALU_DWORD1_OP2(chipfamily, s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ ++ ((chipfamily) < CHIP_FAMILY_RV770 ? \ ++ R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) : \ ++ R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp)) ++#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \ ++ (((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \ ++ ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \ ++ ((de) << 29) | ((clamp) << 31)) ++ ++// VTX clause insts ++// vxt insts ++#define VTX_INST(x) (x) // SQ_VTX_INST_* ++ ++// fetch type ++#define FETCH_TYPE(x) (x) // SQ_VTX_FETCH_* ++ ++#define FETCH_WHOLE_QUAD(x) (x) ++#define BUFFER_ID(x) (x) ++#define SRC_GPR(x) (x) ++#define SRC_REL(x) (x) ++#define MEGA_FETCH_COUNT(x) ((x) ? ((x) - 1) : 0) ++ ++#define SEMANTIC_ID(x) (x) ++#define DST_SEL_X(x) (x) ++#define DST_SEL_Y(x) (x) ++#define DST_SEL_Z(x) (x) ++#define DST_SEL_W(x) (x) ++#define USE_CONST_FIELDS(x) (x) ++#define DATA_FORMAT(x) (x) ++// num format ++#define NUM_FORMAT_ALL(x) (x) // SQ_NUM_FORMAT_* ++// format comp ++#define FORMAT_COMP_ALL(x) (x) // SQ_FORMAT_COMP_* ++// sma ++#define SRF_MODE_ALL(x) (x) ++#define SRF_MODE_ZERO_CLAMP_MINUS_ONE 0 ++#define SRF_MODE_NO_ZERO 1 ++#define OFFSET(x) (x) ++// endian swap ++#define ENDIAN_SWAP(x) (x) // SQ_ENDIAN_* ++#define CONST_BUF_NO_STRIDE(x) (x) ++// mf ++#define MEGA_FETCH(x) (x) ++ ++#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \ ++ (((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \ ++ ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26)) ++#define VTX_DWORD1_SEM(semantic_id, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \ ++ (((semantic_id) << 0) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ ++ ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)) ++#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \ ++ (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ ++ ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)) ++#define VTX_DWORD2(offset, es, cbns, mf) \ ++ (((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19)) ++#define VTX_DWORD_PAD 0x00000000 ++ ++// TEX clause insts ++// tex insts ++#define TEX_INST(x) (x) // SQ_TEX_INST_* ++ ++#define BC_FRAC_MODE(x) (x) ++#define FETCH_WHOLE_QUAD(x) (x) ++#define RESOURCE_ID(x) (x) ++#define R7xx_ALT_CONST(x) (x) ++ ++#define LOD_BIAS(x) (x) ++//ct ++#define COORD_TYPE_X(x) (x) ++#define COORD_TYPE_Y(x) (x) ++#define COORD_TYPE_Z(x) (x) ++#define COORD_TYPE_W(x) (x) ++#define TEX_UNNORMALIZED 0 ++#define TEX_NORMALIZED 1 ++#define OFFSET_X(x) (x) ++#define OFFSET_Y(x) (x) ++#define OFFSET_Z(x) (x) ++#define SAMPLER_ID(x) (x) ++ ++// R7xx has an additional parameter ALT_CONST. We always expose it, but ALT_CONST is R7xx only ++#define TEX_DWORD0(tex_inst, bfm, fwq, resource_id, src_gpr, sr, ac) \ ++ (((tex_inst) << 0) | ((bfm) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \ ++ ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24)) ++#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \ ++ (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ ++ ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31)) ++#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \ ++ (((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \ ++ ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29)) ++#define TEX_DWORD_PAD 0x00000000 ++ ++ ++#endif +diff --git a/src/r600_state.h b/src/r600_state.h +new file mode 100644 +index 0000000..9efd557 +--- /dev/null ++++ b/src/r600_state.h +@@ -0,0 +1,229 @@ ++#ifndef __R600_STATE_H__ ++#define __R600_STATE_H__ ++ ++#include "xf86drm.h" ++ ++typedef int bool_t; ++ ++/* seriously ?! @#$%% */ ++# define uint32_t CARD32 ++# define uint64_t CARD64 ++ ++#define CLEAR(x) memset (&x, 0, sizeof(x)) ++ ++/* Sequencer / thread handling */ ++typedef struct { ++ int ps_prio; ++ int vs_prio; ++ int gs_prio; ++ int es_prio; ++ int num_ps_gprs; ++ int num_vs_gprs; ++ int num_gs_gprs; ++ int num_es_gprs; ++ int num_temp_gprs; ++ int num_ps_threads; ++ int num_vs_threads; ++ int num_gs_threads; ++ int num_es_threads; ++ int num_ps_stack_entries; ++ int num_vs_stack_entries; ++ int num_gs_stack_entries; ++ int num_es_stack_entries; ++} sq_config_t; ++ ++/* Color buffer / render target */ ++typedef struct { ++ int id; ++ int w; ++ int h; ++ uint64_t base; ++ int format; ++ int endian; ++ int array_mode; // tiling ++ int number_type; ++ int read_size; ++ int comp_swap; ++ int tile_mode; ++ int blend_clamp; ++ int clear_color; ++ int blend_bypass; ++ int blend_float32; ++ int simple_float; ++ int round_mode; ++ int tile_compact; ++ int source_format; ++} cb_config_t; ++ ++/* Depth buffer */ ++typedef struct { ++ int w; ++ int h; ++ uint64_t base; ++ int format; ++ int read_size; ++ int array_mode; // tiling ++ int tile_surface_en; ++ int tile_compact; ++ int zrange_precision; ++} db_config_t; ++ ++/* Shader */ ++typedef struct { ++ uint64_t shader_addr; ++ int num_gprs; ++ int stack_size; ++ int dx10_clamp; ++ int prime_cache_pgm_en; ++ int prime_cache_on_draw; ++ int fetch_cache_lines; ++ int prime_cache_en; ++ int prime_cache_on_const; ++ int clamp_consts; ++ int export_mode; ++ int uncached_first_inst; ++} shader_config_t; ++ ++/* Vertex buffer / vtx resource */ ++typedef struct { ++ int id; ++ uint64_t vb_addr; ++ uint32_t vtx_num_entries; ++ uint32_t vtx_size_dw; ++ int clamp_x; ++ int format; ++ int num_format_all; ++ int format_comp_all; ++ int srf_mode_all; ++ int endian; ++ int mem_req_size; ++} vtx_resource_t; ++ ++/* Texture resource */ ++typedef struct { ++ int id; ++ int w; ++ int h; ++ int pitch; ++ int depth; ++ int dim; ++ int tile_mode; ++ int tile_type; ++ int format; ++ uint64_t base; ++ uint64_t mip_base; ++ int format_comp_x; ++ int format_comp_y; ++ int format_comp_z; ++ int format_comp_w; ++ int num_format_all; ++ int srf_mode_all; ++ int force_degamma; ++ int endian; ++ int request_size; ++ int dst_sel_x; ++ int dst_sel_y; ++ int dst_sel_z; ++ int dst_sel_w; ++ int base_level; ++ int last_level; ++ int base_array; ++ int last_array; ++ int mpeg_clamp; ++ int perf_modulation; ++ int interlaced; ++} tex_resource_t; ++ ++/* Texture sampler */ ++typedef struct { ++ int id; ++ /* Clamping */ ++ int clamp_x, clamp_y, clamp_z; ++ int border_color; ++ /* Filtering */ ++ int xy_mag_filter, xy_min_filter; ++ int z_filter; ++ int mip_filter; ++ bool_t high_precision_filter; /* ? */ ++ int perf_mip; /* ? 0-7 */ ++ int perf_z; /* ? 3 */ ++ /* LoD selection */ ++ int min_lod, max_lod; /* 0-0x3ff */ ++ int lod_bias; /* 0-0xfff (signed?) */ ++ int lod_bias2; /* ? 0-0xfff (signed?) */ ++ bool_t lod_uses_minor_axis; /* ? */ ++ /* Other stuff */ ++ bool_t point_sampling_clamp; /* ? */ ++ bool_t tex_array_override; /* ? */ ++ bool_t mc_coord_truncate; /* ? */ ++ bool_t force_degamma; /* ? */ ++ bool_t fetch_4; /* ? */ ++ bool_t sample_is_pcf; /* ? */ ++ bool_t type; /* ? */ ++ int depth_compare; /* only depth textures? */ ++ int chroma_key; ++} tex_sampler_t; ++ ++/* Draw command */ ++typedef struct { ++ uint32_t prim_type; ++ uint32_t vgt_draw_initiator; ++ uint32_t index_type; ++ uint32_t num_instances; ++ uint32_t num_indices; ++} draw_config_t; ++ ++inline void e32(drmBufPtr ib, uint32_t dword); ++inline void efloat(drmBufPtr ib, float f); ++inline void pack3(drmBufPtr ib, int cmd, unsigned num); ++inline void pack0 (drmBufPtr ib, uint32_t reg, int num); ++inline void ereg (drmBufPtr ib, uint32_t reg, uint32_t val); ++void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib); ++void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib); ++ ++uint64_t ++upload (ScrnInfoPtr pScrn, void *shader, int size, int offset); ++void ++wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib); ++void ++wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib); ++void ++start_3d(ScrnInfoPtr pScrn, drmBufPtr ib); ++void ++set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf); ++void ++cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr); ++void ++cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, int crtc, int start, int stop, Bool enable); ++void ++fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf); ++void ++vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf); ++void ++ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf); ++void ++set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf); ++void ++set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res); ++void ++set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res); ++void ++set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s); ++void ++set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2); ++void ++set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2); ++void ++set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2); ++void ++set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2); ++void ++set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2); ++void ++set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib); ++void ++draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices); ++void ++draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf); ++ ++#endif +diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c +new file mode 100644 +index 0000000..f03fb7d +--- /dev/null ++++ b/src/r600_textured_videofuncs.c +@@ -0,0 +1,521 @@ ++/* ++ * Copyright 2008 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ * ++ * Author: Alex Deucher ++ * ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include "xf86.h" ++ ++#include "exa.h" ++ ++#include "radeon.h" ++#include "r600_shader.h" ++#include "r600_reg.h" ++#include "r600_state.h" ++ ++#include "radeon_video.h" ++ ++#include ++#include "fourcc.h" ++ ++#include "damage.h" ++ ++static void ++R600DoneTexturedVideo(ScrnInfoPtr pScrn) ++{ ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ draw_config_t draw_conf; ++ vtx_resource_t vtx_res; ++ ++ CLEAR (draw_conf); ++ CLEAR (vtx_res); ++ ++ if (accel_state->vb_index == 0) { ++ R600IBDiscard(pScrn, accel_state->ib); ++ return; ++ } ++ ++ accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + ++ (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); ++ accel_state->vb_size = accel_state->vb_index * 16; ++ ++ /* flush vertex cache */ ++ if ((info->ChipFamily == CHIP_FAMILY_RV610) || ++ (info->ChipFamily == CHIP_FAMILY_RV620) || ++ (info->ChipFamily == CHIP_FAMILY_RS780) || ++ (info->ChipFamily == CHIP_FAMILY_RV710)) ++ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, ++ accel_state->vb_size, accel_state->vb_mc_addr); ++ else ++ cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, ++ accel_state->vb_size, accel_state->vb_mc_addr); ++ ++ /* Vertex buffer setup */ ++ vtx_res.id = SQ_VTX_RESOURCE_vs; ++ vtx_res.vtx_size_dw = 16 / 4; ++ vtx_res.vtx_num_entries = accel_state->vb_size / 4; ++ vtx_res.mem_req_size = 1; ++ vtx_res.vb_addr = accel_state->vb_mc_addr; ++ set_vtx_resource (pScrn, accel_state->ib, &vtx_res); ++ ++ draw_conf.prim_type = DI_PT_RECTLIST; ++ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; ++ draw_conf.num_instances = 1; ++ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; ++ draw_conf.index_type = DI_INDEX_SIZE_16_BIT; ++ ++ draw_auto(pScrn, accel_state->ib, &draw_conf); ++ ++ wait_3d_idle_clean(pScrn, accel_state->ib); ++ ++ /* sync destination surface */ ++ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), ++ accel_state->dst_size, accel_state->dst_mc_addr); ++ ++ R600CPFlushIndirect(pScrn, accel_state->ib); ++} ++ ++void ++R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) ++{ ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ PixmapPtr pPixmap = pPriv->pPixmap; ++ BoxPtr pBox = REGION_RECTS(&pPriv->clip); ++ int nBox = REGION_NUM_RECTS(&pPriv->clip); ++ int dstxoff, dstyoff; ++ cb_config_t cb_conf; ++ tex_resource_t tex_res; ++ tex_sampler_t tex_samp; ++ shader_config_t vs_conf, ps_conf; ++ int uv_offset; ++ ++ static float ps_alu_consts[] = { ++ 1.0, 0.0, 1.4020, 0, // r - c[0] ++ 1.0, -0.34414, -0.71414, 0, // g - c[1] ++ 1.0, 1.7720, 0.0, 0, // b - c[2] ++ /* Constants for undoing Y'CbCr scaling ++ * - Y' is scaled from 16:235 ++ * - Cb/Cr are scaled from 16:240 ++ * Unscaled value N' = N * N_mul + N_shift (N' in range [-0.5, 0.5]) ++ * Vector is [Y_mul, Y_shfit, C_mul, C_shift] ++ */ ++ 256.0/219.0, -16.0/219.0, 256.0/224.0, -128.0/224.0, ++ }; ++ ++ CLEAR (cb_conf); ++ CLEAR (tex_res); ++ CLEAR (tex_samp); ++ CLEAR (vs_conf); ++ CLEAR (ps_conf); ++ ++ accel_state->dst_pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8); ++ accel_state->src_pitch[0] = pPriv->src_pitch; ++ ++ // bad pitch ++ if (accel_state->src_pitch[0] & 7) ++ return; ++ if (accel_state->dst_pitch & 7) ++ return; ++ ++#ifdef COMPOSITE ++ dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; ++ dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; ++#else ++ dstxoff = 0; ++ dstyoff = 0; ++#endif ++ ++ accel_state->ib = RADEONCPGetBuffer(pScrn); ++ ++ /* Init */ ++ start_3d(pScrn, accel_state->ib); ++ ++ //cp_set_surface_sync(pScrn, accel_state->ib); ++ ++ set_default_state(pScrn, accel_state->ib); ++ ++ /* Scissor / viewport */ ++ ereg (accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); ++ ereg (accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); ++ ++ accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + ++ accel_state->xv_vs_offset; ++ ++ switch(pPriv->id) { ++ case FOURCC_YV12: ++ case FOURCC_I420: ++ accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + ++ accel_state->xv_ps_offset_planar; ++ break; ++ case FOURCC_UYVY: ++ case FOURCC_YUY2: ++ default: ++ accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + ++ accel_state->xv_ps_offset_packed; ++ break; ++ } ++ ++ accel_state->vs_size = 512; ++ accel_state->ps_size = 512; ++ ++ /* Shader */ ++ ++ /* flush SQ cache */ ++ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, ++ accel_state->vs_size, accel_state->vs_mc_addr); ++ ++ vs_conf.shader_addr = accel_state->vs_mc_addr; ++ vs_conf.num_gprs = 2; ++ vs_conf.stack_size = 0; ++ vs_setup (pScrn, accel_state->ib, &vs_conf); ++ ++ /* flush SQ cache */ ++ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, ++ accel_state->ps_size, accel_state->ps_mc_addr); ++ ++ ps_conf.shader_addr = accel_state->ps_mc_addr; ++ ps_conf.num_gprs = 3; ++ ps_conf.stack_size = 0; ++ ps_conf.uncached_first_inst = 1; ++ ps_conf.clamp_consts = 0; ++ ps_conf.export_mode = 2; ++ ps_setup (pScrn, accel_state->ib, &ps_conf); ++ ++ // PS alu constants ++ set_alu_consts(pScrn, accel_state->ib, 0, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); ++ ++ /* Texture */ ++ switch(pPriv->id) { ++ case FOURCC_YV12: ++ case FOURCC_I420: ++ accel_state->src_mc_addr[0] = pPriv->src_offset; ++ accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; ++ ++ /* flush texture cache */ ++ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0], ++ accel_state->src_mc_addr[0]); ++ ++ // Y texture ++ tex_res.id = 0; ++ tex_res.w = pPriv->w; ++ tex_res.h = pPriv->h; ++ tex_res.pitch = accel_state->src_pitch[0]; ++ tex_res.depth = 0; ++ tex_res.dim = SQ_TEX_DIM_2D; ++ tex_res.base = accel_state->src_mc_addr[0]; ++ tex_res.mip_base = accel_state->src_mc_addr[0]; ++ ++ tex_res.format = FMT_8; ++ tex_res.dst_sel_x = SQ_SEL_X; //Y ++ tex_res.dst_sel_y = SQ_SEL_1; ++ tex_res.dst_sel_z = SQ_SEL_1; ++ tex_res.dst_sel_w = SQ_SEL_1; ++ ++ tex_res.request_size = 1; ++ tex_res.base_level = 0; ++ tex_res.last_level = 0; ++ tex_res.perf_modulation = 0; ++ tex_res.interlaced = 0; ++ set_tex_resource (pScrn, accel_state->ib, &tex_res); ++ ++ // Y sampler ++ tex_samp.id = 0; ++ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; ++ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; ++ tex_samp.clamp_z = SQ_TEX_WRAP; ++ ++ // xxx: switch to bicubic ++ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; ++ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; ++ ++ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; ++ tex_samp.mip_filter = 0; /* no mipmap */ ++ set_tex_sampler (pScrn, accel_state->ib, &tex_samp); ++ ++ // U or V texture ++ uv_offset = accel_state->src_pitch[0] * pPriv->h; ++ uv_offset = (uv_offset + 255) & ~255; ++ ++ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, ++ accel_state->src_size[0] / 4, ++ accel_state->src_mc_addr[0] + uv_offset); ++ ++ tex_res.id = 1; ++ tex_res.format = FMT_8; ++ tex_res.w = pPriv->w >> 1; ++ tex_res.h = pPriv->h >> 1; ++ tex_res.pitch = accel_state->src_pitch[0] >> 1; ++ tex_res.dst_sel_x = SQ_SEL_X; //V or U ++ tex_res.dst_sel_y = SQ_SEL_1; ++ tex_res.dst_sel_z = SQ_SEL_1; ++ tex_res.dst_sel_w = SQ_SEL_1; ++ tex_res.interlaced = 0; ++ // XXX tex bases need to be 256B aligned ++ tex_res.base = accel_state->src_mc_addr[0] + uv_offset; ++ tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset; ++ set_tex_resource (pScrn, accel_state->ib, &tex_res); ++ ++ // U or V sampler ++ tex_samp.id = 1; ++ set_tex_sampler (pScrn, accel_state->ib, &tex_samp); ++ ++ // U or V texture ++ uv_offset += ((accel_state->src_pitch[0] >> 1) * (pPriv->h >> 1)); ++ uv_offset = (uv_offset + 255) & ~255; ++ ++ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, ++ accel_state->src_size[0] / 4, ++ accel_state->src_mc_addr[0] + uv_offset); ++ ++ tex_res.id = 2; ++ tex_res.format = FMT_8; ++ tex_res.w = pPriv->w >> 1; ++ tex_res.h = pPriv->h >> 1; ++ tex_res.pitch = accel_state->src_pitch[0] >> 1; ++ tex_res.dst_sel_x = SQ_SEL_X; //V or U ++ tex_res.dst_sel_y = SQ_SEL_1; ++ tex_res.dst_sel_z = SQ_SEL_1; ++ tex_res.dst_sel_w = SQ_SEL_1; ++ tex_res.interlaced = 0; ++ // XXX tex bases need to be 256B aligned ++ tex_res.base = accel_state->src_mc_addr[0] + uv_offset; ++ tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset; ++ set_tex_resource (pScrn, accel_state->ib, &tex_res); ++ ++ // UV sampler ++ tex_samp.id = 2; ++ set_tex_sampler (pScrn, accel_state->ib, &tex_samp); ++ break; ++ case FOURCC_UYVY: ++ case FOURCC_YUY2: ++ default: ++ accel_state->src_mc_addr[0] = pPriv->src_offset; ++ accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; ++ ++ /* flush texture cache */ ++ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0], ++ accel_state->src_mc_addr[0]); ++ ++ // Y texture ++ tex_res.id = 0; ++ tex_res.w = pPriv->w; ++ tex_res.h = pPriv->h; ++ tex_res.pitch = accel_state->src_pitch[0] >> 1; ++ tex_res.depth = 0; ++ tex_res.dim = SQ_TEX_DIM_2D; ++ tex_res.base = accel_state->src_mc_addr[0]; ++ tex_res.mip_base = accel_state->src_mc_addr[0]; ++ ++ tex_res.format = FMT_8_8; ++ if (pPriv->id == FOURCC_UYVY) ++ tex_res.dst_sel_x = SQ_SEL_Y; //Y ++ else ++ tex_res.dst_sel_x = SQ_SEL_X; //Y ++ tex_res.dst_sel_y = SQ_SEL_1; ++ tex_res.dst_sel_z = SQ_SEL_1; ++ tex_res.dst_sel_w = SQ_SEL_1; ++ ++ tex_res.request_size = 1; ++ tex_res.base_level = 0; ++ tex_res.last_level = 0; ++ tex_res.perf_modulation = 0; ++ tex_res.interlaced = 0; ++ set_tex_resource (pScrn, accel_state->ib, &tex_res); ++ ++ // Y sampler ++ tex_samp.id = 0; ++ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; ++ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; ++ tex_samp.clamp_z = SQ_TEX_WRAP; ++ ++ // xxx: switch to bicubic ++ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; ++ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; ++ ++ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; ++ tex_samp.mip_filter = 0; /* no mipmap */ ++ set_tex_sampler (pScrn, accel_state->ib, &tex_samp); ++ ++ // UV texture ++ tex_res.id = 1; ++ tex_res.format = FMT_8_8_8_8; ++ tex_res.w = pPriv->w >> 1; ++ tex_res.h = pPriv->h; ++ tex_res.pitch = accel_state->src_pitch[0] >> 2; ++ if (pPriv->id == FOURCC_UYVY) { ++ tex_res.dst_sel_x = SQ_SEL_X; //V ++ tex_res.dst_sel_y = SQ_SEL_Z; //U ++ } else { ++ tex_res.dst_sel_x = SQ_SEL_Y; //V ++ tex_res.dst_sel_y = SQ_SEL_W; //U ++ } ++ tex_res.dst_sel_z = SQ_SEL_1; ++ tex_res.dst_sel_w = SQ_SEL_1; ++ tex_res.interlaced = 0; ++ // XXX tex bases need to be 256B aligned ++ tex_res.base = accel_state->src_mc_addr[0]; ++ tex_res.mip_base = accel_state->src_mc_addr[0]; ++ set_tex_resource (pScrn, accel_state->ib, &tex_res); ++ ++ // UV sampler ++ tex_samp.id = 1; ++ set_tex_sampler (pScrn, accel_state->ib, &tex_samp); ++ break; ++ } ++ ++ /* Render setup */ ++ ereg (accel_state->ib, CB_SHADER_MASK, (0x0f << OUTPUT0_ENABLE_shift)); ++ ereg (accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); ++ ereg (accel_state->ib, CB_COLOR_CONTROL, (0xcc << ROP3_shift)); /* copy */ ++ ++ cb_conf.id = 0; ++ ++ accel_state->dst_mc_addr = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; ++ ++ cb_conf.w = accel_state->dst_pitch; ++ cb_conf.h = pPixmap->drawable.height; ++ cb_conf.base = accel_state->dst_mc_addr; ++ ++ switch (pPixmap->drawable.bitsPerPixel) { ++ case 16: ++ if (pPixmap->drawable.depth == 15) { ++ cb_conf.format = COLOR_1_5_5_5; ++ cb_conf.comp_swap = 1; //ARGB ++ } else { ++ cb_conf.format = COLOR_5_6_5; ++ cb_conf.comp_swap = 2; //RGB ++ } ++ break; ++ case 32: ++ cb_conf.format = COLOR_8_8_8_8; ++ cb_conf.comp_swap = 1; //ARGB ++ break; ++ default: ++ return; ++ } ++ ++ cb_conf.source_format = 1; ++ cb_conf.blend_clamp = 1; ++ set_render_target(pScrn, accel_state->ib, &cb_conf); ++ ++ ereg (accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | ++ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | ++ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); ++ ereg (accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ ++ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ ++ ++ /* Interpolator setup */ ++ // export tex coords from VS ++ ereg (accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); ++ ereg (accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); ++ ++ /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x ++ * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ ++ ereg (accel_state->ib, SPI_PS_IN_CONTROL_0, ((1 << NUM_INTERP_shift))); ++ ereg (accel_state->ib, SPI_PS_IN_CONTROL_1, 0); ++ ereg (accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | ++ (0x03 << DEFAULT_VAL_shift) | ++ SEL_CENTROID_bit)); ++ ereg (accel_state->ib, SPI_INTERP_CONTROL_0, 0); ++ ++ ++ cp_wait_vline_sync(pScrn, accel_state->ib, pPixmap, ++ radeon_covering_crtc_num(pScrn, ++ pPriv->drw_x, ++ pPriv->drw_x + pPriv->dst_w, ++ pPriv->drw_y, ++ pPriv->drw_y + pPriv->dst_h, ++ pPriv->desired_crtc), ++ pPriv->drw_y, ++ pPriv->drw_y + pPriv->dst_h, ++ pPriv->vsync); ++ ++ ++ accel_state->vb_index = 0; ++ ++ while (nBox--) { ++ int srcX, srcY, srcw, srch; ++ int dstX, dstY, dstw, dsth; ++ struct r6xx_copy_vertex *xv_vb; ++ struct r6xx_copy_vertex vertex[3]; ++ ++ if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) { ++ R600DoneTexturedVideo(pScrn); ++ accel_state->vb_index = 0; ++ accel_state->ib = RADEONCPGetBuffer(pScrn); ++ } ++ ++ xv_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); ++ ++ dstX = pBox->x1 + dstxoff; ++ dstY = pBox->y1 + dstyoff; ++ dstw = pBox->x2 - pBox->x1; ++ dsth = pBox->y2 - pBox->y1; ++ ++ srcX = ((pBox->x1 - pPriv->drw_x) * ++ pPriv->src_w) / pPriv->dst_w; ++ srcY = ((pBox->y1 - pPriv->drw_y) * ++ pPriv->src_h) / pPriv->dst_h; ++ ++ srcw = (pPriv->src_w * dstw) / pPriv->dst_w; ++ srch = (pPriv->src_h * dsth) / pPriv->dst_h; ++ ++ vertex[0].x = (float)dstX; ++ vertex[0].y = (float)dstY; ++ vertex[0].s = (float)srcX / pPriv->w; ++ vertex[0].t = (float)srcY / pPriv->h; ++ ++ vertex[1].x = (float)dstX; ++ vertex[1].y = (float)(dstY + dsth); ++ vertex[1].s = (float)srcX / pPriv->w; ++ vertex[1].t = (float)(srcY + srch) / pPriv->h; ++ ++ vertex[2].x = (float)(dstX + dstw); ++ vertex[2].y = (float)(dstY + dsth); ++ vertex[2].s = (float)(srcX + srcw) / pPriv->w; ++ vertex[2].t = (float)(srcY + srch) / pPriv->h; ++ ++#if 0 ++ ErrorF("vertex 0: %f, %f, %f, %f\n", vertex[0].x, vertex[0].y, vertex[0].s, vertex[0].t); ++ ErrorF("vertex 1: %f, %f, %f, %f\n", vertex[1].x, vertex[1].y, vertex[1].s, vertex[1].t); ++ ErrorF("vertex 2: %f, %f, %f, %f\n", vertex[2].x, vertex[2].y, vertex[2].s, vertex[2].t); ++#endif ++ ++ // append to vertex buffer ++ xv_vb[accel_state->vb_index++] = vertex[0]; ++ xv_vb[accel_state->vb_index++] = vertex[1]; ++ xv_vb[accel_state->vb_index++] = vertex[2]; ++ ++ pBox++; ++ } ++ ++ R600DoneTexturedVideo(pScrn); ++ ++ DamageDamageRegion(pPriv->pDraw, &pPriv->clip); ++} +diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c +new file mode 100644 +index 0000000..267a7b0 +--- /dev/null ++++ b/src/r6xx_accel.c +@@ -0,0 +1,1160 @@ ++/* ++ * Copyright 2008 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ * ++ * Authors: Alex Deucher ++ * Matthias Hopf ++ */ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include "xf86.h" ++ ++#include ++ ++#include "radeon.h" ++#include "r600_shader.h" ++#include "radeon_reg.h" ++#include "r600_reg.h" ++#include "r600_state.h" ++ ++#include "xf86drm.h" ++#include "radeon_drm.h" ++ ++/* Emit uint32_t */ ++inline void e32(drmBufPtr ib, uint32_t dword) ++{ ++ uint32_t *ib_head = (pointer)(char*)ib->address; ++ ++ ib_head[ib->used>>2] = dword; ++ ib->used += 4; ++} ++ ++inline void efloat(drmBufPtr ib, float f) ++{ ++ union { ++ float f; ++ uint32_t d; ++ } a; ++ a.f=f; ++ e32(ib, a.d); ++} ++ ++inline void pack3(drmBufPtr ib, int cmd, unsigned num) ++{ ++ e32 (ib, RADEON_CP_PACKET3 | (cmd << 8) | (((num-1) & 0x3fff) << 16)); ++} ++ ++/* write num registers, start at reg */ ++/* If register falls in a special area, special commands are issued */ ++inline void pack0 (drmBufPtr ib, uint32_t reg, int num) ++{ ++ if (reg >= SET_CONFIG_REG_offset && reg < SET_CONFIG_REG_end) { ++ pack3 (ib, IT_SET_CONFIG_REG, num+1); ++ e32 (ib, (reg-SET_CONFIG_REG_offset) >> 2); ++ } else if (reg >= SET_CONTEXT_REG_offset && reg < SET_CONTEXT_REG_end) { ++ pack3 (ib, IT_SET_CONTEXT_REG, num+1); ++ e32 (ib, (reg-0x28000) >> 2); ++ } else if (reg >= SET_ALU_CONST_offset && reg < SET_ALU_CONST_end) { ++ pack3 (ib, IT_SET_ALU_CONST, num+1); ++ e32 (ib, (reg-SET_ALU_CONST_offset) >> 2); ++ } else if (reg >= SET_RESOURCE_offset && reg < SET_RESOURCE_end) { ++ pack3 (ib, IT_SET_RESOURCE, num+1); ++ e32 (ib, (reg-SET_RESOURCE_offset) >> 2); ++ } else if (reg >= SET_SAMPLER_offset && reg < SET_SAMPLER_end) { ++ pack3 (ib, IT_SET_SAMPLER, num+1); ++ e32 (ib, (reg-SET_SAMPLER_offset) >> 2); ++ } else if (reg >= SET_CTL_CONST_offset && reg < SET_CTL_CONST_end) { ++ pack3 (ib, IT_SET_CTL_CONST, num+1); ++ e32 (ib, (reg-SET_CTL_CONST_offset) >> 2); ++ } else if (reg >= SET_LOOP_CONST_offset && reg < SET_LOOP_CONST_end) { ++ pack3 (ib, IT_SET_LOOP_CONST, num+1); ++ e32 (ib, (reg-SET_LOOP_CONST_offset) >> 2); ++ } else if (reg >= SET_BOOL_CONST_offset && reg < SET_BOOL_CONST_end) { ++ pack3 (ib, IT_SET_BOOL_CONST, num+1); ++ e32 (ib, (reg-SET_BOOL_CONST_offset) >> 2); ++ } else { ++ e32 (ib, CP_PACKET0 (reg, num-1)); ++ } ++} ++ ++/* write a single register */ ++inline void ereg (drmBufPtr ib, uint32_t reg, uint32_t val) ++{ ++ pack0 (ib, reg, 1); ++ e32 (ib, val); ++} ++ ++/* Flush the indirect buffer to the kernel for submission to the card */ ++void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) ++{ ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ drmBufPtr buffer = ib; ++ int start = 0; ++ drm_radeon_indirect_t indirect; ++ ++ if (!buffer) return; ++ ++ //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n", ++ // buffer->idx); ++ ++ while (buffer->used & 0x3c){ ++ e32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */ ++ } ++ ++ //ErrorF("buffer bytes: %d\n", buffer->used); ++ ++ indirect.idx = buffer->idx; ++ indirect.start = start; ++ indirect.end = buffer->used; ++ indirect.discard = 1; ++ ++ drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT, ++ &indirect, sizeof(drm_radeon_indirect_t)); ++ ++} ++ ++void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) ++{ ++ if (!ib) return; ++ ++ ib->used = 0; ++ R600CPFlushIndirect(pScrn, ib); ++} ++ ++void ++wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib) ++{ ++ ++ //flush caches, don't generate timestamp ++ pack3 (ib, IT_EVENT_WRITE, 1); ++ e32 (ib, CACHE_FLUSH_AND_INV_EVENT); ++ // wait for 3D idle clean ++ ereg (ib, WAIT_UNTIL, (WAIT_3D_IDLE_bit | ++ WAIT_3D_IDLECLEAN_bit)); ++} ++ ++void ++wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib) ++{ ++ ++ ereg (ib, WAIT_UNTIL, WAIT_3D_IDLE_bit); ++ ++} ++ ++static void ++reset_cb(ScrnInfoPtr pScrn, drmBufPtr ib) ++{ ++ int i; ++ ++ pack0 (ib, CB_COLOR0_INFO, 8); ++ for (i = 0; i < 8; i++) ++ e32 (ib, 0); ++} ++ ++static void ++reset_td_samplers(ScrnInfoPtr pScrn, drmBufPtr ib) ++{ ++ int i; ++ ++ wait_3d_idle(pScrn, ib); ++ ++ pack0 (ib, TD_PS_SAMPLER0_BORDER_RED, 4*TD_PS_SAMPLER0_BORDER_RED_num); ++ for (i = 0; i < 4*TD_PS_SAMPLER0_BORDER_RED_num; i++) ++ e32 (ib, 0); ++ pack0 (ib, TD_VS_SAMPLER0_BORDER_RED, 4*TD_VS_SAMPLER0_BORDER_RED_num); ++ for (i = 0; i < 4*TD_VS_SAMPLER0_BORDER_RED_num; i++) ++ e32 (ib, 0); ++ ++ wait_3d_idle(pScrn, ib); ++} ++ ++static void ++reset_sampler_const (ScrnInfoPtr pScrn, drmBufPtr ib) ++{ ++ int i; ++ ++ for (i = 0; i < SQ_TEX_SAMPLER_WORD_all_num; i++) { ++ pack0 (ib, SQ_TEX_SAMPLER_WORD + i * SQ_TEX_SAMPLER_WORD_offset, 3); ++ e32 (ib, SQ_TEX_DEPTH_COMPARE_LESSEQUAL << DEPTH_COMPARE_FUNCTION_shift); ++ e32 (ib, MAX_LOD_mask); ++ e32 (ib, 0); ++ } ++} ++ ++static void ++reset_dx9_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib) ++{ ++ int i; ++ ++ const int count = SQ_ALU_CONSTANT_all_num * (SQ_ALU_CONSTANT_offset >> 2); ++ ++ pack0 (ib, SQ_ALU_CONSTANT, count); ++ for (i = 0; i < count; i++) ++ efloat (ib, 0.0); ++} ++ ++static void ++reset_bool_loop_const(ScrnInfoPtr pScrn, drmBufPtr ib) ++{ ++ int i; ++ ++ for (i = 0; i < SQ_BOOL_CONST_0_num; i++) ++ ereg (ib, SQ_BOOL_CONST_0 + (i << 2), 0); ++ ++ pack0 (ib, SQ_LOOP_CONST, SQ_LOOP_CONST_all_num); ++ ++ for (i = 0; i < SQ_LOOP_CONST_all_num; i++) ++ e32 (ib, 0); ++ ++} ++ ++void ++start_3d(ScrnInfoPtr pScrn, drmBufPtr ib) ++{ ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ ++ if (info->ChipFamily < CHIP_FAMILY_RV770) { ++ pack3 (ib, IT_START_3D_CMDBUF, 1); ++ e32 (ib, 0); ++ } ++ ++ pack3 (ib, IT_CONTEXT_CONTROL, 2); ++ e32 (ib, 0x80000000); ++ e32 (ib, 0x80000000); ++ ++ wait_3d_idle_clean (pScrn, ib); ++} ++ ++/* ++ * Setup of functional groups ++ */ ++ ++// asic stack/thread/gpr limits - need to query the drm ++static void ++sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf) ++{ ++ uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; ++ uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ ++ if ((info->ChipFamily == CHIP_FAMILY_RV610) || ++ (info->ChipFamily == CHIP_FAMILY_RV620) || ++ (info->ChipFamily == CHIP_FAMILY_RS780) || ++ (info->ChipFamily == CHIP_FAMILY_RV710)) ++ sq_config = 0; // no VC ++ else ++ sq_config = VC_ENABLE_bit; ++ ++ sq_config |= (DX9_CONSTS_bit | ++ ALU_INST_PREFER_VECTOR_bit | ++ (sq_conf->ps_prio << PS_PRIO_shift) | ++ (sq_conf->vs_prio << VS_PRIO_shift) | ++ (sq_conf->gs_prio << GS_PRIO_shift) | ++ (sq_conf->es_prio << ES_PRIO_shift)); ++ ++ sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | ++ (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | ++ (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); ++ sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | ++ (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); ++ ++ sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | ++ (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | ++ (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | ++ (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); ++ ++ sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | ++ (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); ++ ++ sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | ++ (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); ++ ++ pack0 (ib, SQ_CONFIG, 6); ++ e32 (ib, sq_config); ++ e32 (ib, sq_gpr_resource_mgmt_1); ++ e32 (ib, sq_gpr_resource_mgmt_2); ++ e32 (ib, sq_thread_resource_mgmt); ++ e32 (ib, sq_stack_resource_mgmt_1); ++ e32 (ib, sq_stack_resource_mgmt_2); ++ ++} ++ ++void ++set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf) ++{ ++ uint32_t cb_color_info; ++ int pitch, slice, h; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ ++ cb_color_info = ((cb_conf->endian << ENDIAN_shift) | ++ (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | ++ (cb_conf->array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | ++ (cb_conf->number_type << NUMBER_TYPE_shift) | ++ (cb_conf->comp_swap << COMP_SWAP_shift) | ++ (cb_conf->tile_mode << CB_COLOR0_INFO__TILE_MODE_shift)); ++ if (cb_conf->read_size) ++ cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit; ++ if (cb_conf->blend_clamp) ++ cb_color_info |= BLEND_CLAMP_bit; ++ if (cb_conf->clear_color) ++ cb_color_info |= CLEAR_COLOR_bit; ++ if (cb_conf->blend_bypass) ++ cb_color_info |= BLEND_BYPASS_bit; ++ if (cb_conf->blend_float32) ++ cb_color_info |= BLEND_FLOAT32_bit; ++ if (cb_conf->simple_float) ++ cb_color_info |= SIMPLE_FLOAT_bit; ++ if (cb_conf->round_mode) ++ cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; ++ if (cb_conf->tile_compact) ++ cb_color_info |= TILE_COMPACT_bit; ++ if (cb_conf->source_format) ++ cb_color_info |= SOURCE_FORMAT_bit; ++ ++ pitch = (cb_conf->w / 8) - 1; ++ h = (cb_conf->h + 7) & ~7; ++ slice = ((cb_conf->w * h) / 64) - 1; ++ ++ ereg (ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8)); ++ ++ // rv6xx workaround ++ if ((info->ChipFamily > CHIP_FAMILY_R600) && ++ (info->ChipFamily < CHIP_FAMILY_RV770)) { ++ pack3 (ib, IT_SURFACE_BASE_UPDATE, 1); ++ e32 (ib, (2 << cb_conf->id)); ++ } ++ ++ // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib ++ ereg (ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) | ++ (slice << SLICE_TILE_MAX_shift))); ++ ereg (ib, (CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0 << SLICE_START_shift) | ++ (0 << SLICE_MAX_shift))); ++ ereg (ib, (CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info); ++ ereg (ib, (CB_COLOR0_TILE + (4 * cb_conf->id)), (0 >> 8)); // CMASK per-tile data base/256 ++ ereg (ib, (CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256 ++ ereg (ib, (CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) | ++ (0 << FMASK_TILE_MAX_shift))); ++} ++ ++void ++cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr) ++{ ++ uint32_t cp_coher_size; ++ if (size == 0xffffffff) ++ cp_coher_size = 0xffffffff; ++ else ++ cp_coher_size = ((size + 255) >> 8); ++ ++ pack3 (ib, IT_SURFACE_SYNC, 4); ++ e32 (ib, sync_type); ++ e32 (ib, cp_coher_size); ++ e32 (ib, (mc_addr >> 8)); ++ e32 (ib, 10); /* poll interval */ ++} ++ ++/* inserts a wait for vline in the command stream */ ++void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, ++ int crtc, int start, int stop, Bool enable) ++{ ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); ++ uint32_t offset; ++ RADEONCrtcPrivatePtr radeon_crtc; ++ ++ if (!enable) ++ return; ++ ++ if ((crtc < 0) || (crtc > 1)) ++ return; ++ ++ if (stop < start) ++ return; ++ ++ if (!xf86_config->crtc[crtc]->enabled) ++ return; ++ ++#ifdef USE_EXA ++ if (info->useEXA) ++ offset = exaGetPixmapOffset(pPix); ++ else ++#endif ++ offset = pPix->devPrivate.ptr - info->FB; ++ ++ /* if drawing to front buffer */ ++ if (offset != 0) ++ return; ++ ++ start = max(start, 0); ++ stop = min(stop, xf86_config->crtc[crtc]->mode.VDisplay); ++ ++ if (start > xf86_config->crtc[crtc]->mode.VDisplay) ++ return; ++ ++ radeon_crtc = xf86_config->crtc[crtc]->driver_private; ++ ++ /* set the VLINE range */ ++ ereg(ib, AVIVO_D1MODE_VLINE_START_END + radeon_crtc->crtc_offset, ++ (start << AVIVO_D1MODE_VLINE_START_SHIFT) | ++ (stop << AVIVO_D1MODE_VLINE_END_SHIFT)); ++ ++ /* tell the CP to poll the VLINE state register */ ++ pack3 (ib, IT_WAIT_REG_MEM, 6); ++ e32 (ib, IT_WAIT_REG | IT_WAIT_EQ); ++ e32 (ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS + radeon_crtc->crtc_offset)); ++ e32 (ib, 0); ++ e32 (ib, 0); // Ref value ++ e32 (ib, AVIVO_D1MODE_VLINE_STAT); // Mask ++ e32 (ib, 10); // Wait interval ++} ++ ++void ++fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf) ++{ ++ uint32_t sq_pgm_resources; ++ ++ sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | ++ (fs_conf->stack_size << STACK_SIZE_shift)); ++ ++ if (fs_conf->dx10_clamp) ++ sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit; ++ ++ ereg (ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8); ++ ereg (ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources); ++ ereg (ib, SQ_PGM_CF_OFFSET_FS, 0); ++} ++ ++void ++vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf) ++{ ++ uint32_t sq_pgm_resources; ++ ++ sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | ++ (vs_conf->stack_size << STACK_SIZE_shift)); ++ ++ if (vs_conf->dx10_clamp) ++ sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit; ++ if (vs_conf->fetch_cache_lines) ++ sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); ++ if (vs_conf->uncached_first_inst) ++ sq_pgm_resources |= UNCACHED_FIRST_INST_bit; ++ ++ ereg (ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8); ++ ereg (ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources); ++ ereg (ib, SQ_PGM_CF_OFFSET_VS, 0); ++} ++ ++void ++ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf) ++{ ++ uint32_t sq_pgm_resources; ++ ++ sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | ++ (ps_conf->stack_size << STACK_SIZE_shift)); ++ ++ if (ps_conf->dx10_clamp) ++ sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit; ++ if (ps_conf->fetch_cache_lines) ++ sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); ++ if (ps_conf->uncached_first_inst) ++ sq_pgm_resources |= UNCACHED_FIRST_INST_bit; ++ if (ps_conf->clamp_consts) ++ sq_pgm_resources |= CLAMP_CONSTS_bit; ++ ++ ereg (ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8); ++ ereg (ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources); ++ ereg (ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode); ++ ereg (ib, SQ_PGM_CF_OFFSET_PS, 0); ++} ++ ++void ++set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf) ++{ ++ int i; ++ const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2); ++ ++ pack0 (ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg); ++ for (i = 0; i < countreg; i++) ++ efloat (ib, const_buf[i]); ++} ++ ++void ++set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res) ++{ ++ uint32_t sq_vtx_constant_word2; ++ ++ sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | ++ ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | ++ (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | ++ (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | ++ (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); ++ if (res->clamp_x) ++ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; ++ ++ if (res->format_comp_all) ++ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; ++ ++ if (res->srf_mode_all) ++ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; ++ ++ pack0 (ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); ++ e32 (ib, res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS ++ e32 (ib, (res->vtx_num_entries << 2) - 1); // 1: SIZE ++ e32 (ib, sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN ++ e32 (ib, res->mem_req_size << MEM_REQUEST_SIZE_shift); // 3: MEM_REQUEST_SIZE ?!? ++ e32 (ib, 0); // 4: n/a ++ e32 (ib, 0); // 5: n/a ++ e32 (ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE ++} ++ ++void ++set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res) ++{ ++ uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; ++ uint32_t sq_tex_resource_word5, sq_tex_resource_word6; ++ ++ sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) | ++ (tex_res->tile_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift)); ++ ++ if (tex_res->w) ++ sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) | ++ ((tex_res->w - 1) << TEX_WIDTH_shift)); ++ ++ if (tex_res->tile_type) ++ sq_tex_resource_word0 |= TILE_TYPE_bit; ++ ++ sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift); ++ ++ if (tex_res->h) ++ sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); ++ if (tex_res->depth) ++ sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); ++ ++ sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | ++ (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | ++ (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | ++ (tex_res->format_comp_w << FORMAT_COMP_W_shift) | ++ (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | ++ (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | ++ (tex_res->request_size << REQUEST_SIZE_shift) | ++ (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | ++ (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | ++ (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | ++ (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | ++ (tex_res->base_level << BASE_LEVEL_shift)); ++ ++ if (tex_res->srf_mode_all) ++ sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; ++ if (tex_res->force_degamma) ++ sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; ++ ++ sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | ++ (tex_res->base_array << BASE_ARRAY_shift) | ++ (tex_res->last_array << LAST_ARRAY_shift)); ++ ++ sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) | ++ (tex_res->perf_modulation << PERF_MODULATION_shift) | ++ (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift)); ++ ++ if (tex_res->interlaced) ++ sq_tex_resource_word6 |= INTERLACED_bit; ++ ++ pack0 (ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); ++ e32 (ib, sq_tex_resource_word0); ++ e32 (ib, sq_tex_resource_word1); ++ e32 (ib, ((tex_res->base) >> 8)); ++ e32 (ib, ((tex_res->mip_base) >> 8)); ++ e32 (ib, sq_tex_resource_word4); ++ e32 (ib, sq_tex_resource_word5); ++ e32 (ib, sq_tex_resource_word6); ++} ++ ++void ++set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) ++{ ++ uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; ++ ++ sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | ++ (s->clamp_y << CLAMP_Y_shift) | ++ (s->clamp_z << CLAMP_Z_shift) | ++ (s->xy_mag_filter << XY_MAG_FILTER_shift) | ++ (s->xy_min_filter << XY_MIN_FILTER_shift) | ++ (s->z_filter << Z_FILTER_shift) | ++ (s->mip_filter << MIP_FILTER_shift) | ++ (s->border_color << BORDER_COLOR_TYPE_shift) | ++ (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | ++ (s->chroma_key << CHROMA_KEY_shift)); ++ if (s->point_sampling_clamp) ++ sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit; ++ if (s->tex_array_override) ++ sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit; ++ if (s->lod_uses_minor_axis) ++ sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit; ++ ++ sq_tex_sampler_word1 = ((s->min_lod << MIN_LOD_shift) | ++ (s->max_lod << MAX_LOD_shift) | ++ (s->lod_bias << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift)); ++ ++ sq_tex_sampler_word2 = ((s->lod_bias2 << LOD_BIAS_SEC_shift) | ++ (s->perf_mip << PERF_MIP_shift) | ++ (s->perf_z << PERF_Z_shift)); ++ if (s->mc_coord_truncate) ++ sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; ++ if (s->force_degamma) ++ sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; ++ if (s->high_precision_filter) ++ sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit; ++ if (s->fetch_4) ++ sq_tex_sampler_word2 |= FETCH_4_bit; ++ if (s->sample_is_pcf) ++ sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit; ++ if (s->type) ++ sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; ++ ++ pack0 (ib, SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); ++ e32 (ib, sq_tex_sampler_word0); ++ e32 (ib, sq_tex_sampler_word1); ++ e32 (ib, sq_tex_sampler_word2); ++} ++ ++//XXX deal with clip offsets in clip setup ++void ++set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) ++{ ++ ++ ereg (ib, PA_SC_SCREEN_SCISSOR_TL, ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | ++ (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); ++ ereg (ib, PA_SC_SCREEN_SCISSOR_BR, ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | ++ (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); ++} ++ ++void ++set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) ++{ ++ ++ ereg (ib, PA_SC_VPORT_SCISSOR_0_TL + ++ id * PA_SC_VPORT_SCISSOR_0_TL_offset, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | ++ (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | ++ WINDOW_OFFSET_DISABLE_bit)); ++ ereg (ib, PA_SC_VPORT_SCISSOR_0_BR + ++ id * PA_SC_VPORT_SCISSOR_0_BR_offset, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | ++ (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); ++} ++ ++void ++set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) ++{ ++ ++ ereg (ib, PA_SC_GENERIC_SCISSOR_TL, ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | ++ (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | ++ WINDOW_OFFSET_DISABLE_bit)); ++ ereg (ib, PA_SC_GENERIC_SCISSOR_BR, ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | ++ (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); ++} ++ ++void ++set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) ++{ ++ ++ ereg (ib, PA_SC_WINDOW_SCISSOR_TL, ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | ++ (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | ++ WINDOW_OFFSET_DISABLE_bit)); ++ ereg (ib, PA_SC_WINDOW_SCISSOR_BR, ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | ++ (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); ++} ++ ++void ++set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) ++{ ++ ++ ereg (ib, PA_SC_CLIPRECT_0_TL + ++ id * PA_SC_CLIPRECT_0_TL_offset, ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | ++ (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); ++ ereg (ib, PA_SC_CLIPRECT_0_BR + ++ id * PA_SC_CLIPRECT_0_BR_offset, ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | ++ (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); ++} ++ ++/* ++ * Setup of default state ++ */ ++ ++void ++set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) ++{ ++ tex_resource_t tex_res; ++ shader_config_t fs_conf; ++ sq_config_t sq_conf; ++ int i; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_accel_state *accel_state = info->accel_state; ++ ++ memset(&tex_res, 0, sizeof(tex_resource_t)); ++ memset(&fs_conf, 0, sizeof(shader_config_t)); ++ ++#if 1 ++ if (accel_state->XInited3D) ++ return; ++#endif ++ ++ accel_state->XInited3D = TRUE; ++ ++ wait_3d_idle(pScrn, ib); ++ ++ // ASIC specific setup, see drm ++ if (info->ChipFamily < CHIP_FAMILY_RV770) { ++ ereg (ib, TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) | ++ (28 << TD_FIFO_CREDIT_shift))); ++ ereg (ib, VC_ENHANCE, 0); ++ ereg (ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); ++ ereg (ib, DB_DEBUG, 0x82000000); /* ? */ ++ ereg (ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | ++ (16 << DEPTH_FLUSH_shift) | ++ (0 << FORCE_SUMMARIZE_shift) | ++ (4 << DEPTH_PENDING_FREE_shift) | ++ (16 << DEPTH_CACHELINE_FREE_shift) | ++ 0)); ++ } else { ++ ereg (ib, TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) | ++ (28 << TD_FIFO_CREDIT_shift))); ++ ereg (ib, VC_ENHANCE, 0); ++ ereg (ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit); ++ ereg (ib, DB_DEBUG, 0); ++ ereg (ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | ++ (16 << DEPTH_FLUSH_shift) | ++ (0 << FORCE_SUMMARIZE_shift) | ++ (4 << DEPTH_PENDING_FREE_shift) | ++ (4 << DEPTH_CACHELINE_FREE_shift) | ++ 0)); ++ } ++ ++ reset_td_samplers(pScrn, ib); ++ reset_dx9_alu_consts(pScrn, ib); ++ reset_bool_loop_const (pScrn, ib); ++ reset_sampler_const (pScrn, ib); ++ ++ // SQ ++ sq_conf.ps_prio = 0; ++ sq_conf.vs_prio = 1; ++ sq_conf.gs_prio = 2; ++ sq_conf.es_prio = 3; ++ // need to set stack/thread/gpr limits based on the asic ++ // for now just set them low enough so any card will work ++ // see r600_cp.c in the drm ++ switch (info->ChipFamily) { ++ case CHIP_FAMILY_R600: ++ sq_conf.num_ps_gprs = 192; ++ sq_conf.num_vs_gprs = 56; ++ sq_conf.num_temp_gprs = 4; ++ sq_conf.num_gs_gprs = 0; ++ sq_conf.num_es_gprs = 0; ++ sq_conf.num_ps_threads = 136; ++ sq_conf.num_vs_threads = 48; ++ sq_conf.num_gs_threads = 4; ++ sq_conf.num_es_threads = 4; ++ sq_conf.num_ps_stack_entries = 128; ++ sq_conf.num_vs_stack_entries = 128; ++ sq_conf.num_gs_stack_entries = 0; ++ sq_conf.num_es_stack_entries = 0; ++ break; ++ case CHIP_FAMILY_RV630: ++ case CHIP_FAMILY_RV635: ++ sq_conf.num_ps_gprs = 84; ++ sq_conf.num_vs_gprs = 36; ++ sq_conf.num_temp_gprs = 4; ++ sq_conf.num_gs_gprs = 0; ++ sq_conf.num_es_gprs = 0; ++ sq_conf.num_ps_threads = 144; ++ sq_conf.num_vs_threads = 40; ++ sq_conf.num_gs_threads = 4; ++ sq_conf.num_es_threads = 4; ++ sq_conf.num_ps_stack_entries = 40; ++ sq_conf.num_vs_stack_entries = 40; ++ sq_conf.num_gs_stack_entries = 32; ++ sq_conf.num_es_stack_entries = 16; ++ break; ++ case CHIP_FAMILY_RV610: ++ case CHIP_FAMILY_RV620: ++ case CHIP_FAMILY_RS780: ++ default: ++ sq_conf.num_ps_gprs = 84; ++ sq_conf.num_vs_gprs = 36; ++ sq_conf.num_temp_gprs = 4; ++ sq_conf.num_gs_gprs = 0; ++ sq_conf.num_es_gprs = 0; ++ sq_conf.num_ps_threads = 136; ++ sq_conf.num_vs_threads = 48; ++ sq_conf.num_gs_threads = 4; ++ sq_conf.num_es_threads = 4; ++ sq_conf.num_ps_stack_entries = 40; ++ sq_conf.num_vs_stack_entries = 40; ++ sq_conf.num_gs_stack_entries = 32; ++ sq_conf.num_es_stack_entries = 16; ++ break; ++ case CHIP_FAMILY_RV670: ++ sq_conf.num_ps_gprs = 144; ++ sq_conf.num_vs_gprs = 40; ++ sq_conf.num_temp_gprs = 4; ++ sq_conf.num_gs_gprs = 0; ++ sq_conf.num_es_gprs = 0; ++ sq_conf.num_ps_threads = 136; ++ sq_conf.num_vs_threads = 48; ++ sq_conf.num_gs_threads = 4; ++ sq_conf.num_es_threads = 4; ++ sq_conf.num_ps_stack_entries = 40; ++ sq_conf.num_vs_stack_entries = 40; ++ sq_conf.num_gs_stack_entries = 32; ++ sq_conf.num_es_stack_entries = 16; ++ break; ++ case CHIP_FAMILY_RV770: ++ sq_conf.num_ps_gprs = 192; ++ sq_conf.num_vs_gprs = 56; ++ sq_conf.num_temp_gprs = 4; ++ sq_conf.num_gs_gprs = 0; ++ sq_conf.num_es_gprs = 0; ++ sq_conf.num_ps_threads = 188; ++ sq_conf.num_vs_threads = 60; ++ sq_conf.num_gs_threads = 0; ++ sq_conf.num_es_threads = 0; ++ sq_conf.num_ps_stack_entries = 256; ++ sq_conf.num_vs_stack_entries = 256; ++ sq_conf.num_gs_stack_entries = 0; ++ sq_conf.num_es_stack_entries = 0; ++ break; ++ case CHIP_FAMILY_RV730: ++ sq_conf.num_ps_gprs = 84; ++ sq_conf.num_vs_gprs = 36; ++ sq_conf.num_temp_gprs = 4; ++ sq_conf.num_gs_gprs = 0; ++ sq_conf.num_es_gprs = 0; ++ sq_conf.num_ps_threads = 188; ++ sq_conf.num_vs_threads = 60; ++ sq_conf.num_gs_threads = 0; ++ sq_conf.num_es_threads = 0; ++ sq_conf.num_ps_stack_entries = 128; ++ sq_conf.num_vs_stack_entries = 128; ++ sq_conf.num_gs_stack_entries = 0; ++ sq_conf.num_es_stack_entries = 0; ++ break; ++ case CHIP_FAMILY_RV710: ++ sq_conf.num_ps_gprs = 192; ++ sq_conf.num_vs_gprs = 56; ++ sq_conf.num_temp_gprs = 4; ++ sq_conf.num_gs_gprs = 0; ++ sq_conf.num_es_gprs = 0; ++ sq_conf.num_ps_threads = 144; ++ sq_conf.num_vs_threads = 48; ++ sq_conf.num_gs_threads = 0; ++ sq_conf.num_es_threads = 0; ++ sq_conf.num_ps_stack_entries = 128; ++ sq_conf.num_vs_stack_entries = 128; ++ sq_conf.num_gs_stack_entries = 0; ++ sq_conf.num_es_stack_entries = 0; ++ break; ++ } ++ ++ sq_setup(pScrn, ib, &sq_conf); ++ ++ ereg (ib, SQ_VTX_BASE_VTX_LOC, 0); ++ ereg (ib, SQ_VTX_START_INST_LOC, 0); ++ ++ pack0 (ib, SQ_ESGS_RING_ITEMSIZE, 9); ++ e32 (ib, 0); // SQ_ESGS_RING_ITEMSIZE ++ e32 (ib, 0); // SQ_GSVS_RING_ITEMSIZE ++ e32 (ib, 0); // SQ_ESTMP_RING_ITEMSIZE ++ e32 (ib, 0); // SQ_GSTMP_RING_ITEMSIZE ++ e32 (ib, 0); // SQ_VSTMP_RING_ITEMSIZE ++ e32 (ib, 0); // SQ_PSTMP_RING_ITEMSIZE ++ e32 (ib, 0); // SQ_FBUF_RING_ITEMSIZE ++ e32 (ib, 0); // SQ_REDUC_RING_ITEMSIZE ++ e32 (ib, 0); // SQ_GS_VERT_ITEMSIZE ++ ++ // DB ++ ereg (ib, DB_DEPTH_INFO, 0); ++ ereg (ib, DB_STENCIL_CLEAR, 0); ++ ereg (ib, DB_DEPTH_CLEAR, 0); ++ ereg (ib, DB_STENCILREFMASK, 0); ++ ereg (ib, DB_STENCILREFMASK_BF, 0); ++ ereg (ib, DB_DEPTH_CONTROL, 0); ++ ereg (ib, DB_RENDER_CONTROL, STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); ++ if (info->ChipFamily < CHIP_FAMILY_RV770) ++ ereg (ib, DB_RENDER_OVERRIDE, FORCE_SHADER_Z_ORDER_bit); ++ else ++ ereg (ib, DB_RENDER_OVERRIDE, 0); ++ ereg (ib, DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | ++ (2 << ALPHA_TO_MASK_OFFSET1_shift) | ++ (2 << ALPHA_TO_MASK_OFFSET2_shift) | ++ (2 << ALPHA_TO_MASK_OFFSET3_shift))); ++ ++ // SX ++ ereg (ib, SX_ALPHA_TEST_CONTROL, 0); ++ ereg (ib, SX_ALPHA_REF, 0); ++ ++ // CB ++ reset_cb(pScrn, ib); ++ ++ pack0 (ib, CB_BLEND_RED, 4); ++ e32 (ib, 0x00000000); ++ e32 (ib, 0x00000000); ++ e32 (ib, 0x00000000); ++ e32 (ib, 0x00000000); ++ ++ /* CB_COLOR_CONTROL.PER_MRT_BLEND is off */ ++ // RV6xx+ have per-MRT blend ++ if (info->ChipFamily > CHIP_FAMILY_R600) { ++ pack0 (ib, CB_BLEND0_CONTROL, CB_BLEND0_CONTROL_num); ++ for (i = 0; i < CB_BLEND0_CONTROL_num; i++) ++ e32 (ib, 0); ++ } ++ ++ ereg (ib, CB_BLEND_CONTROL, 0); ++ ++ if (info->ChipFamily < CHIP_FAMILY_RV770) { ++ pack0 (ib, CB_FOG_RED, 3); ++ e32 (ib, 0x00000000); ++ e32 (ib, 0x00000000); ++ e32 (ib, 0x00000000); ++ } ++ ++ ereg (ib, CB_COLOR_CONTROL, 0); ++ pack0 (ib, CB_CLRCMP_CONTROL, 4); ++ e32 (ib, 1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC ++ e32 (ib, 0); // CB_CLRCMP_SRC ++ e32 (ib, 0); // CB_CLRCMP_DST ++ e32 (ib, 0); // CB_CLRCMP_MSK ++ ++ ++ if (info->ChipFamily < CHIP_FAMILY_RV770) { ++ pack0 (ib, CB_CLEAR_RED, 4); ++ efloat(ib, 1.0); /* WTF? */ ++ efloat(ib, 0.0); ++ efloat(ib, 1.0); ++ efloat(ib, 1.0); ++ } ++ ereg (ib, CB_TARGET_MASK, (0x0f << TARGET0_ENABLE_shift)); ++ ++ // SC ++ set_generic_scissor(pScrn, ib, 0, 0, 8192, 8192); ++ set_screen_scissor (pScrn, ib, 0, 0, 8192, 8192); ++ ereg (ib, PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | ++ (0 << WINDOW_Y_OFFSET_shift))); ++ set_window_scissor (pScrn, ib, 0, 0, 8192, 8192); ++ ++ ereg (ib, PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); ++ ++ /* clip boolean is set to always visible -> doesn't matter */ ++ for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) ++ set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192); ++ ++ if (info->ChipFamily < CHIP_FAMILY_RV770) ++ ereg (ib, R7xx_PA_SC_EDGERULE, 0x00000000); ++ else ++ ereg (ib, R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); /* ? */ ++ ++ for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) { ++ set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192); ++ pack0 (ib, PA_SC_VPORT_ZMIN_0 + i * PA_SC_VPORT_ZMIN_0_offset, 2); ++ efloat(ib, 0.0); ++ efloat(ib, 1.0); ++ } ++ ++ if (info->ChipFamily < CHIP_FAMILY_RV770) ++ ereg (ib, PA_SC_MODE_CNTL, (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit)); ++ else ++ ereg (ib, PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit | ++ 0x00500000)); /* ? */ ++ ++ ereg (ib, PA_SC_LINE_CNTL, 0); ++ ereg (ib, PA_SC_AA_CONFIG, 0); ++ ereg (ib, PA_SC_AA_MASK, 0xFFFFFFFF); ++ ++ //XXX: double check this ++ if (info->ChipFamily > CHIP_FAMILY_R600) { ++ ereg (ib, PA_SC_AA_SAMPLE_LOCS_MCTX, 0); ++ ereg (ib, PA_SC_AA_SAMPLE_LOCS_8S_WD1_M, 0); ++ } ++ ++ ereg (ib, PA_SC_LINE_STIPPLE, 0); ++ ereg (ib, PA_SC_MPASS_PS_CNTL, 0); ++ ++ // CL ++ pack0 (ib, PA_CL_VPORT_XSCALE_0, 6); ++ efloat (ib, 0.0f); // PA_CL_VPORT_XSCALE ++ efloat (ib, 0.0f); // PA_CL_VPORT_XOFFSET ++ efloat (ib, 0.0f); // PA_CL_VPORT_YSCALE ++ efloat (ib, 0.0f); // PA_CL_VPORT_YOFFSET ++ efloat (ib, 0.0f); // PA_CL_VPORT_ZSCALE ++ efloat (ib, 0.0f); // PA_CL_VPORT_ZOFFSET ++ ereg (ib, PA_CL_CLIP_CNTL, (CLIP_DISABLE_bit | DX_CLIP_SPACE_DEF_bit)); ++ ereg (ib, PA_CL_VTE_CNTL, 0); ++ ereg (ib, PA_CL_VS_OUT_CNTL, 0); ++ ereg (ib, PA_CL_NANINF_CNTL, 0); ++ pack0 (ib, PA_CL_GB_VERT_CLIP_ADJ, 4); ++ efloat (ib, 1.0); // PA_CL_GB_VERT_CLIP_ADJ ++ efloat (ib, 1.0); // PA_CL_GB_VERT_DISC_ADJ ++ efloat (ib, 1.0); // PA_CL_GB_HORZ_CLIP_ADJ ++ efloat (ib, 1.0); // PA_CL_GB_HORZ_DISC_ADJ ++ ++ /* user clipping planes are disabled by default */ ++ pack0 (ib, PA_CL_UCP_0_X, 24); ++ for (i = 0; i < 24; i++) ++ efloat (ib, 0.0); ++ ++ // SU ++ ereg (ib, PA_SU_SC_MODE_CNTL, FACE_bit); ++ ereg (ib, PA_SU_POINT_SIZE, 0); ++ ereg (ib, PA_SU_POINT_MINMAX, 0); ++ ereg (ib, PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0); ++ ereg (ib, PA_SU_POLY_OFFSET_BACK_SCALE, 0); ++ ereg (ib, PA_SU_POLY_OFFSET_FRONT_SCALE, 0); ++ ereg (ib, PA_SU_POLY_OFFSET_BACK_OFFSET, 0); ++ ereg (ib, PA_SU_POLY_OFFSET_FRONT_OFFSET, 0); ++ ++ ereg (ib, PA_SU_LINE_CNTL, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ ++ ereg (ib, PA_SU_VTX_CNTL, ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | ++ (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */ ++ ereg (ib, PA_SU_POLY_OFFSET_CLAMP, 0); ++ ++ // SPI ++ if (info->ChipFamily < CHIP_FAMILY_RV770) ++ ereg (ib, R7xx_SPI_THREAD_GROUPING, 0); ++ else ++ ereg (ib, R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift)); ++ ++ ereg (ib, SPI_INTERP_CONTROL_0, ((2 << PNT_SPRITE_OVRD_X_shift) | ++ (3 << PNT_SPRITE_OVRD_Y_shift) | ++ (0 << PNT_SPRITE_OVRD_Z_shift) | ++ (1 << PNT_SPRITE_OVRD_W_shift))); /* s,t,0,1 */ ++ ereg (ib, SPI_INPUT_Z, 0); ++ ereg (ib, SPI_FOG_CNTL, 0); ++ ereg (ib, SPI_FOG_FUNC_SCALE, 0); ++ ereg (ib, SPI_FOG_FUNC_BIAS, 0); ++ ++ pack0 (ib, SPI_VS_OUT_ID_0, SPI_VS_OUT_ID_0_num); ++ for (i = 0; i < SPI_VS_OUT_ID_0_num; i++) /* identity mapping */ ++ e32 (ib, 0x03020100 + i*0x04040404); ++ ereg (ib, SPI_VS_OUT_CONFIG, 0); ++ ++ // clear FS ++ fs_setup(pScrn, ib, &fs_conf); ++ ++ // VGT ++ ereg (ib, VGT_MAX_VTX_INDX, 2048); /* XXX set to a reasonably large number of indices */ ++ ereg (ib, VGT_MIN_VTX_INDX, 0); ++ ereg (ib, VGT_INDX_OFFSET, 0); ++ ereg (ib, VGT_INSTANCE_STEP_RATE_0, 0); ++ ereg (ib, VGT_INSTANCE_STEP_RATE_1, 0); ++ ++ ereg (ib, VGT_MULTI_PRIM_IB_RESET_INDX, 0); ++ ereg (ib, VGT_OUTPUT_PATH_CNTL, 0); ++ ereg (ib, VGT_GS_MODE, 0); ++ ereg (ib, VGT_HOS_CNTL, 0); ++ ereg (ib, VGT_HOS_MAX_TESS_LEVEL, 0); ++ ereg (ib, VGT_HOS_MIN_TESS_LEVEL, 0); ++ ereg (ib, VGT_HOS_REUSE_DEPTH, 0); ++ ereg (ib, VGT_GROUP_PRIM_TYPE, 0); ++ ereg (ib, VGT_GROUP_FIRST_DECR, 0); ++ ereg (ib, VGT_GROUP_DECR, 0); ++ ereg (ib, VGT_GROUP_VECT_0_CNTL, 0); ++ ereg (ib, VGT_GROUP_VECT_1_CNTL, 0); ++ ereg (ib, VGT_GROUP_VECT_0_FMT_CNTL, 0); ++ ereg (ib, VGT_GROUP_VECT_1_FMT_CNTL, 0); ++ ereg (ib, VGT_PRIMITIVEID_EN, 0); ++ ereg (ib, VGT_MULTI_PRIM_IB_RESET_EN, 0); ++ ereg (ib, VGT_STRMOUT_EN, 0); ++ ereg (ib, VGT_REUSE_OFF, 0); ++ ereg (ib, VGT_VTX_CNT_EN, 0); ++ ereg (ib, VGT_STRMOUT_BUFFER_EN, 0); ++ ++ // clear tex resources - PS ++ for (i = 0; i < 16; i++) { ++ tex_res.id = i; ++ set_tex_resource(pScrn, ib, &tex_res); ++ } ++ ++ // clear tex resources - VS ++ for (i = 160; i < 164; i++) { ++ tex_res.id = i; ++ set_tex_resource(pScrn, ib, &tex_res); ++ } ++ ++ // clear tex resources - FS ++ for (i = 320; i < 335; i++) { ++ tex_res.id = i; ++ set_tex_resource(pScrn, ib, &tex_res); ++ } ++ ++} ++ ++ ++/* ++ * Commands ++ */ ++ ++void ++draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices) ++{ ++ uint32_t i, count; ++ ++ ereg (ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); ++ pack3 (ib, IT_INDEX_TYPE, 1); ++ e32 (ib, draw_conf->index_type); ++ pack3 (ib, IT_NUM_INSTANCES, 1); ++ e32 (ib, draw_conf->num_instances); ++ ++ // calculate num of packets ++ count = 2; ++ if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) ++ count += (draw_conf->num_indices + 1) / 2; ++ else ++ count += draw_conf->num_indices; ++ ++ pack3 (ib, IT_DRAW_INDEX_IMMD, count); ++ e32 (ib, draw_conf->num_indices); ++ e32 (ib, draw_conf->vgt_draw_initiator); ++ ++ if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) { ++ for (i = 0; i < draw_conf->num_indices; i += 2) { ++ if ((i + 1) == draw_conf->num_indices) ++ e32 (ib, indices[i]); ++ else ++ e32 (ib, (indices[i] | (indices[i + 1] << 16))); ++ } ++ } else { ++ for (i = 0; i < draw_conf->num_indices; i++) ++ e32 (ib, indices[i]); ++ } ++} ++ ++void ++draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) ++{ ++ ++ ereg (ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); ++ pack3 (ib, IT_INDEX_TYPE, 1); ++ e32 (ib, draw_conf->index_type); ++ pack3 (ib, IT_NUM_INSTANCES, 1); ++ e32 (ib, draw_conf->num_instances); ++ pack3 (ib, IT_DRAW_INDEX_AUTO, 2); ++ e32 (ib, draw_conf->num_indices); ++ e32 (ib, draw_conf->vgt_draw_initiator); ++} +diff --git a/src/radeon.h b/src/radeon.h +index 2944fe8..a7ed95e 100644 +--- a/src/radeon.h ++++ b/src/radeon.h +@@ -354,6 +354,8 @@ typedef enum { + + #define IS_DCE32_VARIANT ((info->ChipFamily >= CHIP_FAMILY_RV730)) + ++#define IS_R600_3D (info->ChipFamily >= CHIP_FAMILY_R600) ++ + #define IS_R500_3D ((info->ChipFamily == CHIP_FAMILY_RV515) || \ + (info->ChipFamily == CHIP_FAMILY_R520) || \ + (info->ChipFamily == CHIP_FAMILY_RV530) || \ +@@ -569,6 +571,38 @@ struct radeon_dri { + }; + #endif + ++#ifdef XF86DRI ++#ifdef USE_EXA ++struct r6xx_solid_vertex { ++ float x; ++ float y; ++}; ++ ++struct r6xx_copy_vertex { ++ float x; ++ float y; ++ float s; ++ float t; ++}; ++ ++struct r6xx_comp_vertex { ++ float x; ++ float y; ++ float src_s; ++ float src_t; ++}; ++ ++struct r6xx_comp_mask_vertex { ++ float x; ++ float y; ++ float src_s; ++ float src_t; ++ float mask_s; ++ float mask_t; ++}; ++#endif ++#endif ++ + struct radeon_accel_state { + /* common accel data */ + int fifo_slots; /* Free slots in the FIFO (64 max) */ +@@ -609,6 +643,51 @@ struct radeon_accel_state { + Bool src_tile_height; + + Bool vsync; ++ ++ drmBufPtr ib; ++ int vb_index; ++ ++ // shader storage ++ ExaOffscreenArea *shaders; ++ uint32_t solid_vs_offset; ++ uint32_t solid_ps_offset; ++ uint32_t copy_vs_offset; ++ uint32_t copy_ps_offset; ++ uint32_t comp_vs_offset; ++ uint32_t comp_ps_offset; ++ uint32_t comp_mask_vs_offset; ++ uint32_t comp_mask_ps_offset; ++ uint32_t xv_vs_offset; ++ uint32_t xv_ps_offset_packed; ++ uint32_t xv_ps_offset_planar; ++ ++ //size/addr stuff ++ uint32_t src_size[2]; ++ uint64_t src_mc_addr[2]; ++ uint32_t src_pitch[2]; ++ uint32_t src_width[2]; ++ uint32_t src_height[2]; ++ uint32_t src_bpp[2]; ++ uint32_t dst_size; ++ uint64_t dst_mc_addr; ++ uint32_t dst_pitch; ++ uint32_t dst_height; ++ uint32_t dst_bpp; ++ uint32_t vs_size; ++ uint64_t vs_mc_addr; ++ uint32_t ps_size; ++ uint64_t ps_mc_addr; ++ uint32_t vb_size; ++ uint64_t vb_mc_addr; ++ ++ // UTS/DFS ++ drmBufPtr scratch; ++ ++ // copy ++ ExaOffscreenArea *copy_area; ++ Bool same_surface; ++ int rop; ++ uint32_t planemask; + #endif + + #ifdef USE_XAA +@@ -947,11 +1026,11 @@ extern Bool radeon_card_posted(ScrnInfoPtr pScrn); + #ifdef XF86DRI + extern void RADEONWaitForIdleCP(ScrnInfoPtr pScrn); + extern void RADEONWaitForVLineCP(ScrnInfoPtr pScrn, PixmapPtr pPix, +- int crtc, int start, int stop, int enable); ++ int crtc, int start, int stop); + #endif + extern void RADEONWaitForIdleMMIO(ScrnInfoPtr pScrn); + extern void RADEONWaitForVLineMMIO(ScrnInfoPtr pScrn, PixmapPtr pPix, +- int crtc, int start, int stop, int enable); ++ int crtc, int start, int stop); + + /* radeon_crtc.c */ + extern void radeon_crtc_dpms(xf86CrtcPtr crtc, int mode); +@@ -1035,6 +1114,7 @@ extern void RADEONDoPrepareCopyMMIO(ScrnInfoPtr pScrn, + uint32_t dst_pitch_offset, + uint32_t datatype, int rop, + Pixel planemask); ++extern Bool R600DrawInit(ScreenPtr pScreen); + #endif + + #if defined(XF86DRI) && defined(USE_EXA) +@@ -1119,15 +1199,16 @@ do { \ + #define RADEONCP_STOP(pScrn, info) \ + do { \ + int _ret; \ +- if (info->cp->CPStarted) { \ ++ if (info->cp->CPStarted) { \ + _ret = RADEONCPStop(pScrn, info); \ + if (_ret) { \ + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, \ + "%s: CP stop %d\n", __FUNCTION__, _ret); \ + } \ + info->cp->CPStarted = FALSE; \ +- } \ +- RADEONEngineRestore(pScrn); \ ++ } \ ++ if (info->ChipFamily < CHIP_FAMILY_R600) \ ++ RADEONEngineRestore(pScrn); \ + info->cp->CPRuns = FALSE; \ + } while (0) + +@@ -1235,28 +1316,31 @@ do { \ + if (RADEON_VERBOSE) \ + xf86DrvMsg(pScrn->scrnIndex, X_INFO, \ + "FLUSH_RING in %s\n", __FUNCTION__); \ +- if (info->cp->indirectBuffer) { \ ++ if (info->cp->indirectBuffer) \ + RADEONCPFlushIndirect(pScrn, 0); \ +- } \ + } while (0) + + + #define RADEON_WAIT_UNTIL_2D_IDLE() \ + do { \ +- BEGIN_RING(2); \ +- OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ +- OUT_RING((RADEON_WAIT_2D_IDLECLEAN | \ +- RADEON_WAIT_HOST_IDLECLEAN)); \ +- ADVANCE_RING(); \ ++ if (info->ChipFamily < CHIP_FAMILY_R600) { \ ++ BEGIN_RING(2); \ ++ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ ++ OUT_RING((RADEON_WAIT_2D_IDLECLEAN | \ ++ RADEON_WAIT_HOST_IDLECLEAN)); \ ++ ADVANCE_RING(); \ ++ } \ + } while (0) + + #define RADEON_WAIT_UNTIL_3D_IDLE() \ + do { \ +- BEGIN_RING(2); \ +- OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ +- OUT_RING((RADEON_WAIT_3D_IDLECLEAN | \ +- RADEON_WAIT_HOST_IDLECLEAN)); \ +- ADVANCE_RING(); \ ++ if (info->ChipFamily < CHIP_FAMILY_R600) { \ ++ BEGIN_RING(2); \ ++ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ ++ OUT_RING((RADEON_WAIT_3D_IDLECLEAN | \ ++ RADEON_WAIT_HOST_IDLECLEAN)); \ ++ ADVANCE_RING(); \ ++ } \ + } while (0) + + #define RADEON_WAIT_UNTIL_IDLE() \ +@@ -1265,38 +1349,44 @@ do { \ + xf86DrvMsg(pScrn->scrnIndex, X_INFO, \ + "WAIT_UNTIL_IDLE() in %s\n", __FUNCTION__); \ + } \ +- BEGIN_RING(2); \ +- OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ +- OUT_RING((RADEON_WAIT_2D_IDLECLEAN | \ +- RADEON_WAIT_3D_IDLECLEAN | \ +- RADEON_WAIT_HOST_IDLECLEAN)); \ +- ADVANCE_RING(); \ ++ if (info->ChipFamily < CHIP_FAMILY_R600) { \ ++ BEGIN_RING(2); \ ++ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ ++ OUT_RING((RADEON_WAIT_2D_IDLECLEAN | \ ++ RADEON_WAIT_3D_IDLECLEAN | \ ++ RADEON_WAIT_HOST_IDLECLEAN)); \ ++ ADVANCE_RING(); \ ++ } \ + } while (0) + + #define RADEON_PURGE_CACHE() \ + do { \ +- BEGIN_RING(2); \ +- if (info->ChipFamily <= CHIP_FAMILY_RV280) { \ +- OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \ +- OUT_RING(RADEON_RB3D_DC_FLUSH_ALL); \ +- } else { \ +- OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \ +- OUT_RING(R300_RB3D_DC_FLUSH_ALL); \ +- } \ +- ADVANCE_RING(); \ ++ if (info->ChipFamily < CHIP_FAMILY_R600) { \ ++ BEGIN_RING(2); \ ++ if (info->ChipFamily <= CHIP_FAMILY_RV280) { \ ++ OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \ ++ OUT_RING(RADEON_RB3D_DC_FLUSH_ALL); \ ++ } else { \ ++ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \ ++ OUT_RING(R300_RB3D_DC_FLUSH_ALL); \ ++ } \ ++ ADVANCE_RING(); \ ++ } \ + } while (0) + + #define RADEON_PURGE_ZCACHE() \ + do { \ +- BEGIN_RING(2); \ +- if (info->ChipFamily <= CHIP_FAMILY_RV280) { \ +- OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \ +- OUT_RING(RADEON_RB3D_ZC_FLUSH_ALL); \ +- } else { \ +- OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0)); \ +- OUT_RING(R300_ZC_FLUSH_ALL); \ ++ if (info->ChipFamily < CHIP_FAMILY_R600) { \ ++ BEGIN_RING(2); \ ++ if (info->ChipFamily <= CHIP_FAMILY_RV280) { \ ++ OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \ ++ OUT_RING(RADEON_RB3D_ZC_FLUSH_ALL); \ ++ } else { \ ++ OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0)); \ ++ OUT_RING(R300_ZC_FLUSH_ALL); \ ++ } \ ++ ADVANCE_RING(); \ + } \ +- ADVANCE_RING(); \ + } while (0) + + #endif /* XF86DRI */ +diff --git a/src/radeon_accel.c b/src/radeon_accel.c +index 96570e8..dffbc57 100644 +--- a/src/radeon_accel.c ++++ b/src/radeon_accel.c +@@ -78,6 +78,7 @@ + /* Driver data structures */ + #include "radeon.h" + #include "radeon_reg.h" ++#include "r600_reg.h" + #include "radeon_macros.h" + #include "radeon_probe.h" + #include "radeon_version.h" +@@ -92,6 +93,7 @@ + /* X and server generic header files */ + #include "xf86.h" + ++static void R600EngineReset(ScrnInfoPtr pScrn); + + #ifdef USE_XAA + static struct { +@@ -149,6 +151,37 @@ void RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries) + } + } + ++void R600WaitForFifoFunction(ScrnInfoPtr pScrn, int entries) ++{ ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ unsigned char *RADEONMMIO = info->MMIO; ++ int i; ++ ++ for (;;) { ++ for (i = 0; i < RADEON_TIMEOUT; i++) { ++ if (info->ChipFamily >= CHIP_FAMILY_RV770) ++ info->accel_state->fifo_slots = ++ INREG(R600_GRBM_STATUS) & R700_CMDFIFO_AVAIL_MASK; ++ else ++ info->accel_state->fifo_slots = ++ INREG(R600_GRBM_STATUS) & R600_CMDFIFO_AVAIL_MASK; ++ if (info->accel_state->fifo_slots >= entries) return; ++ } ++ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, ++ "FIFO timed out: stat=0x%08x\n", ++ (unsigned int)INREG(R600_GRBM_STATUS)); ++ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, ++ "FIFO timed out, resetting engine...\n"); ++ R600EngineReset(pScrn); ++#ifdef XF86DRI ++ if (info->directRenderingEnabled) { ++ RADEONCP_RESET(pScrn, info); ++ RADEONCP_START(pScrn, info); ++ } ++#endif ++ } ++} ++ + /* Flush all dirty data in the Pixel Cache to memory */ + void RADEONEngineFlush(ScrnInfoPtr pScrn) + { +@@ -156,9 +189,6 @@ void RADEONEngineFlush(ScrnInfoPtr pScrn) + unsigned char *RADEONMMIO = info->MMIO; + int i; + +- if (info->ChipFamily >= CHIP_FAMILY_R600) +- return; +- + if (info->ChipFamily <= CHIP_FAMILY_RV280) { + OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT, + RADEON_RB3D_DC_FLUSH_ALL, +@@ -198,8 +228,6 @@ void RADEONEngineReset(ScrnInfoPtr pScrn) + uint32_t rbbm_soft_reset; + uint32_t host_path_cntl; + +- if (info->ChipFamily >= CHIP_FAMILY_R600) +- return; + /* The following RBBM_SOFT_RESET sequence can help un-wedge + * an R300 after the command processor got stuck. + */ +@@ -310,6 +338,35 @@ void RADEONEngineReset(ScrnInfoPtr pScrn) + OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl); + } + ++/* Reset graphics card to known state */ ++static void R600EngineReset(ScrnInfoPtr pScrn) ++{ ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ unsigned char *RADEONMMIO = info->MMIO; ++ uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl; ++ ++ cp_ptr = INREG(R600_CP_RB_WPTR); ++ ++ cp_me_cntl = INREG(R600_CP_ME_CNTL); ++ OUTREG(R600_CP_ME_CNTL, 0x10000000); ++ ++ OUTREG(R600_GRBM_SOFT_RESET, 0x7fff); ++ INREG(R600_GRBM_SOFT_RESET); ++ usleep (50); ++ OUTREG(R600_GRBM_SOFT_RESET, 0); ++ INREG(R600_GRBM_SOFT_RESET); ++ ++ OUTREG(R600_CP_RB_WPTR_DELAY, 0); ++ cp_rb_cntl = INREG(R600_CP_RB_CNTL); ++ OUTREG(R600_CP_RB_CNTL, 0x80000000); ++ ++ OUTREG(R600_CP_RB_RPTR_WR, cp_ptr); ++ OUTREG(R600_CP_RB_WPTR, cp_ptr); ++ OUTREG(R600_CP_RB_CNTL, cp_rb_cntl); ++ OUTREG(R600_CP_ME_CNTL, cp_me_cntl); ++ ++} ++ + /* Restore the acceleration hardware to its previous state */ + void RADEONEngineRestore(ScrnInfoPtr pScrn) + { +@@ -611,8 +668,12 @@ drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn) + + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, + "GetBuffer timed out, resetting engine...\n"); +- RADEONEngineReset(pScrn); +- RADEONEngineRestore(pScrn); ++ ++ if (info->ChipFamily < CHIP_FAMILY_R600) { ++ RADEONEngineReset(pScrn); ++ RADEONEngineRestore(pScrn); ++ } else ++ R600EngineReset(pScrn); + + /* Always restart the engine when doing CP 2D acceleration */ + RADEONCP_RESET(pScrn, info); +@@ -627,6 +688,8 @@ void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard) + drmBufPtr buffer = info->cp->indirectBuffer; + int start = info->cp->indirectStart; + drm_radeon_indirect_t indirect; ++ RING_LOCALS; ++ RADEONCP_REFRESH(pScrn, info); + + if (!buffer) return; + if (start == buffer->used && !discard) return; +@@ -636,6 +699,14 @@ void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard) + buffer->idx); + } + ++ if (info->ChipFamily >= CHIP_FAMILY_R600) { ++ while (buffer->used & 0x3c){ ++ BEGIN_RING(1); ++ OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */ ++ ADVANCE_RING(); ++ } ++ } ++ + indirect.idx = buffer->idx; + indirect.start = start; + indirect.end = buffer->used; +@@ -664,6 +735,19 @@ void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn) + drmBufPtr buffer = info->cp->indirectBuffer; + int start = info->cp->indirectStart; + drm_radeon_indirect_t indirect; ++ RING_LOCALS; ++ RADEONCP_REFRESH(pScrn, info); ++ ++ ++ if (info->ChipFamily >= CHIP_FAMILY_R600) { ++ if (buffer) { ++ while (buffer->used & 0x3c) { ++ BEGIN_RING(1); ++ OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */ ++ ADVANCE_RING(); ++ } ++ } ++ } + + info->cp->indirectBuffer = NULL; + info->cp->indirectStart = 0; +@@ -926,20 +1010,26 @@ Bool RADEONAccelInit(ScreenPtr pScreen) + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + +- if (info->ChipFamily >= CHIP_FAMILY_R600) +- return FALSE; +- + #ifdef USE_EXA + if (info->useEXA) { + # ifdef XF86DRI + if (info->directRenderingEnabled) { +- if (!RADEONDrawInitCP(pScreen)) +- return FALSE; ++ if (info->ChipFamily >= CHIP_FAMILY_R600) { ++ if (!R600DrawInit(pScreen)) ++ return FALSE; ++ } else { ++ if (!RADEONDrawInitCP(pScreen)) ++ return FALSE; ++ } + } else + # endif /* XF86DRI */ + { +- if (!RADEONDrawInitMMIO(pScreen)) ++ if (info->ChipFamily >= CHIP_FAMILY_R600) + return FALSE; ++ else { ++ if (!RADEONDrawInitMMIO(pScreen)) ++ return FALSE; ++ } + } + } + #endif /* USE_EXA */ +@@ -947,6 +1037,9 @@ Bool RADEONAccelInit(ScreenPtr pScreen) + if (!info->useEXA) { + XAAInfoRecPtr a; + ++ if (info->ChipFamily >= CHIP_FAMILY_R600) ++ return FALSE; ++ + if (!(a = info->accel_state->accel = XAACreateInfoRec())) { + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n"); + return FALSE; +diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c +index 0a9f9db..eabd87d 100644 +--- a/src/radeon_commonfuncs.c ++++ b/src/radeon_commonfuncs.c +@@ -628,16 +628,13 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) + + /* inserts a wait for vline in the command stream */ + void FUNC_NAME(RADEONWaitForVLine)(ScrnInfoPtr pScrn, PixmapPtr pPix, +- int crtc, int start, int stop, Bool enable) ++ int crtc, int start, int stop) + { + RADEONInfoPtr info = RADEONPTR(pScrn); + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); + uint32_t offset; + ACCEL_PREAMBLE(); + +- if (!enable) +- return; +- + if ((crtc < 0) || (crtc > 1)) + return; + +@@ -659,7 +656,7 @@ void FUNC_NAME(RADEONWaitForVLine)(ScrnInfoPtr pScrn, PixmapPtr pPix, + return; + + start = max(start, 0); +- stop = max(stop, xf86_config->crtc[crtc]->mode.VDisplay); ++ stop = min(stop, xf86_config->crtc[crtc]->mode.VDisplay); + + if (start > xf86_config->crtc[crtc]->mode.VDisplay) + return; +@@ -733,8 +730,11 @@ void FUNC_NAME(RADEONWaitForIdle)(ScrnInfoPtr pScrn) + + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, + "Idle timed out, resetting engine...\n"); +- RADEONEngineReset(pScrn); +- RADEONEngineRestore(pScrn); ++ if (info->ChipFamily < CHIP_FAMILY_R600) { ++ RADEONEngineReset(pScrn); ++ RADEONEngineRestore(pScrn); ++ } else ++ R600EngineReset(pScrn); + + /* Always restart the engine when doing CP 2D acceleration */ + RADEONCP_RESET(pScrn, info); +@@ -743,39 +743,56 @@ void FUNC_NAME(RADEONWaitForIdle)(ScrnInfoPtr pScrn) + } + #endif + +-#if 0 +- xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, +- "WaitForIdle (entering): %d entries, stat=0x%08x\n", +- INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK, +- INREG(RADEON_RBBM_STATUS)); +-#endif +- +- if (info->ChipFamily >= CHIP_FAMILY_R600) +- return; +- +- /* Wait for the engine to go idle */ +- RADEONWaitForFifoFunction(pScrn, 64); ++ if (info->ChipFamily >= CHIP_FAMILY_R600) { ++ /* Wait for the engine to go idle */ ++ if (info->ChipFamily >= CHIP_FAMILY_RV770) ++ R600WaitForFifoFunction(pScrn, 8); ++ else ++ R600WaitForFifoFunction(pScrn, 16); + +- for (;;) { +- for (i = 0; i < RADEON_TIMEOUT; i++) { +- if (!(INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)) { +- RADEONEngineFlush(pScrn); +- return; ++ for (;;) { ++ for (i = 0; i < RADEON_TIMEOUT; i++) { ++ if (!(INREG(R600_GRBM_STATUS) & R600_GUI_ACTIVE)) ++ return; + } +- } +- xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, +- "Idle timed out: %u entries, stat=0x%08x\n", +- (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK, +- (unsigned int)INREG(RADEON_RBBM_STATUS)); +- xf86DrvMsg(pScrn->scrnIndex, X_ERROR, +- "Idle timed out, resetting engine...\n"); +- RADEONEngineReset(pScrn); +- RADEONEngineRestore(pScrn); ++ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, ++ "Idle timed out: stat=0x%08x\n", ++ (unsigned int)INREG(R600_GRBM_STATUS)); ++ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, ++ "Idle timed out, resetting engine...\n"); ++ R600EngineReset(pScrn); + #ifdef XF86DRI +- if (info->directRenderingEnabled) { +- RADEONCP_RESET(pScrn, info); +- RADEONCP_START(pScrn, info); ++ if (info->directRenderingEnabled) { ++ RADEONCP_RESET(pScrn, info); ++ RADEONCP_START(pScrn, info); ++ } ++#endif + } ++ } else { ++ /* Wait for the engine to go idle */ ++ RADEONWaitForFifoFunction(pScrn, 64); ++ ++ for (;;) { ++ for (i = 0; i < RADEON_TIMEOUT; i++) { ++ if (!(INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)) { ++ RADEONEngineFlush(pScrn); ++ return; ++ } ++ } ++ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, ++ "Idle timed out: %u entries, stat=0x%08x\n", ++ (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK, ++ (unsigned int)INREG(RADEON_RBBM_STATUS)); ++ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, ++ "Idle timed out, resetting engine...\n"); ++ RADEONEngineReset(pScrn); ++ RADEONEngineRestore(pScrn); ++#ifdef XF86DRI ++ if (info->directRenderingEnabled) { ++ RADEONCP_RESET(pScrn, info); ++ RADEONCP_START(pScrn, info); ++ } + #endif ++ } + } + } +diff --git a/src/radeon_crtc.c b/src/radeon_crtc.c +index 5a7c730..60140d6 100644 +--- a/src/radeon_crtc.c ++++ b/src/radeon_crtc.c +@@ -587,8 +587,7 @@ Bool RADEONAllocateControllers(ScrnInfoPtr pScrn, int mask) + RADEONEntPtr pRADEONEnt = RADEONEntPriv(pScrn); + RADEONInfoPtr info = RADEONPTR(pScrn); + +- if ((info->ChipFamily < CHIP_FAMILY_R600) && +- (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE))) { ++ if (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE)) { + radeon_crtc_funcs.shadow_create = radeon_crtc_shadow_create; + radeon_crtc_funcs.shadow_allocate = radeon_crtc_shadow_allocate; + radeon_crtc_funcs.shadow_destroy = radeon_crtc_shadow_destroy; +diff --git a/src/radeon_dri.c b/src/radeon_dri.c +index 59d9a83..45c927f 100644 +--- a/src/radeon_dri.c ++++ b/src/radeon_dri.c +@@ -45,6 +45,7 @@ + #include "radeon.h" + #include "radeon_video.h" + #include "radeon_reg.h" ++#include "r600_reg.h" + #include "radeon_macros.h" + #include "radeon_drm.h" + #include "radeon_dri.h" +@@ -790,92 +791,96 @@ static Bool RADEONSetAgpMode(RADEONInfoPtr info, ScreenPtr pScreen) + unsigned long mode = drmAgpGetMode(info->dri->drmFD); /* Default mode */ + unsigned int vendor = drmAgpVendorId(info->dri->drmFD); + unsigned int device = drmAgpDeviceId(info->dri->drmFD); +- /* ignore agp 3.0 mode bit from the chip as it's buggy on some cards with +- pcie-agp rialto bridge chip - use the one from bridge which must match */ +- uint32_t agp_status = (INREG(RADEON_AGP_STATUS) | RADEON_AGPv3_MODE) & mode; +- Bool is_v3 = (agp_status & RADEON_AGPv3_MODE); +- unsigned int defaultMode; +- MessageType from; + +- if (is_v3) { +- defaultMode = (agp_status & RADEON_AGPv3_8X_MODE) ? 8 : 4; +- } else { +- if (agp_status & RADEON_AGP_4X_MODE) defaultMode = 4; +- else if (agp_status & RADEON_AGP_2X_MODE) defaultMode = 2; +- else defaultMode = 1; +- } +- +- /* Apply AGPMode Quirks */ +- radeon_agpmode_quirk_ptr p = radeon_agpmode_quirk_list; +- while (p && p->chipDevice != 0) { +- if (vendor == p->hostbridgeVendor && +- device == p->hostbridgeDevice && +- PCI_DEV_VENDOR_ID(info->PciInfo) == p->chipVendor && +- PCI_DEV_DEVICE_ID(info->PciInfo) == p->chipDevice && +- PCI_SUB_VENDOR_ID(info->PciInfo) == p->subsysVendor && +- PCI_SUB_DEVICE_ID(info->PciInfo) == p->subsysDevice) +- { +- defaultMode = p->defaultMode; +- } +- ++p; +- } ++ if (info->ChipFamily < CHIP_FAMILY_R600) { ++ /* ignore agp 3.0 mode bit from the chip as it's buggy on some cards with ++ pcie-agp rialto bridge chip - use the one from bridge which must match */ ++ uint32_t agp_status = (INREG(RADEON_AGP_STATUS) | RADEON_AGPv3_MODE) & mode; ++ Bool is_v3 = (agp_status & RADEON_AGPv3_MODE); ++ unsigned int defaultMode; ++ MessageType from; + +- from = X_DEFAULT; ++ if (is_v3) { ++ defaultMode = (agp_status & RADEON_AGPv3_8X_MODE) ? 8 : 4; ++ } else { ++ if (agp_status & RADEON_AGP_4X_MODE) defaultMode = 4; ++ else if (agp_status & RADEON_AGP_2X_MODE) defaultMode = 2; ++ else defaultMode = 1; ++ } + +- if (xf86GetOptValInteger(info->Options, OPTION_AGP_MODE, &info->dri->agpMode)) { +- if ((info->dri->agpMode < (is_v3 ? 4 : 1)) || +- (info->dri->agpMode > (is_v3 ? 8 : 4)) || +- (info->dri->agpMode & (info->dri->agpMode - 1))) { +- xf86DrvMsg(pScreen->myNum, X_ERROR, +- "Illegal AGP Mode: %d (valid values: %s), leaving at " +- "%dx\n", info->dri->agpMode, is_v3 ? "4, 8" : "1, 2, 4", +- defaultMode); +- info->dri->agpMode = defaultMode; ++ /* Apply AGPMode Quirks */ ++ radeon_agpmode_quirk_ptr p = radeon_agpmode_quirk_list; ++ while (p && p->chipDevice != 0) { ++ if (vendor == p->hostbridgeVendor && ++ device == p->hostbridgeDevice && ++ PCI_DEV_VENDOR_ID(info->PciInfo) == p->chipVendor && ++ PCI_DEV_DEVICE_ID(info->PciInfo) == p->chipDevice && ++ PCI_SUB_VENDOR_ID(info->PciInfo) == p->subsysVendor && ++ PCI_SUB_DEVICE_ID(info->PciInfo) == p->subsysDevice) ++ { ++ defaultMode = p->defaultMode; ++ } ++ ++p; ++ } ++ ++ from = X_DEFAULT; ++ ++ if (xf86GetOptValInteger(info->Options, OPTION_AGP_MODE, &info->dri->agpMode)) { ++ if ((info->dri->agpMode < (is_v3 ? 4 : 1)) || ++ (info->dri->agpMode > (is_v3 ? 8 : 4)) || ++ (info->dri->agpMode & (info->dri->agpMode - 1))) { ++ xf86DrvMsg(pScreen->myNum, X_ERROR, ++ "Illegal AGP Mode: %d (valid values: %s), leaving at " ++ "%dx\n", info->dri->agpMode, is_v3 ? "4, 8" : "1, 2, 4", ++ defaultMode); ++ info->dri->agpMode = defaultMode; ++ } else ++ from = X_CONFIG; + } else +- from = X_CONFIG; +- } else +- info->dri->agpMode = defaultMode; ++ info->dri->agpMode = defaultMode; + +- xf86DrvMsg(pScreen->myNum, from, "Using AGP %dx\n", info->dri->agpMode); ++ xf86DrvMsg(pScreen->myNum, from, "Using AGP %dx\n", info->dri->agpMode); + +- mode &= ~RADEON_AGP_MODE_MASK; +- if (is_v3) { +- /* only set one mode bit for AGPv3 */ +- switch (info->dri->agpMode) { +- case 8: mode |= RADEON_AGPv3_8X_MODE; break; +- case 4: default: mode |= RADEON_AGPv3_4X_MODE; +- } +- /*TODO: need to take care of other bits valid for v3 mode +- * currently these bits are not used in all tested cards. +- */ +- } else { +- switch (info->dri->agpMode) { +- case 4: mode |= RADEON_AGP_4X_MODE; +- case 2: mode |= RADEON_AGP_2X_MODE; +- case 1: default: mode |= RADEON_AGP_1X_MODE; ++ mode &= ~RADEON_AGP_MODE_MASK; ++ if (is_v3) { ++ /* only set one mode bit for AGPv3 */ ++ switch (info->dri->agpMode) { ++ case 8: mode |= RADEON_AGPv3_8X_MODE; break; ++ case 4: default: mode |= RADEON_AGPv3_4X_MODE; ++ } ++ /*TODO: need to take care of other bits valid for v3 mode ++ * currently these bits are not used in all tested cards. ++ */ ++ } else { ++ switch (info->dri->agpMode) { ++ case 4: mode |= RADEON_AGP_4X_MODE; ++ case 2: mode |= RADEON_AGP_2X_MODE; ++ case 1: default: mode |= RADEON_AGP_1X_MODE; ++ } + } +- } + +- /* AGP Fast Writes. +- * TODO: take into account that certain agp modes don't support fast +- * writes at all */ +- mode &= ~RADEON_AGP_FW_MODE; /* Disable per default */ +- if (xf86ReturnOptValBool(info->Options, OPTION_AGP_FW, FALSE)) { +- xf86DrvMsg(pScreen->myNum, X_WARNING, +- "WARNING: Using the AGPFastWrite option is not recommended.\n"); +- xf86Msg(X_NONE, "\tThis option does not provide much of a noticable speed" +- " boost, while it\n\twill probably hard lock your machine." +- " All bets are off!\n"); +- +- /* Black list some host/AGP bridges. */ +- if ((vendor == PCI_VENDOR_AMD) && (device == PCI_CHIP_AMD761)) +- xf86DrvMsg(pScreen->myNum, X_PROBED, "Ignoring AGPFastWrite option " +- "for the AMD 761 northbridge.\n"); +- else { +- xf86DrvMsg(pScreen->myNum, X_CONFIG, "Enabling AGP Fast Writes.\n"); +- mode |= RADEON_AGP_FW_MODE; +- } +- } /* Don't mention this otherwise, so that people don't get funny ideas */ ++ /* AGP Fast Writes. ++ * TODO: take into account that certain agp modes don't support fast ++ * writes at all */ ++ mode &= ~RADEON_AGP_FW_MODE; /* Disable per default */ ++ if (xf86ReturnOptValBool(info->Options, OPTION_AGP_FW, FALSE)) { ++ xf86DrvMsg(pScreen->myNum, X_WARNING, ++ "WARNING: Using the AGPFastWrite option is not recommended.\n"); ++ xf86Msg(X_NONE, "\tThis option does not provide much of a noticable speed" ++ " boost, while it\n\twill probably hard lock your machine." ++ " All bets are off!\n"); ++ ++ /* Black list some host/AGP bridges. */ ++ if ((vendor == PCI_VENDOR_AMD) && (device == PCI_CHIP_AMD761)) ++ xf86DrvMsg(pScreen->myNum, X_PROBED, "Ignoring AGPFastWrite option " ++ "for the AMD 761 northbridge.\n"); ++ else { ++ xf86DrvMsg(pScreen->myNum, X_CONFIG, "Enabling AGP Fast Writes.\n"); ++ mode |= RADEON_AGP_FW_MODE; ++ } ++ } /* Don't mention this otherwise, so that people don't get funny ideas */ ++ } else ++ info->dri->agpMode = 8; /* doesn't matter at this point */ + + xf86DrvMsg(pScreen->myNum, X_INFO, + "[agp] Mode 0x%08lx [AGP 0x%04x/0x%04x; Card 0x%04x/0x%04x 0x%04x/0x%04x]\n", +@@ -910,6 +915,9 @@ static void RADEONSetAgpBase(RADEONInfoPtr info, ScreenPtr pScreen) + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + unsigned char *RADEONMMIO = info->MMIO; + ++ if (info->ChipFamily >= CHIP_FAMILY_R600) ++ return; ++ + /* drm already does this, so we can probably remove this. + * agp_base_2 ? + */ +@@ -1183,13 +1191,14 @@ static int RADEONDRIKernelInit(RADEONInfoPtr info, ScreenPtr pScreen) + drm_radeon_init_t drmInfo; + + memset(&drmInfo, 0, sizeof(drm_radeon_init_t)); +- if ( info->ChipFamily >= CHIP_FAMILY_R300 ) +- drmInfo.func = RADEON_INIT_R300_CP; ++ if ( info->ChipFamily >= CHIP_FAMILY_R600 ) ++ drmInfo.func = RADEON_INIT_R600_CP; ++ else if ( info->ChipFamily >= CHIP_FAMILY_R300 ) ++ drmInfo.func = RADEON_INIT_R300_CP; ++ else if ( info->ChipFamily >= CHIP_FAMILY_R200 ) ++ drmInfo.func = RADEON_INIT_R200_CP; + else +- if ( info->ChipFamily >= CHIP_FAMILY_R200 ) +- drmInfo.func = RADEON_INIT_R200_CP; +- else +- drmInfo.func = RADEON_INIT_CP; ++ drmInfo.func = RADEON_INIT_CP; + + drmInfo.sarea_priv_offset = sizeof(XF86DRISAREARec); + drmInfo.is_pci = (info->cardType!=CARD_AGP); +@@ -1223,7 +1232,8 @@ static int RADEONDRIKernelInit(RADEONInfoPtr info, ScreenPtr pScreen) + * registers back to their default values, so we need to restore + * those engine register here. + */ +- RADEONEngineRestore(pScrn); ++ if (info->ChipFamily < CHIP_FAMILY_R600) ++ RADEONEngineRestore(pScrn); + + return TRUE; + } +@@ -1299,14 +1309,16 @@ static void RADEONDRIIrqInit(RADEONInfoPtr info, ScreenPtr pScreen) + "[drm] falling back to irq-free operation\n"); + info->dri->irq = 0; + } else { +- unsigned char *RADEONMMIO = info->MMIO; +- info->ModeReg->gen_int_cntl = INREG( RADEON_GEN_INT_CNTL ); +- +- /* Let the DRM know it can safely disable the vblank interrupts */ +- radeon_crtc_modeset_ioctl(XF86_CRTC_CONFIG_PTR(pScrn)->crtc[0], +- FALSE); +- radeon_crtc_modeset_ioctl(XF86_CRTC_CONFIG_PTR(pScrn)->crtc[0], +- TRUE); ++ if (info->ChipFamily < CHIP_FAMILY_R600) { ++ unsigned char *RADEONMMIO = info->MMIO; ++ info->ModeReg->gen_int_cntl = INREG( RADEON_GEN_INT_CNTL ); ++ ++ /* Let the DRM know it can safely disable the vblank interrupts */ ++ radeon_crtc_modeset_ioctl(XF86_CRTC_CONFIG_PTR(pScrn)->crtc[0], ++ FALSE); ++ radeon_crtc_modeset_ioctl(XF86_CRTC_CONFIG_PTR(pScrn)->crtc[0], ++ TRUE); ++ } + } + } + +@@ -1840,7 +1852,8 @@ void RADEONDRIResume(ScreenPtr pScreen) + /* FIXME: return? */ + } + +- RADEONEngineRestore(pScrn); ++ if (info->ChipFamily < CHIP_FAMILY_R600) ++ RADEONEngineRestore(pScrn); + + RADEONDRICPInit(pScrn); + } +diff --git a/src/radeon_driver.c b/src/radeon_driver.c +index 32cb307..7cac321 100644 +--- a/src/radeon_driver.c ++++ b/src/radeon_driver.c +@@ -578,7 +578,7 @@ unsigned RADEONINMC(ScrnInfoPtr pScrn, int addr) + OUTREG(RS690_MC_INDEX, (addr & RS690_MC_INDEX_MASK)); + data = INREG(RS690_MC_DATA); + } else if (info->ChipFamily == CHIP_FAMILY_RS600) { +- OUTREG(RS600_MC_INDEX, (addr & RS600_MC_INDEX_MASK)); ++ OUTREG(RS600_MC_INDEX, ((addr & RS600_MC_ADDR_MASK) | RS600_MC_IND_CITF_ARB0)); + data = INREG(RS600_MC_DATA); + } else if (IS_AVIVO_VARIANT) { + OUTREG(AVIVO_MC_INDEX, (addr & 0xff) | 0x7f0000); +@@ -591,7 +591,7 @@ unsigned RADEONINMC(ScrnInfoPtr pScrn, int addr) + OUTREG(R300_MC_IND_INDEX, addr & 0x3f); + (void)INREG(R300_MC_IND_INDEX); + data = INREG(R300_MC_IND_DATA); +- ++ + OUTREG(R300_MC_IND_INDEX, 0); + (void)INREG(R300_MC_IND_INDEX); + } +@@ -612,10 +612,10 @@ void RADEONOUTMC(ScrnInfoPtr pScrn, int addr, uint32_t data) + OUTREG(RS690_MC_DATA, data); + OUTREG(RS690_MC_INDEX, RS690_MC_INDEX_WR_ACK); + } else if (info->ChipFamily == CHIP_FAMILY_RS600) { +- OUTREG(RS600_MC_INDEX, ((addr & RS600_MC_INDEX_MASK) | +- RS600_MC_INDEX_WR_EN)); ++ OUTREG(RS600_MC_INDEX, ((addr & RS600_MC_ADDR_MASK) | ++ RS600_MC_IND_CITF_ARB0 | ++ RS600_MC_IND_WR_EN)); + OUTREG(RS600_MC_DATA, data); +- OUTREG(RS600_MC_INDEX, RS600_MC_INDEX_WR_ACK); + } else if (IS_AVIVO_VARIANT) { + OUTREG(AVIVO_MC_INDEX, (addr & 0xff) | 0xff0000); + (void)INREG(AVIVO_MC_INDEX); +@@ -635,17 +635,20 @@ void RADEONOUTMC(ScrnInfoPtr pScrn, int addr, uint32_t data) + static Bool avivo_get_mc_idle(ScrnInfoPtr pScrn) + { + RADEONInfoPtr info = RADEONPTR(pScrn); ++ unsigned char *RADEONMMIO = info->MMIO; + + if (info->ChipFamily >= CHIP_FAMILY_R600) { +- /* no idea where this is on r600 yet */ +- return TRUE; ++ if (INREG(R600_SRBM_STATUS) & 0x3f00) ++ return FALSE; ++ else ++ return TRUE; + } else if (info->ChipFamily == CHIP_FAMILY_RV515) { + if (INMC(pScrn, RV515_MC_STATUS) & RV515_MC_STATUS_IDLE) + return TRUE; + else + return FALSE; + } else if (info->ChipFamily == CHIP_FAMILY_RS600) { +- if (INMC(pScrn, RS600_MC_STATUS) & RS600_MC_STATUS_IDLE) ++ if (INMC(pScrn, RS600_MC_STATUS) & RS600_MC_IDLE) + return TRUE; + else + return FALSE; +@@ -674,8 +677,8 @@ static void radeon_write_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, uint32_ + if (mask & LOC_FB) + OUTREG(R700_MC_VM_FB_LOCATION, fb_loc); + if (mask & LOC_AGP) { +- OUTREG(R600_MC_VM_AGP_BOT, agp_loc); +- OUTREG(R600_MC_VM_AGP_TOP, agp_loc_hi); ++ OUTREG(R700_MC_VM_AGP_BOT, agp_loc); ++ OUTREG(R700_MC_VM_AGP_TOP, agp_loc_hi); + } + } else if (info->ChipFamily >= CHIP_FAMILY_R600) { + if (mask & LOC_FB) +@@ -693,8 +696,8 @@ static void radeon_write_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, uint32_ + } else if (info->ChipFamily == CHIP_FAMILY_RS600) { + if (mask & LOC_FB) + OUTMC(pScrn, RS600_MC_FB_LOCATION, fb_loc); +- /* if (mask & LOC_AGP) +- OUTMC(pScrn, RS600_MC_AGP_LOCATION, agp_loc);*/ ++ if (mask & LOC_AGP) ++ OUTMC(pScrn, RS600_MC_AGP_LOCATION, agp_loc); + } else if ((info->ChipFamily == CHIP_FAMILY_RS690) || + (info->ChipFamily == CHIP_FAMILY_RS740)) { + if (mask & LOC_FB) +@@ -724,8 +727,8 @@ static void radeon_read_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, uint32_t + if (mask & LOC_FB) + *fb_loc = INREG(R700_MC_VM_FB_LOCATION); + if (mask & LOC_AGP) { +- *agp_loc = INREG(R600_MC_VM_AGP_BOT); +- *agp_loc_hi = INREG(R600_MC_VM_AGP_TOP); ++ *agp_loc = INREG(R700_MC_VM_AGP_BOT); ++ *agp_loc_hi = INREG(R700_MC_VM_AGP_TOP); + } + } else if (info->ChipFamily >= CHIP_FAMILY_R600) { + if (mask & LOC_FB) +@@ -745,7 +748,7 @@ static void radeon_read_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, uint32_t + if (mask & LOC_FB) + *fb_loc = INMC(pScrn, RS600_MC_FB_LOCATION); + if (mask & LOC_AGP) { +- *agp_loc = 0;//INMC(pScrn, RS600_MC_AGP_LOCATION); ++ *agp_loc = INMC(pScrn, RS600_MC_AGP_LOCATION); + *agp_loc_hi = 0; + } + } else if ((info->ChipFamily == CHIP_FAMILY_RS690) || +@@ -1258,8 +1261,8 @@ static void RADEONInitMemoryMap(ScrnInfoPtr pScrn) + { + RADEONInfoPtr info = RADEONPTR(pScrn); + unsigned char *RADEONMMIO = info->MMIO; +- uint32_t mem_size; +- uint32_t aper_size; ++ uint64_t mem_size; ++ uint64_t aper_size; + + radeon_read_mc_fb_agp_location(pScrn, LOC_FB | LOC_AGP, &info->mc_fb_location, + &info->mc_agp_location, &info->mc_agp_location_hi); +@@ -1306,7 +1309,7 @@ static void RADEONInitMemoryMap(ScrnInfoPtr pScrn) + else + #endif + { +- uint32_t aper0_base; ++ uint64_t aper0_base; + + if (info->ChipFamily >= CHIP_FAMILY_R600) { + aper0_base = INREG(R600_CONFIG_F0_BASE); +@@ -1330,33 +1333,29 @@ static void RADEONInitMemoryMap(ScrnInfoPtr pScrn) + aper0_base &= ~(mem_size - 1); + + if (info->ChipFamily >= CHIP_FAMILY_R600) { +- info->mc_fb_location = (aper0_base >> 24) | +- (((aper0_base + mem_size - 1) & 0xff000000U) >> 8); ++ uint64_t mc_fb = ((aper0_base >> 24) & 0xffff) | ++ (((aper0_base + mem_size - 1) >> 8) & 0xffff0000); ++ info->mc_fb_location = mc_fb & 0xffffffff; + ErrorF("mc fb loc is %08x\n", (unsigned int)info->mc_fb_location); + } else { +- info->mc_fb_location = (aper0_base >> 16) | ++ uint64_t mc_fb = ((aper0_base >> 16) & 0xffff) | + ((aper0_base + mem_size - 1) & 0xffff0000U); ++ info->mc_fb_location = mc_fb & 0xffffffff; + } + } + } + if (info->ChipFamily >= CHIP_FAMILY_R600) { + info->fbLocation = (info->mc_fb_location & 0xffff) << 24; + } else { +- info->fbLocation = (info->mc_fb_location & 0xffff) << 16; ++ info->fbLocation = (info->mc_fb_location & 0xffff) << 16; + } + /* Just disable the damn AGP apertures for now, it may be + * re-enabled later by the DRM + */ +- +- if (IS_AVIVO_VARIANT) { +- if (info->ChipFamily >= CHIP_FAMILY_R600) { +- OUTREG(R600_HDP_NONSURFACE_BASE, (info->mc_fb_location << 16) & 0xff0000); +- } else { +- OUTREG(AVIVO_HDP_FB_LOCATION, info->mc_fb_location); +- } +- info->mc_agp_location = 0x003f0000; +- } else +- info->mc_agp_location = 0xffffffc0; ++ if (IS_AVIVO_VARIANT) ++ info->mc_agp_location = 0x003f0000; ++ else ++ info->mc_agp_location = 0xffffffc0; + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "RADEONInitMemoryMap() : \n"); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, +@@ -1503,6 +1502,9 @@ static uint32_t RADEONGetAccessibleVRAM(ScrnInfoPtr pScrn) + info->dri->newMemoryMap = TRUE; + #endif /* XF86DRI */ + ++ if (info->ChipFamily >= CHIP_FAMILY_R600) ++ return aper_size; ++ + /* Set HDP_APER_CNTL only on cards that are known not to be broken, + * that is has the 2nd generation multifunction PCI interface + */ +@@ -1511,7 +1513,7 @@ static uint32_t RADEONGetAccessibleVRAM(ScrnInfoPtr pScrn) + info->ChipFamily == CHIP_FAMILY_RV380 || + info->ChipFamily == CHIP_FAMILY_R420 || + info->ChipFamily == CHIP_FAMILY_RV410 || +- IS_AVIVO_VARIANT) { ++ IS_AVIVO_VARIANT) { + OUTREGP (RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL, + ~RADEON_HDP_APER_CNTL); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, +@@ -1588,9 +1590,10 @@ static Bool RADEONPreInitVRAM(ScrnInfoPtr pScrn) + if (pScrn->videoRam > accessible) + pScrn->videoRam = accessible; + +- if (!IS_AVIVO_VARIANT) ++ if (!IS_AVIVO_VARIANT) { + info->MemCntl = INREG(RADEON_SDRAM_MODE_REG); +- info->BusCntl = INREG(RADEON_BUS_CNTL); ++ info->BusCntl = INREG(RADEON_BUS_CNTL); ++ } + + RADEONGetVRamType(pScrn); + +@@ -1876,7 +1879,14 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn) + + /* treat PCIE IGP cards as PCI */ + if (info->cardType == CARD_PCIE && info->IsIGP) +- info->cardType = CARD_PCI; ++ info->cardType = CARD_PCI; ++ ++ if ((info->ChipFamily >= CHIP_FAMILY_R600) && info->IsIGP) ++ info->cardType = CARD_PCIE; ++ ++ /* not sure about gart table requirements */ ++ if ((info->ChipFamily == CHIP_FAMILY_RS600) && info->IsIGP) ++ info->cardType = CARD_PCIE; + + if ((s = xf86GetOptValString(info->Options, OPTION_BUS_TYPE))) { + if (strcmp(s, "AGP") == 0) { +@@ -1905,14 +1915,6 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn) + info->Chipset != PCI_CHIP_RN50_5969); + #endif + +- if (info->ChipFamily >= CHIP_FAMILY_R600) { +- info->r600_shadow_fb = TRUE; +- xf86DrvMsg(pScrn->scrnIndex, X_INFO, +- "using shadow framebuffer\n"); +- if (!xf86LoadSubModule(pScrn, "shadow")) +- return FALSE; +- } +- + return TRUE; + } + +@@ -1989,8 +1991,8 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) + + if (info->ChipFamily >= CHIP_FAMILY_R600) { + xf86DrvMsg(pScrn->scrnIndex, X_DEFAULT, +- "No acceleration support available on R600 yet.\n"); +- return TRUE; ++ "Experimental R6xx/R7xx EXA support.\n"); ++ info->useEXA = TRUE; + } + + if (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE)) { +@@ -2146,16 +2148,23 @@ static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn) + + if (info->Chipset == PCI_CHIP_RN50_515E || + info->Chipset == PCI_CHIP_RN50_5969 || +- info->ChipFamily == CHIP_FAMILY_RS600 || + info->ChipFamily >= CHIP_FAMILY_R600) { + if (xf86ReturnOptValBool(info->Options, OPTION_DRI, FALSE)) { + xf86DrvMsg(pScrn->scrnIndex, X_WARNING, +- "Direct rendering for RN50/RS600/R600 forced on -- " ++ "Direct rendering for RN50/R600 forced on -- " + "This is NOT officially supported at the hardware level " + "and may cause instability or lockups\n"); + } else { + xf86DrvMsg(pScrn->scrnIndex, X_INFO, +- "Direct rendering not officially supported on RN50/RS600/R600\n"); ++ "Direct rendering not officially supported on RN50/R600\n"); ++ ++ if (info->ChipFamily >= CHIP_FAMILY_R600) { ++ info->r600_shadow_fb = TRUE; ++ xf86DrvMsg(pScrn->scrnIndex, X_INFO, ++ "using shadow framebuffer\n"); ++ if (!xf86LoadSubModule(pScrn, "shadow")) ++ info->r600_shadow_fb = FALSE; ++ } + return FALSE; + } + } +@@ -2336,7 +2345,10 @@ static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn) + xf86DrvMsg(pScrn->scrnIndex, from, "Page Flipping %sabled%s\n", + info->dri->allowPageFlip ? "en" : "dis", reason); + +- info->DMAForXv = TRUE; ++ if (info->ChipFamily >= CHIP_FAMILY_R600) ++ info->DMAForXv = FALSE; ++ else ++ info->DMAForXv = TRUE; + from = xf86GetOptValBool(info->Options, OPTION_XV_DMA, &info->DMAForXv) + ? X_CONFIG : X_INFO; + xf86DrvMsg(pScrn->scrnIndex, from, +@@ -3644,11 +3656,9 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, + RADEONDGAInit(pScreen); + + /* Init Xv */ +- if (info->ChipFamily < CHIP_FAMILY_R600) { +- xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, +- "Initializing Xv\n"); +- RADEONInitVideo(pScreen); +- } ++ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, ++ "Initializing Xv\n"); ++ RADEONInitVideo(pScreen); + + if (info->r600_shadow_fb == TRUE) { + if (!shadowSetup(pScreen)) { +@@ -3771,9 +3781,10 @@ void RADEONRestoreMemMapRegisters(ScrnInfoPtr pScrn, + } else { + OUTREG(R600_HDP_NONSURFACE_BASE, (restore->mc_fb_location << 16) & 0xff0000); + } +- ++ + /* Reset the engine and HDP */ +- RADEONEngineReset(pScrn); ++ if (info->ChipFamily < CHIP_FAMILY_R600) ++ RADEONEngineReset(pScrn); + } + } else { + +@@ -3958,7 +3969,7 @@ static void RADEONAdjustMemMapRegisters(ScrnInfoPtr pScrn, RADEONSavePtr save) + } + + #ifdef USE_EXA +- if (info->accelDFS) ++ if (info->accelDFS || (info->ChipFamily >= CHIP_FAMILY_R600)) + { + drm_radeon_getparam_t gp; + int gart_base; +@@ -4419,15 +4430,40 @@ avivo_save(ScrnInfoPtr pScrn, RADEONSavePtr save) + state->aux_cntl2[j] = INREG(i + 0x040); + state->aux_cntl3[j] = INREG(i + 0x400); + state->aux_cntl4[j] = INREG(i + 0x440); ++ if (IS_DCE32_VARIANT) { ++ state->aux_cntl5[j] = INREG(i + 0x500); ++ state->aux_cntl6[j] = INREG(i + 0x540); ++ } + j++; + } + + j = 0; + /* save UNIPHY regs */ +- for (i = 0x7ec0; i <= 0x7edc; i += 4) { +- state->uniphy1[j] = INREG(i); +- state->uniphy2[j] = INREG(i + 0x100); +- j++; ++ if (IS_DCE32_VARIANT) { ++ for (i = 0x7680; i <= 0x7690; i += 4) { ++ state->uniphy1[j] = INREG(i); ++ state->uniphy2[j] = INREG(i + 0x20); ++ state->uniphy3[j] = INREG(i + 0x400); ++ state->uniphy4[j] = INREG(i + 0x420); ++ state->uniphy5[j] = INREG(i + 0x840); ++ state->uniphy6[j] = INREG(i + 0x940); ++ j++; ++ } ++ for (i = 0x7698; i <= 0x769c; i += 4) { ++ state->uniphy1[j] = INREG(i); ++ state->uniphy2[j] = INREG(i + 0x20); ++ state->uniphy3[j] = INREG(i + 0x400); ++ state->uniphy4[j] = INREG(i + 0x420); ++ state->uniphy5[j] = INREG(i + 0x840); ++ state->uniphy6[j] = INREG(i + 0x940); ++ j++; ++ } ++ } else { ++ for (i = 0x7ec0; i <= 0x7edc; i += 4) { ++ state->uniphy1[j] = INREG(i); ++ state->uniphy2[j] = INREG(i + 0x100); ++ j++; ++ } + } + j = 0; + /* save PHY,LINK regs */ +@@ -4770,15 +4806,40 @@ avivo_restore(ScrnInfoPtr pScrn, RADEONSavePtr restore) + OUTREG((i + 0x040), state->aux_cntl2[j]); + OUTREG((i + 0x400), state->aux_cntl3[j]); + OUTREG((i + 0x440), state->aux_cntl4[j]); ++ if (IS_DCE32_VARIANT) { ++ OUTREG((i + 0x500), state->aux_cntl5[j]); ++ OUTREG((i + 0x540), state->aux_cntl6[j]); ++ } + j++; + } + + j = 0; + /* save UNIPHY regs */ +- for (i = 0x7ec0; i <= 0x7edc; i += 4) { +- OUTREG(i, state->uniphy1[j]); +- OUTREG((i + 0x100), state->uniphy2[j]); +- j++; ++ if (IS_DCE32_VARIANT) { ++ for (i = 0x7680; i <= 0x7690; i += 4) { ++ OUTREG(i, state->uniphy1[j]); ++ OUTREG((i + 0x20), state->uniphy2[j]); ++ OUTREG((i + 0x400), state->uniphy3[j]); ++ OUTREG((i + 0x420), state->uniphy4[j]); ++ OUTREG((i + 0x840), state->uniphy5[j]); ++ OUTREG((i + 0x940), state->uniphy6[j]); ++ j++; ++ } ++ for (i = 0x7698; i <= 0x769c; i += 4) { ++ OUTREG(i, state->uniphy1[j]); ++ OUTREG((i + 0x20), state->uniphy2[j]); ++ OUTREG((i + 0x400), state->uniphy3[j]); ++ OUTREG((i + 0x420), state->uniphy4[j]); ++ OUTREG((i + 0x840), state->uniphy5[j]); ++ OUTREG((i + 0x940), state->uniphy6[j]); ++ j++; ++ } ++ } else { ++ for (i = 0x7ec0; i <= 0x7edc; i += 4) { ++ OUTREG(i, state->uniphy1[j]); ++ OUTREG((i + 0x100), state->uniphy2[j]); ++ j++; ++ } + } + j = 0; + /* save PHY,LINK regs */ +@@ -5046,8 +5107,10 @@ static void RADEONRestore(ScrnInfoPtr pScrn) + "RADEONRestore\n"); + + #if X_BYTE_ORDER == X_BIG_ENDIAN +- RADEONWaitForFifo(pScrn, 1); +- OUTREG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_NONE); ++ if (info->ChipFamily < CHIP_FAMILY_R600) { ++ RADEONWaitForFifo(pScrn, 1); ++ OUTREG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_NONE); ++ } + #endif + + RADEONBlank(pScrn); +@@ -5220,7 +5283,8 @@ Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags) + + if (info->accelOn) { + RADEON_SYNC(info, pScrn); +- RADEONEngineRestore(pScrn); ++ if (info->ChipFamily < CHIP_FAMILY_R600) ++ RADEONEngineRestore(pScrn); + } + + #ifdef XF86DRI +@@ -5424,6 +5488,10 @@ void RADEONAdjustFrame(int scrnIndex, int x, int y, int flags) + xf86OutputPtr output = config->output[config->compat_output]; + xf86CrtcPtr crtc = output->crtc; + ++ /* not handled */ ++ if (IS_AVIVO_VARIANT) ++ return; ++ + #ifdef XF86DRI + if (info->cp->CPStarted && pScrn->pScreen) DRILock(pScrn->pScreen, 0); + #endif +@@ -5536,9 +5604,12 @@ Bool RADEONEnterVT(int scrnIndex, int flags) + if (info->adaptor) + RADEONResetVideo(pScrn); + +- if (info->accelOn) ++ if (info->accelOn && (info->ChipFamily < CHIP_FAMILY_R600)) + RADEONEngineRestore(pScrn); + ++ if (info->accelOn && info->accel_state) ++ info->accel_state->XInited3D = FALSE; ++ + #ifdef XF86DRI + if (info->directRenderingEnabled) { + RADEONCP_START(pScrn, info); +diff --git a/src/radeon_exa.c b/src/radeon_exa.c +index 2f36d71..ae68146 100644 +--- a/src/radeon_exa.c ++++ b/src/radeon_exa.c +@@ -35,6 +35,7 @@ + + #include "radeon.h" + #include "radeon_reg.h" ++#include "r600_reg.h" + #ifdef XF86DRI + #include "radeon_drm.h" + #endif +@@ -221,7 +222,7 @@ int RADEONBiggerCrtcArea(PixmapPtr pPix) + + #if X_BYTE_ORDER == X_BIG_ENDIAN + +-static unsigned long swapper_surfaces[3]; ++static unsigned long swapper_surfaces[6]; + + static Bool RADEONPrepareAccess(PixmapPtr pPix, int index) + { +diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c +index cd97cc6..dec0285 100644 +--- a/src/radeon_exa_funcs.c ++++ b/src/radeon_exa_funcs.c +@@ -56,8 +56,6 @@ + + #include "radeon.h" + +-#include "exa.h" +- + static int + FUNC_NAME(RADEONMarkSync)(ScreenPtr pScreen) + { +@@ -129,7 +127,8 @@ FUNC_NAME(RADEONSolid)(PixmapPtr pPix, int x1, int y1, int x2, int y2) + + TRACE; + +- FUNC_NAME(RADEONWaitForVLine)(pScrn, pPix, RADEONBiggerCrtcArea(pPix), y1, y2, info->accel_state->vsync); ++ if (info->accel_state->vsync) ++ FUNC_NAME(RADEONWaitForVLine)(pScrn, pPix, RADEONBiggerCrtcArea(pPix), y1, y2); + + BEGIN_ACCEL(2); + OUT_ACCEL_REG(RADEON_DST_Y_X, (y1 << 16) | x1); +@@ -230,7 +229,8 @@ FUNC_NAME(RADEONCopy)(PixmapPtr pDst, + dstY += h - 1; + } + +- FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), dstY, dstY + h, info->accel_state->vsync); ++ if (info->accel_state->vsync) ++ FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), dstY, dstY + h); + + BEGIN_ACCEL(3); + +@@ -281,7 +281,8 @@ RADEONUploadToScreenCP(PixmapPtr pDst, int x, int y, int w, int h, + + RADEON_SWITCH_TO_2D(); + +- FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), y, y + h, info->accel_state->vsync); ++ if (info->accel_state->vsync) ++ FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), y, y + h); + + while ((buf = RADEONHostDataBlit(pScrn, + cpp, w, dst_pitch_off, &buf_pitch, +diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c +index c44502c..571204a 100644 +--- a/src/radeon_exa_render.c ++++ b/src/radeon_exa_render.c +@@ -458,7 +458,7 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix, + + #ifdef ONLY_ONCE + +-static PixmapPtr ++PixmapPtr + RADEONGetDrawablePixmap(DrawablePtr pDrawable) + { + if (pDrawable->type == DRAWABLE_WINDOW) +@@ -2015,13 +2015,14 @@ static inline void transformPoint(PictTransform *transform, xPointFixed *point) + } + #endif + +-static void FUNC_NAME(RadeonCompositeTile)(PixmapPtr pDst, ++static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn, ++ RADEONInfoPtr info, ++ PixmapPtr pDst, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) + { +- RINFO_FROM_SCREEN(pDst->drawable.pScreen); + int vtx_count; + xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight; + static xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight; +@@ -2069,7 +2070,8 @@ static void FUNC_NAME(RadeonCompositeTile)(PixmapPtr pDst, + } else + vtx_count = 4; + +- FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), dstY, dstY + h, info->accel_state->vsync); ++ if (info->accel_state->vsync) ++ FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), dstY, dstY + h); + + #ifdef ACCEL_CP + if (info->ChipFamily < CHIP_FAMILY_R200) { +@@ -2180,7 +2182,9 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst, + RINFO_FROM_SCREEN(pDst->drawable.pScreen); + + if (!info->accel_state->need_src_tile_x && !info->accel_state->need_src_tile_y) { +- FUNC_NAME(RadeonCompositeTile)(pDst, ++ FUNC_NAME(RadeonCompositeTile)(pScrn, ++ info, ++ pDst, + srcX, srcY, + maskX, maskY, + dstX, dstY, +@@ -2214,7 +2218,9 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst, + w = remainingWidth; + remainingWidth -= w; + +- FUNC_NAME(RadeonCompositeTile)(pDst, ++ FUNC_NAME(RadeonCompositeTile)(pScrn, ++ info, ++ pDst, + tileSrcX, tileSrcY, + tileMaskX, tileMaskY, + tileDstX, tileDstY, +diff --git a/src/radeon_legacy_memory.c b/src/radeon_legacy_memory.c +index 2a9ee94..861fd97 100644 +--- a/src/radeon_legacy_memory.c ++++ b/src/radeon_legacy_memory.c +@@ -93,10 +93,10 @@ void + radeon_legacy_free_memory(ScrnInfoPtr pScrn, + void *mem_struct) + { +- ScreenPtr pScreen = screenInfo.screens[pScrn->scrnIndex]; + RADEONInfoPtr info = RADEONPTR(pScrn); +- + #ifdef USE_EXA ++ ScreenPtr pScreen = screenInfo.screens[pScrn->scrnIndex]; ++ + if (info->useEXA) { + ExaOffscreenArea *area = mem_struct; + +diff --git a/src/radeon_modes.c b/src/radeon_modes.c +index e06f8dd..0a8fa00 100644 +--- a/src/radeon_modes.c ++++ b/src/radeon_modes.c +@@ -65,15 +65,19 @@ void RADEONSetPitch (ScrnInfoPtr pScrn) + align_large = info->allowColorTiling || IS_AVIVO_VARIANT; + + /* FIXME: May need to validate line pitch here */ +- switch (pScrn->depth / 8) { +- case 1: pitch_mask = align_large ? 255 : 127; +- break; +- case 2: pitch_mask = align_large ? 127 : 31; +- break; +- case 3: +- case 4: pitch_mask = align_large ? 63 : 15; +- break; +- } ++ if (info->ChipFamily < CHIP_FAMILY_R600) { ++ switch (pScrn->depth / 8) { ++ case 1: pitch_mask = align_large ? 255 : 127; ++ break; ++ case 2: pitch_mask = align_large ? 127 : 31; ++ break; ++ case 3: ++ case 4: pitch_mask = align_large ? 63 : 15; ++ break; ++ } ++ } else ++ pitch_mask = 255; /* r6xx/r7xx need 256B alignment for accel */ ++ + dummy = (pScrn->virtualX + pitch_mask) & ~pitch_mask; + pScrn->displayWidth = dummy; + info->CurrentLayout.displayWidth = pScrn->displayWidth; +diff --git a/src/radeon_output.c b/src/radeon_output.c +index 352519f..897c6a2 100644 +--- a/src/radeon_output.c ++++ b/src/radeon_output.c +@@ -1147,7 +1147,7 @@ radeon_create_resources(xf86OutputPtr output) + } + #endif + +- if (radeon_output->devices & (ATOM_DEVICE_CRT_SUPPORT)) { ++ if (radeon_output->devices & (ATOM_DEVICE_CRT_SUPPORT | ATOM_DEVICE_TV_SUPPORT | ATOM_DEVICE_CV_SUPPORT)) { + load_detection_atom = MAKE_ATOM("load_detection"); + + range[0] = 0; /* off */ +diff --git a/src/radeon_probe.h b/src/radeon_probe.h +index 447ef57..49044e3 100644 +--- a/src/radeon_probe.h ++++ b/src/radeon_probe.h +@@ -43,9 +43,6 @@ + + #include "xf86Crtc.h" + +-#ifdef USE_EXA +-#include "exa.h" +-#endif + #ifdef USE_XAA + #include "xaa.h" + #endif +@@ -373,7 +370,7 @@ struct avivo_state + /* dvoa */ + uint32_t dvoa[16]; + +- /* DCE3 chips */ ++ /* DCE3+ chips */ + uint32_t fmt1[18]; + uint32_t fmt2[18]; + uint32_t dig1[19]; +@@ -384,9 +381,15 @@ struct avivo_state + uint32_t aux_cntl2[14]; + uint32_t aux_cntl3[14]; + uint32_t aux_cntl4[14]; ++ uint32_t aux_cntl5[14]; ++ uint32_t aux_cntl6[14]; + uint32_t phy[10]; + uint32_t uniphy1[8]; + uint32_t uniphy2[8]; ++ uint32_t uniphy3[8]; ++ uint32_t uniphy4[8]; ++ uint32_t uniphy5[8]; ++ uint32_t uniphy6[8]; + + }; + +diff --git a/src/radeon_reg.h b/src/radeon_reg.h +index 7b8840b..0af8859 100644 +--- a/src/radeon_reg.h ++++ b/src/radeon_reg.h +@@ -3445,15 +3445,24 @@ + #define RS690_MC_STATUS 0x90 + #define RS690_MC_STATUS_IDLE (1 << 0) + +-#define RS600_MC_INDEX 0x78 +-# define RS600_MC_INDEX_MASK 0xff +-# define RS600_MC_INDEX_WR_EN (1 << 8) +-# define RS600_MC_INDEX_WR_ACK 0xff +-#define RS600_MC_DATA 0x7c +- +-#define RS600_MC_FB_LOCATION 0xA +-#define RS600_MC_STATUS 0x0 +-#define RS600_MC_STATUS_IDLE (1 << 0) ++#define RS600_MC_INDEX 0x70 ++# define RS600_MC_ADDR_MASK 0xffff ++# define RS600_MC_IND_SEQ_RBS_0 (1 << 16) ++# define RS600_MC_IND_SEQ_RBS_1 (1 << 17) ++# define RS600_MC_IND_SEQ_RBS_2 (1 << 18) ++# define RS600_MC_IND_SEQ_RBS_3 (1 << 19) ++# define RS600_MC_IND_AIC_RBS (1 << 20) ++# define RS600_MC_IND_CITF_ARB0 (1 << 21) ++# define RS600_MC_IND_CITF_ARB1 (1 << 22) ++# define RS600_MC_IND_WR_EN (1 << 23) ++#define RS600_MC_DATA 0x74 ++ ++#define RS600_MC_STATUS 0x0 ++# define RS600_MC_IDLE (1 << 1) ++#define RS600_MC_FB_LOCATION 0x4 ++#define RS600_MC_AGP_LOCATION 0x5 ++#define RS600_AGP_BASE 0x6 ++#define RS600_AGP_BASE2 0x7 + + #define AVIVO_MC_INDEX 0x0070 + #define R520_MC_STATUS 0x00 +@@ -3482,6 +3491,8 @@ + # define R600_CHANSIZE (1 << 7) + # define R600_CHANSIZE_OVERRIDE (1 << 10) + ++#define R600_SRBM_STATUS 0x0e50 ++ + #define AVIVO_HDP_FB_LOCATION 0x134 + + #define AVIVO_VGA_RENDER_CONTROL 0x0300 +@@ -3662,6 +3673,8 @@ + # define AVIVO_D1MODE_VLINE_START_SHIFT 0 + # define AVIVO_D1MODE_VLINE_END_SHIFT 16 + # define AVIVO_D1MODE_VLINE_INV (1 << 31) ++#define AVIVO_D1MODE_VLINE_STATUS 0x653c ++# define AVIVO_D1MODE_VLINE_STAT (1 << 12) + #define AVIVO_D1MODE_VIEWPORT_START 0x6580 + #define AVIVO_D1MODE_VIEWPORT_SIZE 0x6584 + #define AVIVO_D1MODE_EXT_OVERSCAN_LEFT_RIGHT 0x6588 +@@ -3984,6 +3997,9 @@ + #define R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x2198 + + #define R700_MC_VM_FB_LOCATION 0x2024 ++#define R700_MC_VM_AGP_TOP 0x2028 ++#define R700_MC_VM_AGP_BOT 0x202c ++#define R700_MC_VM_AGP_BASE 0x2030 + + #define R600_HDP_NONSURFACE_BASE 0x2c04 + +@@ -5362,4 +5378,32 @@ + + #define R500_DYN_SCLK_PWMEM_PIPE 0x000d /* PLL */ + ++/* r6xx/r7xx stuff */ ++#define R600_GRBM_STATUS 0x8010 ++# define R600_CMDFIFO_AVAIL_MASK 0x1f ++# define R700_CMDFIFO_AVAIL_MASK 0xf ++# define R600_GUI_ACTIVE (1 << 31) ++ ++#define R600_GRBM_SOFT_RESET 0x8020 ++# define R600_SOFT_RESET_CP (1 << 0) ++ ++#define R600_WAIT_UNTIL 0x8040 ++ ++#define R600_CP_ME_CNTL 0x86d8 ++# define R600_CP_ME_HALT (1 << 28) ++ ++#define R600_CP_RB_BASE 0xc100 ++#define R600_CP_RB_CNTL 0xc104 ++# define R600_RB_NO_UPDATE (1 << 27) ++# define R600_RB_RPTR_WR_ENA (1 << 31) ++#define R600_CP_RB_RPTR_WR 0xc108 ++#define R600_CP_RB_RPTR_ADDR 0xc10c ++#define R600_CP_RB_RPTR_ADDR_HI 0xc110 ++#define R600_CP_RB_WPTR 0xc114 ++#define R600_CP_RB_WPTR_ADDR 0xc118 ++#define R600_CP_RB_WPTR_ADDR_HI 0xc11c ++ ++#define R600_CP_RB_RPTR 0x8700 ++#define R600_CP_RB_WPTR_DELAY 0x8704 ++ + #endif +diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c +index 7712344..cbedb7e 100644 +--- a/src/radeon_textured_video.c ++++ b/src/radeon_textured_video.c +@@ -36,6 +36,7 @@ + + #include "radeon.h" + #include "radeon_reg.h" ++#include "r600_reg.h" + #include "radeon_macros.h" + #include "radeon_probe.h" + #include "radeon_video.h" +@@ -43,12 +44,24 @@ + #include + #include "fourcc.h" + ++extern void ++R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv); ++ ++extern Bool ++R600CopyToVRAM(ScrnInfoPtr pScrn, ++ char *src, int src_pitch, ++ uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_height, int bpp, ++ int x, int y, int w, int h); ++ + #define IMAGE_MAX_WIDTH 2048 + #define IMAGE_MAX_HEIGHT 2048 + + #define IMAGE_MAX_WIDTH_R500 4096 + #define IMAGE_MAX_HEIGHT_R500 4096 + ++#define IMAGE_MAX_WIDTH_R600 8192 ++#define IMAGE_MAX_HEIGHT_R600 8192 ++ + static Bool + RADEONTilingEnabled(ScrnInfoPtr pScrn, PixmapPtr pPix) + { +@@ -146,6 +159,56 @@ static __inline__ uint32_t F_TO_24(float val) + + #endif /* XF86DRI */ + ++static void ++R600CopyPlanar(ScrnInfoPtr pScrn, ++ unsigned char *y_src, unsigned char *u_src, unsigned char *v_src, ++ uint32_t dst_mc_addr, ++ int srcPitch, int srcPitch2, int dstPitch, ++ int w, int h) ++{ ++ int dstPitch2 = dstPitch >> 1; ++ int h2 = h >> 1; ++ int w2 = w >> 1; ++ int v_offset, u_offset; ++ v_offset = dstPitch * h; ++ v_offset = (v_offset + 255) & ~255; ++ u_offset = v_offset + (dstPitch2 * h2); ++ u_offset = (u_offset + 255) & ~255; ++ ++ /* Y */ ++ R600CopyToVRAM(pScrn, ++ (char *)y_src, srcPitch, ++ dstPitch, dst_mc_addr, h, 8, ++ 0, 0, w, h); ++ ++ /* V */ ++ R600CopyToVRAM(pScrn, ++ (char *)v_src, srcPitch2, ++ dstPitch2, dst_mc_addr + v_offset, h2, 8, ++ 0, 0, w2, h2); ++ ++ /* U */ ++ R600CopyToVRAM(pScrn, ++ (char *)u_src, srcPitch2, ++ dstPitch2, dst_mc_addr + u_offset, h2, 8, ++ 0, 0, w2, h2); ++} ++ ++static void ++R600CopyPacked(ScrnInfoPtr pScrn, ++ unsigned char *src, uint32_t dst_mc_addr, ++ int srcPitch, int dstPitch, ++ int w, int h) ++{ ++ ++ /* YUV */ ++ R600CopyToVRAM(pScrn, ++ (char *)src, srcPitch, ++ dstPitch >> 2, dst_mc_addr, h, 32, ++ 0, 0, w >> 1, h); ++ ++} ++ + static int + RADEONPutImageTextured(ScrnInfoPtr pScrn, + short src_x, short src_y, +@@ -214,7 +277,10 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, + break; + } + +- dstPitch = (dstPitch + 63) & ~63; ++ if (info->ChipFamily >= CHIP_FAMILY_R600) ++ dstPitch = (dstPitch + 255) & ~255; ++ else ++ dstPitch = (dstPitch + 63) & ~63; + + if (pPriv->video_memory != NULL && size != pPriv->size) { + radeon_legacy_free_memory(pScrn, pPriv->video_memory); +@@ -222,16 +288,21 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, + } + + if (pPriv->video_memory == NULL) { +- pPriv->video_offset = radeon_legacy_allocate_memory(pScrn, +- &pPriv->video_memory, +- size * 2, 64); ++ if (info->ChipFamily >= CHIP_FAMILY_R600) ++ pPriv->video_offset = radeon_legacy_allocate_memory(pScrn, ++ &pPriv->video_memory, ++ size * 2, 256); ++ else ++ pPriv->video_offset = radeon_legacy_allocate_memory(pScrn, ++ &pPriv->video_memory, ++ size * 2, 64); + if (pPriv->video_offset == 0) + return BadAlloc; + } + + /* Bicubic filter setup */ + pPriv->bicubic_enabled = (pPriv->bicubic_state != BICUBIC_OFF); +- if (!(IS_R300_3D || IS_R500_3D)) ++ if (!(IS_R300_3D || IS_R500_3D || IS_R600_3D)) + pPriv->bicubic_enabled = FALSE; + if (pPriv->bicubic_enabled && (pPriv->bicubic_state == BICUBIC_AUTO)) { + /* +@@ -280,7 +351,10 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, + npixels = ((((x2 + 0xffff) >> 16) + 1) & ~1) - left; + + pPriv->src_offset = pPriv->video_offset + info->fbLocation + pScrn->fbOffset; +- pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset + (top * dstPitch)); ++ if (info->ChipFamily >= CHIP_FAMILY_R600) ++ pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset); ++ else ++ pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset + (top * dstPitch)); + pPriv->src_pitch = dstPitch; + pPriv->size = size; + pPriv->pDraw = pDraw; +@@ -294,29 +368,51 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, + switch(id) { + case FOURCC_YV12: + case FOURCC_I420: +- top &= ~1; +- nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top; +- s2offset = srcPitch * height; +- s3offset = (srcPitch2 * (height >> 1)) + s2offset; +- top &= ~1; +- pPriv->src_addr += left << 1; +- tmp = ((top >> 1) * srcPitch2) + (left >> 1); +- s2offset += tmp; +- s3offset += tmp; +- if (id == FOURCC_I420) { +- tmp = s2offset; +- s2offset = s3offset; +- s3offset = tmp; ++ if (info->ChipFamily >= CHIP_FAMILY_R600) { ++ s2offset = srcPitch * height; ++ s3offset = (srcPitch2 * (height >> 1)) + s2offset; ++ if (id == FOURCC_YV12) ++ R600CopyPlanar(pScrn, buf, buf + s3offset, buf + s2offset, ++ pPriv->src_offset, ++ srcPitch, srcPitch2, pPriv->src_pitch, ++ width, height); ++ else ++ R600CopyPlanar(pScrn, buf, buf + s2offset, buf + s3offset, ++ pPriv->src_offset, ++ srcPitch, srcPitch2, pPriv->src_pitch, ++ width, height); ++ ++ } else { ++ top &= ~1; ++ nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top; ++ s2offset = srcPitch * height; ++ s3offset = (srcPitch2 * (height >> 1)) + s2offset; ++ top &= ~1; ++ pPriv->src_addr += left << 1; ++ tmp = ((top >> 1) * srcPitch2) + (left >> 1); ++ s2offset += tmp; ++ s3offset += tmp; ++ if (id == FOURCC_I420) { ++ tmp = s2offset; ++ s2offset = s3offset; ++ s3offset = tmp; ++ } ++ RADEONCopyMungedData(pScrn, buf + (top * srcPitch) + left, ++ buf + s2offset, buf + s3offset, pPriv->src_addr, ++ srcPitch, srcPitch2, dstPitch, nlines, npixels); + } +- RADEONCopyMungedData(pScrn, buf + (top * srcPitch) + left, +- buf + s2offset, buf + s3offset, pPriv->src_addr, +- srcPitch, srcPitch2, dstPitch, nlines, npixels); + break; + case FOURCC_UYVY: + case FOURCC_YUY2: + default: +- nlines = ((y2 + 0xffff) >> 16) - top; +- RADEONCopyData(pScrn, buf, pPriv->src_addr, srcPitch, dstPitch, nlines, npixels, 2); ++ if (info->ChipFamily >= CHIP_FAMILY_R600) { ++ R600CopyPacked(pScrn, buf, pPriv->src_offset, ++ 2 * width, pPriv->src_pitch, ++ width, height); ++ } else { ++ nlines = ((y2 + 0xffff) >> 16) - top; ++ RADEONCopyData(pScrn, buf, pPriv->src_addr, srcPitch, dstPitch, nlines, npixels, 2); ++ } + break; + } + +@@ -340,7 +436,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, + pPriv->h = height; + + #ifdef XF86DRI +- if (info->directRenderingEnabled) ++ if (IS_R600_3D) ++ R600DisplayTexturedVideo(pScrn, pPriv); ++ else if (info->directRenderingEnabled) + RADEONDisplayTexturedVideoCP(pScrn, pPriv); + else + #endif +@@ -370,6 +468,16 @@ static XF86VideoEncodingRec DummyEncodingR500[1] = + } + }; + ++static XF86VideoEncodingRec DummyEncodingR600[1] = ++{ ++ { ++ 0, ++ "XV_IMAGE", ++ IMAGE_MAX_WIDTH_R600, IMAGE_MAX_HEIGHT_R600, ++ {1, 1} ++ } ++}; ++ + #define NUM_FORMATS 3 + + static XF86VideoFormatRec Formats[NUM_FORMATS] = +@@ -471,7 +579,9 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen) + adapt->flags = 0; + adapt->name = "Radeon Textured Video"; + adapt->nEncodings = 1; +- if (IS_R500_3D) ++ if (IS_R600_3D) ++ adapt->pEncodings = DummyEncodingR600; ++ else if (IS_R500_3D) + adapt->pEncodings = DummyEncodingR500; + else + adapt->pEncodings = DummyEncoding; +@@ -483,7 +593,7 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen) + pPortPriv = + (RADEONPortPrivPtr)(&adapt->pPortPrivates[num_texture_ports]); + +- if (IS_R300_3D || IS_R500_3D) { ++ if (IS_R300_3D || IS_R500_3D || IS_R600_3D) { + adapt->pAttributes = Attributes_r300; + adapt->nAttributes = NUM_ATTRIBUTES_R300; + } else { +diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c +index c6ed472..f55ae12 100644 +--- a/src/radeon_textured_videofuncs.c ++++ b/src/radeon_textured_videofuncs.c +@@ -1475,16 +1475,16 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv + } + } + +- FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, +- radeon_covering_crtc_num(pScrn, +- pPriv->drw_x, +- pPriv->drw_x + pPriv->dst_w, +- pPriv->drw_y, +- pPriv->drw_y + pPriv->dst_h, +- pPriv->desired_crtc), +- pPriv->drw_y, +- pPriv->drw_y + pPriv->dst_h, +- pPriv->vsync); ++ if (pPriv->vsync) ++ FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap, ++ radeon_covering_crtc_num(pScrn, ++ pPriv->drw_x, ++ pPriv->drw_x + pPriv->dst_w, ++ pPriv->drw_y, ++ pPriv->drw_y + pPriv->dst_h, ++ pPriv->desired_crtc), ++ pPriv->drw_y, ++ pPriv->drw_y + pPriv->dst_h); + + /* + * Rendering of the actual polygon is done in two different diff --git a/radeon-fix-rs780-mm.patch b/radeon-fix-rs780-mm.patch deleted file mode 100644 index e1462fa..0000000 --- a/radeon-fix-rs780-mm.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 3a6e958c59b4b44fe620c8abe760ed1abf0007b1 Mon Sep 17 00:00:00 2001 -From: airlied -Date: Fri, 30 Jan 2009 07:56:14 +1000 -Subject: [PATCH] rs780: include RS780 in the InitMemory to leave alone - -This stops the MC_FB_LOCATION getting reassigned to 0, which -is really bad thing to happen. - -I've had reported memory corruption on these cards so hopefully this -fixes it. ---- - src/radeon_driver.c | 3 ++- - 1 files changed, 2 insertions(+), 1 deletions(-) - -diff --git a/src/radeon_driver.c b/src/radeon_driver.c -index 22b2e4c..eda7b77 100644 ---- a/src/radeon_driver.c -+++ b/src/radeon_driver.c -@@ -1293,7 +1293,8 @@ static void RADEONInitMemoryMap(ScrnInfoPtr pScrn) - - if ((info->ChipFamily != CHIP_FAMILY_RS600) && - (info->ChipFamily != CHIP_FAMILY_RS690) && -- (info->ChipFamily != CHIP_FAMILY_RS740)) { -+ (info->ChipFamily != CHIP_FAMILY_RS740) && -+ (info->ChipFamily != CHIP_FAMILY_RS780)) { - if (info->IsIGP) - info->mc_fb_location = INREG(RADEON_NB_TOM); - else --- -1.6.0.4 - diff --git a/radeon-mode-fix-rotate.patch b/radeon-mode-fix-rotate.patch deleted file mode 100644 index 376b2cc..0000000 --- a/radeon-mode-fix-rotate.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff -up xf86-video-ati-6.9.0/src/drmmode_display.c.dave xf86-video-ati-6.9.0/src/drmmode_display.c ---- xf86-video-ati-6.9.0/src/drmmode_display.c.dave 2008-12-22 12:45:03.000000000 +1000 -+++ xf86-video-ati-6.9.0/src/drmmode_display.c 2008-12-22 12:45:12.000000000 +1000 -@@ -246,7 +246,7 @@ drmmode_set_mode_major(xf86CrtcPtr crtc, - output_count++; - } - -- if (!xf86CrtcRotate(crtc, mode, rotation)) { -+ if (!xf86CrtcRotate(crtc)) { - goto done; - } - diff --git a/radeon-modeset-fix-legacy-vt.patch b/radeon-modeset-fix-legacy-vt.patch deleted file mode 100644 index fcfe48a..0000000 --- a/radeon-modeset-fix-legacy-vt.patch +++ /dev/null @@ -1,23 +0,0 @@ -diff -up xf86-video-ati-6.9.0/src/radeon_dri.c.dma xf86-video-ati-6.9.0/src/radeon_dri.c ---- xf86-video-ati-6.9.0/src/radeon_dri.c.dma 2009-01-25 09:50:13.000000000 +1000 -+++ xf86-video-ati-6.9.0/src/radeon_dri.c 2009-01-25 09:51:26.000000000 +1000 -@@ -478,6 +478,9 @@ Bool radeon_update_dri_buffers(ScreenPtr - if (info->ChipFamily >= CHIP_FAMILY_R600) - return TRUE; - -+ if (!info->drm_mm) -+ return TRUE; -+ - success = radeon_update_dri_mappings(pScrn, sarea); - - if (!success) -@@ -1289,7 +1292,7 @@ static Bool RADEONDRIMapInit(RADEONInfoP - { - - if (info->drm_mm) -- return TRUE; -+ return TRUE; - /* Map registers */ - info->dri->registerSize = info->MMIOSize; - if (drmAddMap(info->dri->drmFD, info->MMIOAddr, info->dri->registerSize, -diff -up xf86-video-ati-6.9.0/src/radeon_driver.c.dma xf86-video-ati-6.9.0/src/radeon_driver.c diff --git a/radeon-modeset.patch b/radeon-modeset.patch index d12226f..9a113ac 100644 --- a/radeon-modeset.patch +++ b/radeon-modeset.patch @@ -1,833 +1,8 @@ -commit 5b9a9a964c92859cce4f31e3121cb26b11b4bc63 -Author: Dave Airlie -Date: Sat Dec 20 09:14:05 2008 +1000 - - radeon: drop old CS2 setup - -commit 80a220979642ff99a23cd0af59b0fcafc74da2e2 -Author: Dave Airlie -Date: Fri Dec 19 12:37:01 2008 +1100 - - radeon: only do mappings if direct rendering is enabled - -commit 399da3eeea3d273db920e67bffc47c11b688ca7d -Author: Dave Airlie -Date: Tue Dec 9 13:29:18 2008 +1000 - - radeon: upstream fix for Init3D vs switch to/from 2d/3d - -commit 6105102abaa70b1e9c1c24a6f48ba9b4f6a63496 -Author: Dave Airlie -Date: Mon Dec 8 14:19:47 2008 +1000 - - radeon: only update dri buffers if DRI enabled - -commit aef93b70e7b1d0b1d393582273d549551be198f6 -Author: Dave Airlie -Date: Mon Dec 1 15:31:08 2008 +1100 - - radeon: don't have 2D and 3D in one CS buffer - -commit 4fb18164866698d9d42f8cf41fdad3cf10ff34f4 -Author: Dave Airlie -Date: Wed Nov 26 16:09:29 2008 +1100 - - radeon: set touched flag on pinned buffers - -commit 26ea3f4afcf90fbb25abbbd4b5ac2433a63425c0 -Author: Dave Airlie -Date: Wed Nov 26 16:04:35 2008 +1100 - - radeon: fix up some of the touched by gpu handling and force gtt handling - - this fixes DFS on the rs690 - -commit 86447551a72e216df71fca64f5e16ded22c5a1ad -Author: Dave Airlie -Date: Wed Nov 26 12:52:24 2008 +1100 - - radeon: brutal attempt to fix RS4xx and RS6xx by flushing more often - - this might take more CPU but hopefully leads to stabler GPU - -commit 049244cc1a430e46497ad730c54a5e1e66bb7a4c -Author: Dave Airlie -Date: Tue Nov 25 17:20:44 2008 +1100 - - radeon: rs690 stabilisation fixes. - - Flush the VAP due a IB hang on the VAP setup regs. - Flush the TX earlier. - Emit a right to SC_CLIP_RULE to flush as per r500 docs. - -commit ba06b37bde0ad00f8a6b12003d56aeb603227143 -Author: Dave Airlie -Date: Sun Nov 23 17:56:02 2008 +1000 - - radeon: wait for rendering before doing UTS - -commit f36db245fac91aad08b4cd2c131e0c47c21bb950 -Author: Dave Airlie -Date: Sun Nov 23 17:54:27 2008 +1000 - - radeon: stop this_op_read from going negative - -commit b1121141af82075ec16edfdd6095320000f4504b -Author: Dave Airlie -Date: Sun Nov 23 17:52:42 2008 +1000 - - radeon: return flush for conflicting domains - -commit d7f1cebb0cf2d5a8110606e7ef22284cc256df72 -Author: Dave Airlie -Date: Sun Nov 23 17:50:47 2008 +1000 - - radeon: only reset state2d before emitting cache flush - -commit 0f58052bcdce741aa0f6bb63566c390d7c246632 -Author: Dave Airlie -Date: Thu Nov 20 16:48:33 2008 +1000 - - flush on UTS if any references - -commit a29231ab45efc153b6ab1ad19e960d2294aa85ab -Author: Dave Airlie -Date: Thu Nov 20 16:44:40 2008 +1000 - - radeon: add gart vs vram writes - -commit 298e92586d11439bc22aab2cb2f7f61f1e80d70f -Author: Dave Airlie -Date: Thu Nov 20 16:37:07 2008 +1000 - - radeon: improve DFS performance for non-vram objects - -commit 2737e395af3fac9707ea7e05e1dd6aaf2dcc303e -Author: Dave Airlie -Date: Wed Nov 19 14:49:44 2008 +1000 - - radeon: scrap state on LeaveVT not EnterVT - -commit 99e240bbfe3990be735cfa90c7c9e2d560c7ea6e -Author: Dave Airlie -Date: Wed Nov 19 11:08:34 2008 +1000 - - radeon: even more typos - -commit 523e6ae553b4ddffdf82d3759d6b6b63a0230144 -Author: Dave Airlie -Date: Wed Nov 19 07:51:03 2008 +1000 - - radeon: oops bad typo - -commit 05a8c61ef69d4270606129426e45d17bf77d0bbc -Author: Dave Airlie -Date: Tue Nov 18 16:09:10 2008 +1000 - - radeon: even if kernels fails, struggle onwards - - try and keep the session going even if visual glitches happen - -commit 52459be7e841e38e609dd4abad9a67848399219e -Author: Dave Airlie -Date: Tue Nov 18 15:46:46 2008 +1000 - - radeon_bufmgr: much more complete size check functionality - -commit 9910bcbe2b85e3242e2373111605d117e6f3a29d -Author: Dave Airlie -Date: Tue Nov 18 14:33:44 2008 +1000 - - radeon: I fail at uint32_t division - -commit f3f4b0574cf9eb01b69c8ebe7a7da0e05b32f1e4 -Author: Dave Airlie -Date: Tue Nov 18 12:06:02 2008 +1000 - - radeon: workaround O(wtf) logic in post_submit bufmgr - -commit d75dafc38eda798bf4f91b5f8e040d8894631d7c -Author: Dave Airlie -Date: Mon Nov 17 19:19:43 2008 +1000 - - radeon: set emit limit to 90% VRAM - -commit 53eb14298f7b493968462606318301bfa3d62a81 -Author: Dave Airlie -Date: Mon Nov 17 16:16:51 2008 +1000 - - radeon: use get/set master ioctls - -commit 4c154285ebbda9ff5848f02397ff698dc9e57bf3 -Author: Dave Airlie -Date: Fri Nov 14 15:56:16 2008 +1000 - - radeon: make space accounting a lot smarter - -commit 5c7b6f8b8e5a50f49f42658553a66d5fbcd7ae8d -Author: Dave Airlie -Date: Fri Nov 14 15:55:12 2008 +1000 - - radeon: retry on CS2 EAGAIN - -commit 99e9d3981c323ada2aa0c1bbe976a0e4a9c450e5 -Author: Dave Airlie -Date: Fri Nov 14 15:20:59 2008 +1000 - - radeon: add src/mask/dest to fallbacks - -commit 455e9f5f6e5a3e648ffd6135570fbc53abd25609 -Author: Dave Airlie -Date: Fri Nov 14 15:20:37 2008 +1000 - - radeon_memory: align all allocations - -commit 16a417dfeda8d991fb32770280bfe614f5c79f96 -Author: Dave Airlie -Date: Fri Nov 14 11:03:34 2008 +1000 - - radeon: force gtt for mmap after fallbacks - -commit f3f900ff53b8d46614e5757f958cac8b88ef53bb -Author: Dave Airlie -Date: Mon Nov 10 14:18:17 2008 +1000 - - radeon: add more buffer info + fix read objects too big fallback - -commit 04a61bf3b5be1a47436ac7a39b40d2dd687962e1 -Author: Dave Airlie -Date: Mon Nov 10 11:47:02 2008 +1000 - - radeon: remove old exa bufmgr not used anymore code - -commit 4fe1f1458745895c37289c43d342f17dae24a1da -Author: Dave Airlie -Date: Mon Nov 10 11:18:27 2008 +1000 - - radeon: fix crtc dpms - - need to find a better way to switch displays off - -commit c0a8c20b8ad284513d933ef67dc469df222b9e04 -Author: Dave Airlie -Date: Mon Nov 10 11:18:02 2008 +1000 - - radeon: fix textured video corruption - - at least workaround it - -commit 4363273e081fdfc67eee2120693a0a2350b83d9f -Author: Dave Airlie -Date: Sat Nov 8 14:48:29 2008 +1000 - - radeon: add DPMS support for connectors - -commit 332493f1446d929c565e54db15936db5e6fbcb67 -Author: Dave Airlie -Date: Fri Nov 7 16:20:09 2008 +1000 - - radeon: fix rotation of right-of heads - -commit 1962610bfcf611d9b5221852a851adf012880b63 -Author: Dave Airlie -Date: Mon Nov 3 14:51:43 2008 +1000 - - radeon: respect fb tex percent + trust kernel values - -commit 9ec4f7ea480d8cf5431f2e5f0c26ac5293a26015 -Author: Dave Airlie -Date: Fri Oct 31 15:05:14 2008 +1000 - - radeon: remove workaround hack since kernel is hopefully fixed - -commit e0d4781952058834b31cd8decbd8054dcd544974 -Author: Dave Airlie -Date: Fri Oct 31 15:04:31 2008 +1000 - - radeon: workaround use after free - -commit 40113ead912a015f619cce99635d5e1e823ffb7b -Author: Dave Airlie -Date: Thu Oct 30 13:53:02 2008 +1000 - - radeon: setup accel dfs for PCIE cards only if drm_mm - -commit 354b8569178b15ad9166e6cd85ba31608dd3b53e -Author: Dave Airlie -Date: Thu Oct 30 10:00:10 2008 +1000 - - radeon: fixup name handling for bufmgr - -commit da0ead1641f0b0912a269db8af1d6ad0fe24bcc3 -Author: Dave Airlie -Date: Thu Oct 30 09:59:11 2008 +1000 - - radeon: fix memory leak in CS2 code - -commit 882a213a037b3a220f40b7de815c687a1bb6005f -Author: Dave Airlie -Date: Tue Oct 28 20:35:19 2008 +1000 - - return on empty IBs, flush happen in the kernel - -commit 098da7dd3760ec3716eba7da8212119911ccda68 -Author: Dave Airlie -Date: Tue Oct 28 10:16:09 2008 +1000 - - radeon: remove some debugging - -commit 086cafeef09042819d3cd0a849c9bd88b74e5791 -Author: Dave Airlie -Date: Tue Oct 28 06:40:31 2008 +1000 - - radeon: enable gem wait rendering. - -commit de29375198ff5d26b54fbe69d312e38782c4fa57 -Author: Dave Airlie -Date: Mon Oct 27 16:51:00 2008 +1000 - - radeon: add new CS submission scheme - -commit 9bdbba5e7c178433f66121e300c9a833c403914b -Author: Dave Airlie -Date: Thu Oct 23 17:05:12 2008 +1000 - - radeon: really rough effort at vram limit setting - -commit dec38d7aff0f6de7116ac1028f186eb1bb4501bd -Author: Dave Airlie -Date: Thu Oct 23 17:04:51 2008 +1000 - - radeon: this shouldn't fail but it did once while debugging so patch up - -commit b1949a68b655006cad76f2e13f4e6a009469a993 -Author: Dave Airlie -Date: Thu Oct 23 10:43:09 2008 +1000 - - radeon: fixup some memory allocation issues - - hopefully since the alignment got fixed this doesn't break anything - -commit 5726b7b4e0a50bc8696ac9a190722d2ccca73304 -Author: Dave Airlie -Date: Tue Oct 21 15:50:17 2008 +1000 - - radeon: cleanup reserved space calcs - -commit e5bb8a41208cc9f2199d49d42986264942a5dfbc -Author: Dave Airlie -Date: Tue Oct 21 15:49:48 2008 +1000 - - radeon: fixup Owen's optimisation - this fixes corruption - - I haven't a good explaination why mapping the buffer twice in a row - seems to cause this failure. but I probably don't have time to track - it down before release. - -commit f4c4d8377b0ea9688172f6c20842f19659ad8f43 -Author: Dave Airlie -Date: Sun Oct 19 18:27:53 2008 +1000 - - radeon: fixup tex offset for no modeset - -commit e0e0d5bee96ed4c80b50a84858792c8f2c0f5ae8 -Author: Dave Airlie -Date: Wed Oct 15 17:01:34 2008 +1000 - - radeon: add DFS support for CS - -commit 5cd19b7fa508343537ffb60b04ec0897a8350615 -Author: airlied -Date: Wed Oct 15 23:55:13 2008 +1000 - - radeon: add r100/r200 support for EXA render - -commit 6d9a071719237af4da38ded3f72abee540a9c0d5 -Author: Dave Airlie -Date: Mon Oct 13 16:59:02 2008 +1000 - - radeon: fix switch mode path so nexuiz starts - -commit de0ac9eeff9d68e6d429b7b0f5bc6cb02a7e248b -Author: Dave Airlie -Date: Fri Oct 10 15:29:24 2008 +1000 - - remove gem buf caching useless on radeon - -commit 46da4cac3a825be818eced41dd4d04e6e8cc4b46 -Author: Dave Airlie -Date: Fri Oct 10 15:18:41 2008 +1000 - - radeon: drmmode make names same as for non-kms drivers - -commit 8938b8d46cd24fa38b0bfe58b8e19788e25b2fe6 -Author: Dave Airlie -Date: Fri Oct 10 15:10:28 2008 +1000 - - radeon: fix rotation under kms - -commit 7d6238f23cad0ed2037e66c2678931e8c583a0e3 -Author: Dave Airlie -Date: Fri Oct 10 14:44:39 2008 +1000 - - radeon: remove testing fallback - -commit 9969552076c7884d90976be20325f172dbeeeb87 -Author: Kristian Høgsberg -Date: Fri Oct 10 10:57:47 2008 +1100 - - radeon: add copy fb contents patch - -commit 1b027f29f7748301385a35a8f59408ac4ee2283e -Author: Dave Airlie -Date: Fri Oct 10 10:57:20 2008 +1100 - - bufmgr: turn off debug - -commit 05394792a97282ab31e9a472541bc5698b392a55 -Author: Dave Airlie -Date: Fri Oct 10 10:38:38 2008 +1100 - - radeon: fixup modesetting code after rebasing to master - -commit da9ae4160a9b89c7c005c5743e1e9255493d786a -Author: Dave Airlie -Date: Thu Oct 9 16:34:52 2008 +1100 - - radeon: misc cleanups in exa - -commit 180df18055f89cd109099e1f43ba71ee439ead24 -Author: Dave Airlie -Date: Thu Oct 9 16:34:23 2008 +1100 - - radeon: fix UTS for non-modesetting - -commit d18aeb288e13cda262dc676ed11d90fe2ce2d348 -Author: Dave Airlie -Date: Thu Oct 9 16:33:59 2008 +1100 - - radeon: fix exa limits problem - shouldn't have been resetting scissor - -commit 91821d35bf233e97b1dd1ff12999e10445d8c3fa -Author: Dave Airlie -Date: Wed Oct 1 11:21:53 2008 +1000 - - radeon: fixup for latest libdrm changes - -commit 4baa05e54f603c72e8d8abf266fa3d815de187ac -Author: Owen Taylor -Date: Fri Sep 26 16:17:49 2008 -0400 - - Don't flush when mapping a newly created pixmap into system ram If we have a pixmap that has never been mapped into vram (and thus never written to by the GPU), there is no need to flush the graphics pipeline and wait for idle before starting to write to it. - -commit 8b66f282da5f97ab4f4045f90337c6a325a1003c -Author: Dave Airlie -Date: Mon Sep 29 16:32:51 2008 +1000 - - radeon: hopefully fix textured xv - -commit 62517a660bb21daf3910ec719af59fe07e463b9e -Author: Dave Airlie -Date: Fri Sep 26 11:38:36 2008 +1000 - - radeon: fix the offset checks for command submission - - since we are relocating in the kernel we don't need these - -commit 5ad65dcc52633d6925a0d458dbf1ed8c7ed62e7a -Author: Dave Airlie -Date: Fri Sep 26 10:46:20 2008 +1000 - - radeon: fixup after mertge - -commit 99db5b3a2c60433ddb23ae8a45e2ea4f4dd3f48b -Author: Dave Airlie -Date: Fri Sep 26 10:34:41 2008 +1000 - - radeon: fix issues with emitting DST PIPE on cards that don't use it - -commit c157c9541cb71d4b8d27bb0d6847e570a6712105 -Author: Dave Airlie -Date: Wed Sep 24 17:12:19 2008 +1000 - - modesetting: fixup bits of drmmode_display.c - -commit de2ce63ea5c21796490d541e42bf312915992e25 -Author: Dave Airlie -Date: Wed Sep 24 15:42:01 2008 +1000 - - r600: fixup for kms - -commit d0090ee991d1b427ea9da03828f0e70c65f9ec92 -Author: Dave Airlie -Date: Sun Sep 7 08:01:56 2008 +1000 - - modeset: fix AddFB for current tree - -commit 1ecf98ff8c303d80bf39570f31a768d3f5731b2c -Author: Dave Airlie -Date: Thu Aug 14 10:52:52 2008 +1000 - - radeon: disable overlay for modesetting for now - -commit 57aafeb0cac1e145c679ace1fc16dc2d74ea392f -Author: Dave Airlie -Date: Thu Aug 14 10:52:42 2008 +1000 - - radeon: no need for this anymore - -commit d85b13ca8a17b3ed22460b2ff0f448aff5eed01a -Author: Dave Airlie -Date: Fri Sep 5 16:32:01 2008 +1000 - - radeon: fix up for suspend/resume - uses new API - -commit 9099983739f8a9fb02cf23578770d079b66b20e6 -Author: Dave Airlie -Date: Wed Aug 27 13:09:55 2008 +1000 - - radeon: update to proper domain - - this should fix 3d again for now at least - -commit 337e87d1f2becc82b8a5c5dd01b20c339fa1f7a5 -Author: Dave Airlie -Date: Tue Aug 26 18:29:23 2008 +1000 - - ddx: move to using new gem interface - - add a GEM bufmgr backend along the lines of Intels one. - - The buffer reuse is disabled and I'll probably rip it out for radeon - as we can't just re-use buffers that might have ended up in VRAM etc. - - Probably need some sort of in-kernel re-use. - -commit 710aabc060388d5c60a29b66f74d0ac16d61dd1d -Author: Dave Airlie -Date: Mon Aug 25 11:37:48 2008 +1000 - - radeon: add an initial GEM bufmgr - -commit 225e8b6013eda2f8c6dc434c02b04566f4a5b835 -Author: Dave Airlie -Date: Mon Aug 25 11:37:20 2008 +1000 - - radeon: implement simple UTS - -commit c296ff562755b9f98ba26aab0cd6349b4cc3ccf0 -Author: Dave Airlie -Date: Fri Aug 22 15:34:58 2008 +1000 - - radeon: add read/write domains properly - -commit 0b0c142f21330cc0ae84aadf08d8572c02d2bcaa -Author: Dave Airlie -Date: Fri Aug 15 11:25:31 2008 +1000 - - radeon: fix some rotate bugs - -commit a17e4658bc1790753d2fe630eefb4f6dfec4a292 -Author: Dave Airlie -Date: Fri Aug 15 11:16:18 2008 +1000 - - radeon: when create fake bos, they are already mapped - -commit fe8acaa8e666c60af1c0db8f31f24be190f73718 -Author: Dave Airlie -Date: Fri Aug 15 11:15:58 2008 +1000 - - exa: remove wrong assert - -commit 6b9b175962c7347b3914ea38ca4cb589a9a5fc0a -Author: Dave Airlie -Date: Tue Aug 12 19:27:44 2008 +1000 - - radeon: start to work on rotate pixmap support - -commit 736036972ee1785b04645d04e93f5fe38127ca00 -Author: Dave Airlie -Date: Tue Aug 12 18:28:23 2008 +1000 - - radeon: port simple exa bufmgr - - exa pixmaps work now but they are slow - -commit d2d3cd1d2b422c6b99cb4c1334009b01adb1988b -Author: Dave Airlie -Date: Thu Aug 7 17:52:04 2008 +1000 - - further pixmaps on EXA - -commit 553ce05ab5817e2fc1153b57cda5a5d3378415f7 -Author: Dave Airlie -Date: Thu Aug 7 11:22:46 2008 +1000 - - initial exa on gem hackx - - Conflicts: - - src/radeon_exa.c - src/radeon_memory.c - -commit 7276e7b7c7287859d43ce702644eb4202ae36348 -Author: Dave Airlie -Date: Tue Aug 12 15:31:03 2008 +1000 - - radeon: initial rotate pixmap - - This doesn't work, we really need EXA pixmaps for this stuff - -commit b5226f66c35ae7a01cdcfa67d52582ec942a2eeb -Author: Dave Airlie -Date: Tue Aug 12 14:40:18 2008 +1000 - - radeon: use buffer names - -commit 443471b0c2581090615a35f4e93d2730919b877f -Author: Dave Airlie -Date: Tue Aug 12 14:38:56 2008 +1000 - - ati: fix do adjust mode - -commit fe8e25bbf8341dc9647332d846a0eee4d04ca9cb -Author: Dave Airlie -Date: Tue Aug 12 09:38:48 2008 +1000 - - FEDORA: update offsets in DRI private - -commit 029e79b781bac59aa78593419f157fb52b21f6ad -Author: Dave Airlie -Date: Tue Aug 12 09:10:18 2008 +1000 - - radeon: add vt bind flag - -commit 1145f94ac69742d02b0e7140eb8cae350c661959 -Author: Dave Airlie -Date: Wed Aug 6 13:30:54 2008 +1000 - - radeon: warning cleanups - -commit ac6f7f02a3c6399f1c549fa91030f790728633e8 -Author: Dave Airlie -Date: Wed Aug 6 13:18:52 2008 +1000 - - radeon: remove debugging - -commit 6c609f5c7375335cda6b76185bada8731d68988f -Author: Dave Airlie -Date: Wed Aug 6 13:17:31 2008 +1000 - - radeon: add code to set memory map and not futz with mappings - -commit 503b618de3c8ceaf89bd70220b9f7ca64a94e15c -Author: Dave Airlie -Date: Wed Aug 6 13:16:56 2008 +1000 - - drm: don't futz with framebuffer when no drm_mm - -commit d55a53a1f9dfb0a6d168d48e342d0049ef947c75 -Author: Dave Airlie -Date: Tue Aug 5 16:47:21 2008 +1000 - - radeon: undef OUT_RELOC - -commit 61060d28036fcfe07e185069ef398f758b7501c2 -Author: Dave Airlie -Date: Tue Aug 5 16:37:52 2008 +1000 - - radeon: add stdint/lib to check for modesetting - -commit a2b4af49d7e49a91d4e1bbef5168cbc7d9fc365b -Author: Dave Airlie -Date: Tue Aug 5 16:07:31 2008 +1000 - - fix whitespace - -commit 04824237e3b5945c79b56466654ab2b135382e04 -Author: Dave Airlie -Date: Tue Aug 5 16:07:07 2008 +1000 - - radeon: fix build issue - -commit fee0bc168be7f051dd74794e5e3f3b60ecbbbe3c -Author: Dave Airlie -Date: Mon Aug 4 16:27:55 2008 +1000 - - radeon: free all pinned memory on server shutdown - -commit 45f14242535069259897deff31755ad0df8b997a -Author: Dave Airlie -Date: Mon Aug 4 16:11:30 2008 +1000 - - radeon: get textured video working on the memory manager - -commit ee28a1ab379d16a93c47e6305c2ecdd22ab0bbc2 -Author: Dave Airlie -Date: Mon Aug 4 16:11:18 2008 +1000 - - radeon: convert bus addr to void pointer - -commit db3ed400b8cbf04c0195c00be931c299a55e773c -Author: Dave Airlie -Date: Mon Aug 4 15:11:13 2008 +1000 - - radeon: avoid z cache flushes if not needed - -commit 6673579a462f948aede961a924e17e61306715a0 -Author: Dave Airlie -Date: Wed Jul 30 17:04:22 2008 +1000 - - radeon: update DRI support, so mappings get added - - We only add fake frontbuffer mapping as DRI clients expect one. - -commit 30a078524f7c71d59ee834725a3f2c11a082c21f -Author: Dave Airlie -Date: Tue Jul 29 16:50:03 2008 +1000 - - radeon: exa through the handle relocation function - -commit 39e5484735471e589e52ad1c91df9b22c2a44f02 -Author: Dave Airlie -Date: Tue Jul 29 14:58:22 2008 +1000 - - radeon: keep track of 2D state - - This lets the kernel use the 2D blit engine in theory - -commit 1e43289fadd17898028443cce2ee33e00eae7b89 -Author: Dave Airlie -Date: Mon Jul 28 17:48:59 2008 +1000 - - new command submission attempt - -commit 842040a30b0ba5d586f42e1c64a5651da2ac6cae -Author: Dave Airlie -Date: Mon Jul 28 17:48:08 2008 +1000 - - radeon: fixup over fluishing problem - -commit 6aa4d546a70fe88cbb0993ec88d0241c8da73704 -Author: Dave Airlie -Date: Mon Jul 28 17:44:51 2008 +1000 - - hack for now: divide texture size by 2 - -commit 3aab05da72750341430a87c60ee8253994fff577 -Author: Dave Airlie -Date: Fri Jul 25 17:07:20 2008 +1000 - - radeon: fix dri message - -commit dbe42d159f91ed2ccf590be366ec27a41b140083 -Author: Dave Airlie -Date: Fri Jul 25 15:11:58 2008 +1000 - - radeon: fix pool which caused memory corruption - -commit 20d94ec16fe6f7f826960adeb40cfddef641b60a -Author: Dave Airlie -Date: Fri Jul 25 14:59:07 2008 +1000 - - radeon: get DRI to attempt to start up - -commit cf154e884bf0d72fbc3cb907966e8940b826c6c9 -Author: Dave Airlie -Date: Fri Jul 25 14:58:23 2008 +1000 - - radeon: call the right sync function - -commit 8c866bd98f3ba2c40b5c23ce73c047596cb18f21 -Author: Dave Airlie -Date: Fri Jul 25 14:57:21 2008 +1000 - - radeon: unmap memory from objects - -commit 80bd230f4539d0b0564ec78f0d8b1dc179ba564a -Author: Dave Airlie -Date: Wed Jul 23 10:59:16 2008 +1000 - - use CP for 3D engine with modeset - -commit 18638ff0d88051c333fe986f24c6a9d4a45e9e94 -Author: Dave Airlie -Date: Wed Jul 23 10:59:04 2008 +1000 - - radeon: fix DFS - -commit e6f23a36efa674b12be4bbebd6c48dd6552405d4 -Author: Dave Airlie -Date: Wed Jul 23 10:48:45 2008 +1000 - - radeon: get the fb location right - -commit 81c3c56848283e1da51a7a4d696705457eefea5f -Author: Dave Airlie -Date: Tue Jul 22 18:08:25 2008 +1000 - - radeon: make at least the EXA stipple work.. - -commit 011ad06973f0866d700a97694b5225eae14e56ad -Author: Dave Airlie -Date: Tue Jul 22 18:07:15 2008 +1000 - - radeon: disable debugging for now - -commit 33c1418c438aced27890ddb7ac9b026474c658dd -Author: Dave Airlie -Date: Mon Jul 21 18:40:14 2008 +1000 - - radeon: initial indirect buffer use. - - Move EXA and front allocation into one big buffer to workaround - EXA issues - need to move to driver pixmap hooks. - - Add new indirect GEM to take indirect buffers. - - doesn't work think the offsets are all wrong. - -commit 78dcbaf46bf7363fdf88802b68d87c5fa1e6ef29 -Author: Dave Airlie -Date: Fri Jul 18 13:46:41 2008 +1000 - - modesetting step 1: have X and a cursor - -commit 90d1561470adf8466acec16c281e60fc8f6ec89b -Author: Dave Airlie -Date: Thu Jul 17 09:59:19 2008 +1000 - - radeon: add radeon_memory.c - -commit 6f0abb40c55511dbece6a8b2f4aad40953229de9 -Author: Dave Airlie -Date: Tue Jul 8 16:55:27 2008 +1000 - - remove mappings of some buffers. - - use drm gart code if available - -commit b0a91607a6fdfe000a42d2abedae6572d43884be -Author: Dave Airlie -Date: Mon Jul 7 16:54:35 2008 +1000 - - add initial support for a kernel memory manager - - This just pins all the current allocations via the kernel memory manager - instead of trying to do them all in userspace. - - useful steps towards getting kernel modesetting - -commit bc5ef65797de0b20a4cc1ca5189d5bf06fc4f47f -Author: Dave Airlie -Date: Fri Jul 4 10:39:34 2008 +1000 - - port to using drm header files diff --git a/configure.ac b/configure.ac -index 5ffe0d9..d495c44 100644 +index 28207d6..051eca8 100644 --- a/configure.ac +++ b/configure.ac -@@ -114,6 +114,15 @@ if test "$DRI" = yes; then +@@ -114,8 +114,19 @@ if test "$DRI" = yes; then if test "$have_damage_h" = yes; then AC_DEFINE(DAMAGE,1,[Use Damage extension]) fi @@ -842,12 +17,16 @@ index 5ffe0d9..d495c44 100644 + CFLAGS="$save_CFLAGS" fi ++AC_DEFINE(DRI2, 1,[Enable DRI2 code]) ++ save_CFLAGS="$CFLAGS" + CFLAGS="$XORG_CFLAGS" + AC_CHECK_HEADER(xf86Modes.h,[XMODES=yes],[XMODES=no],[#include "xorg-server.h"]) diff --git a/src/Makefile.am b/src/Makefile.am -index d65a3e4..1be325a 100644 +index 7ff7d31..c03a2c7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am -@@ -90,12 +90,13 @@ radeon_drv_ladir = @moduledir@/drivers +@@ -90,12 +90,14 @@ radeon_drv_ladir = @moduledir@/drivers radeon_drv_la_SOURCES = \ radeon_accel.c radeon_cursor.c radeon_dga.c radeon_legacy_memory.c \ radeon_driver.c radeon_video.c radeon_bios.c radeon_mm_i2c.c \ @@ -858,12 +37,21 @@ index d65a3e4..1be325a 100644 radeon_crtc.c radeon_output.c radeon_modes.c radeon_tv.c \ $(RADEON_ATOMBIOS_SOURCES) radeon_atombios.c radeon_atomwrapper.c \ - $(RADEON_DRI_SRCS) $(RADEON_EXA_SOURCES) atombios_output.c atombios_crtc.c -+ $(RADEON_DRI_SRCS) $(RADEON_EXA_SOURCES) atombios_output.c atombios_crtc.c \ -+ drmmode_display.c radeon_bufmgr_gem.c radeon_dri_bufmgr.c ++ $(RADEON_DRI_SRCS) $(RADEON_EXA_SOURCES) atombios_output.c \ ++ atombios_crtc.c drmmode_display.c radeon_bufmgr_gem.c \ ++ radeon_dri_bufmgr.c radeon_dri2.c if XMODES radeon_drv_la_SOURCES += \ -@@ -159,4 +160,6 @@ EXTRA_DIST = \ +@@ -144,6 +146,7 @@ EXTRA_DIST = \ + radeon_exa_render.c \ + radeon_exa_funcs.c \ + radeon.h \ ++ radeon_dri2.h \ + radeon_macros.h \ + radeon_probe.h \ + radeon_reg.h \ +@@ -164,4 +167,6 @@ EXTRA_DIST = \ radeon_pci_device_match_gen.h \ pcidb/ati_pciids.csv \ pcidb/parse_pci_ids.pl \ @@ -873,7 +61,7 @@ index d65a3e4..1be325a 100644 + radeon_dri_bufmgr.h diff --git a/src/drmmode_display.c b/src/drmmode_display.c new file mode 100644 -index 0000000..3cb2fb9 +index 0000000..69d82ea --- /dev/null +++ b/src/drmmode_display.c @@ -0,0 +1,864 @@ @@ -1125,7 +313,7 @@ index 0000000..3cb2fb9 + output_count++; + } + -+ if (!xf86CrtcRotate(crtc, mode, rotation)) { ++ if (!xf86CrtcRotate(crtc)) { + goto done; + } + @@ -1824,7 +1012,7 @@ index 0000000..dbb6412 +#endif +#endif diff --git a/src/radeon.h b/src/radeon.h -index a67962c..370dbf1 100644 +index a7ed95e..69e41a6 100644 --- a/src/radeon.h +++ b/src/radeon.h @@ -46,6 +46,8 @@ @@ -1836,7 +1024,15 @@ index a67962c..370dbf1 100644 /* PCI support */ #include "xf86Pci.h" -@@ -85,6 +87,7 @@ +@@ -75,6 +77,7 @@ + #include "dri.h" + #include "GL/glxint.h" + #include "xf86drm.h" ++#include "radeon_dri2.h" + + #ifdef DAMAGE + #include "damage.h" +@@ -85,6 +88,7 @@ #include "xf86Crtc.h" #include "X11/Xatom.h" @@ -1844,7 +1040,7 @@ index a67962c..370dbf1 100644 /* Render support */ #ifdef RENDER #include "picturestr.h" -@@ -409,6 +412,14 @@ typedef enum { +@@ -412,6 +416,14 @@ typedef enum { typedef struct _atomBiosHandle *atomBiosHandlePtr; @@ -1859,7 +1055,7 @@ index a67962c..370dbf1 100644 typedef struct { uint32_t pci_device_id; RADEONChipFamily chip_family; -@@ -419,7 +430,27 @@ typedef struct { +@@ -422,7 +434,27 @@ typedef struct { int singledac; } RADEONCardInfo; @@ -1887,7 +1083,7 @@ index a67962c..370dbf1 100644 struct radeon_cp { Bool CPRuns; /* CP is running */ Bool CPInUse; /* CP has been used by X server */ -@@ -433,6 +464,10 @@ struct radeon_cp { +@@ -436,6 +468,10 @@ struct radeon_cp { drmBufPtr indirectBuffer; int indirectStart; @@ -1898,7 +1094,7 @@ index a67962c..370dbf1 100644 /* Debugging info for BEGIN_RING/ADVANCE_RING pairs. */ int dma_begin_count; char *dma_debug_func; -@@ -499,13 +534,13 @@ struct radeon_dri { +@@ -502,13 +538,13 @@ struct radeon_dri { drm_handle_t ringHandle; /* Handle from drmAddMap */ drmSize ringMapSize; /* Size of map */ int ringSize; /* Size of ring (in MB) */ @@ -1914,7 +1110,7 @@ index a67962c..370dbf1 100644 /* CP vertex/indirect buffer data */ unsigned long bufStart; /* Offset into GART space */ -@@ -524,7 +559,6 @@ struct radeon_dri { +@@ -527,7 +563,6 @@ struct radeon_dri { drmAddress gartTex; /* Map */ int log2GARTTexGran; @@ -1922,9 +1118,17 @@ index a67962c..370dbf1 100644 int fbX; int fbY; int backX; -@@ -846,6 +880,44 @@ typedef struct { - /* some server chips have a hardcoded edid in the bios so that they work with KVMs */ - Bool get_hardcoded_edid_from_bios; +@@ -817,6 +852,7 @@ typedef struct { + RADEONCardType cardType; /* Current card is a PCI card */ + struct radeon_cp *cp; + struct radeon_dri *dri; ++ struct radeon_dri2 dri2; + #ifdef USE_EXA + Bool accelDFS; + #endif +@@ -918,6 +954,44 @@ typedef struct { + int virtualX; + int virtualY; + Bool new_cs; // new command submission routine + struct radeon_2d_state state_2d; @@ -1967,7 +1171,7 @@ index a67962c..370dbf1 100644 } RADEONInfoRec, *RADEONInfoPtr; #define RADEONWaitForFifo(pScrn, entries) \ -@@ -1092,6 +1164,23 @@ extern void +@@ -1170,6 +1244,23 @@ extern void radeon_legacy_free_memory(ScrnInfoPtr pScrn, void *mem_struct); @@ -1991,7 +1195,7 @@ index a67962c..370dbf1 100644 #ifdef XF86DRI # ifdef USE_XAA /* radeon_accelfuncs.c */ -@@ -1110,7 +1199,9 @@ do { \ +@@ -1188,7 +1279,9 @@ do { \ #define RADEONCP_RELEASE(pScrn, info) \ do { \ @@ -2002,7 +1206,7 @@ index a67962c..370dbf1 100644 RADEON_PURGE_CACHE(); \ RADEON_WAIT_UNTIL_IDLE(); \ RADEONCPReleaseIndirect(pScrn); \ -@@ -1144,7 +1235,7 @@ do { \ +@@ -1223,7 +1316,7 @@ do { \ #define RADEONCP_REFRESH(pScrn, info) \ do { \ @@ -2011,7 +1215,7 @@ index a67962c..370dbf1 100644 if (info->cp->needCacheFlush) { \ RADEON_PURGE_CACHE(); \ RADEON_PURGE_ZCACHE(); \ -@@ -1171,6 +1262,13 @@ do { \ +@@ -1250,6 +1343,13 @@ do { \ #define RING_LOCALS uint32_t *__head = NULL; int __expected; int __count = 0 #define BEGIN_RING(n) do { \ @@ -2025,7 +1229,7 @@ index a67962c..370dbf1 100644 if (RADEON_VERBOSE) { \ xf86DrvMsg(pScrn->scrnIndex, X_INFO, \ "BEGIN_RING(%d) in %s\n", (unsigned int)n, __FUNCTION__);\ -@@ -1183,13 +1281,6 @@ do { \ +@@ -1262,13 +1362,6 @@ do { \ } \ info->cp->dma_debug_func = __FILE__; \ info->cp->dma_debug_lineno = __LINE__; \ @@ -2039,7 +1243,7 @@ index a67962c..370dbf1 100644 __expected = n; \ __head = (pointer)((char *)info->cp->indirectBuffer->address + \ info->cp->indirectBuffer->used); \ -@@ -1232,6 +1323,14 @@ do { \ +@@ -1311,6 +1404,14 @@ do { \ OUT_RING(val); \ } while (0) @@ -2055,18 +1259,18 @@ index a67962c..370dbf1 100644 do { \ if (RADEON_VERBOSE) \ diff --git a/src/radeon_accel.c b/src/radeon_accel.c -index 2f62fa5..fd1e073 100644 +index dffbc57..1531c81 100644 --- a/src/radeon_accel.c +++ b/src/radeon_accel.c -@@ -91,6 +91,7 @@ +@@ -92,6 +92,7 @@ /* X and server generic header files */ #include "xf86.h" +#include "radeon_bufmgr_gem.h" + static void R600EngineReset(ScrnInfoPtr pScrn); - #ifdef USE_XAA -@@ -316,6 +317,9 @@ void RADEONEngineRestore(ScrnInfoPtr pScrn) +@@ -373,6 +374,9 @@ void RADEONEngineRestore(ScrnInfoPtr pScrn) RADEONInfoPtr info = RADEONPTR(pScrn); unsigned char *RADEONMMIO = info->MMIO; @@ -2076,7 +1280,7 @@ index 2f62fa5..fd1e073 100644 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, "EngineRestore (%d/%d)\n", info->CurrentLayout.pixel_code, -@@ -375,8 +379,8 @@ void RADEONEngineInit(ScrnInfoPtr pScrn) +@@ -433,8 +437,8 @@ void RADEONEngineInit(ScrnInfoPtr pScrn) info->CurrentLayout.bitsPerPixel); #ifdef XF86DRI @@ -2087,7 +1291,7 @@ index 2f62fa5..fd1e073 100644 int num_pipes; memset(&np, 0, sizeof(np)); -@@ -395,59 +399,62 @@ void RADEONEngineInit(ScrnInfoPtr pScrn) +@@ -453,59 +457,62 @@ void RADEONEngineInit(ScrnInfoPtr pScrn) } #endif @@ -2199,7 +1403,7 @@ index 2f62fa5..fd1e073 100644 switch (info->CurrentLayout.pixel_code) { case 8: datatype = 2; break; -@@ -555,6 +562,115 @@ int RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info) +@@ -613,6 +620,119 @@ int RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info) } } @@ -2293,6 +1497,10 @@ index 2f62fa5..fd1e073 100644 + buffer as the kernel needs to use the blit engine to move stuff around */ + if (info->reemit_current2d) + info->reemit_current2d(pScrn, 0); ++ if (info->dri2.enabled) { ++ info->accel_state->XInited3D = FALSE; ++ info->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN; ++ } +} + +void RADEONCSReleaseIndirect(ScrnInfoPtr pScrn) @@ -2315,7 +1523,7 @@ index 2f62fa5..fd1e073 100644 /* Get an indirect buffer for the CP 2D acceleration commands */ drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn) { -@@ -565,6 +681,9 @@ drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn) +@@ -623,6 +743,9 @@ drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn) int size = 0; int i = 0; int ret; @@ -2325,7 +1533,7 @@ index 2f62fa5..fd1e073 100644 #if 0 /* FIXME: pScrn->pScreen has not been initialized when this is first -@@ -630,6 +749,11 @@ void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard) +@@ -694,6 +817,11 @@ void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard) if (!buffer) return; if (start == buffer->used && !discard) return; @@ -2337,9 +1545,9 @@ index 2f62fa5..fd1e073 100644 if (RADEON_VERBOSE) { xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n", buffer->idx); -@@ -664,10 +788,16 @@ void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn) - int start = info->cp->indirectStart; - drm_radeon_indirect_t indirect; +@@ -749,10 +877,16 @@ void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn) + } + } + if (info->new_cs) { + RADEONCSReleaseIndirect(pScrn); @@ -2354,7 +1562,7 @@ index 2f62fa5..fd1e073 100644 if (RADEON_VERBOSE) { xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Releasing buffer %d\n", -@@ -794,6 +924,7 @@ RADEONHostDataBlit( +@@ -879,6 +1013,7 @@ RADEONHostDataBlit( ret = ( uint8_t* )&__head[__count]; __count += dwords; @@ -2362,16 +1570,16 @@ index 2f62fa5..fd1e073 100644 ADVANCE_RING(); *y += *hpass; -@@ -931,7 +1062,7 @@ Bool RADEONAccelInit(ScreenPtr pScreen) +@@ -1013,7 +1148,7 @@ Bool RADEONAccelInit(ScreenPtr pScreen) #ifdef USE_EXA if (info->useEXA) { # ifdef XF86DRI - if (info->directRenderingEnabled) { + if (info->directRenderingEnabled || info->drm_mode_setting) { - if (!RADEONDrawInitCP(pScreen)) - return FALSE; - } else -@@ -952,7 +1083,7 @@ Bool RADEONAccelInit(ScreenPtr pScreen) + if (info->ChipFamily >= CHIP_FAMILY_R600) { + if (!R600DrawInit(pScreen)) + return FALSE; +@@ -1046,7 +1181,7 @@ Bool RADEONAccelInit(ScreenPtr pScreen) } #ifdef XF86DRI @@ -2380,14 +1588,14 @@ index 2f62fa5..fd1e073 100644 RADEONAccelInitCP(pScreen, a); else #endif /* XF86DRI */ -@@ -974,11 +1105,13 @@ void RADEONInit3DEngine(ScrnInfoPtr pScrn) +@@ -1068,11 +1203,13 @@ void RADEONInit3DEngine(ScrnInfoPtr pScrn) RADEONInfoPtr info = RADEONPTR (pScrn); #ifdef XF86DRI - if (info->directRenderingEnabled) { - drm_radeon_sarea_t *pSAREAPriv; + if (info->directRenderingEnabled || info->drm_mode_setting) { -+ if (info->directRenderingEnabled) { ++ if (info->directRenderingEnabled && !info->dri2.enabled) { + drm_radeon_sarea_t *pSAREAPriv; - pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen); @@ -2398,7 +1606,7 @@ index 2f62fa5..fd1e073 100644 RADEONInit3DEngineCP(pScrn); } else #endif -@@ -986,7 +1119,7 @@ void RADEONInit3DEngine(ScrnInfoPtr pScrn) +@@ -1080,7 +1217,7 @@ void RADEONInit3DEngine(ScrnInfoPtr pScrn) info->accel_state->XInited3D = TRUE; } @@ -2440,10 +1648,10 @@ index 0000000..481c5cf +#endif diff --git a/src/radeon_bufmgr_gem.c b/src/radeon_bufmgr_gem.c new file mode 100644 -index 0000000..7508dd7 +index 0000000..6cf13ba --- /dev/null +++ b/src/radeon_bufmgr_gem.c -@@ -0,0 +1,656 @@ +@@ -0,0 +1,666 @@ +/************************************************************************** + * + * Copyright © 2007-2008 Red Hat Inc. @@ -2543,7 +1751,6 @@ index 0000000..7508dd7 + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; + struct drm_radeon_gem_create args; + int ret; -+ unsigned int page_size = getpagesize(); + dri_bo_gem *gem_bo; + + gem_bo = calloc(1, sizeof(*gem_bo)); @@ -2660,7 +1867,6 @@ index 0000000..7508dd7 +static int +dri_gem_bo_unmap(dri_bo *buf) +{ -+ dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)buf->bufmgr; + dri_bo_gem *gem_bo = (dri_bo_gem *)buf; + + if (--gem_bo->map_count > 0) @@ -2674,9 +1880,6 @@ index 0000000..7508dd7 +static void +dri_bufmgr_gem_destroy(dri_bufmgr *bufmgr) +{ -+ dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; -+ int i; -+ + free(bufmgr); +} + @@ -2778,7 +1981,6 @@ index 0000000..7508dd7 + dri_bo_gem *gem_bo = (dri_bo_gem *)buf; + uint32_t *__head = head; + uint32_t __count = *count_p; -+ dri_bo_gem *trav; + int i; + int index; + int have_reloc = -1; @@ -3008,7 +2210,6 @@ index 0000000..7508dd7 +radeon_bufmgr_gem_init(int fd) +{ + dri_bufmgr_gem *bufmgr_gem; -+ int i; + + bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); + bufmgr_gem->fd = fd; @@ -3100,9 +2301,26 @@ index 0000000..7508dd7 + dri_bo_gem *gem_bo = (dri_bo_gem *)buf; + return gem_bo->in_vram; +} ++ ++int radeon_bo_gem_name_buffer(dri_bo *bo, uint32_t *name) ++{ ++ dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; ++ dri_bo_gem *gem_bo = (dri_bo_gem *)bo; ++ struct drm_gem_flink flink; ++ int r; ++ ++ flink.handle = gem_bo->gem_handle; ++ r = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); ++ if (r) { ++ DBG("[drm] failed to name buffer %d\n", -errno); ++ return r; ++ } ++ *name = flink.name; ++ return 0; ++} diff --git a/src/radeon_bufmgr_gem.h b/src/radeon_bufmgr_gem.h new file mode 100644 -index 0000000..7c32b61 +index 0000000..6d3b6fe --- /dev/null +++ b/src/radeon_bufmgr_gem.h @@ -0,0 +1,19 @@ @@ -3123,10 +2341,10 @@ index 0000000..7c32b61 +int radeon_bufmgr_gem_force_gtt(dri_bo *buf); +void radeon_bufmgr_gem_set_limit(dri_bufmgr *bufmgr, uint32_t domain, uint32_t limit); +int radeon_bufmgr_gem_in_vram(dri_bo *buf); -+ ++int radeon_bo_gem_name_buffer(dri_bo *bo, uint32_t *name); +#endif diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c -index 0a9f9db..07f83e0 100644 +index eabd87d..582b80f 100644 --- a/src/radeon_commonfuncs.c +++ b/src/radeon_commonfuncs.c @@ -62,12 +62,15 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) @@ -3165,7 +2383,7 @@ index 0a9f9db..07f83e0 100644 OUT_ACCEL_REG(R300_GB_SELECT, 0); OUT_ACCEL_REG(R300_GB_ENABLE, 0); FINISH_ACCEL(); -@@ -715,7 +720,7 @@ void FUNC_NAME(RADEONWaitForIdle)(ScrnInfoPtr pScrn) +@@ -712,7 +717,7 @@ void FUNC_NAME(RADEONWaitForIdle)(ScrnInfoPtr pScrn) #ifdef ACCEL_CP /* Make sure the CP is idle first */ @@ -3263,7 +2481,7 @@ index 0fcdcf0..49601cf 100644 xf86CrtcPtr crtc = xf86_config->crtc[c]; RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; diff --git a/src/radeon_dri.c b/src/radeon_dri.c -index 7c317e6..8217b6e 100644 +index 45c927f..b9641a8 100644 --- a/src/radeon_dri.c +++ b/src/radeon_dri.c @@ -40,6 +40,8 @@ @@ -3275,7 +2493,7 @@ index 7c317e6..8217b6e 100644 /* Driver data structures */ #include "radeon.h" -@@ -50,6 +52,8 @@ +@@ -51,6 +53,8 @@ #include "radeon_dri.h" #include "radeon_version.h" @@ -3284,7 +2502,7 @@ index 7c317e6..8217b6e 100644 /* X and server generic header files */ #include "xf86.h" #include "xf86PciInfo.h" -@@ -67,16 +71,31 @@ static size_t radeon_drm_page_size; +@@ -68,16 +72,31 @@ static size_t radeon_drm_page_size; extern void GlxSetVisualConfigs(int nconfigs, __GLXvisualConfig *configs, void **configprivs); @@ -3318,7 +2536,7 @@ index 7c317e6..8217b6e 100644 static void RADEONDRIClipNotify(ScreenPtr pScreen, WindowPtr *ppWin, int num); #endif #endif -@@ -349,6 +368,126 @@ static void RADEONDestroyContext(ScreenPtr pScreen, drm_context_t hwContext, +@@ -350,6 +369,129 @@ static void RADEONDestroyContext(ScreenPtr pScreen, drm_context_t hwContext, #endif } @@ -3432,6 +2650,9 @@ index 7c317e6..8217b6e 100644 + if (info->ChipFamily >= CHIP_FAMILY_R600) + return TRUE; + ++ if (!info->drm_mm) ++ return TRUE; ++ + success = radeon_update_dri_mappings(pScrn, sarea); + + if (!success) @@ -3445,7 +2666,7 @@ index 7c317e6..8217b6e 100644 /* Called when the X server is woken up to allow the last client's * context to be saved and the X server's context to be loaded. This is * not necessary for the Radeon since the client detects when it's -@@ -698,25 +837,35 @@ static void RADEONDRIInitGARTValues(RADEONInfoPtr info) +@@ -699,25 +841,35 @@ static void RADEONDRIInitGARTValues(RADEONInfoPtr info) info->dri->gartOffset = 0; @@ -3500,7 +2721,7 @@ index 7c317e6..8217b6e 100644 } /* AGP Mode Quirk List - Certain hostbridge/gfx-card combos don't work with -@@ -952,6 +1101,8 @@ static Bool RADEONDRIAgpInit(RADEONInfoPtr info, ScreenPtr pScreen) +@@ -979,6 +1131,8 @@ static Bool RADEONDRIAgpInit(RADEONInfoPtr info, ScreenPtr pScreen) "[agp] ring handle = 0x%08x\n", (unsigned int)info->dri->ringHandle); @@ -3509,7 +2730,7 @@ index 7c317e6..8217b6e 100644 if (drmMap(info->dri->drmFD, info->dri->ringHandle, info->dri->ringMapSize, &info->dri->ring) < 0) { xf86DrvMsg(pScreen->myNum, X_ERROR, "[agp] Could not map ring\n"); -@@ -960,9 +1111,10 @@ static Bool RADEONDRIAgpInit(RADEONInfoPtr info, ScreenPtr pScreen) +@@ -987,9 +1141,10 @@ static Bool RADEONDRIAgpInit(RADEONInfoPtr info, ScreenPtr pScreen) xf86DrvMsg(pScreen->myNum, X_INFO, "[agp] Ring mapped at 0x%08lx\n", (unsigned long)info->dri->ring); @@ -3521,7 +2742,7 @@ index 7c317e6..8217b6e 100644 xf86DrvMsg(pScreen->myNum, X_ERROR, "[agp] Could not add ring read ptr mapping\n"); return FALSE; -@@ -971,6 +1123,8 @@ static Bool RADEONDRIAgpInit(RADEONInfoPtr info, ScreenPtr pScreen) +@@ -998,6 +1153,8 @@ static Bool RADEONDRIAgpInit(RADEONInfoPtr info, ScreenPtr pScreen) "[agp] ring read ptr handle = 0x%08x\n", (unsigned int)info->dri->ringReadPtrHandle); @@ -3530,7 +2751,7 @@ index 7c317e6..8217b6e 100644 if (drmMap(info->dri->drmFD, info->dri->ringReadPtrHandle, info->dri->ringReadMapSize, &info->dri->ringReadPtr) < 0) { xf86DrvMsg(pScreen->myNum, X_ERROR, -@@ -980,6 +1134,7 @@ static Bool RADEONDRIAgpInit(RADEONInfoPtr info, ScreenPtr pScreen) +@@ -1007,6 +1164,7 @@ static Bool RADEONDRIAgpInit(RADEONInfoPtr info, ScreenPtr pScreen) xf86DrvMsg(pScreen->myNum, X_INFO, "[agp] Ring read ptr mapped at 0x%08lx\n", (unsigned long)info->dri->ringReadPtr); @@ -3538,7 +2759,7 @@ index 7c317e6..8217b6e 100644 if (drmAddMap(info->dri->drmFD, info->dri->bufStart, info->dri->bufMapSize, DRM_AGP, 0, &info->dri->bufHandle) < 0) { -@@ -1057,6 +1212,7 @@ static Bool RADEONDRIPciInit(RADEONInfoPtr info, ScreenPtr pScreen) +@@ -1084,6 +1242,7 @@ static Bool RADEONDRIPciInit(RADEONInfoPtr info, ScreenPtr pScreen) "[pci] ring handle = 0x%08x\n", (unsigned int)info->dri->ringHandle); @@ -3546,7 +2767,7 @@ index 7c317e6..8217b6e 100644 if (drmMap(info->dri->drmFD, info->dri->ringHandle, info->dri->ringMapSize, &info->dri->ring) < 0) { xf86DrvMsg(pScreen->myNum, X_ERROR, "[pci] Could not map ring\n"); -@@ -1068,6 +1224,7 @@ static Bool RADEONDRIPciInit(RADEONInfoPtr info, ScreenPtr pScreen) +@@ -1095,6 +1254,7 @@ static Bool RADEONDRIPciInit(RADEONInfoPtr info, ScreenPtr pScreen) xf86DrvMsg(pScreen->myNum, X_INFO, "[pci] Ring contents 0x%08lx\n", *(unsigned long *)(pointer)info->dri->ring); @@ -3554,7 +2775,7 @@ index 7c317e6..8217b6e 100644 if (drmAddMap(info->dri->drmFD, info->dri->ringReadOffset, info->dri->ringReadMapSize, DRM_SCATTER_GATHER, flags, &info->dri->ringReadPtrHandle) < 0) { -@@ -1079,8 +1236,10 @@ static Bool RADEONDRIPciInit(RADEONInfoPtr info, ScreenPtr pScreen) +@@ -1106,8 +1266,10 @@ static Bool RADEONDRIPciInit(RADEONInfoPtr info, ScreenPtr pScreen) "[pci] ring read ptr handle = 0x%08x\n", (unsigned int)info->dri->ringReadPtrHandle); @@ -3565,7 +2786,7 @@ index 7c317e6..8217b6e 100644 xf86DrvMsg(pScreen->myNum, X_ERROR, "[pci] Could not map ring read ptr\n"); return FALSE; -@@ -1091,6 +1250,7 @@ static Bool RADEONDRIPciInit(RADEONInfoPtr info, ScreenPtr pScreen) +@@ -1118,6 +1280,7 @@ static Bool RADEONDRIPciInit(RADEONInfoPtr info, ScreenPtr pScreen) xf86DrvMsg(pScreen->myNum, X_INFO, "[pci] Ring read ptr contents 0x%08lx\n", *(unsigned long *)(pointer)info->dri->ringReadPtr); @@ -3573,17 +2794,17 @@ index 7c317e6..8217b6e 100644 if (drmAddMap(info->dri->drmFD, info->dri->bufStart, info->dri->bufMapSize, DRM_SCATTER_GATHER, 0, &info->dri->bufHandle) < 0) { -@@ -1143,6 +1303,9 @@ static Bool RADEONDRIPciInit(RADEONInfoPtr info, ScreenPtr pScreen) +@@ -1170,6 +1333,9 @@ static Bool RADEONDRIPciInit(RADEONInfoPtr info, ScreenPtr pScreen) */ static Bool RADEONDRIMapInit(RADEONInfoPtr info, ScreenPtr pScreen) { + + if (info->drm_mm) -+ return TRUE; ++ return TRUE; /* Map registers */ info->dri->registerSize = info->MMIOSize; if (drmAddMap(info->dri->drmFD, info->MMIOAddr, info->dri->registerSize, -@@ -1182,20 +1345,23 @@ static int RADEONDRIKernelInit(RADEONInfoPtr info, ScreenPtr pScreen) +@@ -1210,20 +1376,23 @@ static int RADEONDRIKernelInit(RADEONInfoPtr info, ScreenPtr pScreen) drmInfo.fb_bpp = info->CurrentLayout.pixel_code; drmInfo.depth_bpp = (info->dri->depthBits - 8) * 2; @@ -3621,17 +2842,19 @@ index 7c317e6..8217b6e 100644 if (drmCommandWrite(info->dri->drmFD, DRM_RADEON_CP_INIT, &drmInfo, sizeof(drm_radeon_init_t)) < 0) return FALSE; -@@ -1204,7 +1370,8 @@ static int RADEONDRIKernelInit(RADEONInfoPtr info, ScreenPtr pScreen) +@@ -1232,8 +1401,9 @@ static int RADEONDRIKernelInit(RADEONInfoPtr info, ScreenPtr pScreen) * registers back to their default values, so we need to restore * those engine register here. */ -- RADEONEngineRestore(pScrn); +- if (info->ChipFamily < CHIP_FAMILY_R600) +- RADEONEngineRestore(pScrn); + if (!info->drm_mm) -+ RADEONEngineRestore(pScrn); ++ if (info->ChipFamily < CHIP_FAMILY_R600) ++ RADEONEngineRestore(pScrn); return TRUE; } -@@ -1400,12 +1567,11 @@ Bool RADEONDRIGetVersion(ScrnInfoPtr pScrn) +@@ -1431,12 +1601,11 @@ Bool RADEONDRIGetVersion(ScrnInfoPtr pScrn) /* Get DRM version & close DRM */ info->dri->pKernelDRMVersion = drmGetVersion(fd); @@ -3645,7 +2868,7 @@ index 7c317e6..8217b6e 100644 } /* Now check if we qualify */ -@@ -1439,10 +1605,29 @@ Bool RADEONDRIGetVersion(ScrnInfoPtr pScrn) +@@ -1470,10 +1639,29 @@ Bool RADEONDRIGetVersion(ScrnInfoPtr pScrn) req_patch); drmFreeVersion(info->dri->pKernelDRMVersion); info->dri->pKernelDRMVersion = NULL; @@ -3676,7 +2899,7 @@ index 7c317e6..8217b6e 100644 } Bool RADEONDRISetVBlankInterrupt(ScrnInfoPtr pScrn, Bool on) -@@ -1451,6 +1636,9 @@ Bool RADEONDRISetVBlankInterrupt(ScrnInfoPtr pScrn, Bool on) +@@ -1482,6 +1670,9 @@ Bool RADEONDRISetVBlankInterrupt(ScrnInfoPtr pScrn, Bool on) xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); int value = 0; @@ -3686,7 +2909,7 @@ index 7c317e6..8217b6e 100644 if (!info->want_vblank_interrupts) on = FALSE; -@@ -1470,6 +1658,48 @@ Bool RADEONDRISetVBlankInterrupt(ScrnInfoPtr pScrn, Bool on) +@@ -1501,6 +1692,48 @@ Bool RADEONDRISetVBlankInterrupt(ScrnInfoPtr pScrn, Bool on) return TRUE; } @@ -3735,7 +2958,7 @@ index 7c317e6..8217b6e 100644 /* Initialize the screen-specific data structures for the DRI and the * Radeon. This is the main entry point to the device-specific -@@ -1533,10 +1763,22 @@ Bool RADEONDRIScreenInit(ScreenPtr pScreen) +@@ -1564,10 +1797,22 @@ Bool RADEONDRIScreenInit(ScreenPtr pScreen) pDRIInfo->ddxDriverMajorVersion = info->allowColorTiling ? 5 : 4; pDRIInfo->ddxDriverMinorVersion = 3; pDRIInfo->ddxDriverPatchVersion = 0; @@ -3762,7 +2985,7 @@ index 7c317e6..8217b6e 100644 pDRIInfo->ddxDrawableTableEntry = RADEON_MAX_DRAWABLES; pDRIInfo->maxDrawableTableEntry = (SAREA_MAX_DRAWABLES < RADEON_MAX_DRAWABLES -@@ -1589,9 +1831,7 @@ Bool RADEONDRIScreenInit(ScreenPtr pScreen) +@@ -1620,9 +1865,7 @@ Bool RADEONDRIScreenInit(ScreenPtr pScreen) pDRIInfo->TransitionTo3d = RADEONDRITransitionTo3d; pDRIInfo->TransitionSingleToMulti3D = RADEONDRITransitionSingleToMulti3d; pDRIInfo->TransitionMultiToSingle3D = RADEONDRITransitionMultiToSingle3d; @@ -3773,7 +2996,7 @@ index 7c317e6..8217b6e 100644 pDRIInfo->ClipNotify = RADEONDRIClipNotify; #endif -@@ -1623,57 +1863,60 @@ Bool RADEONDRIScreenInit(ScreenPtr pScreen) +@@ -1654,57 +1897,60 @@ Bool RADEONDRIScreenInit(ScreenPtr pScreen) pDRIInfo = NULL; return FALSE; } @@ -3787,7 +3010,7 @@ index 7c317e6..8217b6e 100644 - RADEONDRICloseScreen(pScreen); - return FALSE; - } -- + - /* Initialize PCI */ - if ((info->cardType!=CARD_AGP) && !RADEONDRIPciInit(info, pScreen)) { - xf86DrvMsg(pScreen->myNum, X_ERROR, @@ -3795,7 +3018,12 @@ index 7c317e6..8217b6e 100644 - RADEONDRICloseScreen(pScreen); - return FALSE; - } -- ++ /* Now, nuke dri.c's dummy frontbuffer map setup if we did that. */ ++ if (pDRIInfo->frameBufferSize != 0 && info->drm_mm) { ++ int tmp; ++ drm_handle_t fb_handle; ++ void *ptmp; + - /* DRIScreenInit doesn't add all the - * common mappings. Add additional - * mappings here. @@ -3804,18 +3032,13 @@ index 7c317e6..8217b6e 100644 - RADEONDRICloseScreen(pScreen); - return FALSE; - } - +- - /* DRIScreenInit adds the frame buffer - map, but we need it as well */ - { - void *scratch_ptr; - int scratch_int; -+ /* Now, nuke dri.c's dummy frontbuffer map setup if we did that. */ -+ if (pDRIInfo->frameBufferSize != 0 && info->drm_mm) { -+ int tmp; -+ drm_handle_t fb_handle; -+ void *ptmp; - +- - DRIGetDeviceInfo(pScreen, &info->dri->fbHandle, - &scratch_int, &scratch_int, - &scratch_int, &scratch_int, @@ -3878,7 +3101,7 @@ index 7c317e6..8217b6e 100644 static Bool RADEONDRIDoCloseScreen(int scrnIndex, ScreenPtr pScreen) { ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; -@@ -1715,14 +1958,18 @@ Bool RADEONDRIFinishScreenInit(ScreenPtr pScreen) +@@ -1746,17 +1992,21 @@ Bool RADEONDRIFinishScreenInit(ScreenPtr pScreen) return FALSE; } @@ -3899,11 +3122,16 @@ index 7c317e6..8217b6e 100644 + if (!info->drm_mode_setting) { + /* Initialize IRQ */ + RADEONDRIIrqInit(info, pScreen); ++ /* Initialize kernel GART memory manager */ ++ RADEONDRIGartHeapInit(info, pScreen); + } - /* Initialize kernel GART memory manager */ - RADEONDRIGartHeapInit(info, pScreen); -@@ -1734,6 +1981,10 @@ Bool RADEONDRIFinishScreenInit(ScreenPtr pScreen) +- /* Initialize kernel GART memory manager */ +- RADEONDRIGartHeapInit(info, pScreen); + + /* Initialize and start the CP if required */ + RADEONDRICPInit(pScrn); +@@ -1765,6 +2015,10 @@ Bool RADEONDRIFinishScreenInit(ScreenPtr pScreen) pSAREAPriv = (drm_radeon_sarea_t*)DRIGetSAREAPrivate(pScreen); memset(pSAREAPriv, 0, sizeof(*pSAREAPriv)); @@ -3914,7 +3142,7 @@ index 7c317e6..8217b6e 100644 pRADEONDRI = (RADEONDRIPtr)info->dri->pDRIInfo->devPrivate; pRADEONDRI->deviceID = info->Chipset; -@@ -1890,6 +2141,8 @@ void RADEONDRICloseScreen(ScreenPtr pScreen) +@@ -1922,6 +2176,8 @@ void RADEONDRICloseScreen(ScreenPtr pScreen) drmUnmap(info->dri->buf, info->dri->bufMapSize); info->dri->buf = NULL; } @@ -3923,7 +3151,7 @@ index 7c317e6..8217b6e 100644 if (info->dri->ringReadPtr) { drmUnmap(info->dri->ringReadPtr, info->dri->ringReadMapSize); info->dri->ringReadPtr = NULL; -@@ -1898,6 +2151,7 @@ void RADEONDRICloseScreen(ScreenPtr pScreen) +@@ -1930,6 +2186,7 @@ void RADEONDRICloseScreen(ScreenPtr pScreen) drmUnmap(info->dri->ring, info->dri->ringMapSize); info->dri->ring = NULL; } @@ -3931,7 +3159,7 @@ index 7c317e6..8217b6e 100644 if (info->dri->agpMemHandle != DRM_AGP_NO_HANDLE) { drmAgpUnbind(info->dri->drmFD, info->dri->agpMemHandle); drmAgpFree(info->dri->drmFD, info->dri->agpMemHandle); -@@ -2304,3 +2558,16 @@ int RADEONDRISetParam(ScrnInfoPtr pScrn, unsigned int param, int64_t value) +@@ -2335,3 +2592,16 @@ int RADEONDRISetParam(ScrnInfoPtr pScrn, unsigned int param, int64_t value) &radeonsetparam, sizeof(drm_radeon_setparam_t)); return ret; } @@ -3948,6 +3176,284 @@ index 7c317e6..8217b6e 100644 + return radeon_setup_gart_mem(pScreen); + +} +diff --git a/src/radeon_dri2.c b/src/radeon_dri2.c +new file mode 100644 +index 0000000..eb15ff2 +--- /dev/null ++++ b/src/radeon_dri2.c +@@ -0,0 +1,224 @@ ++/* ++ * Copyright 2008 Kristian Høgsberg ++ * Copyright 2008 Jérôme Glisse ++ * ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining ++ * a copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation on the rights to use, copy, modify, merge, ++ * publish, distribute, sublicense, and/or sell copies of the Software, ++ * and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial ++ * portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR ++ * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include ++#include ++#include ++#include ++ ++#include "radeon.h" ++#include "radeon_dri2.h" ++#include "radeon_version.h" ++ ++#ifdef DRI2 ++ ++struct dri2_buffer_priv { ++ PixmapPtr pixmap; ++}; ++ ++ ++static DRI2BufferPtr ++radeon_dri2_create_buffers(DrawablePtr drawable, ++ unsigned int *attachments, ++ int count) ++{ ++ ScreenPtr pScreen = drawable->pScreen; ++ DRI2BufferPtr buffers; ++ struct dri2_buffer_priv *privates; ++ PixmapPtr pixmap, depth_pixmap; ++ struct radeon_exa_pixmap_priv *driver_priv; ++ int i, r; ++ ++ buffers = xcalloc(count, sizeof *buffers); ++ if (buffers == NULL) { ++ return NULL; ++ } ++ privates = xcalloc(count, sizeof(struct dri2_buffer_priv)); ++ if (privates == NULL) { ++ xfree(buffers); ++ return NULL; ++ } ++ ++ depth_pixmap = NULL; ++ for (i = 0; i < count; i++) { ++ if (attachments[i] == DRI2BufferFrontLeft) { ++ if (drawable->type == DRAWABLE_PIXMAP) { ++ pixmap = (Pixmap*)drawable; ++ } else { ++ pixmap = (*pScreen->GetWindowPixmap)((WindowPtr)drawable); ++ } ++ pixmap->refcnt++; ++ } else if (attachments[i] == DRI2BufferStencil && depth_pixmap) { ++ pixmap = depth_pixmap; ++ pixmap->refcnt++; ++ } else { ++ pixmap = (*pScreen->CreatePixmap)(pScreen, ++ drawable->width, ++ drawable->height, ++ drawable->depth, ++ 0); ++ } ++ ++ if (attachments[i] == DRI2BufferDepth) { ++ depth_pixmap = pixmap; ++ } ++ driver_priv = exaGetPixmapDriverPrivate(pixmap); ++ r = radeon_bo_gem_name_buffer(driver_priv->bo, &buffers[i].name); ++ if (r) { ++ /* FIXME: cleanup */ ++ fprintf(stderr, "flink error: %d %s\n", r, strerror(r)); ++ xfree(buffers); ++ xfree(privates); ++ return NULL; ++ } ++ buffers[i].attachment = attachments[i]; ++ buffers[i].pitch = pixmap->devKind; ++ buffers[i].cpp = pixmap->drawable.bitsPerPixel / 8; ++ buffers[i].driverPrivate = &privates[i]; ++ buffers[i].flags = 0; /* not tiled */ ++ privates[i].pixmap = pixmap; ++ } ++ return buffers; ++} ++ ++static void ++radeon_dri2_destroy_buffers(DrawablePtr drawable, ++ DRI2BufferPtr buffers, ++ int count) ++{ ++ ScreenPtr pScreen = drawable->pScreen; ++ struct dri2_buffer_priv *private; ++ int i; ++ ++ for (i = 0; i < count; i++) { ++ private = buffers[i].driverPrivate; ++ (*pScreen->DestroyPixmap)(private->pixmap); ++ } ++ if (buffers) { ++ xfree(buffers[0].driverPrivate); ++ xfree(buffers); ++ } ++} ++ ++static void ++radeon_dri2_copy_region(DrawablePtr drawable, ++ RegionPtr region, ++ DRI2BufferPtr dest_buffer, ++ DRI2BufferPtr src_buffer) ++{ ++ struct dri2_buffer_priv *private = src_buffer->driverPrivate; ++ ScreenPtr pScreen = drawable->pScreen; ++ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; ++ PixmapPtr pixmap = private->pixmap; ++ RegionPtr copy_clip; ++ GCPtr gc; ++ ++ gc = GetScratchGC(drawable->depth, pScreen); ++ copy_clip = REGION_CREATE(pScreen, NULL, 0); ++ REGION_COPY(pScreen, copy_clip, region); ++ (*gc->funcs->ChangeClip) (gc, CT_REGION, copy_clip, 0); ++ ValidateGC(drawable, gc); ++ (*gc->ops->CopyArea)(&pixmap->drawable, drawable, gc, ++ 0, 0, drawable->width, drawable->height, 0, 0); ++ FreeScratchGC(gc); ++ RADEONCPReleaseIndirect(pScrn); ++} ++ ++Bool ++radeon_dri2_screen_init(ScreenPtr pScreen) ++{ ++ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ DRI2InfoRec dri2_info; ++ int fd; ++ char *bus_id; ++ char *tmp_bus_id; ++ int cmp; ++ int i; ++ ++ if (!info->useEXA) { ++ xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "DRI2 requires EXA\n"); ++ return FALSE; ++ } ++ ++ /* The whole drmOpen thing is a fiasco and we need to find a way ++ * back to just using open(2). For now, however, lets just make ++ * things worse with even more ad hoc directory walking code to ++ * discover the device file name. */ ++ bus_id = DRICreatePCIBusID(info->PciInfo); ++ for (i = 0; i < DRM_MAX_MINOR; i++) { ++ sprintf(info->dri2.device_name, DRM_DEV_NAME, DRM_DIR_NAME, i); ++ fd = open(info->dri2.device_name, O_RDWR); ++ if (fd < 0) ++ continue; ++ ++ tmp_bus_id = drmGetBusid(fd); ++ close(fd); ++ if (tmp_bus_id == NULL) ++ continue; ++ ++ cmp = strcmp(tmp_bus_id, bus_id); ++ drmFree(tmp_bus_id); ++ if (cmp == 0) ++ break; ++ } ++ xfree(bus_id); ++ ++ if (i == DRM_MAX_MINOR) { ++ xf86DrvMsg(pScrn->scrnIndex, X_WARNING, ++ "DRI2: failed to open drm device\n"); ++ return FALSE; ++ } ++ ++ if ( (info->ChipFamily >= CHIP_FAMILY_R300) ) { ++ dri2_info.driverName = R300_DRIVER_NAME; ++ } else if ( info->ChipFamily >= CHIP_FAMILY_R200 ) { ++ dri2_info.driverName = R200_DRIVER_NAME; ++ } else { ++ dri2_info.driverName = RADEON_DRIVER_NAME; ++ } ++ dri2_info.fd = info->dri2.drm_fd; ++ dri2_info.deviceName = info->dri2.device_name; ++ dri2_info.version = 1; ++ dri2_info.CreateBuffers = radeon_dri2_create_buffers; ++ dri2_info.DestroyBuffers = radeon_dri2_destroy_buffers; ++ dri2_info.CopyRegion = radeon_dri2_copy_region; ++ info->dri2.enabled = DRI2ScreenInit(pScreen, &dri2_info); ++ return info->dri2.enabled; ++} ++ ++void ++radeon_dri2_close_screen(ScreenPtr pScreen) ++{ ++ DRI2CloseScreen(pScreen); ++} ++ ++#endif +diff --git a/src/radeon_dri2.h b/src/radeon_dri2.h +new file mode 100644 +index 0000000..9ad9cee +--- /dev/null ++++ b/src/radeon_dri2.h +@@ -0,0 +1,42 @@ ++/* ++ * Copyright 2008 Jerome Glisse ++ * ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining ++ * a copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation on the rights to use, copy, modify, merge, ++ * publish, distribute, sublicense, and/or sell copies of the Software, ++ * and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial ++ * portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR ++ * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++#ifndef RADEON_DRI2_H ++#define RADEON_DRI2_H ++ ++struct radeon_dri2 { ++ int drm_fd; ++ Bool enabled; ++ char device_name[64]; ++}; ++ ++#ifdef DRI2 ++#include "dri2.h" ++Bool radeon_dri2_screen_init(ScreenPtr pScreen); ++void radeon_dri2_close_screen(ScreenPtr pScreen); ++#endif ++ ++#endif diff --git a/src/radeon_dri_bufmgr.c b/src/radeon_dri_bufmgr.c new file mode 100644 index 0000000..f6154dc @@ -4429,7 +3935,7 @@ index 0000000..a19d7ec + +#endif diff --git a/src/radeon_driver.c b/src/radeon_driver.c -index d414854..6775372 100644 +index 7cac321..dbdae69 100644 --- a/src/radeon_driver.c +++ b/src/radeon_driver.c @@ -67,7 +67,7 @@ @@ -4441,7 +3947,7 @@ index d414854..6775372 100644 /* Driver data structures */ #include "radeon.h" #include "radeon_reg.h" -@@ -224,7 +224,10 @@ radeonShadowWindow(ScreenPtr screen, CARD32 row, CARD32 offset, int mode, +@@ -225,7 +225,10 @@ radeonShadowWindow(ScreenPtr screen, CARD32 row, CARD32 offset, int mode, stride = (pScrn->displayWidth * pScrn->bitsPerPixel) / 8; *size = stride; @@ -4453,7 +3959,7 @@ index d414854..6775372 100644 } static Bool RADEONCreateScreenResources (ScreenPtr pScreen) -@@ -1620,6 +1623,7 @@ static Bool RADEONPreInitVRAM(ScrnInfoPtr pScrn) +@@ -1625,6 +1628,7 @@ static Bool RADEONPreInitVRAM(ScrnInfoPtr pScrn) } pScrn->videoRam &= ~1023; @@ -4461,7 +3967,7 @@ index d414854..6775372 100644 info->FbMapSize = pScrn->videoRam * 1024; /* if the card is PCI Express reserve the last 32k for the gart table */ -@@ -1750,58 +1754,64 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn) +@@ -1755,58 +1759,64 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn) } from = X_PROBED; @@ -4569,7 +4075,7 @@ index d414854..6775372 100644 #ifdef XF86DRI /* AGP/PCI */ -@@ -1994,6 +2004,9 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) +@@ -1998,6 +2008,9 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) if (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE)) { int errmaj = 0, errmin = 0; @@ -4579,7 +4085,7 @@ index d414854..6775372 100644 from = X_DEFAULT; #if defined(USE_EXA) #if defined(USE_XAA) -@@ -2004,6 +2017,7 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) +@@ -2008,6 +2021,7 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) info->useEXA = TRUE; } else if (xf86NameCmp(optstr, "XAA") == 0) { from = X_CONFIG; @@ -4587,7 +4093,7 @@ index d414854..6775372 100644 } } #else /* USE_XAA */ -@@ -2107,15 +2121,9 @@ static Bool RADEONPreInitInt10(ScrnInfoPtr pScrn, xf86Int10InfoPtr *ppInt10) +@@ -2111,15 +2125,9 @@ static Bool RADEONPreInitInt10(ScrnInfoPtr pScrn, xf86Int10InfoPtr *ppInt10) return TRUE; } @@ -4604,7 +4110,7 @@ index d414854..6775372 100644 if (!(info->dri = xcalloc(1, sizeof(struct radeon_dri)))) { xf86DrvMsg(pScrn->scrnIndex, X_ERROR,"Unable to allocate dri rec!\n"); -@@ -2126,6 +2134,22 @@ static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn) +@@ -2130,6 +2138,22 @@ static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn) xf86DrvMsg(pScrn->scrnIndex, X_ERROR,"Unable to allocate cp rec!\n"); return FALSE; } @@ -4627,7 +4133,7 @@ index d414854..6775372 100644 info->cp->CPInUse = FALSE; info->cp->CPStarted = FALSE; info->cp->CPusecTimeout = RADEON_DEFAULT_CP_TIMEOUT; -@@ -2690,6 +2714,37 @@ static const xf86CrtcConfigFuncsRec RADEONCRTCResizeFuncs = { +@@ -2704,6 +2728,37 @@ static const xf86CrtcConfigFuncsRec RADEONCRTCResizeFuncs = { RADEONCRTCResize }; @@ -4665,7 +4171,7 @@ index d414854..6775372 100644 Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) { xf86CrtcConfigPtr xf86_config; -@@ -2710,6 +2765,8 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) +@@ -2724,6 +2779,8 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) info = RADEONPTR(pScrn); info->MMIO = NULL; @@ -4674,7 +4180,7 @@ index d414854..6775372 100644 info->IsSecondary = FALSE; info->IsPrimary = FALSE; -@@ -2744,62 +2801,63 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) +@@ -2758,62 +2815,63 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) } info->PciInfo = xf86GetPciInfoForEntity(info->pEnt->index); @@ -4709,11 +4215,11 @@ index d414854..6775372 100644 + info->PciTag = pciTag(PCI_DEV_BUS(info->PciInfo), + PCI_DEV_DEV(info->PciInfo), + PCI_DEV_FUNC(info->PciInfo)); -+ info->MMIOAddr = PCI_REGION_BASE(info->PciInfo, 2, REGION_MEM) & ~0xffULL; ++ info->MMIOAddr = PCI_REGION_BASE(info->PciInfo, 2, REGION_MEM) & ~0xffULL; + info->MMIOSize = PCI_REGION_SIZE(info->PciInfo, 2); + if (info->pEnt->device->IOBase) { + xf86DrvMsg(pScrn->scrnIndex, X_CONFIG, -+ "MMIO address override, using 0x%08lx instead of 0x%08llx\n", ++ "MMIO address override, using 0x%08lx instead of 0x%016llx\n", + info->pEnt->device->IOBase, + info->MMIOAddr); + info->MMIOAddr = info->pEnt->device->IOBase; @@ -4721,8 +4227,8 @@ index d414854..6775372 100644 + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "No valid MMIO address\n"); + goto fail1; + } -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, -+ "MMIO registers at 0x%016llx: size %ldKB\n", info->MMIOAddr, info->MMIOSize / 1024); ++ xf86DrvMsg(pScrn->scrnIndex, X_INFO, ++ "MMIO registers at 0x%016llx: size %ldKB\n", info->MMIOAddr, info->MMIOSize / 1024); + + if(!RADEONMapMMIO(pScrn)) { + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, @@ -4785,7 +4291,7 @@ index d414854..6775372 100644 if (xf86RegisterResources(info->pEnt->index, 0, ResExclusive)) goto fail; -@@ -2809,10 +2867,12 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) +@@ -2823,10 +2881,12 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) pScrn->racMemFlags = RAC_FB | RAC_COLORMAP | RAC_VIEWPORT | RAC_CURSOR; pScrn->monitor = pScrn->confScreen->monitor; @@ -4802,7 +4308,7 @@ index d414854..6775372 100644 if (!RADEONPreInitVisual(pScrn)) goto fail; -@@ -2826,136 +2886,197 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) +@@ -2840,136 +2900,199 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) memcpy(info->Options, RADEONOptions, sizeof(RADEONOptions)); xf86ProcessOptions(pScrn->scrnIndex, pScrn->options, info->Options); @@ -5054,6 +4560,8 @@ index d414854..6775372 100644 - ErrorF("after xf86InitialConfiguration\n"); + info->drmmode.create_new_fb = radeon_create_new_fb; + info->dri->drmFD = info->drmmode.fd; ++ info->dri2.drm_fd = info->drmmode.fd; ++ info->dri2.enabled = FALSE; + xfree(bus_id); + + { @@ -5105,7 +4613,7 @@ index d414854..6775372 100644 /* Get ScreenInit function */ if (!xf86LoadSubModule(pScrn, "fb")) return FALSE; -@@ -2970,10 +3091,12 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) +@@ -2984,10 +3107,12 @@ Bool RADEONPreInit(ScrnInfoPtr pScrn, int flags) if (!RADEONPreInitXv(pScrn)) goto fail; } @@ -5122,7 +4630,17 @@ index d414854..6775372 100644 } if (pScrn->modes == NULL) { -@@ -3214,7 +3337,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3140,6 +3265,9 @@ static void RADEONBlockHandler(int i, pointer blockData, + + #ifdef USE_EXA + info->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN; ++ if (info->new_cs) { ++ RADEONCPReleaseIndirect(pScrn); ++ } + #endif + } + +@@ -3228,7 +3356,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, int subPixelOrder = SubPixelUnknown; char* s; #endif @@ -5131,7 +4649,7 @@ index d414854..6775372 100644 info->accelOn = FALSE; #ifdef USE_XAA -@@ -3234,52 +3357,55 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3248,58 +3376,61 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, "RADEONScreenInit %lx %ld\n", pScrn->memPhysBase, pScrn->fbOffset); #endif @@ -5155,6 +4673,17 @@ index d414854..6775372 100644 - info->crtc_on = FALSE; - info->crtc2_on = FALSE; ++ /* save the real front buffer size ++ * it changes with randr, rotation, etc. ++ */ ++ info->virtualX = pScrn->virtualX; ++ info->virtualY = pScrn->virtualY; + +- /* save the real front buffer size +- * it changes with randr, rotation, etc. +- */ +- info->virtualX = pScrn->virtualX; +- info->virtualY = pScrn->virtualY; + RADEONSave(pScrn); - RADEONSave(pScrn); @@ -5221,7 +4750,7 @@ index d414854..6775372 100644 /* Visual setup */ miClearVisualTypes(); if (!miSetVisualTypes(pScrn->depth, -@@ -3313,19 +3439,21 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3333,19 +3464,21 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, hasDRI = info->directRenderingEnabled; #endif /* XF86DRI */ @@ -5229,7 +4758,12 @@ index d414854..6775372 100644 - * we'll use later on for MC_FB_LOCATION & MC_AGP_LOCATION - */ - RADEONInitMemoryMap(pScrn); -- ++ if (!info->drm_mode_setting) { ++ /* Initialize the memory map, this basically calculates the values ++ * we'll use later on for MC_FB_LOCATION & MC_AGP_LOCATION ++ */ ++ RADEONInitMemoryMap(pScrn); + - /* empty the surfaces */ - if (info->ChipFamily < CHIP_FAMILY_R600) { - unsigned char *RADEONMMIO = info->MMIO; @@ -5238,12 +4772,6 @@ index d414854..6775372 100644 - OUTREG(RADEON_SURFACE0_INFO + 16 * j, 0); - OUTREG(RADEON_SURFACE0_LOWER_BOUND + 16 * j, 0); - OUTREG(RADEON_SURFACE0_UPPER_BOUND + 16 * j, 0); -+ if (!info->drm_mode_setting) { -+ /* Initialize the memory map, this basically calculates the values -+ * we'll use later on for MC_FB_LOCATION & MC_AGP_LOCATION -+ */ -+ RADEONInitMemoryMap(pScrn); -+ + /* empty the surfaces */ + if (info->ChipFamily < CHIP_FAMILY_R600) { + unsigned char *RADEONMMIO = info->MMIO; @@ -5256,24 +4784,19 @@ index d414854..6775372 100644 } } -@@ -3365,11 +3493,16 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3382,7 +3515,10 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, + #ifdef XF86DRI if (hasDRI) { - info->accelDFS = info->cardType != CARD_AGP; - -+ /* disable DFS by default */ -+ if (info->cardType != CARD_PCIE && info->drm_mm) -+ info->accelDFS = FALSE; + info->accelDFS = xf86ReturnOptValBool(info->Options, OPTION_ACCEL_DFS, +- info->cardType != CARD_AGP); ++ info->cardType != CARD_AGP); + - if (xf86GetOptValInteger(info->Options, OPTION_ACCEL_DFS, - &info->accelDFS)) { - from = X_CONFIG; - } ++ if (info->drm_mm) ++ info->accelDFS = FALSE; -+ /* Reserve approx. half of offscreen memory for local textures by * default, can be overridden with Option "FBTexPercent". - * Round down to a whole number of texture regions. -@@ -3398,7 +3531,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3408,7 +3544,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, #endif #if defined(XF86DRI) && defined(USE_XAA) @@ -5282,7 +4805,7 @@ index d414854..6775372 100644 info->dri->textureSize = -1; if (xf86GetOptValInteger(info->Options, OPTION_FBTEX_PERCENT, &(info->dri->textureSize))) { -@@ -3416,7 +3549,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3426,7 +3562,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, #endif #ifdef USE_XAA @@ -5291,7 +4814,7 @@ index d414854..6775372 100644 return FALSE; #endif -@@ -3437,7 +3570,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3447,7 +3583,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, info->CurrentLayout.pixel_bytes); int maxy = info->FbMapSize / width_bytes; @@ -5300,7 +4823,20 @@ index d414854..6775372 100644 xf86DrvMsg(scrnIndex, X_ERROR, "Static buffer allocation failed. Disabling DRI.\n"); xf86DrvMsg(scrnIndex, X_ERROR, -@@ -3451,15 +3584,42 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3457,19 +3593,54 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, + info->CurrentLayout.pixel_bytes * 3 + 1023) / 1024); + info->directRenderingEnabled = FALSE; + } else { +- info->directRenderingEnabled = RADEONDRIScreenInit(pScreen); ++ info->directRenderingEnabled = FALSE; ++#ifdef DRI2 ++ if (info->drm_mm) { ++ info->directRenderingEnabled = radeon_dri2_screen_init(pScreen); ++ } ++#endif ++ if (!info->directRenderingEnabled) { ++ info->directRenderingEnabled = RADEONDRIScreenInit(pScreen); ++ } } } @@ -5340,13 +4876,13 @@ index d414854..6775372 100644 } } + -+ if (info->directRenderingEnabled == TRUE) ++ if (info->directRenderingEnabled == TRUE && !info->dri2.enabled) + RADEONDRIDoMappings(pScreen); + #endif xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, "Initializing fb layer\n"); -@@ -3483,7 +3643,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3493,7 +3664,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, if (info->r600_shadow_fb == FALSE) { /* Init fb layer */ @@ -5355,7 +4891,7 @@ index d414854..6775372 100644 pScrn->virtualX, pScrn->virtualY, pScrn->xDpi, pScrn->yDpi, pScrn->displayWidth, pScrn->bitsPerPixel)) -@@ -3525,8 +3685,10 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3535,8 +3706,10 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, /* restore the memory map here otherwise we may get a hang when * initializing the drm below */ @@ -5368,7 +4904,7 @@ index d414854..6775372 100644 /* Backing store setup */ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -@@ -3536,7 +3698,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3546,7 +3719,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, /* DRI finalisation */ #ifdef XF86DRI @@ -5377,13 +4913,20 @@ index d414854..6775372 100644 info->dri->pKernelDRMVersion->version_minor >= 19) { if (RADEONDRISetParam(pScrn, RADEON_SETPARAM_PCIGART_LOCATION, info->dri->pciGartOffset) < 0) -@@ -3555,11 +3717,17 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3562,14 +3735,24 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, + if (info->directRenderingEnabled) { + xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, + "DRI Finishing init !\n"); ++ if (!info->dri2.enabled) { info->directRenderingEnabled = RADEONDRIFinishScreenInit(pScreen); } ++ } if (info->directRenderingEnabled) { + + if (info->drm_mm) ++ if (!info->dri2.enabled) { + radeon_update_dri_buffers(pScreen); ++ } + /* DRI final init might have changed the memory map, we need to adjust * our local image to make sure we restore them properly on mode @@ -5396,7 +4939,7 @@ index d414854..6775372 100644 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Direct rendering enabled\n"); -@@ -3657,10 +3825,16 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, +@@ -3665,10 +3848,16 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, return FALSE; } } @@ -5414,7 +4957,7 @@ index d414854..6775372 100644 /* Provide SaveScreen & wrap BlockHandler and CloseScreen */ /* Wrap CloseScreen */ -@@ -5141,7 +5315,7 @@ Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags) +@@ -5245,7 +5434,7 @@ Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags) #ifdef XF86DRI Bool CPStarted = info->cp->CPStarted; @@ -5423,7 +4966,7 @@ index d414854..6775372 100644 DRILock(pScrn->pScreen, 0); RADEONCP_STOP(pScrn, info); } -@@ -5164,8 +5338,10 @@ Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags) +@@ -5268,8 +5457,10 @@ Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags) #endif } @@ -5436,18 +4979,20 @@ index d414854..6775372 100644 ret = xf86SetSingleMode (pScrn, mode, RR_Rotate_0); -@@ -5177,15 +5353,18 @@ Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags) +@@ -5281,16 +5472,19 @@ Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags) /* xf86SetRootClip would do, but can't access that here */ } - if (info->accelOn) { - RADEON_SYNC(info, pScrn); -- RADEONEngineRestore(pScrn); +- if (info->ChipFamily < CHIP_FAMILY_R600) +- RADEONEngineRestore(pScrn); - } + if (!info->drm_mode_setting) + if (info->accelOn) { + RADEON_SYNC(info, pScrn); -+ RADEONEngineRestore(pScrn); ++ if (info->ChipFamily < CHIP_FAMILY_R600) ++ RADEONEngineRestore(pScrn); + } #ifdef XF86DRI @@ -5462,7 +5007,7 @@ index d414854..6775372 100644 } #endif -@@ -5383,6 +5562,11 @@ void RADEONAdjustFrame(int scrnIndex, int x, int y, int flags) +@@ -5488,6 +5682,11 @@ void RADEONAdjustFrame(int scrnIndex, int x, int y, int flags) xf86OutputPtr output = config->output[config->compat_output]; xf86CrtcPtr crtc = output->crtc; @@ -5471,10 +5016,10 @@ index d414854..6775372 100644 + return; + } + - #ifdef XF86DRI - if (info->cp->CPStarted && pScrn->pScreen) DRILock(pScrn->pScreen, 0); - #endif -@@ -5418,67 +5602,92 @@ Bool RADEONEnterVT(int scrnIndex, int flags) + /* not handled */ + if (IS_AVIVO_VARIANT) + return; +@@ -5527,76 +5726,103 @@ Bool RADEONEnterVT(int scrnIndex, int flags) xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, "RADEONEnterVT\n"); @@ -5572,10 +5117,10 @@ index d414854..6775372 100644 + + if (info->drm_mode_setting) { + if (!drmmode_set_desired_modes(pScrn, &info->drmmode)) -+ return FALSE; + return FALSE; + } else { + if (!xf86SetDesiredModes(pScrn)) - return FALSE; ++ return FALSE; + } - if (info->ChipFamily < CHIP_FAMILY_R600) @@ -5588,20 +5133,35 @@ index d414854..6775372 100644 - if (info->cardType == CARD_PCIE && - info->dri->pKernelDRMVersion->version_minor >= 19 && - info->FbSecureSize) { ++ if (info->directRenderingEnabled) { ++ if (info->cardType == CARD_PCIE && ++ info->dri->pKernelDRMVersion->version_minor >= 19 && ++ info->FbSecureSize) { + #if X_BYTE_ORDER == X_BIG_ENDIAN +- unsigned char *RADEONMMIO = info->MMIO; +- unsigned int sctrl = INREG(RADEON_SURFACE_CNTL); ++ unsigned char *RADEONMMIO = info->MMIO; ++ unsigned int sctrl = INREG(RADEON_SURFACE_CNTL); + - /* we need to backup the PCIE GART TABLE from fb memory */ +- OUTREG(RADEON_SURFACE_CNTL, 0); ++ /* we need to backup the PCIE GART TABLE from fb memory */ ++ OUTREG(RADEON_SURFACE_CNTL, 0); + #endif - memcpy(info->FB + info->dri->pciGartOffset, info->dri->pciGartBackup, info->dri->pciGartSize); ++ ++ /* we need to backup the PCIE GART TABLE from fb memory */ ++ memcpy(info->FB + info->dri->pciGartOffset, info->dri->pciGartBackup, info->dri->pciGartSize); + #if X_BYTE_ORDER == X_BIG_ENDIAN +- OUTREG(RADEON_SURFACE_CNTL, sctrl); ++ OUTREG(RADEON_SURFACE_CNTL, sctrl); + #endif - } - - /* get the DRI back into shape after resume */ - RADEONDRISetVBlankInterrupt (pScrn, TRUE); - RADEONDRIResume(pScrn->pScreen); - RADEONAdjustMemMapRegisters(pScrn, info->ModeReg); -+ if (info->directRenderingEnabled) { -+ if (info->cardType == CARD_PCIE && -+ info->dri->pKernelDRMVersion->version_minor >= 19 && -+ info->FbSecureSize) { -+ /* we need to backup the PCIE GART TABLE from fb memory */ -+ memcpy(info->FB + info->dri->pciGartOffset, info->dri->pciGartBackup, info->dri->pciGartSize); + } + /* get the DRI back into shape after resume */ @@ -5612,8 +5172,8 @@ index d414854..6775372 100644 } #endif /* this will get XVideo going again, but only if XVideo was initialised -@@ -5490,7 +5699,7 @@ Bool RADEONEnterVT(int scrnIndex, int flags) - RADEONEngineRestore(pScrn); +@@ -5611,7 +5837,7 @@ Bool RADEONEnterVT(int scrnIndex, int flags) + info->accel_state->XInited3D = FALSE; #ifdef XF86DRI - if (info->directRenderingEnabled) { @@ -5621,7 +5181,7 @@ index d414854..6775372 100644 RADEONCP_START(pScrn, info); DRIUnlock(pScrn->pScreen); } -@@ -5513,17 +5722,18 @@ void RADEONLeaveVT(int scrnIndex, int flags) +@@ -5634,26 +5860,28 @@ void RADEONLeaveVT(int scrnIndex, int flags) "RADEONLeaveVT\n"); #ifdef XF86DRI if (RADEONPTR(pScrn)->directRenderingInited) { @@ -5633,9 +5193,6 @@ index d414854..6775372 100644 - if (info->cardType == CARD_PCIE && - info->dri->pKernelDRMVersion->version_minor >= 19 && - info->FbSecureSize) { -- /* we need to backup the PCIE GART TABLE from fb memory */ -- memcpy(info->dri->pciGartBackup, (info->FB + info->dri->pciGartOffset), info->dri->pciGartSize); -- } + if (!info->drm_mode_setting) { + RADEONDRISetVBlankInterrupt (pScrn, FALSE); + DRILock(pScrn->pScreen, 0); @@ -5644,14 +5201,31 @@ index d414854..6775372 100644 + if (info->cardType == CARD_PCIE && + info->dri->pKernelDRMVersion->version_minor >= 19 && + info->FbSecureSize) { + #if X_BYTE_ORDER == X_BIG_ENDIAN +- unsigned char *RADEONMMIO = info->MMIO; +- unsigned int sctrl = INREG(RADEON_SURFACE_CNTL); ++ unsigned char *RADEONMMIO = info->MMIO; ++ unsigned int sctrl = INREG(RADEON_SURFACE_CNTL); + +- /* we need to backup the PCIE GART TABLE from fb memory */ +- OUTREG(RADEON_SURFACE_CNTL, 0); ++ /* we need to backup the PCIE GART TABLE from fb memory */ ++ OUTREG(RADEON_SURFACE_CNTL, 0); + #endif +- memcpy(info->dri->pciGartBackup, (info->FB + info->dri->pciGartOffset), info->dri->pciGartSize); + /* we need to backup the PCIE GART TABLE from fb memory */ + memcpy(info->dri->pciGartBackup, (info->FB + info->dri->pciGartOffset), info->dri->pciGartSize); + #if X_BYTE_ORDER == X_BIG_ENDIAN +- OUTREG(RADEON_SURFACE_CNTL, sctrl); ++ OUTREG(RADEON_SURFACE_CNTL, sctrl); + #endif +- } + } + } /* Make sure 3D clients will re-upload textures to video RAM */ if (info->dri->textureSize) { -@@ -5539,6 +5749,11 @@ void RADEONLeaveVT(int scrnIndex, int flags) +@@ -5669,6 +5897,11 @@ void RADEONLeaveVT(int scrnIndex, int flags) i = list[i].next; } while (i != 0); } @@ -5663,7 +5237,7 @@ index d414854..6775372 100644 } #endif -@@ -5559,10 +5774,18 @@ void RADEONLeaveVT(int scrnIndex, int flags) +@@ -5695,10 +5928,18 @@ void RADEONLeaveVT(int scrnIndex, int flags) xf86_hide_cursors (pScrn); @@ -5673,19 +5247,19 @@ index d414854..6775372 100644 + info->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN; + radeon_unbind_all_memory(pScrn); + } -+ -+ if (!info->drm_mode_setting) { -+ RADEONRestore(pScrn); - for (i = 0; i < config->num_crtc; i++) - radeon_crtc_modeset_ioctl(config->crtc[i], FALSE); ++ if (!info->drm_mode_setting) { ++ RADEONRestore(pScrn); ++ + for (i = 0; i < config->num_crtc; i++) + radeon_crtc_modeset_ioctl(config->crtc[i], FALSE); + } xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, "Ok, leaving now...\n"); -@@ -5607,7 +5830,8 @@ static Bool RADEONCloseScreen(int scrnIndex, ScreenPtr pScreen) +@@ -5752,7 +5993,8 @@ static Bool RADEONCloseScreen(int scrnIndex, ScreenPtr pScreen) #endif /* USE_XAA */ if (pScrn->vtSema) { @@ -5695,7 +5269,7 @@ index d414854..6775372 100644 } xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, -@@ -5642,6 +5866,12 @@ static Bool RADEONCloseScreen(int scrnIndex, ScreenPtr pScreen) +@@ -5787,6 +6029,12 @@ static Bool RADEONCloseScreen(int scrnIndex, ScreenPtr pScreen) info->DGAModes = NULL; xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, "Unmapping memory\n"); @@ -5709,7 +5283,7 @@ index d414854..6775372 100644 pScrn->vtSema = FALSE; diff --git a/src/radeon_drm.h b/src/radeon_drm.h -index 54bc234..9241a88 100644 +index 54bc234..06fbad3 100644 --- a/src/radeon_drm.h +++ b/src/radeon_drm.h @@ -303,7 +303,6 @@ typedef union { @@ -5755,7 +5329,7 @@ index 54bc234..9241a88 100644 #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) #define DRM_IOCTL_RADEON_CP_STOP DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_STOP, drm_radeon_cp_stop_t) -@@ -522,16 +542,27 @@ typedef struct { +@@ -522,16 +542,28 @@ typedef struct { #define DRM_IOCTL_RADEON_SURF_ALLOC DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_ALLOC, drm_radeon_surface_alloc_t) #define DRM_IOCTL_RADEON_SURF_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_FREE, drm_radeon_surface_free_t) @@ -5776,9 +5350,9 @@ index 54bc234..9241a88 100644 RADEON_INIT_CP = 0x01, RADEON_CLEANUP_CP = 0x02, RADEON_INIT_R200_CP = 0x03, -- RADEON_INIT_R300_CP = 0x04, + RADEON_INIT_R300_CP = 0x04, - RADEON_INIT_R600_CP = 0x05, -+ RADEON_INIT_R300_CP = 0x04 ++ RADEON_INIT_R600_CP = 0x05 } func; unsigned long sarea_priv_offset; - int is_pci; /* for overriding only */ @@ -5786,19 +5360,7 @@ index 54bc234..9241a88 100644 int cp_mode; int gart_size; int ring_size; -@@ -543,9 +574,8 @@ typedef struct drm_radeon_init { - unsigned int depth_bpp; - unsigned int depth_offset, depth_pitch; - -- /* DEPRECATED commented out below to allow for -Werror build */ -- unsigned long fb_offset; /* deprecated, driver asks hardware */ -- unsigned long mmio_offset; /* deprecated, driver asks hardware */ -+ unsigned long fb_offset; -+ unsigned long mmio_offset; - unsigned long ring_offset; - unsigned long ring_rptr_offset; - unsigned long buffers_offset; -@@ -581,7 +611,7 @@ typedef struct drm_radeon_clear { +@@ -581,7 +613,7 @@ typedef struct drm_radeon_clear { unsigned int clear_depth; unsigned int color_mask; unsigned int depth_mask; /* misnamed field: should be stencil */ @@ -5807,7 +5369,7 @@ index 54bc234..9241a88 100644 } drm_radeon_clear_t; typedef struct drm_radeon_vertex { -@@ -607,9 +637,9 @@ typedef struct drm_radeon_vertex2 { +@@ -607,9 +639,9 @@ typedef struct drm_radeon_vertex2 { int idx; /* Index of vertex buffer */ int discard; /* Client finished with buffer? */ int nr_states; @@ -5819,7 +5381,7 @@ index 54bc234..9241a88 100644 } drm_radeon_vertex2_t; /* v1.3 - obsoletes drm_radeon_vertex2 -@@ -624,15 +654,15 @@ typedef struct drm_radeon_vertex2 { +@@ -624,15 +656,15 @@ typedef struct drm_radeon_vertex2 { */ typedef struct drm_radeon_cmd_buffer { int bufsz; @@ -5838,7 +5400,7 @@ index 54bc234..9241a88 100644 } drm_radeon_tex_image_t; typedef struct drm_radeon_texture { -@@ -641,11 +671,11 @@ typedef struct drm_radeon_texture { +@@ -641,11 +673,11 @@ typedef struct drm_radeon_texture { int format; int width; /* Texture image coordinates */ int height; @@ -5852,7 +5414,7 @@ index 54bc234..9241a88 100644 } drm_radeon_stipple_t; typedef struct drm_radeon_indirect { -@@ -655,9 +685,6 @@ typedef struct drm_radeon_indirect { +@@ -655,9 +687,6 @@ typedef struct drm_radeon_indirect { int discard; } drm_radeon_indirect_t; @@ -5862,7 +5424,7 @@ index 54bc234..9241a88 100644 /* enum for card type parameters */ #define RADEON_CARD_PCI 0 #define RADEON_CARD_AGP 1 -@@ -683,10 +710,11 @@ typedef struct drm_radeon_indirect { +@@ -683,10 +712,11 @@ typedef struct drm_radeon_indirect { #define RADEON_PARAM_VBLANK_CRTC 13 /* VBLANK CRTC */ #define RADEON_PARAM_FB_LOCATION 14 /* FB location */ #define RADEON_PARAM_NUM_GB_PIPES 15 /* num GB pipes */ @@ -5875,7 +5437,7 @@ index 54bc234..9241a88 100644 } drm_radeon_getparam_t; /* 1.6: Set up a memory manager for regions of shared memory: -@@ -698,7 +726,7 @@ typedef struct drm_radeon_mem_alloc { +@@ -698,7 +728,7 @@ typedef struct drm_radeon_mem_alloc { int region; int alignment; int size; @@ -5884,7 +5446,7 @@ index 54bc234..9241a88 100644 } drm_radeon_mem_alloc_t; typedef struct drm_radeon_mem_free { -@@ -715,7 +743,7 @@ typedef struct drm_radeon_mem_init_heap { +@@ -715,7 +745,7 @@ typedef struct drm_radeon_mem_init_heap { /* 1.6: Userspace can request & wait on irq's: */ typedef struct drm_radeon_irq_emit { @@ -5893,7 +5455,7 @@ index 54bc234..9241a88 100644 } drm_radeon_irq_emit_t; typedef struct drm_radeon_irq_wait { -@@ -734,10 +762,10 @@ typedef struct drm_radeon_setparam { +@@ -734,10 +764,10 @@ typedef struct drm_radeon_setparam { #define RADEON_SETPARAM_FB_LOCATION 1 /* determined framebuffer location */ #define RADEON_SETPARAM_SWITCH_TILING 2 /* enable/disable color tiling */ #define RADEON_SETPARAM_PCIGART_LOCATION 3 /* PCI Gart Location */ @@ -5905,7 +5467,7 @@ index 54bc234..9241a88 100644 /* 1.14: Clients can allocate/free a surface */ typedef struct drm_radeon_surface_alloc { -@@ -753,4 +781,106 @@ typedef struct drm_radeon_surface_free { +@@ -753,4 +783,107 @@ typedef struct drm_radeon_surface_free { #define DRM_RADEON_VBLANK_CRTC1 1 #define DRM_RADEON_VBLANK_CRTC2 2 @@ -5994,6 +5556,7 @@ index 54bc234..9241a88 100644 +/* New interface which obsolete all previous interface. + */ + ++ +#define RADEON_CHUNK_ID_RELOCS 0x01 +#define RADEON_CHUNK_ID_IB 0x02 + @@ -6013,674 +5576,228 @@ index 54bc234..9241a88 100644 + #endif diff --git a/src/radeon_exa.c b/src/radeon_exa.c -index c4bc1bb..22b3147 100644 +index ae68146..dcf4e96 100644 --- a/src/radeon_exa.c +++ b/src/radeon_exa.c -@@ -1,33 +1,33 @@ - /* -- * Copyright 2005 Eric Anholt -- * Copyright 2005 Benjamin Herrenschmidt -- * All Rights Reserved. -- * -- * Permission is hereby granted, free of charge, to any person obtaining a -- * copy of this software and associated documentation files (the "Software"), -- * to deal in the Software without restriction, including without limitation -- * the rights to use, copy, modify, merge, publish, distribute, sublicense, -- * and/or sell copies of the Software, and to permit persons to whom the -- * Software is furnished to do so, subject to the following conditions: -- * -- * The above copyright notice and this permission notice (including the next -- * paragraph) shall be included in all copies or substantial portions of the -- * Software. -- * -- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -- * SOFTWARE. -- * -- * Authors: -- * Eric Anholt -- * Zack Rusin -- * Benjamin Herrenschmidt -- * -- */ -+* Copyright 2005 Eric Anholt -+* Copyright 2005 Benjamin Herrenschmidt -+* All Rights Reserved. -+* -+* Permission is hereby granted, free of charge, to any person obtaining a -+* copy of this software and associated documentation files (the "Software"), -+* to deal in the Software without restriction, including without limitation -+* the rights to use, copy, modify, merge, publish, distribute, sublicense, -+* and/or sell copies of the Software, and to permit persons to whom the -+* Software is furnished to do so, subject to the following conditions: -+* -+* The above copyright notice and this permission notice (including the next -+* paragraph) shall be included in all copies or substantial portions of the -+* Software. -+* -+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+* SOFTWARE. -+* -+* Authors: -+* Eric Anholt -+* Zack Rusin -+* Benjamin Herrenschmidt -+* -+*/ - - #ifdef HAVE_CONFIG_H - #include "config.h" -@@ -43,11 +43,15 @@ +@@ -44,7 +44,11 @@ #include "radeon_version.h" #include "xf86.h" +#include "radeon_bufmgr_gem.h" -+ -+#define RADEON_PIXMAP_IS_FRONTBUFFER 1 ++#define RADEON_PIXMAP_IS_FRONTBUFFER 1 ++ +/* quick hacks lolz */ /***********************************************************************/ #define RINFO_FROM_SCREEN(pScr) ScrnInfoPtr pScrn = xf86Screens[pScr->myNum]; \ -- RADEONInfoPtr info = RADEONPTR(pScrn) -+RADEONInfoPtr info = RADEONPTR(pScrn) - - #define RADEON_TRACE_FALL 0 - #define RADEON_TRACE_DRAW 0 -@@ -55,9 +59,9 @@ - #if RADEON_TRACE_FALL - #define RADEON_FALLBACK(x) \ - do { \ -- ErrorF("%s: ", __FUNCTION__); \ -- ErrorF x; \ -- return FALSE; \ -+ErrorF("%s: ", __FUNCTION__); \ -+ErrorF x; \ -+return FALSE; \ - } while (0) - #else - #define RADEON_FALLBACK(x) return FALSE -@@ -70,257 +74,386 @@ do { \ - #endif - - static struct { -- int rop; -- int pattern; -+int rop; -+int pattern; - } RADEON_ROP[] = { -- { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear */ -- { RADEON_ROP3_DSa, RADEON_ROP3_DPa }, /* Gxand */ -- { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse */ -- { RADEON_ROP3_S, RADEON_ROP3_P }, /* GXcopy */ -- { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted */ -- { RADEON_ROP3_D, RADEON_ROP3_D }, /* GXnoop */ -- { RADEON_ROP3_DSx, RADEON_ROP3_DPx }, /* GXxor */ -- { RADEON_ROP3_DSo, RADEON_ROP3_DPo }, /* GXor */ -- { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor */ -- { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv */ -- { RADEON_ROP3_Dn, RADEON_ROP3_Dn }, /* GXinvert */ -- { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse */ -- { RADEON_ROP3_Sn, RADEON_ROP3_Pn }, /* GXcopyInverted */ -- { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted */ -- { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand */ -- { RADEON_ROP3_ONE, RADEON_ROP3_ONE } /* GXset */ -+{ RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear */ -+{ RADEON_ROP3_DSa, RADEON_ROP3_DPa }, /* Gxand */ -+{ RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse */ -+{ RADEON_ROP3_S, RADEON_ROP3_P }, /* GXcopy */ -+{ RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted */ -+{ RADEON_ROP3_D, RADEON_ROP3_D }, /* GXnoop */ -+{ RADEON_ROP3_DSx, RADEON_ROP3_DPx }, /* GXxor */ -+{ RADEON_ROP3_DSo, RADEON_ROP3_DPo }, /* GXor */ -+{ RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor */ -+{ RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv */ -+{ RADEON_ROP3_Dn, RADEON_ROP3_Dn }, /* GXinvert */ -+{ RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse */ -+{ RADEON_ROP3_Sn, RADEON_ROP3_Pn }, /* GXcopyInverted */ -+{ RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted */ -+{ RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand */ -+{ RADEON_ROP3_ONE, RADEON_ROP3_ONE } /* GXset */ - }; - - /* Compute log base 2 of val. */ - static __inline__ int - RADEONLog2(int val) - { -- int bits; -+int bits; - #if (defined __i386__ || defined __x86_64__) && (defined __GNUC__) -- __asm volatile("bsrl %1, %0" -- : "=r" (bits) -- : "c" (val) -- ); -- return bits; -+__asm volatile("bsrl %1, %0" -+ : "=r" (bits) -+ : "c" (val) -+); -+return bits; - #else -- for (bits = 0; val != 0; val >>= 1, ++bits) -- ; -- return bits - 1; -+for (bits = 0; val != 0; val >>= 1, ++bits) -+ ; -+return bits - 1; - #endif - } - - static __inline__ uint32_t F_TO_DW(float val) - { -- union { -- float f; -- uint32_t l; -- } tmp; -- tmp.f = val; -- return tmp.l; -+union { -+float f; -+uint32_t l; -+} tmp; -+tmp.f = val; -+return tmp.l; - } - - /* Assumes that depth 15 and 16 can be used as depth 16, which is okay since we -- * require src and dest datatypes to be equal. -- */ -+* require src and dest datatypes to be equal. -+*/ - Bool RADEONGetDatatypeBpp(int bpp, uint32_t *type) - { -- switch (bpp) { -- case 8: -- *type = ATI_DATATYPE_CI8; -- return TRUE; -- case 16: -- *type = ATI_DATATYPE_RGB565; -- return TRUE; -- case 24: -- *type = ATI_DATATYPE_CI8; -- return TRUE; -- case 32: -- *type = ATI_DATATYPE_ARGB8888; -- return TRUE; -- default: -- RADEON_FALLBACK(("Unsupported bpp: %d\n", bpp)); -- return FALSE; -- } -+switch (bpp) { -+case 8: -+ *type = ATI_DATATYPE_CI8; -+ return TRUE; -+case 16: -+ *type = ATI_DATATYPE_RGB565; -+ return TRUE; -+case 24: -+ *type = ATI_DATATYPE_CI8; -+ return TRUE; -+case 32: -+ *type = ATI_DATATYPE_ARGB8888; -+ return TRUE; -+default: -+ RADEON_FALLBACK(("Unsupported bpp: %d\n", bpp)); -+ return FALSE; -+} - } - - static Bool RADEONPixmapIsColortiled(PixmapPtr pPix) - { -- RINFO_FROM_SCREEN(pPix->drawable.pScreen); -- -- /* This doesn't account for the back buffer, which we may want to wrap in -- * a pixmap at some point for the purposes of DRI buffer moves. -- */ -- if (info->tilingEnabled && exaGetPixmapOffset(pPix) == 0) -- return TRUE; -- else -- return FALSE; -+RINFO_FROM_SCREEN(pPix->drawable.pScreen); -+ -+/* This doesn't account for the back buffer, which we may want to wrap in -+* a pixmap at some point for the purposes of DRI buffer moves. -+*/ -+if (info->tilingEnabled && exaGetPixmapOffset(pPix) == 0) -+return TRUE; -+else -+return FALSE; - } +@@ -182,12 +186,23 @@ Bool RADEONGetPixmapOffsetPitch(PixmapPtr pPix, uint32_t *pitch_offset) + RINFO_FROM_SCREEN(pPix->drawable.pScreen); + uint32_t pitch, offset; + int bpp; ++ struct radeon_exa_pixmap_priv *driver_priv; - static Bool RADEONGetOffsetPitch(PixmapPtr pPix, int bpp, uint32_t *pitch_offset, -- unsigned int offset, unsigned int pitch) -+ unsigned int offset, unsigned int pitch) - { -- RINFO_FROM_SCREEN(pPix->drawable.pScreen); -+RINFO_FROM_SCREEN(pPix->drawable.pScreen); - -- if (pitch > 16320 || pitch % info->accel_state->exa->pixmapPitchAlign != 0) -- RADEON_FALLBACK(("Bad pitch 0x%08x\n", pitch)); -+if (pitch > 16320 || pitch % info->accel_state->exa->pixmapPitchAlign != 0) -+ RADEON_FALLBACK(("Bad pitch 0x%08x\n", pitch)); - -- if (offset % info->accel_state->exa->pixmapOffsetAlign != 0) -- RADEON_FALLBACK(("Bad offset 0x%08x\n", offset)); -+if (offset % info->accel_state->exa->pixmapOffsetAlign != 0) -+ RADEON_FALLBACK(("Bad offset 0x%08x\n", offset)); - -- pitch = pitch >> 6; -- *pitch_offset = (pitch << 22) | (offset >> 10); -+pitch = pitch >> 6; -+*pitch_offset = (pitch << 22) | (offset >> 10); - -- /* If it's the front buffer, we've got to note that it's tiled? */ -- if (RADEONPixmapIsColortiled(pPix)) -- *pitch_offset |= RADEON_DST_TILE_MACRO; -- return TRUE; -+/* If it's the front buffer, we've got to note that it's tiled? */ -+if (RADEONPixmapIsColortiled(pPix)) -+ *pitch_offset |= RADEON_DST_TILE_MACRO; -+return TRUE; - } + bpp = pPix->drawable.bitsPerPixel; + if (bpp == 24) + bpp = 8; - Bool RADEONGetPixmapOffsetPitch(PixmapPtr pPix, uint32_t *pitch_offset) - { -- RINFO_FROM_SCREEN(pPix->drawable.pScreen); -- uint32_t pitch, offset; -- int bpp; -- -- bpp = pPix->drawable.bitsPerPixel; -- if (bpp == 24) -- bpp = 8; -- - offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; -- pitch = exaGetPixmapPitch(pPix); -+RINFO_FROM_SCREEN(pPix->drawable.pScreen); -+uint32_t pitch, offset; -+int bpp; -+struct radeon_exa_pixmap_priv *driver_priv; -+ -+bpp = pPix->drawable.bitsPerPixel; -+if (bpp == 24) -+ bpp = 8; -+ -+ -+/* validate the pixmap somewhere */ -+if (info->new_cs) -+ offset = 0; -+else { -+ offset = exaGetPixmapOffset(pPix); -+ offset += info->fbLocation + pScrn->fbOffset; -+} -+pitch = exaGetPixmapPitch(pPix); ++ ++ /* validate the pixmap somewhere */ ++ if (info->new_cs) ++ offset = 0; ++ else { ++ if (driver_priv) ++ offset = driver_priv->bo->offset; ++ else ++ offset = exaGetPixmapOffset(pPix); ++ offset += info->fbLocation + pScrn->fbOffset; ++ } + pitch = exaGetPixmapPitch(pPix); -- return RADEONGetOffsetPitch(pPix, bpp, pitch_offset, offset, pitch); -+return RADEONGetOffsetPitch(pPix, bpp, pitch_offset, offset, pitch); + return RADEONGetOffsetPitch(pPix, bpp, pitch_offset, offset, pitch); +@@ -220,10 +235,27 @@ int RADEONBiggerCrtcArea(PixmapPtr pPix) + return crtc_num; } - /* -- * Used for vblank render stalling. -- * Ideally we'd have one pixmap per crtc. -- * syncing per-blit is unrealistic so, -- * we sync to whichever crtc has a larger area. -- */ -+* Used for vblank render stalling. -+* Ideally we'd have one pixmap per crtc. -+* syncing per-blit is unrealistic so, -+* we sync to whichever crtc has a larger area. -+*/ - int RADEONBiggerCrtcArea(PixmapPtr pPix) - { -- ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; -- xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); -- int c, crtc_num = -1, area = 0; -+ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; -+xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); -+int c, crtc_num = -1, area = 0; - -- for (c = 0; c < xf86_config->num_crtc; c++) { -- xf86CrtcPtr crtc = xf86_config->crtc[c]; -+for (c = 0; c < xf86_config->num_crtc; c++) { -+xf86CrtcPtr crtc = xf86_config->crtc[c]; - -- if (!crtc->enabled) -- continue; -+if (!crtc->enabled) -+ continue; - -- if ((crtc->mode.HDisplay * crtc->mode.VDisplay) > area) { -- area = crtc->mode.HDisplay * crtc->mode.VDisplay; -- crtc_num = c; -- } -- } -+if ((crtc->mode.HDisplay * crtc->mode.VDisplay) > area) { -+ area = crtc->mode.HDisplay * crtc->mode.VDisplay; -+ crtc_num = c; -+} ++Bool RADEONGetPixmapOffsetCS(PixmapPtr pPix, uint32_t *pitch_offset) ++{ ++ RINFO_FROM_SCREEN(pPix->drawable.pScreen); ++ uint32_t pitch, offset; ++ int bpp; ++ ++ bpp = pPix->drawable.bitsPerPixel; ++ if (bpp == 24) ++ bpp = 8; ++ ++ offset = exaGetPixmapOffset(pPix); ++ pitch = exaGetPixmapPitch(pPix); ++ return RADEONGetOffsetPitch(pPix, bpp, pitch_offset, offset, pitch); +} - -- return crtc_num; -+return crtc_num; - } - ++ #if X_BYTE_ORDER == X_BIG_ENDIAN - static unsigned long swapper_surfaces[3]; + static unsigned long swapper_surfaces[6]; +#endif + static Bool RADEONPrepareAccess(PixmapPtr pPix, int index) { -- RINFO_FROM_SCREEN(pPix->drawable.pScreen); -- unsigned char *RADEONMMIO = info->MMIO; -- uint32_t offset = exaGetPixmapOffset(pPix); -- int bpp, soff; -- uint32_t size, flags; -- -- /* Front buffer is always set with proper swappers */ -- if (offset == 0) -- return TRUE; -- -- /* If same bpp as front buffer, just do nothing as the main -- * swappers will apply -- */ -- bpp = pPix->drawable.bitsPerPixel; -- if (bpp == pScrn->bitsPerPixel) -- return TRUE; -- -- /* We need to setup a separate swapper, let's request a -- * surface. We need to align the size first -- */ -- size = exaGetPixmapSize(pPix); -- size = (size + RADEON_BUFFER_ALIGN) & ~(RADEON_BUFFER_ALIGN); -- -- /* Set surface to tiling disabled with appropriate swapper */ -- switch (bpp) { -- case 16: -- flags = RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP; -- break; -- case 32: -- flags = RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP; -- break; -- default: -- flags = 0; -+RINFO_FROM_SCREEN(pPix->drawable.pScreen); -+unsigned char *RADEONMMIO = info->MMIO; -+uint32_t offset = exaGetPixmapOffset(pPix); -+int bpp, soff; -+uint32_t size, flags; -+struct radeon_exa_pixmap_priv *driver_priv; -+ -+driver_priv = exaGetPixmapDriverPrivate(pPix); -+if (driver_priv) { -+ -+if (driver_priv->bo) { -+ int ret; -+ -+ if (radeon_bufmgr_gem_has_references(driver_priv->bo)) -+ RADEONCPFlushIndirect(pScrn, 0); -+ -+ radeon_bufmgr_gem_wait_rendering(driver_priv->bo); + RINFO_FROM_SCREEN(pPix->drawable.pScreen); +@@ -231,7 +263,31 @@ static Bool RADEONPrepareAccess(PixmapPtr pPix, int index) + uint32_t offset = exaGetPixmapOffset(pPix); + int bpp, soff; + uint32_t size, flags; ++ struct radeon_exa_pixmap_priv *driver_priv; ++ ++ driver_priv = exaGetPixmapDriverPrivate(pPix); ++ if (driver_priv) { + -+ /* flush IB */ -+ ret = dri_bo_map(driver_priv->bo, 1); -+ if (ret) { -+ FatalError("failed to map pixmap %d\n", ret); -+ return FALSE; - } ++ if (driver_priv->bo) { ++ int ret; + -+ pPix->devPrivate.ptr = driver_priv->bo->virtual; -+} -+} ++ if (radeon_bufmgr_gem_has_references(driver_priv->bo)) ++ RADEONCPFlushIndirect(pScrn, 0); + -+#if X_BYTE_ORDER == X_BIG_ENDIAN -+/* Front buffer is always set with proper swappers */ -+if (offset == 0) -+return TRUE; ++ radeon_bufmgr_gem_wait_rendering(driver_priv->bo); + -+/* If same bpp as front buffer, just do nothing as the main -+* swappers will apply -+*/ -+bpp = pPix->drawable.bitsPerPixel; -+if (bpp == pScrn->bitsPerPixel) -+return TRUE; ++ /* flush IB */ ++ ret = dri_bo_map(driver_priv->bo, 1); ++ if (ret) { ++ FatalError("failed to map pixmap %d\n", ret); ++ return FALSE; ++ } + -+/* We need to setup a separate swapper, let's request a -+* surface. We need to align the size first -+*/ -+size = exaGetPixmapSize(pPix); -+size = (size + RADEON_BUFFER_ALIGN) & ~(RADEON_BUFFER_ALIGN); -+ -+/* Set surface to tiling disabled with appropriate swapper */ -+switch (bpp) { -+case 16: -+flags = RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP; -+break; -+case 32: -+flags = RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP; -+break; -+default: -+flags = 0; -+} - #if defined(XF86DRI) -- if (info->directRenderingEnabled && info->allowColorTiling) { -- struct drm_radeon_surface_alloc drmsurfalloc; -- int rc; -- -- drmsurfalloc.address = offset; -- drmsurfalloc.size = size; -- drmsurfalloc.flags = flags | 1; /* bogus pitch to please DRM */ -- -- rc = drmCommandWrite(info->dri->drmFD, DRM_RADEON_SURF_ALLOC, -- &drmsurfalloc, sizeof(drmsurfalloc)); -- if (rc < 0) { -- xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -- "drm: could not allocate surface for access" -- " swapper, err: %d!\n", rc); -- return FALSE; -- } -- swapper_surfaces[index] = offset; -+if (info->directRenderingEnabled && info->allowColorTiling) { -+struct drm_radeon_surface_alloc drmsurfalloc; -+int rc; -+ -+drmsurfalloc.address = offset; -+drmsurfalloc.size = size; -+drmsurfalloc.flags = flags | 1; /* bogus pitch to please DRM */ -+ -+rc = drmCommandWrite(info->dri->drmFD, DRM_RADEON_SURF_ALLOC, -+ &drmsurfalloc, sizeof(drmsurfalloc)); -+if (rc < 0) { -+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, -+ "drm: could not allocate surface for access" -+ " swapper, err: %d!\n", rc); -+ return FALSE; -+} -+swapper_surfaces[index] = offset; ++ pPix->devPrivate.ptr = driver_priv->bo->virtual; ++ } ++ } -- return TRUE; -- } -+return TRUE; -+} - #endif -- soff = (index + 1) * 0x10; -- OUTREG(RADEON_SURFACE0_INFO + soff, flags); -- OUTREG(RADEON_SURFACE0_LOWER_BOUND + soff, offset); -- OUTREG(RADEON_SURFACE0_UPPER_BOUND + soff, offset + size - 1); -- swapper_surfaces[index] = offset; -- return TRUE; -+soff = (index + 1) * 0x10; -+OUTREG(RADEON_SURFACE0_INFO + soff, flags); -+OUTREG(RADEON_SURFACE0_LOWER_BOUND + soff, offset); -+OUTREG(RADEON_SURFACE0_UPPER_BOUND + soff, offset + size - 1); -+swapper_surfaces[index] = offset; ++#if X_BYTE_ORDER == X_BIG_ENDIAN + /* Front buffer is always set with proper swappers */ + if (offset == 0) + return TRUE; +@@ -287,6 +343,7 @@ static Bool RADEONPrepareAccess(PixmapPtr pPix, int index) + OUTREG(RADEON_SURFACE0_LOWER_BOUND + soff, offset); + OUTREG(RADEON_SURFACE0_UPPER_BOUND + soff, offset + size - 1); + swapper_surfaces[index] = offset; +#endif -+return TRUE; + return TRUE; } - static void RADEONFinishAccess(PixmapPtr pPix, int index) - { -- RINFO_FROM_SCREEN(pPix->drawable.pScreen); -- unsigned char *RADEONMMIO = info->MMIO; -- uint32_t offset = exaGetPixmapOffset(pPix); -- int soff; -+RINFO_FROM_SCREEN(pPix->drawable.pScreen); -+unsigned char *RADEONMMIO = info->MMIO; -+uint32_t offset = exaGetPixmapOffset(pPix); -+int soff; -+struct radeon_exa_pixmap_priv *driver_priv; -+ -+driver_priv = exaGetPixmapDriverPrivate(pPix); - -- /* Front buffer is always set with proper swappers */ -- if (offset == 0) -- return; -+if (driver_priv) { -+dri_bo_unmap(driver_priv->bo); -+pPix->devPrivate.ptr = NULL; -+} +@@ -296,7 +353,17 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index) + unsigned char *RADEONMMIO = info->MMIO; + uint32_t offset = exaGetPixmapOffset(pPix); + int soff; ++ struct radeon_exa_pixmap_priv *driver_priv; + ++ driver_priv = exaGetPixmapDriverPrivate(pPix); + ++ if (driver_priv) { ++ dri_bo_unmap(driver_priv->bo); ++ pPix->devPrivate.ptr = NULL; ++ } ++ + +#if X_BYTE_ORDER == X_BIG_ENDIAN -+/* Front buffer is always set with proper swappers */ -+if (offset == 0) -+return; - -- if (swapper_surfaces[index] == 0) -- return; -+if (swapper_surfaces[index] == 0) -+return; - #if defined(XF86DRI) -- if (info->directRenderingEnabled && info->allowColorTiling) { -- struct drm_radeon_surface_free drmsurffree; -- -- drmsurffree.address = offset; -- drmCommandWrite(info->dri->drmFD, DRM_RADEON_SURF_FREE, -- &drmsurffree, sizeof(drmsurffree)); -- swapper_surfaces[index] = 0; -- return; -- } -+if (info->directRenderingEnabled && info->allowColorTiling) { -+struct drm_radeon_surface_free drmsurffree; -+ -+drmsurffree.address = offset; -+drmCommandWrite(info->dri->drmFD, DRM_RADEON_SURF_FREE, -+ &drmsurffree, sizeof(drmsurffree)); -+swapper_surfaces[index] = 0; -+return; -+} - #endif -- soff = (index + 1) * 0x10; -- OUTREG(RADEON_SURFACE0_INFO + soff, 0); -- OUTREG(RADEON_SURFACE0_LOWER_BOUND + soff, 0); -- OUTREG(RADEON_SURFACE0_UPPER_BOUND + soff, 0); -- swapper_surfaces[index] = 0; -+soff = (index + 1) * 0x10; -+OUTREG(RADEON_SURFACE0_INFO + soff, 0); -+OUTREG(RADEON_SURFACE0_LOWER_BOUND + soff, 0); -+OUTREG(RADEON_SURFACE0_UPPER_BOUND + soff, 0); -+swapper_surfaces[index] = 0; + /* Front buffer is always set with proper swappers */ + if (offset == 0) + return; +@@ -319,9 +386,93 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index) + OUTREG(RADEON_SURFACE0_LOWER_BOUND + soff, 0); + OUTREG(RADEON_SURFACE0_UPPER_BOUND + soff, 0); + swapper_surfaces[index] = 0; +#endif /* X_BYTE_ORDER == X_BIG_ENDIAN */ } -#endif /* X_BYTE_ORDER == X_BIG_ENDIAN */ +void *RADEONEXACreatePixmap(ScreenPtr pScreen, int size, int align) +{ -+ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; -+RADEONInfoPtr info = RADEONPTR(pScrn); -+struct radeon_exa_pixmap_priv *new_priv; -+ -+new_priv = xcalloc(1, sizeof(struct radeon_exa_pixmap_priv)); -+if (!new_priv) -+return NULL; -+ -+if (size == 0) -+return new_priv; -+ -+new_priv->bo = dri_bo_alloc(info->bufmgr, "exa pixmap", size, -+ align, 0); -+if (!new_priv->bo) { -+xfree(new_priv); -+ErrorF("Failed to alloc memory\n"); -+return NULL; -+} ++ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_exa_pixmap_priv *new_priv; + -+return new_priv; ++ new_priv = xcalloc(1, sizeof(struct radeon_exa_pixmap_priv)); ++ if (!new_priv) ++ return NULL; ++ ++ if (size == 0) ++ return new_priv; ++ ++ new_priv->bo = dri_bo_alloc(info->bufmgr, "exa pixmap", size, ++ align, 0); ++ if (!new_priv->bo) { ++ xfree(new_priv); ++ ErrorF("Failed to alloc memory\n"); ++ return NULL; ++ } ++ ++ return new_priv; + +} + +static void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv) +{ -+ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; -+RADEONInfoPtr info = RADEONPTR(pScrn); -+struct radeon_exa_pixmap_priv *driver_priv = driverPriv; -+int ret; ++ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_exa_pixmap_priv *driver_priv = driverPriv; ++ int ret; + -+dri_bo_unreference(driver_priv->bo); -+xfree(driverPriv); ++ dri_bo_unreference(driver_priv->bo); ++ xfree(driverPriv); +} + +static Bool RADEONEXAModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, -+ int depth, int bitsPerPixel, int devKind, -+ pointer pPixData) ++ int depth, int bitsPerPixel, int devKind, ++ pointer pPixData) +{ -+ScreenPtr pScreen = pPixmap->drawable.pScreen; -+ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; -+RADEONInfoPtr info = RADEONPTR(pScrn); -+struct radeon_exa_pixmap_priv *driver_priv; ++ ScreenPtr pScreen = pPixmap->drawable.pScreen; ++ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; ++ RADEONInfoPtr info = RADEONPTR(pScrn); ++ struct radeon_exa_pixmap_priv *driver_priv; + -+driver_priv = exaGetPixmapDriverPrivate(pPixmap); -+if (!driver_priv) -+return FALSE; ++ driver_priv = exaGetPixmapDriverPrivate(pPixmap); ++ if (!driver_priv) ++ return FALSE; + + -+if (info->drm_mode_setting && drmmode_is_rotate_pixmap(pScrn, pPixData, &driver_priv->bo)){ -+dri_bo_unmap(driver_priv->bo); -+dri_bo_reference(driver_priv->bo); -+miModifyPixmapHeader(pPixmap, width, height, depth, -+ bitsPerPixel, devKind, NULL); ++ if (info->drm_mode_setting && drmmode_is_rotate_pixmap(pScrn, pPixData, &driver_priv->bo)){ ++ dri_bo_unmap(driver_priv->bo); ++ dri_bo_reference(driver_priv->bo); ++ miModifyPixmapHeader(pPixmap, width, height, depth, ++ bitsPerPixel, devKind, NULL); + -+return TRUE; -+} ++ return TRUE; ++ } + -+if (pPixData == info->mm.front_buffer->map) { -+driver_priv->flags |= RADEON_PIXMAP_IS_FRONTBUFFER; ++ if (pPixData == info->mm.front_buffer->map) { ++ driver_priv->flags |= RADEON_PIXMAP_IS_FRONTBUFFER; + -+driver_priv->bo = radeon_bo_gem_create_from_name(info->bufmgr, "front", -+ radeon_name_buffer(pScrn, info->mm.front_buffer)); ++ driver_priv->bo = radeon_bo_gem_create_from_name(info->bufmgr, "front", ++ radeon_name_buffer(pScrn, info->mm.front_buffer)); + -+miModifyPixmapHeader(pPixmap, width, height, depth, -+ bitsPerPixel, devKind, NULL); -+return TRUE; -+} -+return FALSE; ++ miModifyPixmapHeader(pPixmap, width, height, depth, ++ bitsPerPixel, devKind, NULL); ++ return TRUE; ++ } ++ return FALSE; +} + +static Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix) +{ -+struct radeon_exa_pixmap_priv *driver_priv; ++ struct radeon_exa_pixmap_priv *driver_priv; + -+driver_priv = exaGetPixmapDriverPrivate(pPix); ++ driver_priv = exaGetPixmapDriverPrivate(pPix); + -+if (!driver_priv) -+return FALSE; -+if (driver_priv->bo) -+return TRUE; -+return FALSE; ++ if (!driver_priv) ++ return FALSE; ++ if (driver_priv->bo) ++ return TRUE; ++ return FALSE; +} #define ENTER_DRAW(x) TRACE #define LEAVE_DRAW(x) TRACE -@@ -331,6 +464,7 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index) +@@ -332,6 +483,7 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index) #define BEGIN_ACCEL(n) RADEONWaitForFifo(pScrn, (n)) #define OUT_ACCEL_REG(reg, val) OUTREG(reg, val) #define OUT_ACCEL_REG_F(reg, val) OUTREG(reg, F_TO_DW(val)) @@ -6688,7 +5805,7 @@ index c4bc1bb..22b3147 100644 #define FINISH_ACCEL() #ifdef RENDER -@@ -344,6 +478,7 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index) +@@ -345,6 +497,7 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index) #undef OUT_ACCEL_REG #undef OUT_ACCEL_REG_F #undef FINISH_ACCEL @@ -6696,7 +5813,7 @@ index c4bc1bb..22b3147 100644 #ifdef XF86DRI -@@ -354,6 +489,7 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index) +@@ -355,6 +508,7 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index) #define BEGIN_ACCEL(n) BEGIN_RING(2*(n)) #define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val) #define FINISH_ACCEL() ADVANCE_RING() @@ -6704,7 +5821,7 @@ index c4bc1bb..22b3147 100644 #define OUT_RING_F(x) OUT_RING(F_TO_DW(x)) -@@ -371,6 +507,8 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index) +@@ -372,6 +526,8 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index) #endif /* XF86DRI */ @@ -6713,7 +5830,7 @@ index c4bc1bb..22b3147 100644 /* * Once screen->off_screen_base is set, this function * allocates the remaining memory appropriately -@@ -392,122 +530,126 @@ Bool RADEONSetupMemEXA (ScreenPtr pScreen) +@@ -393,122 +549,126 @@ Bool RADEONSetupMemEXA (ScreenPtr pScreen) if (info->accel_state->exa == NULL) return FALSE; @@ -6943,36 +6060,43 @@ index c4bc1bb..22b3147 100644 return TRUE; } -@@ -520,10 +662,21 @@ extern void ExaOffscreenMarkUsed(PixmapPtr); - unsigned long long - RADEONTexOffsetStart(PixmapPtr pPix) +@@ -523,14 +683,23 @@ RADEONTexOffsetStart(PixmapPtr pPix) { + RINFO_FROM_SCREEN(pPix->drawable.pScreen); + unsigned long long offset; - exaMoveInPixmap(pPix); +- ExaOffscreenMarkUsed(pPix); +- +- offset = exaGetPixmapOffset(pPix); + struct radeon_exa_pixmap_priv *driver_priv; -+ uint32_t offset; ++ + driver_priv = exaGetPixmapDriverPrivate(pPix); + + if (driver_priv) { + //offset = dri_bo_get_handle(driver_priv->bo); + offset = driver_priv->bo->offset; -+ + } else { + exaMoveInPixmap(pPix); + offset = exaGetPixmapOffset(pPix); -+ offset += RADEONPTR(xf86Screens[pPix->drawable.pScreen->myNum])->fbLocation; ++ if (offset > info->FbMapSize) ++ return ~0ULL; ++ else ++ offset += info->fbLocation; ++ ExaOffscreenMarkUsed(pPix); + } - ExaOffscreenMarkUsed(pPix); -- return RADEONPTR(xf86Screens[pPix->drawable.pScreen->myNum])->fbLocation + -- exaGetPixmapOffset(pPix); +- if (offset > info->FbMapSize) +- return ~0ULL; +- else +- return info->fbLocation + offset; + return offset; } #endif diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c -index 55710ff..2ea4fc9 100644 +index dec0285..6e9c212 100644 --- a/src/radeon_exa_funcs.c +++ b/src/radeon_exa_funcs.c -@@ -74,21 +74,73 @@ FUNC_NAME(RADEONSync)(ScreenPtr pScreen, int marker) +@@ -72,21 +72,73 @@ FUNC_NAME(RADEONSync)(ScreenPtr pScreen, int marker) ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; RADEONInfoPtr info = RADEONPTR(pScrn); @@ -7047,7 +6171,7 @@ index 55710ff..2ea4fc9 100644 ACCEL_PREAMBLE(); TRACE; -@@ -97,25 +149,58 @@ FUNC_NAME(RADEONPrepareSolid)(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) +@@ -95,25 +147,58 @@ FUNC_NAME(RADEONPrepareSolid)(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) RADEON_FALLBACK(("24bpp unsupported\n")); if (!RADEONGetDatatypeBpp(pPix->drawable.bitsPerPixel, &datatype)) RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n")); @@ -7121,7 +6245,7 @@ index 55710ff..2ea4fc9 100644 return TRUE; } -@@ -145,6 +230,7 @@ FUNC_NAME(RADEONDoneSolid)(PixmapPtr pPix) +@@ -144,6 +229,7 @@ FUNC_NAME(RADEONDoneSolid)(PixmapPtr pPix) TRACE; @@ -7129,7 +6253,7 @@ index 55710ff..2ea4fc9 100644 BEGIN_ACCEL(2); OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL); OUT_ACCEL_REG(RADEON_WAIT_UNTIL, -@@ -152,6 +238,7 @@ FUNC_NAME(RADEONDoneSolid)(PixmapPtr pPix) +@@ -151,6 +237,7 @@ FUNC_NAME(RADEONDoneSolid)(PixmapPtr pPix) FINISH_ACCEL(); } @@ -7137,7 +6261,7 @@ index 55710ff..2ea4fc9 100644 void FUNC_NAME(RADEONDoPrepareCopy)(ScrnInfoPtr pScrn, uint32_t src_pitch_offset, uint32_t dst_pitch_offset, uint32_t datatype, int rop, -@@ -162,23 +249,28 @@ FUNC_NAME(RADEONDoPrepareCopy)(ScrnInfoPtr pScrn, uint32_t src_pitch_offset, +@@ -161,23 +248,28 @@ FUNC_NAME(RADEONDoPrepareCopy)(ScrnInfoPtr pScrn, uint32_t src_pitch_offset, RADEON_SWITCH_TO_2D(); @@ -7183,7 +6307,7 @@ index 55710ff..2ea4fc9 100644 } static Bool -@@ -189,9 +281,46 @@ FUNC_NAME(RADEONPrepareCopy)(PixmapPtr pSrc, PixmapPtr pDst, +@@ -188,9 +280,46 @@ FUNC_NAME(RADEONPrepareCopy)(PixmapPtr pSrc, PixmapPtr pDst, { RINFO_FROM_SCREEN(pDst->drawable.pScreen); uint32_t datatype, src_pitch_offset, dst_pitch_offset; @@ -7231,7 +6355,7 @@ index 55710ff..2ea4fc9 100644 info->accel_state->xdir = xdir; info->accel_state->ydir = ydir; -@@ -199,10 +328,11 @@ FUNC_NAME(RADEONPrepareCopy)(PixmapPtr pSrc, PixmapPtr pDst, +@@ -198,10 +327,11 @@ FUNC_NAME(RADEONPrepareCopy)(PixmapPtr pSrc, PixmapPtr pDst, RADEON_FALLBACK(("24bpp unsupported")); if (!RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype)) RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n")); @@ -7253,27 +6377,29 @@ index 55710ff..2ea4fc9 100644 BEGIN_ACCEL(2); OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL); OUT_ACCEL_REG(RADEON_WAIT_UNTIL, -@@ -264,6 +395,8 @@ FUNC_NAME(RADEONUploadToScreen)(PixmapPtr pDst, int x, int y, int w, int h, - uint8_t *dst = info->FB + exaGetPixmapOffset(pDst); - unsigned int dst_pitch = exaGetPixmapPitch(pDst); +@@ -259,12 +390,15 @@ FUNC_NAME(RADEONDoneCopy)(PixmapPtr pDst) + + #ifdef ACCEL_CP + ++#if 0 + static Bool + RADEONUploadToScreenCP(PixmapPtr pDst, int x, int y, int w, int h, + char *src, int src_pitch) + { + RINFO_FROM_SCREEN(pDst->drawable.pScreen); unsigned int bpp = pDst->drawable.bitsPerPixel; + int ret; + struct radeon_exa_pixmap_priv *driver_priv; - #ifdef ACCEL_CP unsigned int hpass; uint32_t buf_pitch, dst_pitch_off; -@@ -280,10 +413,50 @@ FUNC_NAME(RADEONUploadToScreen)(PixmapPtr pDst, int x, int y, int w, int h, + +@@ -273,9 +407,45 @@ RADEONUploadToScreenCP(PixmapPtr pDst, int x, int y, int w, int h, if (bpp < 8) return FALSE; -+ if (info->new_cs) -+ dst = info->mm.front_buffer->map + exaGetPixmapOffset(pDst); -+ - #ifdef ACCEL_CP - if (info->directRenderingEnabled && - RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_off)) { - uint8_t *buf; -+ + if (info->new_cs){ + + if (info->drm_mm) { @@ -7316,21 +6442,12 @@ index 55710ff..2ea4fc9 100644 int cpp = bpp / 8; ACCEL_PREAMBLE(); -@@ -301,9 +474,10 @@ FUNC_NAME(RADEONUploadToScreen)(PixmapPtr pDst, int x, int y, int w, int h, - - exaMarkSync(pDst->drawable.pScreen); - return TRUE; -- } --#endif -+ } +@@ -298,17 +468,24 @@ RADEONUploadToScreenCP(PixmapPtr pDst, int x, int y, int w, int h, -+ fallback: + return FALSE; + } +#endif - /* Do we need that sync here ? probably not .... */ - exaWaitSync(pDst->drawable.pScreen); -@@ -342,14 +516,20 @@ FUNC_NAME(RADEONUploadToScreen)(PixmapPtr pDst, int x, int y, int w, int h, - #ifdef ACCEL_CP /* Emit blit with arbitrary source and destination offsets and pitches */ static void -RADEONBlitChunk(ScrnInfoPtr pScrn, uint32_t datatype, uint32_t src_pitch_offset, @@ -7352,7 +6469,7 @@ index 55710ff..2ea4fc9 100644 OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, RADEON_GMC_DST_PITCH_OFFSET_CNTL | RADEON_GMC_SRC_PITCH_OFFSET_CNTL | -@@ -360,8 +540,14 @@ RADEONBlitChunk(ScrnInfoPtr pScrn, uint32_t datatype, uint32_t src_pitch_offset, +@@ -319,8 +496,14 @@ RADEONBlitChunk(ScrnInfoPtr pScrn, uint32_t datatype, uint32_t src_pitch_offset, RADEON_DP_SRC_SOURCE_MEMORY | RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS); @@ -7367,11 +6484,10 @@ index 55710ff..2ea4fc9 100644 OUT_ACCEL_REG(RADEON_SRC_Y_X, (srcY << 16) | srcX); OUT_ACCEL_REG(RADEON_DST_Y_X, (dstY << 16) | dstX); OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w); -@@ -372,6 +558,174 @@ RADEONBlitChunk(ScrnInfoPtr pScrn, uint32_t datatype, uint32_t src_pitch_offset, - RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); +@@ -332,6 +515,173 @@ RADEONBlitChunk(ScrnInfoPtr pScrn, uint32_t datatype, uint32_t src_pitch_offset, FINISH_ACCEL(); } -+ + +static Bool +RADEON_DFS_CS2(PixmapPtr pSrc, int x, int y, int w, int h, + char *dst, int dst_pitch) @@ -7539,14 +6655,13 @@ index 55710ff..2ea4fc9 100644 + dri_bo_unreference(scratch_bo[1]); + return FALSE; +} - #endif static Bool -@@ -396,12 +750,18 @@ FUNC_NAME(RADEONDownloadFromScreen)(PixmapPtr pSrc, int x, int y, int w, int h, + RADEONDownloadFromScreenCP(PixmapPtr pSrc, int x, int y, int w, int h, +@@ -345,12 +695,17 @@ RADEONDownloadFromScreenCP(PixmapPtr pSrc, int x, int y, int w, int h, + TRACE; - #ifdef ACCEL_CP -+ + if (info->new_cs) { + return RADEON_DFS_CS(pSrc, x, y, w, h, dst, dst_pitch); + } @@ -7556,13 +6671,13 @@ index 55710ff..2ea4fc9 100644 * blitting the bits to one half while copying them out of the other one and * then swapping the halves. */ -- if (info->accelDFS && bpp != 24 && RADEONGetDatatypeBpp(bpp, &datatype) && +- if (bpp != 24 && RADEONGetDatatypeBpp(bpp, &datatype) && + if (!info->drm_mm && info->accelDFS && bpp != 24 && + RADEONGetDatatypeBpp(bpp, &datatype) && RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset) && (scratch = RADEONCPGetBuffer(pScrn))) { -@@ -416,7 +776,8 @@ FUNC_NAME(RADEONDownloadFromScreen)(PixmapPtr pSrc, int x, int y, int w, int h, +@@ -365,7 +720,8 @@ RADEONDownloadFromScreenCP(PixmapPtr pSrc, int x, int y, int w, int h, RADEON_SWITCH_TO_2D(); /* Kick the first blit as early as possible */ @@ -7572,7 +6687,7 @@ index 55710ff..2ea4fc9 100644 x, y, 0, 0, w, hpass); FLUSH_RING(); -@@ -443,7 +804,8 @@ FUNC_NAME(RADEONDownloadFromScreen)(PixmapPtr pSrc, int x, int y, int w, int h, +@@ -392,7 +748,8 @@ RADEONDownloadFromScreenCP(PixmapPtr pSrc, int x, int y, int w, int h, /* Prepare next blit if anything's left */ if (hpass) { scratch_off = scratch->total/2 - scratch_off; @@ -7582,9 +6697,15 @@ index 55710ff..2ea4fc9 100644 x, y, 0, 0, w, hpass); } -@@ -550,14 +912,17 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) - info->accel_state->exa->UploadToScreen = FUNC_NAME(RADEONUploadToScreen); - info->accel_state->exa->DownloadFromScreen = FUNC_NAME(RADEONDownloadFromScreen); +@@ -466,19 +823,22 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) + info->accel_state->exa->MarkSync = FUNC_NAME(RADEONMarkSync); + info->accel_state->exa->WaitMarker = FUNC_NAME(RADEONSync); + #ifdef ACCEL_CP +- info->accel_state->exa->UploadToScreen = RADEONUploadToScreenCP; ++ // info->accel_state->exa->UploadToScreen = RADEONUploadToScreenCP; + if (info->accelDFS) + info->accel_state->exa->DownloadFromScreen = RADEONDownloadFromScreenCP; + #endif -#if X_BYTE_ORDER == X_BIG_ENDIAN info->accel_state->exa->PrepareAccess = RADEONPrepareAccess; @@ -7603,7 +6724,7 @@ index 55710ff..2ea4fc9 100644 #ifdef RENDER if (info->RenderAccel) { -@@ -567,7 +932,7 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) +@@ -488,7 +848,7 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) else if (IS_R300_3D || IS_R500_3D) { if ((info->ChipFamily < CHIP_FAMILY_RS400) #ifdef XF86DRI @@ -7612,7 +6733,7 @@ index 55710ff..2ea4fc9 100644 #endif ) { xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration " -@@ -602,6 +967,16 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) +@@ -523,6 +883,16 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) } #endif @@ -7630,7 +6751,7 @@ index 55710ff..2ea4fc9 100644 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n"); diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c -index 55e55be..b8e3095 100644 +index 571204a..ca46505 100644 --- a/src/radeon_exa_render.c +++ b/src/radeon_exa_render.c @@ -54,6 +54,10 @@ @@ -7644,8 +6765,8 @@ index 55e55be..b8e3095 100644 /* Only include the following (generic) bits once. */ #ifdef ONLY_ONCE -@@ -355,12 +359,14 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - Bool repeat = pPict->repeat && +@@ -365,12 +369,14 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix, + Bool repeat = pPict->repeat && pPict->repeatType != RepeatPad && !(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y)); int i; + struct radeon_exa_pixmap_priv *driver_priv; @@ -7661,9 +6782,9 @@ index 55e55be..b8e3095 100644 RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset)); if ((txpitch & 0x1f) != 0) RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch)); -@@ -402,23 +408,43 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - if (repeat) - txfilter |= RADEON_CLAMP_S_WRAP | RADEON_CLAMP_T_WRAP; +@@ -426,23 +432,43 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix, + } + } - BEGIN_ACCEL(5); + qwords = info->new_cs ? 6 : 5; @@ -7708,7 +6829,7 @@ index 55e55be..b8e3095 100644 } FINISH_ACCEL(); -@@ -523,10 +549,52 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op, +@@ -551,10 +577,52 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op, uint32_t dst_format, dst_offset, dst_pitch, colorpitch; uint32_t pp_cntl, blendcntl, cblend, ablend; int pixel_shift; @@ -7761,7 +6882,7 @@ index 55e55be..b8e3095 100644 if (!RADEONGetDestFormat(pDstPicture, &dst_format)) return FALSE; -@@ -540,19 +608,18 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op, +@@ -568,19 +636,18 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op, pixel_shift = pDst->drawable.bitsPerPixel >> 4; @@ -7784,7 +6905,7 @@ index 55e55be..b8e3095 100644 if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE)) return FALSE; -@@ -570,10 +637,18 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op, +@@ -598,10 +665,18 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op, info->accel_state->is_transform[1] = FALSE; } @@ -7805,8 +6926,8 @@ index 55e55be..b8e3095 100644 OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch); /* IN operator: Multiply src by mask components or mask alpha. -@@ -669,13 +744,17 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - Bool repeat = pPict->repeat && +@@ -701,13 +776,17 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, + Bool repeat = pPict->repeat && pPict->repeatType != RepeatPad && !(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y)); int i; + struct radeon_exa_pixmap_priv *driver_priv; @@ -7826,9 +6947,9 @@ index 55e55be..b8e3095 100644 if ((txpitch & 0x1f) != 0) RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch)); -@@ -718,7 +797,8 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, - if (repeat) - txfilter |= R200_CLAMP_S_WRAP | R200_CLAMP_T_WRAP; +@@ -764,7 +843,8 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, + } + } - BEGIN_ACCEL(6); + qwords = info->new_cs ? 7 : 6; @@ -7836,7 +6957,7 @@ index 55e55be..b8e3095 100644 if (unit == 0) { OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); -@@ -726,7 +806,15 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, +@@ -772,7 +852,15 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, OUT_ACCEL_REG(R200_PP_TXSIZE_0, (pPix->drawable.width - 1) | ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT)); OUT_ACCEL_REG(R200_PP_TXPITCH_0, txpitch - 32); @@ -7853,7 +6974,7 @@ index 55e55be..b8e3095 100644 } else { OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter); OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat); -@@ -734,7 +822,17 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, +@@ -780,7 +868,17 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, OUT_ACCEL_REG(R200_PP_TXSIZE_1, (pPix->drawable.width - 1) | ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT)); OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch - 32); @@ -7872,7 +6993,7 @@ index 55e55be..b8e3095 100644 } FINISH_ACCEL(); -@@ -823,10 +921,52 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, +@@ -873,10 +971,52 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, uint32_t dst_format, dst_offset, dst_pitch; uint32_t pp_cntl, blendcntl, cblend, ablend, colorpitch; int pixel_shift; @@ -7925,7 +7046,7 @@ index 55e55be..b8e3095 100644 if (!RADEONGetDestFormat(pDstPicture, &dst_format)) return FALSE; -@@ -840,13 +980,13 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, +@@ -890,13 +1030,13 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, pixel_shift = pDst->drawable.bitsPerPixel >> 4; @@ -7941,7 +7062,7 @@ index 55e55be..b8e3095 100644 RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset)); if (((dst_pitch >> pixel_shift) & 0x7) != 0) RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch)); -@@ -868,11 +1008,22 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, +@@ -918,11 +1058,22 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, info->accel_state->is_transform[1] = FALSE; } @@ -7966,7 +7087,7 @@ index 55e55be..b8e3095 100644 OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); if (pMask) -@@ -941,6 +1092,10 @@ static Bool R300CheckCompositeTexture(PicturePtr pPict, +@@ -991,6 +1142,10 @@ static Bool R300CheckCompositeTexture(PicturePtr pPict, int unit, Bool is_r500) { @@ -7977,7 +7098,7 @@ index 55e55be..b8e3095 100644 int w = pPict->pDrawable->width; int h = pPict->pDrawable->height; int i; -@@ -966,8 +1121,17 @@ static Bool R300CheckCompositeTexture(PicturePtr pPict, +@@ -1016,8 +1171,17 @@ static Bool R300CheckCompositeTexture(PicturePtr pPict, RADEON_FALLBACK(("Unsupported picture format 0x%x\n", (int)pPict->format)); @@ -7996,7 +7117,7 @@ index 55e55be..b8e3095 100644 if (pPict->filter != PictFilterNearest && pPict->filter != PictFilterBilinear) -@@ -999,15 +1163,19 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix, +@@ -1049,15 +1213,19 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix, int w = pPict->pDrawable->width; int h = pPict->pDrawable->height; int i, pixel_shift; @@ -8019,7 +7140,7 @@ index 55e55be..b8e3095 100644 if ((txpitch & 0x1f) != 0) RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch)); -@@ -1068,13 +1236,26 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix, +@@ -1139,13 +1307,26 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix, RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); } @@ -8048,7 +7169,7 @@ index 55e55be..b8e3095 100644 if (!pPict->repeat) OUT_ACCEL_REG(R300_TX_BORDER_COLOR_0 + (unit * 4), 0); FINISH_ACCEL(); -@@ -1173,6 +1354,7 @@ static Bool R300CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskP +@@ -1244,6 +1425,7 @@ static Bool R300CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskP return TRUE; } @@ -8056,7 +7177,7 @@ index 55e55be..b8e3095 100644 #endif /* ONLY_ONCE */ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, -@@ -1184,10 +1366,51 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, +@@ -1255,10 +1437,51 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, uint32_t txenable, colorpitch; uint32_t blendcntl; int pixel_shift; @@ -8109,7 +7230,7 @@ index 55e55be..b8e3095 100644 if (!R300GetDestFormat(pDstPicture, &dst_format)) return FALSE; -@@ -1198,7 +1421,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, +@@ -1269,7 +1492,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, pixel_shift = pDst->drawable.bitsPerPixel >> 4; @@ -8118,7 +7239,7 @@ index 55e55be..b8e3095 100644 dst_pitch = exaGetPixmapPitch(pDst); colorpitch = dst_pitch >> pixel_shift; -@@ -1207,7 +1430,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, +@@ -1278,7 +1501,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, colorpitch |= dst_format; @@ -8127,7 +7248,7 @@ index 55e55be..b8e3095 100644 RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset)); if (((dst_pitch >> pixel_shift) & 0x7) != 0) RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch)); -@@ -1216,6 +1439,9 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, +@@ -1287,6 +1510,9 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, return FALSE; RADEON_SWITCH_TO_3D(); @@ -8137,7 +7258,7 @@ index 55e55be..b8e3095 100644 if (!FUNC_NAME(R300TextureSetup)(pSrcPicture, pSrc, 0)) return FALSE; -@@ -1868,9 +2094,18 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, +@@ -1939,9 +2165,18 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, (8191 << R300_SCISSOR_Y_SHIFT))); FINISH_ACCEL(); @@ -8158,7 +7279,7 @@ index 55e55be..b8e3095 100644 OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch); blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format); -@@ -1888,7 +2123,6 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, +@@ -1959,7 +2194,6 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, return TRUE; } @@ -8166,19 +7287,7 @@ index 55e55be..b8e3095 100644 #ifdef ACCEL_CP #define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ -@@ -2082,9 +2316,10 @@ static void FUNC_NAME(RadeonCompositeTile)(PixmapPtr pDst, - xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0], xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0]); - } - -- if (IS_R300_3D || IS_R500_3D) -+ if (IS_R300_3D || IS_R500_3D) { - /* flushing is pipelined, free/finish is not */ - OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D); -+ } - - #ifdef ACCEL_CP - ADVANCE_RING(); -@@ -2167,14 +2402,23 @@ static void FUNC_NAME(RadeonDoneComposite)(PixmapPtr pDst) +@@ -2245,14 +2479,23 @@ static void FUNC_NAME(RadeonDoneComposite)(PixmapPtr pDst) if (IS_R300_3D || IS_R500_3D) { BEGIN_ACCEL(3); @@ -8204,7 +7313,7 @@ index 55e55be..b8e3095 100644 #undef ONLY_ONCE diff --git a/src/radeon_legacy_memory.c b/src/radeon_legacy_memory.c -index 2a9ee94..0423571 100644 +index 861fd97..c436faf 100644 --- a/src/radeon_legacy_memory.c +++ b/src/radeon_legacy_memory.c @@ -21,6 +21,19 @@ radeon_legacy_allocate_memory(ScrnInfoPtr pScrn, @@ -8227,24 +7336,25 @@ index 2a9ee94..0423571 100644 #ifdef USE_EXA if (info->useEXA) { ExaOffscreenArea *area = *mem_struct; -@@ -96,6 +109,11 @@ radeon_legacy_free_memory(ScrnInfoPtr pScrn, - ScreenPtr pScreen = screenInfo.screens[pScrn->scrnIndex]; +@@ -94,6 +107,12 @@ radeon_legacy_free_memory(ScrnInfoPtr pScrn, + void *mem_struct) + { RADEONInfoPtr info = RADEONPTR(pScrn); - ++ + if (info->new_cs) { + dri_bo *bo = mem_struct; + dri_bo_unreference(bo); + return; + } #ifdef USE_EXA - if (info->useEXA) { - ExaOffscreenArea *area = mem_struct; + ScreenPtr pScreen = screenInfo.screens[pScrn->scrnIndex]; + diff --git a/src/radeon_memory.c b/src/radeon_memory.c new file mode 100644 -index 0000000..766f977 +index 0000000..2b6bc4d --- /dev/null +++ b/src/radeon_memory.c -@@ -0,0 +1,418 @@ +@@ -0,0 +1,423 @@ + +#include +#include @@ -8551,11 +7661,11 @@ index 0000000..766f977 + + if (info->directRenderingEnabled) { + info->dri->backPitch = pScrn->displayWidth; -+ info->mm.back_buffer = radeon_allocate_memory(pScrn, RADEON_POOL_VRAM, screen_size, 0, 1, "Back Buffer", 1); ++ info->mm.back_buffer = radeon_allocate_memory(pScrn, RADEON_POOL_VRAM, screen_size, 0, 1, "Back Buffer", 0); + if (!info->mm.back_buffer) { + return FALSE; + } -+ radeon_bind_memory(pScrn, info->mm.back_buffer); ++ // radeon_bind_memory(pScrn, info->mm.back_buffer); + total_size_bytes += screen_size; + + info->dri->depthPitch = RADEON_ALIGN(pScrn->displayWidth, 32); @@ -8563,11 +7673,11 @@ index 0000000..766f977 + int depthCpp = (info->dri->depthBits - 8) / 4; + int depth_size = RADEON_ALIGN(pScrn->virtualY, 16) * info->dri->depthPitch * depthCpp; + depth_size = RADEON_ALIGN(depth_size, pagesize); -+ info->mm.depth_buffer = radeon_allocate_memory(pScrn, RADEON_POOL_VRAM, depth_size, 0, 1, "Depth Buffer", 1); ++ info->mm.depth_buffer = radeon_allocate_memory(pScrn, RADEON_POOL_VRAM, depth_size, 0, 1, "Depth Buffer", 0); + if (!info->mm.depth_buffer) { + return FALSE; + } -+ radeon_bind_memory(pScrn, info->mm.depth_buffer); ++ // radeon_bind_memory(pScrn, info->mm.depth_buffer); + total_size_bytes += depth_size; + } + } @@ -8575,6 +7685,8 @@ index 0000000..766f977 + /* work out from the mm size what the exa / tex sizes need to be */ + remain_size_bytes = info->mm.vram_size - total_size_bytes; + ++ info->dri->textureSize = 0; ++#if 0 + if (info->dri->textureSize > 0) + info->dri->textureSize = (remain_size_bytes / 100) * info->dri->textureSize; + else @@ -8583,9 +7695,11 @@ index 0000000..766f977 + info->dri->textureSize = RADEON_ALIGN(info->dri->textureSize, pagesize); + + remain_size_bytes -= info->dri->textureSize; ++#endif + + ErrorF("texture size is %dK, exa is %dK\n", info->dri->textureSize / 1024, remain_size_bytes/1024); + ++ + fb_size_bytes = screen_size; + + ErrorF("fb size is %dK %dK\n", fb_size_bytes / 1024, total_size_bytes / 1024); @@ -8601,7 +7715,7 @@ index 0000000..766f977 + } + info->dri->frontPitch = pScrn->displayWidth; + -+ if (info->directRenderingEnabled) { ++ if (info->directRenderingEnabled && info->dri->textureSize) { + info->mm.texture_buffer = radeon_allocate_memory(pScrn, RADEON_POOL_VRAM, info->dri->textureSize, 0, 1, "Texture Buffer", 1); + if (!info->mm.texture_buffer) { + return FALSE; @@ -8616,7 +7730,8 @@ index 0000000..766f977 + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Front buffer size: %dK at 0x%08x\n", info->mm.front_buffer->size/1024, info->mm.front_buffer->offset); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Back buffer size: %dK at 0x%08x\n", info->mm.back_buffer->size/1024, info->mm.back_buffer->offset); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Depth buffer size: %dK at 0x%08x\n", info->mm.depth_buffer->size/1024, info->mm.depth_buffer->offset); -+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Texture size: %dK at 0x%08x\n", info->mm.texture_buffer->size/1024, info->mm.texture_buffer->offset); ++ if (info->mm.texture_buffer) ++ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Texture size: %dK at 0x%08x\n", info->mm.texture_buffer->size/1024, info->mm.texture_buffer->offset); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Remaining VRAM size (used for pixmaps): %dK\n", remain_size_bytes/1024); + + /* set the emit limit at 90% of VRAM */ @@ -8631,7 +7746,7 @@ index 0000000..766f977 + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + -+ ++#if 0 + info->mm.gart_texture_buffer = + radeon_allocate_memory(pScrn, RADEON_POOL_GART, + info->dri->gartTexMapSize, @@ -8642,7 +7757,7 @@ index 0000000..766f977 + } + + radeon_bind_memory(pScrn, info->mm.gart_texture_buffer); -+ ++#endif + return TRUE; +} + @@ -8664,10 +7779,10 @@ index 0000000..766f977 +} + diff --git a/src/radeon_probe.h b/src/radeon_probe.h -index 5cd610c..907c52b 100644 +index 49044e3..9a01a24 100644 --- a/src/radeon_probe.h +++ b/src/radeon_probe.h -@@ -183,6 +183,27 @@ typedef struct +@@ -142,6 +142,27 @@ typedef struct Bool hw_capable; } RADEONI2CBusRec, *RADEONI2CBusPtr; @@ -8695,20 +7810,20 @@ index 5cd610c..907c52b 100644 typedef struct _RADEONCrtcPrivateRec { void *crtc_rotate_mem; void *cursor_mem; -@@ -195,6 +216,8 @@ typedef struct _RADEONCrtcPrivateRec { - uint32_t crtc_offset; +@@ -155,6 +176,8 @@ typedef struct _RADEONCrtcPrivateRec { int can_tile; Bool enabled; + Bool initialized; + struct radeon_memory *cursor; + } RADEONCrtcPrivateRec, *RADEONCrtcPrivatePtr; - typedef struct { + typedef struct _radeon_encoder { diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c -index 7712344..b147495 100644 +index cbedb7e..da60494 100644 --- a/src/radeon_textured_video.c +++ b/src/radeon_textured_video.c -@@ -39,6 +39,7 @@ +@@ -40,6 +40,7 @@ #include "radeon_macros.h" #include "radeon_probe.h" #include "radeon_video.h" @@ -8716,7 +7831,7 @@ index 7712344..b147495 100644 #include #include "fourcc.h" -@@ -111,6 +112,7 @@ static __inline__ uint32_t F_TO_24(float val) +@@ -124,6 +125,7 @@ static __inline__ uint32_t F_TO_24(float val) #define BEGIN_ACCEL(n) RADEONWaitForFifo(pScrn, (n)) #define OUT_ACCEL_REG(reg, val) OUTREG(reg, val) #define OUT_ACCEL_REG_F(reg, val) OUTREG(reg, F_TO_DW(val)) @@ -8724,7 +7839,7 @@ index 7712344..b147495 100644 #define FINISH_ACCEL() #include "radeon_textured_videofuncs.c" -@@ -120,6 +122,7 @@ static __inline__ uint32_t F_TO_24(float val) +@@ -133,6 +135,7 @@ static __inline__ uint32_t F_TO_24(float val) #undef BEGIN_ACCEL #undef OUT_ACCEL_REG #undef OUT_ACCEL_REG_F @@ -8732,7 +7847,7 @@ index 7712344..b147495 100644 #undef FINISH_ACCEL #ifdef XF86DRI -@@ -133,6 +136,7 @@ static __inline__ uint32_t F_TO_24(float val) +@@ -146,6 +149,7 @@ static __inline__ uint32_t F_TO_24(float val) #define OUT_ACCEL_REG_F(reg, val) OUT_ACCEL_REG(reg, F_TO_DW(val)) #define FINISH_ACCEL() ADVANCE_RING() #define OUT_RING_F(x) OUT_RING(F_TO_DW(x)) @@ -8740,18 +7855,8 @@ index 7712344..b147495 100644 #include "radeon_textured_videofuncs.c" -@@ -214,7 +218,8 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, - break; - } - -- dstPitch = (dstPitch + 63) & ~63; -+ /* always align to 64 bytes - fixes issue with non-CP code */ -+ dstPitch = (dstPitch + 63) & ~63; - - if (pPriv->video_memory != NULL && size != pPriv->size) { - radeon_legacy_free_memory(pScrn, pPriv->video_memory); -@@ -227,6 +232,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, - size * 2, 64); +@@ -298,6 +302,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, + size * 2, 64); if (pPriv->video_offset == 0) return BadAlloc; + @@ -8760,7 +7865,7 @@ index 7712344..b147495 100644 } /* Bicubic filter setup */ -@@ -250,6 +258,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, +@@ -321,6 +328,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, pPriv->bicubic_src_offset = pPriv->bicubic_offset + info->fbLocation + pScrn->fbOffset; if (pPriv->bicubic_offset == 0) pPriv->bicubic_enabled = FALSE; @@ -8770,12 +7875,15 @@ index 7712344..b147495 100644 } if (pDraw->type == DRAWABLE_WINDOW) -@@ -279,8 +290,19 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, +@@ -350,11 +360,22 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, left = (x1 >> 16) & ~1; npixels = ((((x2 + 0xffff) >> 16) + 1) & ~1) - left; - pPriv->src_offset = pPriv->video_offset + info->fbLocation + pScrn->fbOffset; -- pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset + (top * dstPitch)); +- if (info->ChipFamily >= CHIP_FAMILY_R600) +- pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset); +- else +- pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset + (top * dstPitch)); + pPriv->src_offset = pPriv->video_offset; + if (info->new_cs) { + int ret; @@ -8787,12 +7895,15 @@ index 7712344..b147495 100644 + } else if (info->drm_mm) { + pPriv->src_addr = (uint8_t *)(info->mm.front_buffer->map + pPriv->video_offset + (top * dstPitch)); + } else { -+ pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset + (top * dstPitch)); ++ if (info->ChipFamily >= CHIP_FAMILY_R600) ++ pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset); ++ else ++ pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset + (top * dstPitch)); + } pPriv->src_pitch = dstPitch; pPriv->size = size; pPriv->pDraw = pDraw; -@@ -321,8 +343,24 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, +@@ -417,8 +438,24 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, } /* Upload bicubic filter tex */ @@ -8819,20 +7930,22 @@ index 7712344..b147495 100644 /* update cliplist */ if (!REGION_EQUAL(pScrn->pScreen, &pPriv->clip, clipBoxes)) { -@@ -339,8 +377,10 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, +@@ -435,10 +472,12 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, pPriv->w = width; pPriv->h = height; + if (info->new_cs) + dri_bo_unmap(pPriv->src_bo); #ifdef XF86DRI -- if (info->directRenderingEnabled) + if (IS_R600_3D) + R600DisplayTexturedVideo(pScrn, pPriv); +- else if (info->directRenderingEnabled) + if (info->directRenderingEnabled || info->drm_mode_setting) RADEONDisplayTexturedVideoCP(pScrn, pPriv); else #endif diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c -index 410430c..b51637a 100644 +index f55ae12..5d23ad9 100644 --- a/src/radeon_textured_videofuncs.c +++ b/src/radeon_textured_videofuncs.c @@ -92,6 +92,11 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv @@ -8908,6 +8021,15 @@ index 410430c..b51637a 100644 dst_pitch = pPixmap->devKind; } +@@ -143,7 +186,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv + RADEON_WAIT_DMA_GUI_IDLE); + FINISH_ACCEL(); + +- if (!info->accel_state->XInited3D) ++ if (!info->accel_state->XInited3D && !info->drm_mm) + RADEONInit3DEngine(pScrn); + } + @@ -213,13 +256,20 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv txoffset = pPriv->src_offset; @@ -9006,7 +8128,7 @@ index 410430c..b51637a 100644 RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO | diff --git a/src/radeon_video.c b/src/radeon_video.c -index 423ea28..ae62549 100644 +index 2fb5fcc..33a4fd6 100644 --- a/src/radeon_video.c +++ b/src/radeon_video.c @@ -287,7 +287,7 @@ void RADEONInitVideo(ScreenPtr pScreen) diff --git a/radeon.xinf b/radeon.xinf index f2e49b4..e8597ff 100644 --- a/radeon.xinf +++ b/radeon.xinf @@ -263,9 +263,15 @@ alias pcivideo:v00001002d00009452sv*sd*bc*sc*i* radeon # AMD FireStream 9250 alias pcivideo:v00001002d00009456sv*sd*bc*sc*i* radeon # ATI FirePro V8700 (FireGL) alias pcivideo:v00001002d0000945Asv*sd*bc*sc*i* radeon # ATI Mobility RADEON HD 4870 alias pcivideo:v00001002d0000945Bsv*sd*bc*sc*i* radeon # ATI Mobility RADEON M98 +alias pcivideo:v00001002d0000946Asv*sd*bc*sc*i* radeon # ATI FirePro M7750 +alias pcivideo:v00001002d0000946Bsv*sd*bc*sc*i* radeon # ATI M98 +alias pcivideo:v00001002d0000947Asv*sd*bc*sc*i* radeon # ATI M98 +alias pcivideo:v00001002d0000947Bsv*sd*bc*sc*i* radeon # ATI M98 alias pcivideo:v00001002d00009487sv*sd*bc*sc*i* radeon # ATI Radeon RV730 (AGP) +alias pcivideo:v00001002d00009489sv*sd*bc*sc*i* radeon # ATI FirePro M5750 alias pcivideo:v00001002d0000948Fsv*sd*bc*sc*i* radeon # ATI Radeon RV730 (AGP) alias pcivideo:v00001002d00009490sv*sd*bc*sc*i* radeon # ATI RV730XT [Radeon HD 4670] +alias pcivideo:v00001002d00009491sv*sd*bc*sc*i* radeon # ATI RADEON E4600 alias pcivideo:v00001002d00009498sv*sd*bc*sc*i* radeon # ATI RV730 PRO [Radeon HD 4650] alias pcivideo:v00001002d0000949Csv*sd*bc*sc*i* radeon # ATI FirePro V7750 (FireGL) alias pcivideo:v00001002d0000949Esv*sd*bc*sc*i* radeon # ATI FirePro V5700 (FireGL) @@ -281,19 +287,27 @@ alias pcivideo:v00001002d000094C8sv*sd*bc*sc*i* radeon # ATI Mobility Radeon HD alias pcivideo:v00001002d000094C9sv*sd*bc*sc*i* radeon # ATI Mobility Radeon HD 2400 alias pcivideo:v00001002d000094CBsv*sd*bc*sc*i* radeon # ATI RADEON E2400 alias pcivideo:v00001002d000094CCsv*sd*bc*sc*i* radeon # ATI RV610 +alias pcivideo:v00001002d000094CDsv*sd*bc*sc*i* radeon # ATI FireMV 2260 alias pcivideo:v00001002d00009500sv*sd*bc*sc*i* radeon # ATI RV670 alias pcivideo:v00001002d00009501sv*sd*bc*sc*i* radeon # ATI Radeon HD3870 +alias pcivideo:v00001002d00009504sv*sd*bc*sc*i* radeon # ATI Mobility Radeon HD 3850 alias pcivideo:v00001002d00009505sv*sd*bc*sc*i* radeon # ATI Radeon HD3850 +alias pcivideo:v00001002d00009506sv*sd*bc*sc*i* radeon # ATI Mobility Radeon HD 3850 X2 alias pcivideo:v00001002d00009507sv*sd*bc*sc*i* radeon # ATI RV670 +alias pcivideo:v00001002d00009508sv*sd*bc*sc*i* radeon # ATI Mobility Radeon HD 3870 +alias pcivideo:v00001002d00009509sv*sd*bc*sc*i* radeon # ATI Mobility Radeon HD 3870 X2 alias pcivideo:v00001002d0000950Fsv*sd*bc*sc*i* radeon # ATI Radeon HD3870 X2 alias pcivideo:v00001002d00009511sv*sd*bc*sc*i* radeon # ATI FireGL V7700 alias pcivideo:v00001002d00009515sv*sd*bc*sc*i* radeon # ATI Radeon HD3850 +alias pcivideo:v00001002d00009517sv*sd*bc*sc*i* radeon # ATI Radeon HD3690 +alias pcivideo:v00001002d00009519sv*sd*bc*sc*i* radeon # AMD Firestream 9170 alias pcivideo:v00001002d00009540sv*sd*bc*sc*i* radeon # ATI Radeon HD 4550 alias pcivideo:v00001002d00009541sv*sd*bc*sc*i* radeon # ATI Radeon RV710 alias pcivideo:v00001002d0000954Esv*sd*bc*sc*i* radeon # ATI Radeon RV710 alias pcivideo:v00001002d0000954Fsv*sd*bc*sc*i* radeon # ATI Radeon HD 4350 alias pcivideo:v00001002d00009552sv*sd*bc*sc*i* radeon # ATI Mobility Radeon 4300 Series alias pcivideo:v00001002d00009553sv*sd*bc*sc*i* radeon # ATI Mobility Radeon 4500 Series +alias pcivideo:v00001002d00009555sv*sd*bc*sc*i* radeon # ATI Mobility Radeon 4500 Series alias pcivideo:v00001002d00009580sv*sd*bc*sc*i* radeon # ATI RV630 alias pcivideo:v00001002d00009581sv*sd*bc*sc*i* radeon # ATI Mobility Radeon HD 2600 alias pcivideo:v00001002d00009583sv*sd*bc*sc*i* radeon # ATI Mobility Radeon HD 2600 XT @@ -306,12 +320,16 @@ alias pcivideo:v00001002d0000958Bsv*sd*bc*sc*i* radeon # ATI Gemini Mobility Rad alias pcivideo:v00001002d0000958Csv*sd*bc*sc*i* radeon # ATI FireGL V5600 alias pcivideo:v00001002d0000958Dsv*sd*bc*sc*i* radeon # ATI FireGL V3600 alias pcivideo:v00001002d0000958Esv*sd*bc*sc*i* radeon # ATI Radeon HD 2600 LE -alias pcivideo:v00001002d00009592sv*sd*bc*sc*i* radeon # ATI Radeon RV710 +alias pcivideo:v00001002d0000958Fsv*sd*bc*sc*i* radeon # ATI Mobility FireGL Graphics Processor +alias pcivideo:v00001002d00009542sv*sd*bc*sc*i* radeon # ATI Radeon RV710 alias pcivideo:v00001002d000095C0sv*sd*bc*sc*i* radeon # ATI Radeon HD 3470 alias pcivideo:v00001002d000095C2sv*sd*bc*sc*i* radeon # ATI Mobility Radeon HD 3430 alias pcivideo:v00001002d000095C4sv*sd*bc*sc*i* radeon # ATI Mobility Radeon HD 3400 Series alias pcivideo:v00001002d000095C5sv*sd*bc*sc*i* radeon # ATI Radeon HD 3450 +alias pcivideo:v00001002d000095C6sv*sd*bc*sc*i* radeon # ATI Radeon HD 3450 alias pcivideo:v00001002d000095C7sv*sd*bc*sc*i* radeon # ATI Radeon HD 3430 +alias pcivideo:v00001002d000095C9sv*sd*bc*sc*i* radeon # ATI Radeon HD 3450 +alias pcivideo:v00001002d000095CCsv*sd*bc*sc*i* radeon # ATI FirePro V3700 alias pcivideo:v00001002d000095CDsv*sd*bc*sc*i* radeon # ATI FireMV 2450 alias pcivideo:v00001002d000095CEsv*sd*bc*sc*i* radeon # ATI FireMV 2260 alias pcivideo:v00001002d000095CFsv*sd*bc*sc*i* radeon # ATI FireMV 2260 @@ -322,6 +340,8 @@ alias pcivideo:v00001002d00009598sv*sd*bc*sc*i* radeon # ATI Radeon HD 3600 XT alias pcivideo:v00001002d00009599sv*sd*bc*sc*i* radeon # ATI Radeon HD 3600 PRO alias pcivideo:v00001002d00009591sv*sd*bc*sc*i* radeon # ATI Mobility Radeon HD 3650 alias pcivideo:v00001002d00009593sv*sd*bc*sc*i* radeon # ATI Mobility Radeon HD 3670 +alias pcivideo:v00001002d00009595sv*sd*bc*sc*i* radeon # ATI Mobility FireGL V5700 +alias pcivideo:v00001002d0000959Bsv*sd*bc*sc*i* radeon # ATI Mobility FireGL V5725 alias pcivideo:v00001002d00009610sv*sd*bc*sc*i* radeon # ATI Radeon HD 3200 Graphics alias pcivideo:v00001002d00009611sv*sd*bc*sc*i* radeon # ATI Radeon 3100 Graphics alias pcivideo:v00001002d00009612sv*sd*bc*sc*i* radeon # ATI Radeon HD 3200 Graphics diff --git a/sources b/sources index b496622..f4a5a58 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -6445815b12ffa4ccd1aafd43640f466d xf86-video-ati-6.10.0.tar.bz2 +b4120aa6a3bb22a7a3bc83cba060675a xf86-video-ati-6.11.0.tar.bz2 diff --git a/xorg-x11-drv-ati.spec b/xorg-x11-drv-ati.spec index 47a4020..1104248 100644 --- a/xorg-x11-drv-ati.spec +++ b/xorg-x11-drv-ati.spec @@ -4,8 +4,8 @@ Summary: Xorg X11 ati video driver Name: xorg-x11-drv-ati -Version: 6.10.0 -Release: 4%{?dist} +Version: 6.11.0 +Release: 1%{?dist} URL: http://www.x.org License: MIT Group: User Interface/X Hardware Support @@ -14,13 +14,11 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) Source0: http://www.x.org/pub/individual/driver/%{tarball}-%{version}.tar.bz2 Source1: radeon.xinf +Patch0: radeon-6.11.0-git.patch Patch1: radeon-modeset.patch -Patch2: radeon-modeset-fix-legacy-vt.patch #Patch4: radeon-6.9.0-remove-limit-heuristics.patch Patch5: radeon-6.9.0-panel-size-sanity.patch Patch6: radeon-6.9.0-bgnr-enable.patch -Patch7: radeon-mode-fix-rotate.patch -Patch8: radeon-fix-rs780-mm.patch ExcludeArch: s390 s390x @@ -42,13 +40,11 @@ X.Org X11 ati video driver. %prep %setup -q -n %{tarball}-%{version} +%patch0 -p1 -b .git %patch1 -p1 -b .modeset -%patch2 -p1 -b .modeset-fix-vt #patch4 -p1 -b .remove-limit-heuristics -%patch5 -p1 -b .panel-size +#%patch5 -p1 -b .panel-size %patch6 -p1 -b .bgnr -%patch7 -p1 -b .fix-rotate -%patch8 -p1 -b .rs780-fix %build autoreconf -iv @@ -80,6 +76,10 @@ rm -rf $RPM_BUILD_ROOT %{_mandir}/man4/radeon.4* %changelog +* Fri Feb 27 2009 Dave Airlie 6.11.0-1 +- rebase to latest upstream +- enable R600 acceleration for EXA and Xv. + * Thu Feb 26 2009 Fedora Release Engineering - 6.10.0-4 - Rebuilt for https://fedoraproject.org/wiki/Fedora_11_Mass_Rebuild