Commit 8af77b87 authored by Xiang, Haihao's avatar Xiang, Haihao

i965_drv_video: [H.264] add support for ILDB

parent 0c70b6b6
...@@ -41,7 +41,8 @@ i965_drv_video_la_SOURCES = \ ...@@ -41,7 +41,8 @@ i965_drv_video_la_SOURCES = \
i965_render.c \ i965_render.c \
i965_drv_video.c \ i965_drv_video.c \
i965_avc_bsd.c \ i965_avc_bsd.c \
i965_avc_hw_scoreboard.c i965_avc_hw_scoreboard.c\
i965_avc_ildb.c
noinst_HEADERS = \ noinst_HEADERS = \
object_heap.h \ object_heap.h \
...@@ -57,4 +58,5 @@ noinst_HEADERS = \ ...@@ -57,4 +58,5 @@ noinst_HEADERS = \
i965_defines.h \ i965_defines.h \
i965_structs.h \ i965_structs.h \
i965_avc_bsd.h \ i965_avc_bsd.h \
i965_avc_hw_scoreboard.h i965_avc_hw_scoreboard.h\
i965_avc_ildb.h
...@@ -934,6 +934,9 @@ i965_avc_bsd_frame_store_index(VADriverContextP ctx, ...@@ -934,6 +934,9 @@ i965_avc_bsd_frame_store_index(VADriverContextP ctx,
void void
i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state) i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state)
{ {
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_media_state *media_state = &i965->media_state;
struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_state->private_context;
int i, j; int i, j;
VAPictureParameterBufferH264 *pic_param; VAPictureParameterBufferH264 *pic_param;
VASliceParameterBufferH264 *slice_param; VASliceParameterBufferH264 *slice_param;
...@@ -946,6 +949,7 @@ i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state) ...@@ -946,6 +949,7 @@ i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state)
i965_avc_bsd_img_state(ctx, decode_state); i965_avc_bsd_img_state(ctx, decode_state);
i965_avc_bsd_qm_state(ctx, decode_state); i965_avc_bsd_qm_state(ctx, decode_state);
i965_h264_context->enable_avc_ildb = 0;
for (j = 0; j < decode_state->num_slice_params; j++) { for (j = 0; j < decode_state->num_slice_params; j++) {
assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer; slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
...@@ -964,6 +968,10 @@ i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state) ...@@ -964,6 +968,10 @@ i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state)
i965_avc_bsd_slice_state(ctx, pic_param, slice_param); i965_avc_bsd_slice_state(ctx, pic_param, slice_param);
i965_avc_bsd_buf_base_state(ctx, pic_param, slice_param); i965_avc_bsd_buf_base_state(ctx, pic_param, slice_param);
i965_avc_bsd_object(ctx, decode_state, pic_param, slice_param); i965_avc_bsd_object(ctx, decode_state, pic_param, slice_param);
if (slice_param->disable_deblocking_filter_idc != 1)
i965_h264_context->enable_avc_ildb = 1;
slice_param++; slice_param++;
} }
......
...@@ -52,8 +52,8 @@ enum { ...@@ -52,8 +52,8 @@ enum {
}; };
static unsigned long avc_hw_scoreboard_kernel_offset[] = { static unsigned long avc_hw_scoreboard_kernel_offset[] = {
SETHWSCOREBOARD_IP_GEN5, SETHWSCOREBOARD_IP_GEN5 * INST_UNIT_GEN5,
SETHWSCOREBOARD_MBAFF_IP_GEN5 SETHWSCOREBOARD_MBAFF_IP_GEN5 * INST_UNIT_GEN5
}; };
static unsigned int avc_hw_scoreboard_constants[] = { static unsigned int avc_hw_scoreboard_constants[] = {
...@@ -301,11 +301,9 @@ i965_avc_hw_scoreboard(VADriverContextP ctx, struct decode_state *decode_state) ...@@ -301,11 +301,9 @@ i965_avc_hw_scoreboard(VADriverContextP ctx, struct decode_state *decode_state)
struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_media_state *media_state = &i965->media_state; struct i965_media_state *media_state = &i965->media_state;
struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_state->private_context; struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_state->private_context;
struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context;
if (i965_h264_context->use_avc_hw_scoreboard) { if (i965_h264_context->use_avc_hw_scoreboard) {
assert(i965_h264_context != NULL); struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
avc_hw_scoreboard_context->inline_data.num_mb_cmds = i965_h264_context->avc_it_command_mb_info.mbs; avc_hw_scoreboard_context->inline_data.num_mb_cmds = i965_h264_context->avc_it_command_mb_info.mbs;
avc_hw_scoreboard_context->inline_data.starting_mb_number = i965_h264_context->avc_it_command_mb_info.mbs; avc_hw_scoreboard_context->inline_data.starting_mb_number = i965_h264_context->avc_it_command_mb_info.mbs;
...@@ -333,12 +331,10 @@ i965_avc_hw_scoreboard_decode_init(VADriverContextP ctx) ...@@ -333,12 +331,10 @@ i965_avc_hw_scoreboard_decode_init(VADriverContextP ctx)
struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_media_state *media_state = &i965->media_state; struct i965_media_state *media_state = &i965->media_state;
struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_state->private_context; struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_state->private_context;
struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context;
dri_bo *bo;
if (i965_h264_context->use_avc_hw_scoreboard) { if (i965_h264_context->use_avc_hw_scoreboard) {
assert(i965_h264_context != NULL); struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context; dri_bo *bo;
dri_bo_unreference(avc_hw_scoreboard_context->curbe.bo); dri_bo_unreference(avc_hw_scoreboard_context->curbe.bo);
bo = dri_bo_alloc(i965->intel.bufmgr, bo = dri_bo_alloc(i965->intel.bufmgr,
......
This diff is collapsed.
/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
*
*/
#ifndef __I965_AVC_ILDB_H__
#define __I965_AVC_ILDB_H__
#define SURFACE_EDGE_CONTROL_DATA 0
#define SURFACE_SRC_Y 1
#define SURFACE_SRC_UV 2
#define SURFACE_DEST_Y 3
#define SURFACE_DEST_UV 4
#define NUM_AVC_ILDB_SURFACES 5
#define EDGE_CONTROL_DATA_IN_DWS 16
#define EDGE_CONTROL_DATA_IN_BTYES 64
struct i965_avc_ildb_context
{
struct {
dri_bo *bo;
} curbe;
struct {
dri_bo *ss_bo;
dri_bo *s_bo;
unsigned long offset;
int surface_type;
int width;
int height;
int depth;
int pitch;
int format;
int vert_line_stride;
int vert_line_stride_ofs;
int is_target;
} surface[NUM_AVC_ILDB_SURFACES];
struct {
dri_bo *bo;
} binding_table;
struct {
dri_bo *bo;
} idrt;
struct {
dri_bo *bo;
} vfe_state;
struct {
unsigned int vfe_start;
unsigned int cs_start;
unsigned int num_vfe_entries;
unsigned int num_cs_entries;
unsigned int size_vfe_entry;
unsigned int size_cs_entry;
} urb;
int picture_type;
int mbs_per_picture;
};
void i965_avc_ildb(VADriverContextP, struct decode_state *);
void i965_avc_ildb_decode_init(VADriverContextP);
Bool i965_avc_ildb_ternimate(struct i965_avc_ildb_context *);
#endif /* __I965_AVC_ILDB_H__ */
This diff is collapsed.
...@@ -3,6 +3,10 @@ ...@@ -3,6 +3,10 @@
#include "i965_avc_bsd.h" #include "i965_avc_bsd.h"
#include "i965_avc_hw_scoreboard.h" #include "i965_avc_hw_scoreboard.h"
#include "i965_avc_ildb.h"
#define INST_UNIT_GEN4 16
#define INST_UNIT_GEN5 8
#define MB_CMD_IN_BYTES 64 #define MB_CMD_IN_BYTES 64
#define MB_CMD_IN_DWS 16 #define MB_CMD_IN_DWS 16
...@@ -35,6 +39,7 @@ struct i965_h264_context ...@@ -35,6 +39,7 @@ struct i965_h264_context
int mbaff_frame_flag; int mbaff_frame_flag;
} picture; } picture;
int enable_avc_ildb;
int use_avc_hw_scoreboard; int use_avc_hw_scoreboard;
int use_hw_w128; int use_hw_w128;
...@@ -47,6 +52,8 @@ struct i965_h264_context ...@@ -47,6 +52,8 @@ struct i965_h264_context
struct i965_avc_bsd_context i965_avc_bsd_context; struct i965_avc_bsd_context i965_avc_bsd_context;
struct i965_avc_hw_scoreboard_context avc_hw_scoreboard_context; struct i965_avc_hw_scoreboard_context avc_hw_scoreboard_context;
struct i965_avc_ildb_context avc_ildb_context;
struct { struct {
VASurfaceID surface_id; VASurfaceID surface_id;
int frame_store_id; int frame_store_id;
......
...@@ -239,7 +239,7 @@ ...@@ -239,7 +239,7 @@
#define FilterSampleFlag r28.0 // :uw, #define FilterSampleFlag r28.0 // :uw,
.declare A Base=r28.0 ElementSize=2 SrcRegion=REGION(16,1) Type=w .declare A Base=r28.0 ElementSize=2 SrcRegion=REGION(16,1) Type=w
.declare B Base=r29.0 ElementSize=2 SrcRegion=REGION(16,1) Type=w .declare BB Base=r29.0 ElementSize=2 SrcRegion=REGION(16,1) Type=w
.declare TempRow3 Base=r30.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w .declare TempRow3 Base=r30.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare TempRow3B Base=r30.0 ElementSize=1 SrcRegion=REGION(8,2) Type=ub .declare TempRow3B Base=r30.0 ElementSize=1 SrcRegion=REGION(8,2) Type=ub
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
ILDB_LABEL(AVC_ILDB_CHILD_UV): ILDB_LABEL(AVC_ILDB_CHILD_UV):
#endif #endif
#include "setupVPKernel.asm" #include "SetupVPKernel.asm"
#include "AVC_ILDB.inc" #include "AVC_ILDB.inc"
#if defined(_DEBUG) #if defined(_DEBUG)
...@@ -85,9 +85,9 @@ ILDB_LABEL(RE_ENTRY_UV): // for bootom field ...@@ -85,9 +85,9 @@ ILDB_LABEL(RE_ENTRY_UV): // for bootom field
// Load current MB control data // Load current MB control data
#if defined(DEV_CL) #if defined(DEV_CL)
#include "load_ILDB_Cntrl_Data_64DW.asm" // Crestline #include "Load_ILDB_Cntrl_Data_64DW.asm" // Crestline
#else #else
#include "load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond #include "Load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond
#endif #endif
// Init addr register for vertical control data // Init addr register for vertical control data
......
...@@ -39,7 +39,7 @@ ...@@ -39,7 +39,7 @@
ILDB_LABEL(AVC_ILDB_CHILD_Y): ILDB_LABEL(AVC_ILDB_CHILD_Y):
#endif #endif
#include "setupVPKernel.asm" #include "SetupVPKernel.asm"
#include "AVC_ILDB.inc" #include "AVC_ILDB.inc"
#if defined(_DEBUG) #if defined(_DEBUG)
...@@ -99,9 +99,9 @@ RE_ENTRY: // for bootom field ...@@ -99,9 +99,9 @@ RE_ENTRY: // for bootom field
// Load current MB control data // Load current MB control data
#if defined(DEV_CL) #if defined(DEV_CL)
#include "load_ILDB_Cntrl_Data_64DW.asm" // Crestline #include "Load_ILDB_Cntrl_Data_64DW.asm" // Crestline
#else #else
#include "load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond #include "Load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond
#endif #endif
// Init addr register for vertical control data // Init addr register for vertical control data
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
ILDB_LABEL(AVC_ILDB_CHILD_UV): ILDB_LABEL(AVC_ILDB_CHILD_UV):
#endif #endif
#include "setupVPKernel.asm" #include "SetupVPKernel.asm"
#include "AVC_ILDB.inc" #include "AVC_ILDB.inc"
#if defined(_DEBUG) #if defined(_DEBUG)
...@@ -71,12 +71,12 @@ ILDB_LABEL(AVC_ILDB_CHILD_UV): ...@@ -71,12 +71,12 @@ ILDB_LABEL(AVC_ILDB_CHILD_UV):
// Load current MB control data // Load current MB control data
#if defined(DEV_CL) #if defined(DEV_CL)
#if defined(_APPLE) #if defined(_APPLE)
#include "load_ILDB_Cntrl_Data_22DW.asm" // Crestline for Apple, progressive only #include "Load_ILDB_Cntrl_Data_22DW.asm" // Crestline for Apple, progressive only
#else #else
#include "load_ILDB_Cntrl_Data_64DW.asm" // Crestline #include "Load_ILDB_Cntrl_Data_64DW.asm" // Crestline
#endif #endif
#else #else
#include "load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond #include "Load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond
#endif #endif
// Check loaded control data // Check loaded control data
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
ILDB_LABEL(AVC_ILDB_CHILD_Y): ILDB_LABEL(AVC_ILDB_CHILD_Y):
#endif #endif
#include "setupVPKernel.asm" #include "SetupVPKernel.asm"
#include "AVC_ILDB.inc" #include "AVC_ILDB.inc"
#if defined(_DEBUG) #if defined(_DEBUG)
...@@ -58,12 +58,12 @@ ILDB_LABEL(AVC_ILDB_CHILD_Y): ...@@ -58,12 +58,12 @@ ILDB_LABEL(AVC_ILDB_CHILD_Y):
// Load current MB control data // Load current MB control data
#if defined(DEV_CL) #if defined(DEV_CL)
#if defined(_APPLE) #if defined(_APPLE)
#include "load_ILDB_Cntrl_Data_22DW.asm" // Crestline for Apple, progressive only #include "Load_ILDB_Cntrl_Data_22DW.asm" // Crestline for Apple, progressive only
#else #else
#include "load_ILDB_Cntrl_Data_64DW.asm" // Crestline #include "Load_ILDB_Cntrl_Data_64DW.asm" // Crestline
#endif #endif
#else #else
#include "load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond #include "Load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond
#endif #endif
// Check loaded control data // Check loaded control data
......
...@@ -19,4 +19,4 @@ ...@@ -19,4 +19,4 @@
// 19:16 Response length = 0 // 19:16 Response length = 0
// 1:0 SubFuncID = 01 for CloseGateway // 1:0 SubFuncID = 01 for CloseGateway
// Message descriptor: 0 000 0011 0001 0000 + 0 0 000000000000 01 ==> 0000 0011 0001 0000 0000 0000 0000 0001 // Message descriptor: 0 000 0011 0001 0000 + 0 0 000000000000 01 ==> 0000 0011 0001 0000 0000 0000 0000 0001
send (8) null:ud m7 r0.0:ud MSG_GW CGWMSGDSC send (8) null:ud m7 r0.0<0;1,0>:ud MSG_GW CGWMSGDSC
...@@ -38,7 +38,7 @@ mov (1) GatewayPayloadKey:uw 0x1212:uw { NoDDChk } // Key ...@@ -38,7 +38,7 @@ mov (1) GatewayPayloadKey:uw 0x1212:uw { NoDDChk } // Key
// Write back one byte (value = 0xFF) to root thread GRF to indicate this child thread is finished // Write back one byte (value = 0xFF) to root thread GRF to indicate this child thread is finished
// All lower 4 bytes must be assigned to the same byte value. // All lower 4 bytes must be assigned to the same byte value.
add (1) Temp1_W:w MaxThreads:uw -OutstandingThreads:uw add (1) Temp1_W:w MaxThreads:uw -OutstandingThreads:uw
mov (4) GatewayPayload<1>:ub Temp1_B:ub mov (4) GatewayPayload<1>:ub Temp1_B<0;1,0>:ub
send (8) GatewayResponse:ud m0 GatewayPayload<8;8,1>:ud MSG_GW FWDMSGDSC send (8) GatewayResponse:ud m0 GatewayPayload<8;8,1>:ud MSG_GW FWDMSGDSC
......
...@@ -172,28 +172,28 @@ FILTER_Y: ...@@ -172,28 +172,28 @@ FILTER_Y:
// B = p2 + (p1 + p0 + q0) + 4 = p2 + A + 4 // B = p2 + (p1 + p0 + q0) + 4 = p2 + A + 4
// add (16) acc0.0<1>:w P2 4:w // p2 + 4 // add (16) acc0.0<1>:w P2 4:w // p2 + 4
// add (16) B(0)<1> acc0.0<16;16,1>:w A(0) // B = p2 + A + 4 // add (16) BB(0)<1> acc0.0<16;16,1>:w A(0) // B = p2 + A + 4
add (16) acc0.0<1>:w acc0.0<16;16,1>:w 4:w // p2 + 4 add (16) acc0.0<1>:w acc0.0<16;16,1>:w 4:w // p2 + 4
add (16) B(0)<1> acc0.0<16;16,1>:w P2 // B = p2 + A + 4 add (16) BB(0)<1> acc0.0<16;16,1>:w P2 // B = p2 + A + 4
// Now acc0 = B // Now acc0 = B
// p2' = (2*p3 +3*p2 + A + 4) >> 3 = (2*(p3+p2) + B) >> 3 // p2' = (2*p3 +3*p2 + A + 4) >> 3 = (2*(p3+p2) + B) >> 3
// mov (16) acc0.0<1>:w B(0) // mov (16) acc0.0<1>:w BB(0)
mac (16) acc0.0<1>:w P2_plus_P3(0) 2:w mac (16) acc0.0<1>:w P2_plus_P3(0) 2:w
shr.sat (16) TempRow3B(0)<2> acc0.0<16;16,1>:w 3:w shr.sat (16) TempRow3B(0)<2> acc0.0<16;16,1>:w 3:w
// p1' = (p2 + A + 2) >> 2 = (B - 2) >> 2 // p1' = (p2 + A + 2) >> 2 = (B - 2) >> 2
add (16) acc0.0<1>:w B(0) -2:w add (16) acc0.0<1>:w BB(0) -2:w
shr.sat (16) TempRow1B(0)<2> acc0.0<16;16,1>:w 2:w shr.sat (16) TempRow1B(0)<2> acc0.0<16;16,1>:w 2:w
// p0' = (p2 +2*A + q1 + 4) >> 3 = (B + A + q1) >> 3 // p0' = (p2 +2*A + q1 + 4) >> 3 = (B + A + q1) >> 3
add (16) acc0.0<1>:w Q1 A(0) // B + A add (16) acc0.0<1>:w Q1 A(0) // B + A
add (16) acc0.0<1>:w acc0.0<16;16,1>:w B(0) // B + A + q1 add (16) acc0.0<1>:w acc0.0<16;16,1>:w BB(0) // B + A + q1
shr.sat (16) TempRow0B(0)<2> acc0.0<16;16,1>:w 3:w // (B + A + q1) >> 3 shr.sat (16) TempRow0B(0)<2> acc0.0<16;16,1>:w 3:w // (B + A + q1) >> 3
// p2' = (2*p3 +3*p2 + A + 4) >> 3 = (2*(p3+p2) + B) >> 3 // p2' = (2*p3 +3*p2 + A + 4) >> 3 = (2*(p3+p2) + B) >> 3
// mov (16) acc0.0<1>:w B(0) // mov (16) acc0.0<1>:w BB(0)
// mac (16) acc0.0<1>:w P2_plus_P3(0) 2:w // mac (16) acc0.0<1>:w P2_plus_P3(0) 2:w
// shr.sat (16) TempRow3B(0)<2> acc0.0<16;16,1>:w 3:w // shr.sat (16) TempRow3B(0)<2> acc0.0<16;16,1>:w 3:w
...@@ -246,22 +246,22 @@ Y_ENDIF3: ...@@ -246,22 +246,22 @@ Y_ENDIF3:
// B = q2 + q1 + q0 + p0 + 4 = q2 + A + 4 // B = q2 + q1 + q0 + p0 + 4 = q2 + A + 4
add (16) acc0.0<1>:w acc0.0<16;16,1>:w 4:w // q2 + 4 add (16) acc0.0<1>:w acc0.0<16;16,1>:w 4:w // q2 + 4
add (16) B(0)<1> acc0.0<16;16,1>:w Q2 // B = q2 + A + 4 add (16) BB(0)<1> acc0.0<16;16,1>:w Q2 // B = q2 + A + 4
// Acc0 = B // Acc0 = B
// q2' = (2*q3 +3*q2 + A + 4) >> 3 = (2*(q3+q2) + B) >> 3 // q2' = (2*q3 +3*q2 + A + 4) >> 3 = (2*(q3+q2) + B) >> 3
// mov (16) acc0.0<1>:w B(0) // mov (16) acc0.0<1>:w BB(0)
mac (16) acc0.0<1>:w Q2_plus_Q3(0) 2:w mac (16) acc0.0<1>:w Q2_plus_Q3(0) 2:w
shr.sat (16) TempRow3B(0)<2> acc0.0<16;16,1>:w 3:w shr.sat (16) TempRow3B(0)<2> acc0.0<16;16,1>:w 3:w
// q1' = (q2 + A + 2) >> 2 = (B - 2) >> 2 // q1' = (q2 + A + 2) >> 2 = (B - 2) >> 2
add (16) acc0.0<1>:w B(0) -2:w add (16) acc0.0<1>:w BB(0) -2:w
shr.sat (16) TempRow1B(0)<2> acc0.0<16;16,1>:w 2:w shr.sat (16) TempRow1B(0)<2> acc0.0<16;16,1>:w 2:w
// q0' = (q2 +2*A + p1 + 4) >> 3 = (B + A + p1) >> 3 // q0' = (q2 +2*A + p1 + 4) >> 3 = (B + A + p1) >> 3
add (16) acc0.0<1>:w p1(0) A(0) add (16) acc0.0<1>:w p1(0) A(0)
add (16) acc0.0<1>:w acc0.0<16;16,1>:w B(0) add (16) acc0.0<1>:w acc0.0<16;16,1>:w BB(0)
shr.sat (16) TempRow0B(0)<2> acc0.0<16;16,1>:w 3:w shr.sat (16) TempRow0B(0)<2> acc0.0<16;16,1>:w 3:w
mov (16) NewQ2 TempRow3B(0) // q2' mov (16) NewQ2 TempRow3B(0) // q2'
......
...@@ -170,7 +170,7 @@ FILTER_Y_MBAFF: ...@@ -170,7 +170,7 @@ FILTER_Y_MBAFF:
// B = p2 + p1 + p0 + q0 + 4 = p2 + A + 4 // B = p2 + p1 + p0 + q0 + 4 = p2 + A + 4
add (16) acc0.0<1>:w acc0.0<16;16,1>:w 4:w // p2 + 4 add (16) acc0.0<1>:w acc0.0<16;16,1>:w 4:w // p2 + 4
add (16) B(0)<1> acc0.0<16;16,1>:w P2 // B = p2 + A + 4 add (16) BB(0)<1> acc0.0<16;16,1>:w P2 // B = p2 + A + 4
// Now acc0 = B // Now acc0 = B
...@@ -179,12 +179,12 @@ FILTER_Y_MBAFF: ...@@ -179,12 +179,12 @@ FILTER_Y_MBAFF:
shr.sat (16) TempRow3B(0)<2> acc0.0<16;16,1>:w 3:w shr.sat (16) TempRow3B(0)<2> acc0.0<16;16,1>:w 3:w
// p1' = (p2 + A + 2) >> 2 = (B - 2) >> 2 // p1' = (p2 + A + 2) >> 2 = (B - 2) >> 2
add (16) acc0.0<1>:w B(0) -2:w add (16) acc0.0<1>:w BB(0) -2:w
shr.sat (16) TempRow1B(0)<2> acc0.0<16;16,1>:w 2:w shr.sat (16) TempRow1B(0)<2> acc0.0<16;16,1>:w 2:w
// p0' = (p2 +2*A + q1 + 4) >> 3 = (B + A + q1) >> 3 // p0' = (p2 +2*A + q1 + 4) >> 3 = (B + A + q1) >> 3
add (16) acc0.0<1>:w Q1 A(0) // B + A add (16) acc0.0<1>:w Q1 A(0) // B + A
add (16) acc0.0<1>:w acc0.0<16;16,1>:w B(0) // B + A + q1 add (16) acc0.0<1>:w acc0.0<16;16,1>:w BB(0) // B + A + q1
shr.sat (16) TempRow0B(0)<2> acc0.0<16;16,1>:w 3:w // (B + A + q1) >> 3 shr.sat (16) TempRow0B(0)<2> acc0.0<16;16,1>:w 3:w // (B + A + q1) >> 3
mov (16) NewP2 TempRow3B(0) // p2' mov (16) NewP2 TempRow3B(0) // p2'
...@@ -227,7 +227,7 @@ MBAFF_Y_ENDIF3: ...@@ -227,7 +227,7 @@ MBAFF_Y_ENDIF3:
// B = q2 + q1 + q0 + p0 + 4 = q2 + A + 4 // B = q2 + q1 + q0 + p0 + 4 = q2 + A + 4
add (16) acc0.0<1>:w acc0.0<16;16,1>:w 4:w // q2 + 4 add (16) acc0.0<1>:w acc0.0<16;16,1>:w 4:w // q2 + 4
add (16) B(0)<1> acc0.0<16;16,1>:w Q2 // B = q2 + A + 4 add (16) BB(0)<1> acc0.0<16;16,1>:w Q2 // B = q2 + A + 4
// Acc0 = B // Acc0 = B
...@@ -236,12 +236,12 @@ MBAFF_Y_ENDIF3: ...@@ -236,12 +236,12 @@ MBAFF_Y_ENDIF3:
shr.sat (16) TempRow3B(0)<2> acc0.0<16;16,1>:w 3:w shr.sat (16) TempRow3B(0)<2> acc0.0<16;16,1>:w 3:w
// q1' = (q2 + A + 2) >> 2 = (B - 2) >> 2 // q1' = (q2 + A + 2) >> 2 = (B - 2) >> 2
add (16) acc0.0<1>:w B(0) -2:w add (16) acc0.0<1>:w BB(0) -2:w
shr.sat (16) TempRow1B(0)<2> acc0.0<16;16,1>:w 2:w shr.sat (16) TempRow1B(0)<2> acc0.0<16;16,1>:w 2:w
// q0' = (q2 +2*A + p1 + 4) >> 3 = (B + A + p1) >> 3 // q0' = (q2 +2*A + p1 + 4) >> 3 = (B + A + p1) >> 3
add (16) acc0.0<1>:w p1(0) A(0) add (16) acc0.0<1>:w p1(0) A(0)
add (16) acc0.0<1>:w acc0.0<16;16,1>:w B(0) add (16) acc0.0<1>:w acc0.0<16;16,1>:w BB(0)
shr.sat (16) TempRow0B(0)<2> acc0.0<16;16,1>:w 3:w shr.sat (16) TempRow0B(0)<2> acc0.0<16;16,1>:w 3:w
mov (16) NewQ2 TempRow3B(0) // q2' mov (16) NewQ2 TempRow3B(0) // q2'
......
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
ILDB_LABEL(AVC_ILDB_ROOT_UV): ILDB_LABEL(AVC_ILDB_ROOT_UV):
#endif #endif
#include "setupVPKernel.asm" #include "SetupVPKernel.asm"
#include "AVC_ILDB.inc" #include "AVC_ILDB.inc"
......
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
ILDB_LABEL(AVC_ILDB_ROOT_Y): ILDB_LABEL(AVC_ILDB_ROOT_Y):
#endif #endif
#include "setupVPKernel.asm" #include "SetupVPKernel.asm"
#include "AVC_ILDB.inc" #include "AVC_ILDB.inc"
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
ILDB_LABEL(AVC_ILDB_ROOT_UV): ILDB_LABEL(AVC_ILDB_ROOT_UV):
#endif #endif
#include "setupVPKernel.asm" #include "SetupVPKernel.asm"
#include "AVC_ILDB.inc" #include "AVC_ILDB.inc"
#if defined(_DEBUG) #if defined(_DEBUG)
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
ILDB_LABEL(AVC_ILDB_ROOT_Y): ILDB_LABEL(AVC_ILDB_ROOT_Y):
#endif #endif
#include "setupVPKernel.asm" #include "SetupVPKernel.asm"
#include "AVC_ILDB.inc" #include "AVC_ILDB.inc"
///////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////
......
...@@ -86,12 +86,12 @@ ...@@ -86,12 +86,12 @@
#define RETURN_REG r62 // Return pointer for all sub-routine calls (type DWORD) #define RETURN_REG r62 // Return pointer for all sub-routine calls (type DWORD)
#define CALL(subFunc, skipInst) add (1) RETURN_REG<1>:ud ip:ud 1+skipInst*INST_SIZE \n\ #define CALL(subFunc, skipInst) add (1) RETURN_REG<1>:ud ip:ud (1+skipInst)*INST_SIZE \n\
jmpi (1) subFunc jmpi (1) subFunc
#define RETURN mov (1) ip:ud RETURN_REG<0;1,0>:ud // Return to calling module #define RETURN mov (1) ip:ud RETURN_REG<0;1,0>:ud // Return to calling module
#define PRED_CALL(flag, subFunc, skipInst) add (1) RETURN_REG<1>:ud ip:ud 1+skipInst*INST_SIZE \n\ #define PRED_CALL(flag, subFunc, skipInst) add (1) RETURN_REG<1>:ud ip:ud (1+skipInst)*INST_SIZE \n\
(flag) jmpi (1) subFunc (flag) jmpi (1) subFunc
......
...@@ -33,8 +33,8 @@ ...@@ -33,8 +33,8 @@
asr (1) MSGSRC.1:ud ORIY_CUR:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp asr (1) MSGSRC.1:ud ORIY_CUR:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x0007000F:ud { NoDDChk } // NV12 U+V block width and height (16x8) mov (1) MSGSRC.2:ud 0x0007000F:ud { NoDDChk } // NV12 U+V block width and height (16x8)
mov (16) MSGPAYLOADD(0)<1> SRC_UD(0) // Compressed inst mov (16) MSGPAYLOADD(0)<1> SRC_UD(0) // Compressed inst
mov (16) MSGPAYLOADD(2)<1> SRC_UD(2) mov (16) MSGPAYLOADD(2)<1> SRC_UD(2)
#if defined(_PROGRESSIVE) #if defined(_PROGRESSIVE)
mov (1) MSGDSC MSG_LEN(4)+DWBWMSGDSC+BI_DEST_UV:ud mov (1) MSGDSC MSG_LEN(4)+DWBWMSGDSC+BI_DEST_UV:ud
......
...@@ -34,9 +34,9 @@ ...@@ -34,9 +34,9 @@
// Pack Y // Pack Y
mov (16) MSGPAYLOADD(0)<1> SRC_YD(0) // Compressed inst mov (16) MSGPAYLOADD(0)<1> SRC_YD(0) // Compressed inst
mov (16) MSGPAYLOADD(2)<1> SRC_YD(2) mov (16) MSGPAYLOADD(2)<1> SRC_YD(2)
mov (16) MSGPAYLOADD(4)<1> SRC_YD(4) mov (16) MSGPAYLOADD(4)<1> SRC_YD(4)
mov (16) MSGPAYLOADD(6)<1> SRC_YD(6) mov (16) MSGPAYLOADD(6)<1> SRC_YD(6)
#if defined(_PROGRESSIVE) #if defined(_PROGRESSIVE)
......
...@@ -34,5 +34,5 @@ shr (1) MSGSRC.0:uw URBOffset:uw 1:w // divide by 2, because URB entry is count ...@@ -34,5 +34,5 @@ shr (1) MSGSRC.0:uw URBOffset:uw 1:w // divide by 2, because URB entry is count
//mov (1) MSGSRC.1:ud 0:ud // Reset Handle 1 //mov (1) MSGSRC.1:ud 0:ud // Reset Handle 1
send null:uw m0 MSGSRC:uw URBWRITE URBWriteMsgDesc:ud // URB write send null:uw m0 MSGSRC<8;8,1>:uw URBWRITE URBWriteMsgDesc:ud // URB write
//send null:ud MRF0 null:ud URBWriteMsgDesc:ud // URB write //send null:ud MRF0 null:ud URBWriteMsgDesc:ud // URB write
...@@ -36,4 +36,4 @@ shr (1) MSGSRC.0:uw URBOffsetC:uw 1:w // divide by 2, because URB entry is coun ...@@ -36,4 +36,4 @@ shr (1) MSGSRC.0:uw URBOffsetC:uw 1:w // divide by 2, because URB entry is coun
// Current MB offset is in URBOffset, use it as write origin // Current MB offset is in URBOffset, use it as write origin
// Add 2 to offset to store data be be passed to the right MB // Add 2 to offset to store data be be passed to the right MB
send null:uw m0 MSGSRC:uw URBWRITE MSG_LEN(1)+URBWMSGDSC+0x20 // URB write send null:uw m0 MSGSRC<8;8,1>:uw URBWRITE MSG_LEN(1)+URBWMSGDSC+0x20 // URB write
...@@ -37,4 +37,4 @@ shr (1) MSGSRC.0:uw URBOffsetC:uw 1:w // divide by 2, because URB entry is coun ...@@ -37,4 +37,4 @@ shr (1) MSGSRC.0:uw URBOffsetC:uw 1:w // divide by 2, because URB entry is coun
// Add 2 to offset to store data be be passed to the right MB // Add 2 to offset to store data be be passed to the right MB
//mov (1) URBWriteMsgDesc:ud 0x06300020:ud //mov (1) URBWriteMsgDesc:ud 0x06300020:ud
send null:uw m0 MSGSRC:uw URBWRITE MSG_LEN(2)+URBWMSGDSC+0x20 // URB write send null:uw m0 MSGSRC<8;8,1>:uw URBWRITE MSG_LEN(2)+URBWMSGDSC+0x20 // URB write
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
// ---------------------------------------------------- // ----------------------------------------------------
#define COMBINED_KERNEL #define COMBINED_KERNEL
//#define ENABLE_ILDB #define ENABLE_ILDB
// WA for *Stim tool issue, should be removed later // WA for *Stim tool issue, should be removed later
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment