Commit 57c6d5cb authored by Zhou Chang's avatar Zhou Chang Committed by Xiang, Haihao

add inter frame support in vme.

parent 60b39eb7
......@@ -220,11 +220,11 @@ gen6_mfc_avc_img_state(VADriverContextP ctx)
OUT_BCS_BATCH(ctx, 0); /*Mainly about MB rate control and debug, just ignoring*/
OUT_BCS_BATCH(ctx, /*Inter and Intra Conformance Max size limit*/
(0xBB8 << 16) | /*InterMbMaxSz*/
(0xBB8) ); /*IntraMbMaxSz*/
(0xEE8) ); /*IntraMbMaxSz*/
OUT_BCS_BATCH(ctx, 0); /*Reserved*/
OUT_BCS_BATCH(ctx, 0); /*Slice QP Delta for bitrate control*/
OUT_BCS_BATCH(ctx, 0); /*Slice QP Delta for bitrate control*/
OUT_BCS_BATCH(ctx, 0x80200000);
OUT_BCS_BATCH(ctx, 0x8C000000);
OUT_BCS_BATCH(ctx, 0x00010000);
OUT_BCS_BATCH(ctx, 0);
......@@ -412,7 +412,7 @@ gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, in
return len_in_dwords;
}
static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, dri_bo *bo)
static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, dri_bo *bo, unsigned int offset)
{
int len_in_dwords = 11;
......@@ -420,14 +420,14 @@ static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int
OUT_BCS_BATCH(ctx, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
OUT_BCS_BATCH(ctx, 32); /* 1 MV : SKIP*/
OUT_BCS_BATCH(ctx, 32); /* 32 MV*/
OUT_BCS_RELOC(ctx, bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0);
offset);
OUT_BCS_BATCH(ctx,
(1 << 24) | /* PackedMvNum, Debug*/
(4 << 20) | /* 8 MV*/
(4 << 20) | /* 8 MV, SNB don't use it*/
(1 << 19) | /* CbpDcY */
(1 << 18) | /* CbpDcU */
(1 << 17) | /* CbpDcV */
......@@ -525,7 +525,7 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx, void *obj)
struct gen6_media_state *media_state = &i965->gen6_media_state;
VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
unsigned int *msg;
unsigned int *msg, offset;
int emit_new_state = 1, object_len_in_bytes;
int is_intra = pSliceParameter->slice_flags.bits.is_intra;
......@@ -535,8 +535,10 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx, void *obj)
msg = (unsigned int *)media_state->vme_output.bo->virtual;
if ( is_intra == 0) { /*TODO: simulate VME result, [0,0] MVs*/
memset(media_state->vme_output.bo->virtual, 0, 128);
//memset(media_state->vme_output.bo->virtual, 0, 128);
//printf("msgs = %08x, %08x, %08x, %08x \n", msg[0], msg[1], msg[2], msg[3]);
dri_bo_unmap(media_state->vme_output.bo);
offset = 0;
}
for (y = 0; y < height_in_mbs; y++) {
......@@ -564,7 +566,8 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx, void *obj)
if ( is_intra ) {
object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg);
} else {
object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, media_state->vme_output.bo);
object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, media_state->vme_output.bo,offset);
offset += 64;
}
msg += 4;
......
......@@ -51,6 +51,11 @@ static uint32_t gen6_vme_intra_frame[][4] = {
{0,0,0,0}
};
static uint32_t gen6_vme_inter_frame[][4] = {
#include "shaders/vme/inter_frame.g6b"
{0,0,0,0}
};
static struct media_kernel gen6_vme_kernels[] = {
{
"VME Intra Frame",
......@@ -58,10 +63,17 @@ static struct media_kernel gen6_vme_kernels[] = {
gen6_vme_intra_frame,
sizeof(gen6_vme_intra_frame),
NULL
},
{
"VME inter Frame",
VME_INTER_SHADER,
gen6_vme_inter_frame,
sizeof(gen6_vme_inter_frame),
NULL
}
};
#define GEN6_VME_KERNEL_NUMBER ARRAY_ELEMS(gen6_vme_kernels)
#define GEN6_VME_KERNEL_NUMBER 2
static void
gen6_vme_set_source_surface_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
......@@ -114,6 +126,7 @@ static void gen6_vme_source_surface_state(VADriverContextP ctx,
ss->ss0.surface_base_address = obj_surface->bo->offset;
ss->ss1.cbcr_pixel_offset_v_direction = 2;
ss->ss1.width = w - 1;
ss->ss1.height = h - 1;
......@@ -121,6 +134,7 @@ static void gen6_vme_source_surface_state(VADriverContextP ctx,
ss->ss2.interleave_chroma = 1;
ss->ss2.pitch = w_pitch - 1;
ss->ss2.half_pitch_for_chroma = 0;
gen6_vme_set_source_surface_tiling(ss, tiling);
/* UV offset for interleave mode */
......@@ -154,8 +168,14 @@ gen6_vme_output_buffer_setup(VADriverContextP ctx,
int width_in_mbs = ALIGN(obj_context->picture_width, 16) / 16;
int height_in_mbs = ALIGN(obj_context->picture_height, 16) / 16;
int num_entries;
VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
int is_intra = pSliceParameter->slice_flags.bits.is_intra;
media_state->vme_output.num_blocks = width_in_mbs * height_in_mbs;
if ( is_intra ) {
media_state->vme_output.num_blocks = width_in_mbs * height_in_mbs;
} else {
media_state->vme_output.num_blocks = width_in_mbs * height_in_mbs * 4;
}
media_state->vme_output.size_block = 16; /* an OWORD */
media_state->vme_output.pitch = ALIGN(media_state->vme_output.size_block, 16);
bo = dri_bo_alloc(i965->intel.bufmgr,
......@@ -199,7 +219,8 @@ gen6_vme_output_buffer_setup(VADriverContextP ctx,
static VAStatus gen6_vme_surface_setup(VADriverContextP ctx,
VAContextID context,
struct mfc_encode_state *encode_state)
struct mfc_encode_state *encode_state,
int is_intra)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_media_state *media_state = &i965->gen6_media_state;
......@@ -214,14 +235,16 @@ static VAStatus gen6_vme_surface_setup(VADriverContextP ctx,
obj_surface = SURFACE(encode_state->current_render_target);
assert(obj_surface);
gen6_vme_source_surface_state(ctx, 0, obj_surface);
/* reference 0, FIXME: must check it is valid or not */
obj_surface = SURFACE(pPicParameter->reference_picture);
assert(obj_surface);
// gen6_vme_source_surface_state(ctx, 1, obj_surface)
/* reference 1, FIXME: */
// obj_surface = SURFACE(pPicParameter->reference_picture);
// assert(obj_surface);
// gen6_vme_source_surface_state(ctx, 2, obj_surface);
if ( ! is_intra ) {
/* reference 0 */
obj_surface = SURFACE(pPicParameter->reference_picture);
assert(obj_surface);
gen6_vme_source_surface_state(ctx, 1, obj_surface);
/* reference 1, FIXME: */
// obj_surface = SURFACE(pPicParameter->reference_picture);
// assert(obj_surface);
//gen6_vme_source_surface_state(ctx, 2, obj_surface);
}
/* VME output */
gen6_vme_output_buffer_setup(ctx, context, encode_state, 3);
......@@ -250,7 +273,8 @@ static VAStatus gen6_vme_surface_setup(VADriverContextP ctx,
static VAStatus gen6_vme_interface_setup(VADriverContextP ctx,
VAContextID context,
struct mfc_encode_state *encode_state)
struct mfc_encode_state *encode_state,
int is_intra)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_media_state *media_state = &i965->gen6_media_state;
......@@ -263,9 +287,14 @@ static VAStatus gen6_vme_interface_setup(VADriverContextP ctx,
assert(bo->virtual);
desc = bo->virtual;
for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
/*Load kernel into GPU memory*/
struct media_kernel *kernel = &gen6_vme_kernels[i];
for (i = 0; i < 1; i++) {
/*Load kernel into GPU memory*/
struct media_kernel *kernel;
if ( is_intra) {
kernel = &gen6_vme_kernels[0];
} else {
kernel = &gen6_vme_kernels[1];
}
/*Setup the descritor table*/
memset(desc, 0, sizeof(*desc));
......@@ -292,7 +321,7 @@ static VAStatus gen6_vme_interface_setup(VADriverContextP ctx,
/*binding table*/
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
1, //One Entry
4, //One Entry
offsetof(struct gen6_interface_descriptor_data, desc3),
media_state->binding_table.bo);
desc++;
......@@ -321,7 +350,7 @@ static VAStatus gen6_vme_constant_setup(VADriverContextP ctx,
return VA_STATUS_SUCCESS;
}
static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx, VAContextID context, struct mfc_encode_state *encode_state)
static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx, VAContextID context, struct mfc_encode_state *encode_state, int is_intra)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_media_state *media_state = &i965->gen6_media_state;
......@@ -334,7 +363,7 @@ static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx, VAContextID conte
vme_state_message = (unsigned int *)media_state->vme_state.bo->virtual;
for(i = 0;i < 32; i++) {
vme_state_message[i] = 0x00;
vme_state_message[i] = 0x11;
}
vme_state_message[16] = 0x42424242; //cost function LUT set 0 for Intra
......@@ -428,7 +457,7 @@ static void gen6_vme_idrt(VADriverContextP ctx)
ADVANCE_BATCH(ctx);
}
static int gen6_vme_media_object(VADriverContextP ctx,
static int gen6_vme_media_object_intra(VADriverContextP ctx,
VAContextID context,
struct mfc_encode_state *encode_state,
int mb_x, int mb_y)
......@@ -561,6 +590,139 @@ static int gen6_vme_media_object(VADriverContextP ctx,
return len_in_dowrds * 4;
}
static int gen6_vme_media_object_inter(VADriverContextP ctx,
VAContextID context,
struct mfc_encode_state *encode_state,
int mb_x, int mb_y)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct object_surface *obj_surface = SURFACE( encode_state->current_render_target);
int i;
unsigned char *pPixel[17];
int pitch = obj_surface->width;
int mb_width = ALIGN(obj_surface->orig_width, 16) / 16;
int len_in_dowrds = 6 + 32 + 8;
BEGIN_BATCH(ctx, len_in_dowrds);
OUT_BATCH(ctx, CMD_MEDIA_OBJECT | (len_in_dowrds - 2));
OUT_BATCH(ctx, VME_INTRA_SHADER); /*Interface Descriptor Offset*/
OUT_BATCH(ctx, 0);
OUT_BATCH(ctx, 0);
OUT_BATCH(ctx, 0);
OUT_BATCH(ctx, 0);
/*inline data */
OUT_BATCH(ctx, ((mb_y << 20)) | ((mb_x<<4)) ); /*M0.0 Refrence0 X,Y*/
OUT_BATCH(ctx, 0x00000000); /*M0.1 Refrence1 X,Y, not used in P frame*/
OUT_BATCH(ctx, (mb_y<<20) |
(mb_x<<4)); /*M0.2 Source X,Y*/
OUT_BATCH(ctx, 0x00A03000); /*M0.3 16x16 Source, 1/4 pixel, harr*/
OUT_BATCH(ctx, 0x00000000); /*M0.4 Ignored*/
OUT_BATCH(ctx, 0x20200000); /*M0.5 Reference Width&Height, 32x32*/
OUT_BATCH(ctx, 0x00000000); /*M0.6 Debug*/
OUT_BATCH(ctx, 0x00000000); /*M0.7 Debug*/
OUT_BATCH(ctx, 0x00000000); /*M1.0 Default value*/
OUT_BATCH(ctx, 0x0C000020); /*M1.1 Default value MAX 32 MVs*/
OUT_BATCH(ctx, 0x00000000); /*M1.2 Default value*/
OUT_BATCH(ctx, 0x00000000); /*M1.3 Default value*/
OUT_BATCH(ctx, 0x00000000); /*M1.4 Default value*/
OUT_BATCH(ctx, 0x00000000); /*M1.5 Default value*/
OUT_BATCH(ctx, 0x00000000); /*M1.6 Default value*/
i = 0;
if ( mb_x > 0)
i |= 0x60;
if ( mb_y > 0)
i |= 0x10;
if ( mb_x > 0 && mb_y > 0)
i |= 0x08;
if ( mb_x > 0 && mb_y > 0 && mb_x < (mb_width - 1) )
i |= 0x04;
OUT_BATCH(ctx, (i << 8) | 6 ); /*M1.7 Neighbor MBS and Intra mode masks*/
drm_intel_gem_bo_map_gtt( obj_surface->bo );
for(i = 0; i < 17; i++){
pPixel[i] = (unsigned char *) ( obj_surface->bo->virtual + mb_x * 16 - 1 + ( mb_y * 16 - 1 + i) * pitch);
}
OUT_BATCH(ctx, 0); /*M2.0 MBZ*/
OUT_BATCH(ctx, pPixel[0][0] << 24); /*M2.1 Corner Neighbor*/
OUT_BATCH(ctx, ( (pPixel[0][4] << 24)
| (pPixel[0][3] << 16)
| (pPixel[0][2] << 8)
| (pPixel[0][1] ) )); /*M2.2 */
OUT_BATCH(ctx, ( (pPixel[0][8] << 24)
| (pPixel[0][7] << 16)
| (pPixel[0][6] << 8)
| (pPixel[0][5] ) )); /*M2.3 */
OUT_BATCH(ctx, ( (pPixel[0][12] << 24)
| (pPixel[0][11] << 16)
| (pPixel[0][10] << 8)
| (pPixel[0][9] ) )); /*M2.4 */
OUT_BATCH(ctx, ( (pPixel[0][16] << 24)
| (pPixel[0][15] << 16)
| (pPixel[0][14] << 8)
| (pPixel[0][13] ) )); /*M2.5 */
OUT_BATCH(ctx, ( (pPixel[0][20] << 24)
| (pPixel[0][19] << 16)
| (pPixel[0][18] << 8)
| (pPixel[0][17] ) )); /*M2.6 */
OUT_BATCH(ctx, ( (pPixel[0][24] << 24)
| (pPixel[0][23] << 16)
| (pPixel[0][22] << 8)
| (pPixel[0][21] ) )); /*M2.7 */
OUT_BATCH(ctx, ( (pPixel[4][0] << 24)
| (pPixel[3][0] << 16)
| (pPixel[2][0] << 8)
| (pPixel[1][0] ) )); /*M3.0 */
OUT_BATCH(ctx, ( (pPixel[8][0] << 24)
| (pPixel[7][0] << 16)
| (pPixel[6][0] << 8)
| (pPixel[5][0] ) )); /*M3.1 */
OUT_BATCH(ctx, ( (pPixel[12][0] << 24)
| (pPixel[11][0] << 16)
| (pPixel[10][0] << 8)
| (pPixel[9][0] ) )); /*M3.2 */
OUT_BATCH(ctx, ( (pPixel[0][0] << 24)
| (pPixel[15][0] << 16)
| (pPixel[14][0] << 8)
| (pPixel[13][0] ) )); /*M3.3 */
OUT_BATCH(ctx, 0x11111111); /*M3.4*/
OUT_BATCH(ctx, 0x00000000); /*M3.5*/
OUT_BATCH(ctx, 0x00000000); /*M3.6*/
OUT_BATCH(ctx, 0x00000000); /*M3.7*/
OUT_BATCH(ctx, 0); /*Write Message Header M0.0*/
OUT_BATCH(ctx, 0); /*Write Message Header M0.1*/
OUT_BATCH(ctx, (mb_y * mb_width + mb_x) * 4); /*Write Message Header M0.2*/
OUT_BATCH(ctx, 0x00000000); /*Write Message Header M0.3*/
OUT_BATCH(ctx, 0x00000000);
OUT_BATCH(ctx, 0x00000000);
OUT_BATCH(ctx, 0x00000000);
OUT_BATCH(ctx, 0x00000000);
drm_intel_gem_bo_unmap_gtt( obj_surface->bo );
ADVANCE_BATCH(ctx);
return len_in_dowrds * 4;
}
static void gen6_vme_media_init(VADriverContextP ctx)
{
int i;
......@@ -627,6 +789,8 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx,
int height_in_mbs = (obj_context->picture_height + 15) / 16;
int x, y;
int emit_new_state = 1, object_len_in_bytes;
VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
int is_intra = pSliceParameter->slice_flags.bits.is_intra;
intel_batchbuffer_start_atomic(ctx, 0x1000);
......@@ -643,7 +807,7 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx,
ADVANCE_BATCH(ctx);
/*Step2: State command PIPELINE_SELECT*/
gen6_vme_pipeline_select(ctx);
gen6_vme_pipeline_select(ctx);
/*Step3: State commands configuring pipeline states*/
gen6_vme_state_base_address(ctx);
......@@ -655,7 +819,11 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx,
}
/*Step4: Primitive commands*/
object_len_in_bytes = gen6_vme_media_object(ctx, context, encode_state, x, y);
if ( is_intra ) {
object_len_in_bytes = gen6_vme_media_object_intra(ctx, context, encode_state, x, y);
} else {
object_len_in_bytes = gen6_vme_media_object_inter(ctx, context, encode_state, x, y);
}
if (intel_batchbuffer_check_free_space(ctx, object_len_in_bytes) == 0) {
intel_batchbuffer_end_atomic(ctx);
......@@ -674,12 +842,14 @@ static VAStatus gen6_vme_prepare(VADriverContextP ctx,
struct mfc_encode_state *encode_state)
{
VAStatus vaStatus = VA_STATUS_SUCCESS;
VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
int is_intra = pSliceParameter->slice_flags.bits.is_intra;
/*Setup all the memory object*/
gen6_vme_surface_setup(ctx, context, encode_state);
gen6_vme_interface_setup(ctx, context, encode_state);
gen6_vme_surface_setup(ctx, context, encode_state, is_intra);
gen6_vme_interface_setup(ctx, context, encode_state, is_intra);
gen6_vme_constant_setup(ctx, context, encode_state);
gen6_vme_vme_state_setup(ctx, context, encode_state);
gen6_vme_vme_state_setup(ctx, context, encode_state, is_intra);
/*Programing media pipeline*/
gen6_vme_pipeline_programing(ctx, context, encode_state);
......
INTEL_G6B = intra_frame.g6b
INTEL_G6B = intra_frame.g6b inter_frame.g6b
EXTRA_DIST = $(INTEL_G6B)
......
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// Modual name: IntraFrame.asm
//
// Make intra predition estimation for Intra frame
//
//
// Now, begin source code....
//
include(`vme_header.inc')
/*inline input data: r5~r11*/
mov(1) r5.20<1>:UB r0.20<1,1,0>:UB {align1} ;
mov(8) m0.0<1>:UD r5.0<8,8,1>:UD {align1};
mov(8) m1.0<1>:UD r6.0<8,8,1>:UD {align1};
mov(8) m2.0<1>:UD r7.0<8,8,1>:UD {align1};
mov(8) m3.0<1>:UD r8.0<8,8,1>:UD {align1};
send(8) 0 r12 null vme(0,0,0,1) mlen 4 rlen 4 {align1};
mov(1) r9.20<1>:UB r0.20<1,1,0>:UB {align1} ;
mov(8) m0.0<1>:UD r9.0<8,8,1>:UD {align1 mask_disable};
/*
mov(8) m1.0<1>:UD r13.0<8,8,1>:UD {align1 mask_disable};
mov(8) m2.0<1>:UD r14.0<8,8,1>:UD {align1 mask_disable};
*/
mov(2) r9.0<1>:UW r13.0<2,2,1>:UB {align1 mask_disable};
mov(8) m1.0<1>:UD r9.0<8,8,0>:UD {align1 mask_disable};
mov(8) m2.0<1>:UD r9.0<8,8,0>:UD {align1 mask_disable};
/*
mov(16) m1.2<8:8:2>:UW r13.1<0,0,0>:UB {align1};
*/
/* bind index 3, write 2 oword, msg type: 8(OWord Block Write) */
send (16) 0 r13 null write(3, 3, 8, 1) mlen 3 rlen 1 {align1};
mov (8) m0.0<1>:UD r0<8,8,1>:UD {align1};
send (16) 0 acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
{ 0x00000001, 0x20b40231, 0x00200014, 0x00000000 },
{ 0x00600001, 0x20000022, 0x008d00a0, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d00c0, 0x00000000 },
{ 0x00600001, 0x20400022, 0x008d00e0, 0x00000000 },
{ 0x00600001, 0x20600022, 0x008d0100, 0x00000000 },
{ 0x08600031, 0x21801cdd, 0x00000000, 0x08482000 },
{ 0x00000001, 0x21340231, 0x00200014, 0x00000000 },
{ 0x00600201, 0x20000022, 0x008d0120, 0x00000000 },
{ 0x00200201, 0x21200229, 0x004501a0, 0x00000000 },
{ 0x00600201, 0x20200022, 0x008c0120, 0x00000000 },
{ 0x00600201, 0x20400022, 0x008c0120, 0x00000000 },
{ 0x05800031, 0x21a01cdd, 0x00000000, 0x061b0303 },
{ 0x00600001, 0x20000022, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x24001cc8, 0x00000000, 0x82000010 },
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment