Commit bffbec25 authored by Xiang, Haihao's avatar Xiang, Haihao

i965_drv_video: add support for H264 on Clarkdale/Arrandale

parent 11f69c84
...@@ -122,6 +122,8 @@ AC_OUTPUT([ ...@@ -122,6 +122,8 @@ AC_OUTPUT([
dummy_drv_video/Makefile dummy_drv_video/Makefile
i965_drv_video/Makefile i965_drv_video/Makefile
i965_drv_video/shaders/Makefile i965_drv_video/shaders/Makefile
i965_drv_video/shaders/h264/Makefile
i965_drv_video/shaders/h264/mc/Makefile
i965_drv_video/shaders/mpeg2/Makefile i965_drv_video/shaders/mpeg2/Makefile
i965_drv_video/shaders/mpeg2/vld/Makefile i965_drv_video/shaders/mpeg2/vld/Makefile
i965_drv_video/shaders/render/Makefile i965_drv_video/shaders/render/Makefile
......
...@@ -32,21 +32,29 @@ i965_drv_video_la_LIBADD = ../va/libva-x11.la -lpthread ...@@ -32,21 +32,29 @@ i965_drv_video_la_LIBADD = ../va/libva-x11.la -lpthread
i965_drv_video_la_SOURCES = \ i965_drv_video_la_SOURCES = \
object_heap.c \ object_heap.c \
intel_batchbuffer.c \ intel_batchbuffer.c \
intel_batchbuffer_dump.c\
intel_memman.c \ intel_memman.c \
intel_driver.c \ intel_driver.c \
i965_media.c \ i965_media.c \
i965_media_mpeg2.c \ i965_media_mpeg2.c \
i965_media_h264.c \
i965_render.c \ i965_render.c \
i965_drv_video.c i965_drv_video.c \
i965_avc_bsd.c \
i965_avc_hw_scoreboard.c
noinst_HEADERS = \ noinst_HEADERS = \
object_heap.h \ object_heap.h \
intel_batchbuffer.h \ intel_batchbuffer.h \
intel_batchbuffer_dump.h\
intel_memman.h \ intel_memman.h \
intel_driver.h \ intel_driver.h \
i965_media.h \ i965_media.h \
i965_media_mpeg2.h \ i965_media_mpeg2.h \
i965_media_h264.h \
i965_render.h \ i965_render.h \
i965_drv_video.h \ i965_drv_video.h \
i965_defines.h \ i965_defines.h \
i965_structs.h i965_structs.h \
i965_avc_bsd.h \
i965_avc_hw_scoreboard.h
This diff is collapsed.
/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
*
*/
#ifndef __I965_AVC_BSD_H__
#define __I965_AVC_BSD_H__
struct i965_avc_bsd_context
{
struct {
dri_bo *bo;
} bsd_raw_store;
struct {
dri_bo *bo;
} mpr_row_store;
struct {
dri_bo *bo;
} avc_it_command_mb_info;
struct {
dri_bo *bo;
long write_offset;
} avc_it_data;
struct {
dri_bo *bo;
} ildb_data;
};
void i965_avc_bsd_pipeline(VADriverContextP, struct decode_state *);
void i965_avc_bsd_decode_init(VADriverContextP);
Bool i965_avc_bsd_ternimate(struct i965_avc_bsd_context *);
#endif /* __I965_AVC_BSD_H__ */
This diff is collapsed.
/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
*
*/
#ifndef __I965_AVC_HW_SCOREBOARD_H__
#define __I965_AVC_HW_SCOREBOARD_H__
struct i965_avc_hw_scoreboard_context
{
struct {
unsigned int num_mb_cmds;
unsigned int starting_mb_number;
unsigned int pic_width_in_mbs;
} inline_data;
struct {
dri_bo *ss_bo;
dri_bo *s_bo;
unsigned int total_mbs;
} surface;
struct {
dri_bo *bo;
} binding_table;
struct {
dri_bo *bo;
} idrt;
struct {
dri_bo *bo;
} vfe_state;
struct {
dri_bo *bo;
} curbe;
struct {
dri_bo *bo;
unsigned long offset;
} hw_kernel;
struct {
unsigned int vfe_start;
unsigned int cs_start;
unsigned int num_vfe_entries;
unsigned int num_cs_entries;
unsigned int size_vfe_entry;
unsigned int size_cs_entry;
} urb;
};
void i965_avc_hw_scoreboard(VADriverContextP, struct decode_state *);
void i965_avc_hw_scoreboard_decode_init(VADriverContextP);
Bool i965_avc_hw_scoreboard_ternimate(struct i965_avc_hw_scoreboard_context *);
#endif /* __I965_AVC_HW_SCOREBOARD_H__ */
...@@ -20,6 +20,13 @@ ...@@ -20,6 +20,13 @@
#define CMD_MEDIA_OBJECT CMD(2, 1, 0) #define CMD_MEDIA_OBJECT CMD(2, 1, 0)
#define CMD_MEDIA_OBJECT_EX CMD(2, 1, 1) #define CMD_MEDIA_OBJECT_EX CMD(2, 1, 1)
#define CMD_AVC_BSD_IMG_STATE CMD(2, 4, 0)
#define CMD_AVC_BSD_QM_STATE CMD(2, 4, 1)
#define CMD_AVC_BSD_SLICE_STATE CMD(2, 4, 2)
#define CMD_AVC_BSD_BUF_BASE_STATE CMD(2, 4, 3)
#define CMD_BSD_IND_OBJ_BASE_ADDR CMD(2, 4, 4)
#define CMD_AVC_BSD_OBJECT CMD(2, 4, 8)
#define CMD_PIPELINED_POINTERS CMD(3, 0, 0) #define CMD_PIPELINED_POINTERS CMD(3, 0, 0)
#define CMD_BINDING_TABLE_POINTERS CMD(3, 0, 1) #define CMD_BINDING_TABLE_POINTERS CMD(3, 0, 1)
#define CMD_VERTEX_BUFFERS CMD(3, 0, 8) #define CMD_VERTEX_BUFFERS CMD(3, 0, 8)
...@@ -321,6 +328,32 @@ ...@@ -321,6 +328,32 @@
#define I965_TILEWALK_XMAJOR 0 #define I965_TILEWALK_XMAJOR 0
#define I965_TILEWALK_YMAJOR 1 #define I965_TILEWALK_YMAJOR 1
#define URB_SIZE(intel) (IS_IGDNG(intel->device_id) ? 1024 : \ #define SCAN_RASTER_ORDER 0
#define SCAN_SPECIAL_ORDER 1
#define ENTROPY_CAVLD 0
#define ENTROPY_CABAC 1
#define SLICE_TYPE_P 0
#define SLICE_TYPE_B 1
#define SLICE_TYPE_I 2
#define SLICE_TYPE_SP 3
#define SLICE_TYPE_SI 4
#define PRESENT_REF_LIST0 (1 << 0)
#define PRESENT_REF_LIST1 (1 << 1)
#define PRESENT_WEIGHT_OFFSET_L0 (1 << 2)
#define PRESENT_WEIGHT_OFFSET_L1 (1 << 3)
#define RESIDUAL_DATA_OFFSET 48
#define PRESENT_NOMV 0
#define PRESENT_NOWO 1
#define PRESENT_MV_WO 3
#define SCOREBOARD_STALLING 0
#define SCOREBOARD_NON_STALLING 1
#define URB_SIZE(intel) (IS_IRONLAKE(intel->device_id) ? 1024 : \
IS_G4X(intel->device_id) ? 384 : 256) IS_G4X(intel->device_id) ? 384 : 256)
#endif /* _I965_DEFINES_H_ */ #endif /* _I965_DEFINES_H_ */
...@@ -99,6 +99,9 @@ i965_QueryConfigProfiles(VADriverContextP ctx, ...@@ -99,6 +99,9 @@ i965_QueryConfigProfiles(VADriverContextP ctx,
profile_list[i++] = VAProfileMPEG2Simple; profile_list[i++] = VAProfileMPEG2Simple;
profile_list[i++] = VAProfileMPEG2Main; profile_list[i++] = VAProfileMPEG2Main;
profile_list[i++] = VAProfileH264Baseline;
profile_list[i++] = VAProfileH264Main;
profile_list[i++] = VAProfileH264High;
/* If the assert fails then I965_MAX_PROFILES needs to be bigger */ /* If the assert fails then I965_MAX_PROFILES needs to be bigger */
assert(i <= I965_MAX_PROFILES); assert(i <= I965_MAX_PROFILES);
...@@ -122,6 +125,13 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx, ...@@ -122,6 +125,13 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx,
entrypoint_list[0] = VAEntrypointVLD; entrypoint_list[0] = VAEntrypointVLD;
break; break;
case VAProfileH264Baseline:
case VAProfileH264Main:
case VAProfileH264High:
*num_entrypoints = 1;
entrypoint_list[0] = VAEntrypointVLD;
break;
default: default:
vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
*num_entrypoints = 0; *num_entrypoints = 0;
...@@ -217,6 +227,17 @@ i965_CreateConfig(VADriverContextP ctx, ...@@ -217,6 +227,17 @@ i965_CreateConfig(VADriverContextP ctx,
} }
break; break;
case VAProfileH264Baseline:
case VAProfileH264Main:
case VAProfileH264High:
if (VAEntrypointVLD == entrypoint) {
vaStatus = VA_STATUS_SUCCESS;
} else {
vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
}
break;
default: default:
vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
break; break;
...@@ -305,6 +326,10 @@ i965_destroy_surface(struct object_heap *heap, struct object_base *obj) ...@@ -305,6 +326,10 @@ i965_destroy_surface(struct object_heap *heap, struct object_base *obj)
dri_bo_unreference(obj_surface->bo); dri_bo_unreference(obj_surface->bo);
obj_surface->bo = NULL; obj_surface->bo = NULL;
dri_bo_unreference(obj_surface->direct_mv_wr_top_bo);
obj_surface->direct_mv_wr_top_bo = NULL;
dri_bo_unreference(obj_surface->direct_mv_wr_bottom_bo);
obj_surface->direct_mv_wr_bottom_bo = NULL;
object_heap_free(heap, obj); object_heap_free(heap, obj);
} }
...@@ -344,12 +369,17 @@ i965_CreateSurfaces(VADriverContextP ctx, ...@@ -344,12 +369,17 @@ i965_CreateSurfaces(VADriverContextP ctx,
"vaapi surface", "vaapi surface",
obj_surface->size, obj_surface->size,
64); 64);
assert(obj_surface->bo); assert(obj_surface->bo);
if (NULL == obj_surface->bo) { obj_surface->direct_mv_wr_top_bo = dri_bo_alloc(i965->intel.bufmgr,
vaStatus = VA_STATUS_ERROR_UNKNOWN; "direct mv wr top",
break; 0x90000,
} 64);
assert(obj_surface->direct_mv_wr_top_bo);
obj_surface->direct_mv_wr_bottom_bo = dri_bo_alloc(i965->intel.bufmgr,
"direct mv wr bottom",
0x90000,
64);
assert(obj_surface->direct_mv_wr_bottom_bo);
} }
/* Error recovery */ /* Error recovery */
...@@ -881,6 +911,12 @@ i965_BeginPicture(VADriverContextP ctx, ...@@ -881,6 +911,12 @@ i965_BeginPicture(VADriverContextP ctx,
vaStatus = VA_STATUS_SUCCESS; vaStatus = VA_STATUS_SUCCESS;
break; break;
case VAProfileH264Baseline:
case VAProfileH264Main:
case VAProfileH264High:
vaStatus = VA_STATUS_SUCCESS;
break;
default: default:
assert(0); assert(0);
vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
...@@ -1014,6 +1050,7 @@ VAStatus ...@@ -1014,6 +1050,7 @@ VAStatus
i965_EndPicture(VADriverContextP ctx, VAContextID context) i965_EndPicture(VADriverContextP ctx, VAContextID context)
{ {
struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_render_state *render_state = &i965->render_state;
struct object_context *obj_context = CONTEXT(context); struct object_context *obj_context = CONTEXT(context);
struct object_config *obj_config; struct object_config *obj_config;
VAContextID config; VAContextID config;
...@@ -1026,6 +1063,18 @@ i965_EndPicture(VADriverContextP ctx, VAContextID context) ...@@ -1026,6 +1063,18 @@ i965_EndPicture(VADriverContextP ctx, VAContextID context)
config = obj_context->config_id; config = obj_context->config_id;
obj_config = CONFIG(config); obj_config = CONFIG(config);
assert(obj_config); assert(obj_config);
switch (obj_config->profile) {
case VAProfileH264Baseline:
case VAProfileH264Main:
case VAProfileH264High:
render_state->interleaved_uv = 1;
break;
default:
render_state->interleaved_uv = 0;
}
i965_media_decode_picture(ctx, obj_config->profile, &obj_context->decode_state); i965_media_decode_picture(ctx, obj_config->profile, &obj_context->decode_state);
obj_context->decode_state.current_render_target = -1; obj_context->decode_state.current_render_target = -1;
obj_context->decode_state.num_slices = 0; obj_context->decode_state.num_slices = 0;
...@@ -1131,7 +1180,7 @@ i965_Init(VADriverContextP ctx) ...@@ -1131,7 +1180,7 @@ i965_Init(VADriverContextP ctx)
return VA_STATUS_ERROR_UNKNOWN; return VA_STATUS_ERROR_UNKNOWN;
if (!IS_G4X(i965->intel.device_id) && if (!IS_G4X(i965->intel.device_id) &&
!IS_IGDNG(i965->intel.device_id)) !IS_IRONLAKE(i965->intel.device_id))
return VA_STATUS_ERROR_UNKNOWN; return VA_STATUS_ERROR_UNKNOWN;
if (i965_media_init(ctx) == False) if (i965_media_init(ctx) == False)
......
...@@ -97,6 +97,9 @@ struct object_surface ...@@ -97,6 +97,9 @@ struct object_surface
int height; int height;
int size; int size;
dri_bo *bo; dri_bo *bo;
/* FIXME: only for H.264 */
dri_bo *direct_mv_wr_top_bo;
dri_bo *direct_mv_wr_bottom_bo;
}; };
struct object_buffer struct object_buffer
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include "i965_defines.h" #include "i965_defines.h"
#include "i965_media_mpeg2.h" #include "i965_media_mpeg2.h"
#include "i965_media_h264.h"
#include "i965_media.h" #include "i965_media.h"
#include "i965_drv_video.h" #include "i965_drv_video.h"
...@@ -72,13 +73,21 @@ static void ...@@ -72,13 +73,21 @@ static void
i965_media_state_base_address(VADriverContextP ctx) i965_media_state_base_address(VADriverContextP ctx)
{ {
struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_media_state *media_state = &i965->media_state;
if (IS_IGDNG(i965->intel.device_id)) { if (IS_IRONLAKE(i965->intel.device_id)) {
BEGIN_BATCH(ctx, 8); BEGIN_BATCH(ctx, 8);
OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6); OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
if (media_state->indirect_object.bo) {
OUT_RELOC(ctx, media_state->indirect_object.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
media_state->indirect_object.offset | BASE_ADDRESS_MODIFY);
} else {
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
}
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
...@@ -89,7 +98,14 @@ i965_media_state_base_address(VADriverContextP ctx) ...@@ -89,7 +98,14 @@ i965_media_state_base_address(VADriverContextP ctx)
OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 4); OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 4);
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
if (media_state->indirect_object.bo) {
OUT_RELOC(ctx, media_state->indirect_object.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
media_state->indirect_object.offset | BASE_ADDRESS_MODIFY);
} else {
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
}
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
ADVANCE_BATCH(ctx); ADVANCE_BATCH(ctx);
...@@ -222,6 +238,12 @@ i965_media_decode_init(VADriverContextP ctx, VAProfile profile) ...@@ -222,6 +238,12 @@ i965_media_decode_init(VADriverContextP ctx, VAProfile profile)
i965_media_mpeg2_decode_init(ctx); i965_media_mpeg2_decode_init(ctx);
break; break;
case VAProfileH264Baseline:
case VAProfileH264Main:
case VAProfileH264High:
i965_media_h264_decode_init(ctx);
break;
default: default:
assert(0); assert(0);
break; break;
...@@ -237,8 +259,8 @@ i965_media_decode_picture(VADriverContextP ctx, ...@@ -237,8 +259,8 @@ i965_media_decode_picture(VADriverContextP ctx,
struct i965_media_state *media_state = &i965->media_state; struct i965_media_state *media_state = &i965->media_state;
i965_media_decode_init(ctx, profile); i965_media_decode_init(ctx, profile);
assert(media_state->states_setup); assert(media_state->media_states_setup);
media_state->states_setup(ctx, decode_state); media_state->media_states_setup(ctx, decode_state);
i965_media_pipeline_setup(ctx, decode_state); i965_media_pipeline_setup(ctx, decode_state);
intel_batchbuffer_flush(ctx); intel_batchbuffer_flush(ctx);
} }
...@@ -247,6 +269,7 @@ Bool ...@@ -247,6 +269,7 @@ Bool
i965_media_init(VADriverContextP ctx) i965_media_init(VADriverContextP ctx)
{ {
i965_media_mpeg2_init(ctx); i965_media_mpeg2_init(ctx);
i965_media_h264_init(ctx);
return True; return True;
} }
...@@ -277,7 +300,11 @@ i965_media_terminate(VADriverContextP ctx) ...@@ -277,7 +300,11 @@ i965_media_terminate(VADriverContextP ctx)
dri_bo_unreference(media_state->curbe.bo); dri_bo_unreference(media_state->curbe.bo);
media_state->curbe.bo = NULL; media_state->curbe.bo = NULL;
dri_bo_unreference(media_state->indirect_object.bo);
media_state->indirect_object.bo = NULL;
i965_media_mpeg2_ternimate(ctx); i965_media_mpeg2_ternimate(ctx);
i965_media_h264_ternimate(ctx);
return True; return True;
} }
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
#include "i965_structs.h" #include "i965_structs.h"
#define MAX_INTERFACE_DESC 16 #define MAX_INTERFACE_DESC 16
#define MAX_MEDIA_SURFACES 32 #define MAX_MEDIA_SURFACES 34
#define MPEG_TOP_FIELD 1 #define MPEG_TOP_FIELD 1
#define MPEG_BOTTOM_FIELD 2 #define MPEG_BOTTOM_FIELD 2
...@@ -82,6 +82,11 @@ struct i965_media_state ...@@ -82,6 +82,11 @@ struct i965_media_state
dri_bo *bo; dri_bo *bo;
} curbe; } curbe;
struct {
dri_bo *bo;
unsigned long offset;
} indirect_object;
struct { struct {
unsigned int vfe_start; unsigned int vfe_start;
unsigned int cs_start; unsigned int cs_start;
...@@ -93,7 +98,8 @@ struct i965_media_state ...@@ -93,7 +98,8 @@ struct i965_media_state
unsigned int size_cs_entry; unsigned int size_cs_entry;
} urb; } urb;
void (*states_setup)(VADriverContextP ctx, struct decode_state *decode_state); void *private_context;
void (*media_states_setup)(VADriverContextP ctx, struct decode_state *decode_state);
void (*media_objects)(VADriverContextP ctx, struct decode_state *decode_state); void (*media_objects)(VADriverContextP ctx, struct decode_state *decode_state);
}; };
......
This diff is collapsed.
#ifndef _I965_MEDIA_H264_H_
#define _I965_MEDIA_H264_H_
#include "i965_avc_bsd.h"
#include "i965_avc_hw_scoreboard.h"
#define MB_CMD_IN_BYTES 64
#define MB_CMD_IN_DWS 16
#define MB_CMD_IN_OWS 4
enum {
H264_AVC_COMBINED = 0,
H264_AVC_NULL
};
struct i965_h264_context
{
struct {
dri_bo *bo;
unsigned int mbs;
} avc_it_command_mb_info;
struct {
dri_bo *bo;
long write_offset;
} avc_it_data;
struct {
unsigned int width_in_mbs;
unsigned int height_in_mbs;
int mbaff_frame_flag;
} picture;
int use_avc_hw_scoreboard;
int use_hw_w128;
unsigned int weight128_luma_l0;
unsigned int weight128_luma_l1;
unsigned int weight128_chroma_l0;
unsigned int weight128_chroma_l1;
char weight128_offset0_flag;
short weight128_offset0;
struct i965_avc_bsd_context i965_avc_bsd_context;
struct i965_avc_hw_scoreboard_context avc_hw_scoreboard_context;
};
Bool i965_media_h264_init(VADriverContextP ctx);
Bool i965_media_h264_ternimate(VADriverContextP ctx);
void i965_media_h264_decode_init(VADriverContextP ctx);
#endif /* _I965_MEDIA_H264_H_ */
...@@ -280,7 +280,7 @@ static struct media_kernel mpeg2_vld_kernels_gen4[] = { ...@@ -280,7 +280,7 @@ static struct media_kernel mpeg2_vld_kernels_gen4[] = {
} }
}; };
/* On IGDNG */ /* On IRONLAKE */
static uint32_t frame_intra_kernel_gen5[][4] = { static uint32_t frame_intra_kernel_gen5[][4] = {
#include "shaders/mpeg2/vld/frame_intra.g4b.gen5" #include "shaders/mpeg2/vld/frame_intra.g4b.gen5"
}; };
...@@ -886,6 +886,7 @@ i965_media_mpeg2_decode_init(VADriverContextP ctx) ...@@ -886,6 +886,7 @@ i965_media_mpeg2_decode_init(VADriverContextP ctx)
dri_bo *bo; dri_bo *bo;
media_state->extended_state.enabled = 1; media_state->extended_state.enabled = 1;
media_state->indirect_object.bo = NULL;
dri_bo_unreference(media_state->extended_state.bo); dri_bo_unreference(media_state->extended_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr, bo = dri_bo_alloc(i965->intel.bufmgr,
"vld state", "vld state",
...@@ -907,7 +908,7 @@ i965_media_mpeg2_decode_init(VADriverContextP ctx) ...@@ -907,7 +908,7 @@ i965_media_mpeg2_decode_init(VADriverContextP ctx)
media_state->urb.num_cs_entries * media_state->urb.size_cs_entry <= URB_SIZE((&i965->intel))); media_state->urb.num_cs_entries * media_state->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
/* hook functions */ /* hook functions */
media_state->states_setup = i965_media_mpeg2_states_setup; media_state->media_states_setup = i965_media_mpeg2_states_setup;
media_state->media_objects = i965_media_mpeg2_objects; media_state->media_objects = i965_media_mpeg2_objects;
} }
...@@ -923,7 +924,7 @@ i965_media_mpeg2_init(VADriverContextP ctx) ...@@ -923,7 +924,7 @@ i965_media_mpeg2_init(VADriverContextP ctx)
sizeof(mpeg2_vld_kernels_gen5[0]))); sizeof(mpeg2_vld_kernels_gen5[0])));
assert(NUM_MPEG2_VLD_KERNELS <= MAX_INTERFACE_DESC); assert(NUM_MPEG2_VLD_KERNELS <= MAX_INTERFACE_DESC);
if (IS_IGDNG(i965->intel.device_id)) if (IS_IRONLAKE(i965->intel.device_id))
mpeg2_vld_kernels = mpeg2_vld_kernels_gen5; mpeg2_vld_kernels = mpeg2_vld_kernels_gen5;
else else
mpeg2_vld_kernels = mpeg2_vld_kernels_gen4; mpeg2_vld_kernels = mpeg2_vld_kernels_gen4;
......
This diff is collapsed.
...@@ -60,6 +60,11 @@ struct i965_render_state ...@@ -60,6 +60,11 @@ struct i965_render_state
dri_bo *viewport; dri_bo *viewport;
} cc; } cc;
struct {
dri_bo *bo;
} curbe;
int interleaved_uv;
struct intel_region *draw_region; struct intel_region *draw_region;
}; };
......
...@@ -34,6 +34,7 @@ struct i965_vfe_state_ex ...@@ -34,6 +34,7 @@ struct i965_vfe_state_ex
unsigned int obj_id:24; unsigned int obj_id:24;
} vfex0; } vfex0;
union {
struct { struct {
unsigned int residual_grf_offset:5; unsigned int residual_grf_offset:5;
unsigned int pad0:3; unsigned int pad0:3;
...@@ -41,9 +42,12 @@ struct i965_vfe_state_ex ...@@ -41,9 +42,12 @@ struct i965_vfe_state_ex
unsigned int pad1:3; unsigned int pad1:3;
unsigned int residual_data_offset:8; unsigned int residual_data_offset:8;
unsigned int sub_field_present_flag:2; unsigned int sub_field_present_flag:2;
unsigned int residual_data_fix_offset:1; unsigned int residual_data_fix_offset_flag:1;
unsigned int pad2:5; unsigned int pad2:5;
}vfex1; } avc;
unsigned int vc1;
} vfex1;
struct { struct {
unsigned int remap_index_0:4; unsigned int remap_index_0:4;
...@@ -68,18 +72,32 @@ struct i965_vfe_state_ex ...@@ -68,18 +72,32 @@ struct i965_vfe_state_ex
} remap_table1; } remap_table1;
struct { struct {
unsigned int scoreboard_mask:8; unsigned int mask:8;
unsigned int pad:22; unsigned int pad:22;
unsigned int type:1; unsigned int type:1;
unsigned int enable:1; unsigned int enable:1;
} scoreboard0; } scoreboard0;
struct { struct {
unsigned int ignore; int delta_x0:4;
int delta_y0:4;
int delta_x1:4;
int delta_y1:4;
int delta_x2:4;
int delta_y2:4;
int delta_x3:4;
int delta_y3:4;
} scoreboard1; } scoreboard1;
struct { struct {
unsigned int ignore; int delta_x4:4;
int delta_y4:4;
int delta_x5:4;
int delta_y5:4;
int delta_x6:4;
int delta_y6:4;
int delta_x7:4;
int delta_y7:4;
} scoreboard2; } scoreboard2;
unsigned int pad; unsigned int pad;
...@@ -177,8 +195,9 @@ struct i965_surface_state ...@@ -177,8 +195,9 @@ struct i965_surface_state
unsigned int cube_neg_y:1; unsigned int cube_neg_y:1;
unsigned int cube_pos_x:1; unsigned int cube_pos_x:1;
unsigned int cube_neg_x:1; unsigned int cube_neg_x:1;
unsigned int pad:3; unsigned int pad:2;
unsigned int render_cache_read_mode:1; unsigned int render_cache_read_mode:1;
unsigned int cube_map_corner_mode:1;
unsigned int mipmap_layout_mode:1; unsigned int mipmap_layout_mode:1;
unsigned int vert_line_stride_ofs:1; unsigned int vert_line_stride_ofs:1;
unsigned int vert_line_stride:1; unsigned int vert_line_stride:1;
......
This diff is collapsed.
...@@ -16,10 +16,16 @@ struct intel_batchbuffer ...@@ -16,10 +16,16 @@ struct intel_batchbuffer
unsigned char *map; unsigned char *map;
unsigned char *ptr; unsigned char *ptr;
int atomic; int atomic;
int flag;
int (*run)(drm_intel_bo *bo, int used,
drm_clip_rect_t *cliprects, int num_cliprects,
int DR4, int ring_flag);
}; };
Bool intel_batchbuffer_init(struct intel_driver_data *intel); Bool intel_batchbuffer_init(struct intel_driver_data *intel);
Bool intel_batchbuffer_terminate(struct intel_driver_data *intel); Bool intel_batchbuffer_terminate(struct intel_driver_data *intel);
void intel_batchbuffer_emit_dword(VADriverContextP ctx, unsigned int x); void intel_batchbuffer_emit_dword(VADriverContextP ctx, unsigned int x);
void intel_batchbuffer_emit_reloc(VADriverContextP ctx, dri_bo *bo, void intel_batchbuffer_emit_reloc(VADriverContextP ctx, dri_bo *bo,
uint32_t read_domains, uint32_t write_domains, uint32_t read_domains, uint32_t write_domains,
...@@ -31,6 +37,17 @@ void intel_batchbuffer_start_atomic(VADriverContextP ctx, unsigned int size); ...@@ -31,6 +37,17 @@ void intel_batchbuffer_start_atomic(VADriverContextP ctx, unsigned int size);
void intel_batchbuffer_end_atomic(VADriverContextP ctx); void intel_batchbuffer_end_atomic(VADriverContextP ctx);
Bool intel_batchbuffer_flush(VADriverContextP ctx); Bool intel_batchbuffer_flush(VADriverContextP ctx);
void intel_batchbuffer_emit_dword_bcs(VADriverContextP ctx, unsigned int x);
void intel_batchbuffer_emit_reloc_bcs(VADriverContextP ctx, dri_bo *bo,
uint32_t read_domains, uint32_t write_domains,
uint32_t delta);
void intel_batchbuffer_require_space_bcs(VADriverContextP ctx, unsigned int size);
void intel_batchbuffer_data_bcs(VADriverContextP ctx, void *data, unsigned int size);
void intel_batchbuffer_emit_mi_flush_bcs(VADriverContextP ctx);
void intel_batchbuffer_start_atomic_bcs(VADriverContextP ctx, unsigned int size);
void intel_batchbuffer_end_atomic_bcs(VADriverContextP ctx);
Bool intel_batchbuffer_flush_bcs(VADriverContextP ctx);
#define BEGIN_BATCH(ctx, n) do { \ #define BEGIN_BATCH(ctx, n) do { \
intel_batchbuffer_require_space(ctx, (n) * 4); \ intel_batchbuffer_require_space(ctx, (n) * 4); \
} while (0) } while (0)
...@@ -48,4 +65,21 @@ Bool intel_batchbuffer_flush(VADriverContextP ctx); ...@@ -48,4 +65,21 @@ Bool intel_batchbuffer_flush(VADriverContextP ctx);
#define ADVANCE_BATCH(ctx) do { \ #define ADVANCE_BATCH(ctx) do { \
} while (0) } while (0)
#define BEGIN_BCS_BATCH(ctx, n) do { \
intel_batchbuffer_require_space_bcs(ctx, (n) * 4); \
} while (0)
#define OUT_BCS_BATCH(ctx, d) do { \
intel_batchbuffer_emit_dword_bcs(ctx, d); \
} while (0)
#define OUT_BCS_RELOC(ctx, bo, read_domains, write_domain, delta) do { \
assert((delta) >= 0); \
intel_batchbuffer_emit_reloc_bcs(ctx, bo, \
read_domains, write_domain, delta); \
} while (0)
#define ADVANCE_BCS_BATCH(ctx) do { \
} while (0)
#endif /* _INTEL_BATCHBUFFER_H_ */ #endif /* _INTEL_BATCHBUFFER_H_ */
This diff is collapsed.
#ifndef _INTEL_BATCHBUFFER_DUMP_H_
#define _INTEL_BATCHBUFFER_DUMP_H_
#define MASK_CMD_TYPE 0xE0000000
#define SHIFT_CMD_TYPE 29
#define CMD_TYPE_GFXPIPE 3
#define CMD_TYPE_BLT 2
#define CMD_TYPE_MI 0
/* GFXPIPE */
#define MASK_GFXPIPE_SUBTYPE 0x18000000
#define MASK_GFXPIPE_OPCODE 0x07000000
#define MASK_GFXPIPE_SUBOPCODE 0x00FF0000
#define MASK_GFXPIPE_LENGTH 0x0000FFFF
#define SHIFT_GFXPIPE_SUBTYPE 27
#define SHIFT_GFXPIPE_OPCODE 24
#define SHIFT_GFXPIPE_SUBOPCODE 16
#define SHIFT_GFXPIPE_LENGTH 0
/* 3D */
#define GFXPIPE_3D 3
/* BSD */
#define GFXPIPE_BSD 2
#define OPCODE_BSD_AVC 4
#define SUBOPCODE_BSD_IMG 0
#define SUBOPCODE_BSD_QM 1
#define SUBOPCODE_BSD_SLICE 2
#define SUBOPCODE_BSD_BUF_BASE 3
#define SUBOPCODE_BSD_IND_OBJ 4
#define SUBOPCODE_BSD_OBJECT 8
/* MI */
#define MASK_MI_OPCODE 0x1F800000
#define SHIFT_MI_OPCODE 23
#define OPCODE_MI_FLUSH 0x04
#define OPCODE_MI_BATCH_BUFFER_END 0x0A
int intel_batchbuffer_dump(unsigned int *data, unsigned int offset, int count, unsigned int device);
#endif /* _INTEL_BATCHBUFFER_DUMP_H_ */
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#define INLINE #define INLINE
#endif #endif
#define BATCH_SIZE 0x10000 #define BATCH_SIZE 0x100000
#define BATCH_RESERVED 0x10 #define BATCH_RESERVED 0x10
#define CMD_MI (0x0 << 29) #define CMD_MI (0x0 << 29)
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#define MI_NOOP (CMD_MI | 0) #define MI_NOOP (CMD_MI | 0)
#define MI_BATCH_BUFFER_END (CMD_MI | (0xA << 23)) #define MI_BATCH_BUFFER_END (CMD_MI | (0xA << 23))
#define MI_BATCH_BUFFER_START (CMD_MI | (0x31 << 23))
#define MI_FLUSH (CMD_MI | (0x4 << 23)) #define MI_FLUSH (CMD_MI | (0x4 << 23))
#define STATE_INSTRUCTION_CACHE_INVALIDATE (0x1 << 0) #define STATE_INSTRUCTION_CACHE_INVALIDATE (0x1 << 0)
...@@ -85,6 +86,7 @@ struct intel_driver_data ...@@ -85,6 +86,7 @@ struct intel_driver_data
int locked; int locked;
struct intel_batchbuffer *batch; struct intel_batchbuffer *batch;
struct intel_batchbuffer *batch_bcs;
dri_bufmgr *bufmgr; dri_bufmgr *bufmgr;
}; };
...@@ -118,8 +120,8 @@ struct intel_region ...@@ -118,8 +120,8 @@ struct intel_region
#define PCI_CHIP_G45_G 0x2E22 #define PCI_CHIP_G45_G 0x2E22
#define PCI_CHIP_G41_G 0x2E32 #define PCI_CHIP_G41_G 0x2E32
#define PCI_CHIP_IGDNG_D_G 0x0042 #define PCI_CHIP_IRONLAKE_D_G 0x0042
#define PCI_CHIP_IGDNG_M_G 0x0046 #define PCI_CHIP_IRONLAKE_M_G 0x0046
#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ #define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
devid == PCI_CHIP_Q45_G || \ devid == PCI_CHIP_Q45_G || \
...@@ -128,8 +130,8 @@ struct intel_region ...@@ -128,8 +130,8 @@ struct intel_region
#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) #define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) #define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
#define IS_IGDNG_D(devid) (devid == PCI_CHIP_IGDNG_D_G) #define IS_IRONLAKE_D(devid) (devid == PCI_CHIP_IRONLAKE_D_G)
#define IS_IGDNG_M(devid) (devid == PCI_CHIP_IGDNG_M_G) #define IS_IRONLAKE_M(devid) (devid == PCI_CHIP_IRONLAKE_M_G)
#define IS_IGDNG(devid) (IS_IGDNG_D(devid) || IS_IGDNG_M(devid)) #define IS_IRONLAKE(devid) (IS_IRONLAKE_D(devid) || IS_IRONLAKE_M(devid))
#endif /* _INTEL_DRIVER_H_ */ #endif /* _INTEL_DRIVER_H_ */
...@@ -50,7 +50,6 @@ static int object_heap_expand( object_heap_p heap ) ...@@ -50,7 +50,6 @@ static int object_heap_expand( object_heap_p heap )
{ {
return -1; /* Out of memory */ return -1; /* Out of memory */
} }
memset(new_heap_index + heap->heap_size*heap->object_size, 0, heap->heap_increment * new_heap_size);
heap->heap_index = new_heap_index; heap->heap_index = new_heap_index;
next_free = heap->next_free; next_free = heap->next_free;
for(i = new_heap_size; i-- > heap->heap_size; ) for(i = new_heap_size; i-- > heap->heap_size; )
......
SUBDIRS = mpeg2 render SUBDIRS = h264 mpeg2 render
This diff is collapsed.
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
#include "AVC_ILDB_Child_UV.asm"
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
#include "AVC_ILDB_Child_Y.asm"
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB UV comp)
//
// First de-block vertical edges from left to right.
// Second de-block horizontal edge from top to bottom.
//
// For 4:2:0, chroma is always de-blocked at 8x8.
// NV12 format allows to filter U and V together.
//
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define AVC_ILDB
.kernel AVC_ILDB_CHILD_MBAFF_UV
#if defined(COMBINED_KERNEL)
ILDB_LABEL(AVC_ILDB_CHILD_UV):
#endif
#include "setupVPKernel.asm"
#include "AVC_ILDB.inc"
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xE997:w
#endif
// Setup temp buf used by load and save code
#define BUF_B RTempB
#define BUF_W RTempW
#define BUF_D RTempD
// Init local variables
mul (4) ORIX_CUR<2>:w ORIX<0;1,0>:w 16:w { NoDDClr } // Expand X addr to bytes, repeat 4 times
mul (4) ORIY_CUR<2>:w ORIY<0;1,0>:w 32:w { NoDDChk } // Expand Y addr to bytes, repeat 4 times
mov (2) f0.0<1>:w 0:w
mov (1) GateWayOffsetC:uw ORIY:uw // Use row # as Gateway offset
//=== Null Kernel ===============================================================
// jmpi ILDB_LABEL(POST_ILDB_UV)
//===============================================================================
//====================================================================================
// Assuming the MB control data is laid out in scan line order in a rectangle with width = 16 bytes.
// Control data has dimension of X x Y = 16 x N bytes, where N = W x H / 16
// Each MB has 256 bytes of control data
// For CRESTLINE, 256 bytes are stored in memory and fetched into GRF.
// MB_offset = MBsCntX * CurRow + CurCol
// Byte_offset = MB_offset * (256 << Mbaff_flag), Mbaff_flag = 0 or 1.
// Base address of a control data block = (x, y) = (0, y'=y/x), region width is 16 bytes
// where y' = Byte_offset / 16 = MB_offset * (16 << Mbaff_flag)
// MBCntrlDataOffsetY holds y'.
// For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes. Need to use a special read command on BL-C.
// MB_offset = MBsCntX * CurRow + CurCol
// Byte_offset = MB_offset * (64 << Mbaff_flag), Mbaff_flag = 0 or 1.
// MBCntrlDataOffsetY holds globel byte offset.
#if !defined(DEV_CL)
mul (1) CntrlDataOffsetY:ud MBsCntX:w ORIY:w
add (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud ORIX:w
mul (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud 128:uw
#endif
//====================================================================================
add (1) ORIX_LEFT:w ORIX_LEFT:w -4:w
add (1) ORIY_TOP:w ORIY_TOP:w -4:w
//=========== Process Top MB ============
and (1) BitFields:w BitFields:w TopFieldFlag:w // Reset BotFieldFlag
// Build a ramp from 0 to 15
mov (16) RRampW(0)<1> RampConstC<0;8,1>:ub
add (8) RRampW(0,8)<1> RRampW(0,8) 8:w // RRampW = ramp 15-0
ILDB_LABEL(RE_ENTRY_UV): // for bootom field
// Load current MB control data
#if defined(DEV_CL)
#include "load_ILDB_Cntrl_Data_64DW.asm" // Crestline
#else
#include "load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond
#endif
// Init addr register for vertical control data
mov (1) ECM_AddrReg<1>:w CNTRL_DATA_BASE:w // Init ECM_AddrReg
// Use free cycles here
// Check loaded control data
and.z.f0.1 (16) null<1>:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw 0xFFFF:uw // Skip ILDB?
and.nz.f0.0 (1) null:w r[ECM_AddrReg, ExtBitFlags]:ub DISABLE_ILDB_FLAG:w // Skip ILDB?
// Set DualFieldMode for all data read, write and deblocking
and (1) CTemp1_W:uw r[ECM_AddrReg, BitFlags]:ub FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw
// Get Vert Edge Pattern (frame vs. field MBs)
and (1) VertEdgePattern:uw r[ECM_AddrReg, BitFlags]:ub FieldModeLeftMbFlag+FieldModeCurrentMbFlag:uw
(f0.1.all16h) jmpi ILDB_LABEL(SKIP_ILDB_UV) // Skip ILDB
(f0.0) jmpi ILDB_LABEL(SKIP_ILDB_UV) // Skip ILDB
// Set DualFieldMode for all data read, write and deblocking
// and (1) CTemp1_W:uw r[ECM_AddrReg, BitFlags]:ub FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw
cmp.z.f0.0 (1) null:w CTemp1_W:uw ABOVE_FIELD_CUR_FRAME:w
and (1) DualFieldMode:w f0.0:w 0x0001:w
#include "load_Cur_UV_8x8T_Mbaff.asm" // Load transposed data 8x8
#include "load_Left_UV_2x8T_Mbaff.asm" // Load left MB (2x8) UV data from memory if exists
#include "Transpose_Cur_UV_8x8.asm"
#include "Transpose_Left_UV_2x8.asm"
//---------- Perform vertical ILDB filting on UV ----------
#include "AVC_ILDB_Filter_Mbaff_UV_v.asm"
//---------------------------------------------------------
#include "save_Left_UV_8x2T_Mbaff.asm" // Write left MB (2x8) Y data to memory if exists
#include "load_Top_UV_8x2_Mbaff.asm" // Load top MB (8x2) Y data from memory if exists
#include "Transpose_Cur_UV_8x8.asm" // Transpose a MB for horizontal edge de-blocking
//---------- Perform horizontal ILDB filting on UV ----------
#include "AVC_ILDB_Filter_Mbaff_UV_h.asm"
//-----------------------------------------------------------
#include "save_Cur_UV_8x8_Mbaff.asm" // Write 8x8
#include "save_Top_UV_8x2_Mbaff.asm" // Write top MB (8x2) if not the top row
//-----------------------------------------------------------
ILDB_LABEL(SKIP_ILDB_UV):
and.z.f0.0 (1) null:w BitFields:w BotFieldFlag:w
//=========== Process Bottom MB ============
or (1) BitFields:w BitFields:w BotFieldFlag:w // Set BotFieldFlag to 1
(f0.0) jmpi ILDB_LABEL(RE_ENTRY_UV) // Loop back for bottom deblocking
// Fall through to finish
//=========== Check write commit of the last write ============
mov (8) WritebackResponse(0)<1> WritebackResponse(0)
ILDB_LABEL(POST_ILDB_UV):
// Send notification thru Gateway to root thread, update chroma Status[CurRow]
#include "AVC_ILDB_ForwardMsg.asm"
#if !defined(GW_DCN) // For non-ILK chipsets
//child send EOT : Request type = 1
END_CHILD_THREAD
#endif // !defined(DEV_ILK)
// The thread finishs here
//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
// Include other subrutines being called
#include "AVC_ILDB_Chroma_Core_Mbaff.asm"
#if !defined(COMBINED_KERNEL) // For standalone kernel only
.end_code
.end_kernel
#endif
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB Y comp)
//
// First, de-block vertical edges from left to right.
// Second, de-block horizontal edge from top to bottom.
//
// ***** MBAFF Mode *****
// This version deblocks top MB first, followed by bottom MB.
//
// Need variable CurMB to indicate top MB or bottom MB (CurMB = 0 or 1).
// We can use BotFieldFlag in BitFields to represent it.
//
// Usage:
// 1) Access control data for top
// CntrlDataOffsetY + CurMB * Control data block size (64 DWs for CL, 16 DWs for BLC)
//
// 2) Load frame/field video data based on flags: FieldModeCurrentMbFlag, FieldModeLeftMbFlag, FieldModeaboveMbFlag,
//
// E.g.
// if (pCntlData->BitField & FieldModeCurrentMbFlag)
// cur_y = ORIX_CUR.y + CurMB * 1; // Add field vertical offset for bot field MB .
// else
// cur_y = ORIX_CUR.y + CurMB * MB_Rows_Y; // Add bottom MB vertical offset for bot MB
//
//
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define AVC_ILDB
.kernel AVC_ILDB_CHILD_MBAFF_Y
#if defined(COMBINED_KERNEL)
ILDB_LABEL(AVC_ILDB_CHILD_Y):
#endif
#include "setupVPKernel.asm"
#include "AVC_ILDB.inc"
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xE998:w
#endif
// Setup temp buf used by load and save code
#define BUF_B RTempB
#define BUF_D RTempD
// Init local variables
// These coordinates are in progressive fashion
mul (4) ORIX_CUR<2>:w ORIX<0;1,0>:w 16:w { NoDDClr } // Expand X addr to bytes, repeat 4 times
mul (4) ORIY_CUR<2>:w ORIY<0;1,0>:w 32:w { NoDDChk } // Expand Y addr to bytes, repeat 4 times
mov (2) f0.0<1>:w 0:w
mov (1) GateWayOffsetC:uw ORIY:uw // Use row # as Gateway offset
//=== Null Kernel ===============================================================
// jmpi POST_ILDB
//===============================================================================
//====================================================================================
// Assuming the MB control data is laid out in scan line order in a rectangle with width = 16 bytes.
// Control data has dimension of X x Y = 16 x N bytes, where N = W x H / 16
// Each MB has 256 bytes of control data
// For CRESTLINE, 256 bytes are stored in memory and fetched into GRF.
// MB_offset = MBsCntX * CurRow + CurCol
// Byte_offset = MB_offset * (256 << Mbaff_flag), Mbaff_flag = 0 or 1.
// Base address of a control data block = (x, y) = (0, y'=y/x), region width is 16 bytes
// where y' = Byte_offset / 16 = MB_offset * (16 << Mbaff_flag)
// MBCntrlDataOffsetY holds y'.
// For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes. Need to use a special read command on BL-C.
// MB_offset = MBsCntX * CurRow + CurCol
// Byte_offset = MB_offset * (64 << Mbaff_flag), Mbaff_flag = 0 or 1.
// MBCntrlDataOffsetY holds globel byte offset.
#if !defined(DEV_CL)
mul (1) CntrlDataOffsetY:ud MBsCntX:w ORIY:w
add (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud ORIX:w
mul (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud 128:uw
#endif
//====================================================================================
add (1) ORIX_LEFT:w ORIX_LEFT:w -4:w
add (1) ORIY_TOP:w ORIY_TOP:w -4:w
//=========== Process Top MB ============
and (1) BitFields:w BitFields:w TopFieldFlag:w // Reset BotFieldFlag
RE_ENTRY: // for bootom field
// Load current MB control data
#if defined(DEV_CL)
#include "load_ILDB_Cntrl_Data_64DW.asm" // Crestline
#else
#include "load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond
#endif
// Init addr register for vertical control data
mov (1) ECM_AddrReg<1>:w CNTRL_DATA_BASE:w // Init edge control map AddrReg
// Check loaded control data
and.z.f0.1 (16) null<1>:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw 0xFFFF:uw // Skip ILDB?
and.nz.f0.0 (1) null:w r[ECM_AddrReg, ExtBitFlags]:ub DISABLE_ILDB_FLAG:w // Skip ILDB?
// Use free cycles here
// Set DualFieldMode for all data read, write and deblocking
and (1) CTemp1_W:uw r[ECM_AddrReg, BitFlags]:ub FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw
// Get Vert Edge Pattern (frame vs. field MBs)
and (1) VertEdgePattern:uw r[ECM_AddrReg, BitFlags]:ub FieldModeLeftMbFlag+FieldModeCurrentMbFlag:uw
(f0.1.all16h) jmpi SKIP_ILDB // Skip ILDB
(f0.0) jmpi SKIP_ILDB // Skip ILDB
// Set DualFieldMode for all data read, write and deblocking
// and (1) CTemp1_W:uw r[ECM_AddrReg, BitFlags]:ub FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw
cmp.z.f0.0 (1) null:w CTemp1_W:uw ABOVE_FIELD_CUR_FRAME:w
and (1) DualFieldMode:w f0.0:w 0x0001:w
// Load current MB // DDD1
#include "load_Cur_Y_16x16T_Mbaff.asm" // Load cur Y, 16x16, transpose
#include "load_Left_Y_4x16T_Mbaff.asm" // Load left MB (4x16) Y data from memory if exists
#include "Transpose_Cur_Y_16x16.asm"
#include "Transpose_Left_Y_4x16.asm"
//---------- Perform vertical ILDB filting on Y----------
#include "AVC_ILDB_Filter_Mbaff_Y_v.asm"
//-------------------------------------------------------
#include "save_Left_Y_16x4T_Mbaff.asm" // Write left MB (4x16) Y data to memory if exists
#include "load_Top_Y_16x4_Mbaff.asm" // Load top MB (16x4) Y data from memory if exists
#include "Transpose_Cur_Y_16x16.asm" // Transpose a MB for horizontal edge de-blocking
//---------- Perform horizontal ILDB filting on Y ----------
#include "AVC_ILDB_Filter_Mbaff_Y_h.asm"
//----------------------------------------------------------
#include "save_Cur_Y_16x16_Mbaff.asm" // Write cur MB (16x16)
#include "save_Top_Y_16x4_Mbaff.asm" // Write top MB (16x4) if not the top row
SKIP_ILDB:
//----------------------------------------------------------
and.z.f0.0 (1) null:w BitFields:w BotFieldFlag:w
//=========== Process Bottom MB ============
or (1) BitFields:w BitFields:w BotFieldFlag:w // Set BotFieldFlag to 1
(f0.0) jmpi RE_ENTRY // Loop back for bottom deblocking
// Fall through to finish
//=========== Check write commit of the last write ============
mov (8) WritebackResponse(0)<1> WritebackResponse(0)
POST_ILDB:
//---------------------------------------------------------------------------
// Send notification thru Gateway to root thread, update luma Status[CurRow]
#include "AVC_ILDB_ForwardMsg.asm"
#if !defined(GW_DCN) // For non-ILK chipsets
//child send EOT : Request type = 1
END_CHILD_THREAD
#endif // !defined(DEV_ILK)
// The thread finishs here
//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
// Include other subrutines being called
#include "AVC_ILDB_Luma_Core_Mbaff.asm"
#if !defined(COMBINED_KERNEL) // For standalone kernel only
.end_code
.end_kernel
#endif
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
//----- Close a Message Gateway -----
#if defined(_DEBUG)
mov (1) EntrySignature:b 0x4444:w
#endif
// Message descriptor
// bit 31 EOD
// 27:24 FFID = 0x0011 for msg gateway
// 23:20 msg length = 1 MRF
// 19:16 Response length = 0
// 1:0 SubFuncID = 01 for CloseGateway
// Message descriptor: 0 000 0011 0001 0000 + 0 0 000000000000 01 ==> 0000 0011 0001 0000 0000 0000 0000 0001
send (8) null:ud m7 r0.0:ud MSG_GW CGWMSGDSC
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
#include "AVC_ILDB_Root_UV.asm"
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
#include "AVC_ILDB_Root_Y.asm"
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment