Commit 3a2bed26 authored by benoit's avatar benoit

Unroll codepath

Patch by strites: strites gmail com
Original thread: [FFmpeg-devel] [PATCH] Patch cleanup for MPEG 1 & 2 optimizations
Date: 04/06/2008 11:32 PM


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@12917 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 722ab4a3
...@@ -1737,7 +1737,7 @@ void ff_clean_intra_table_entries(MpegEncContext *s) ...@@ -1737,7 +1737,7 @@ void ff_clean_intra_table_entries(MpegEncContext *s)
*/ */
static av_always_inline static av_always_inline
void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
int lowres_flag) int lowres_flag, int is_mpeg12)
{ {
int mb_x, mb_y; int mb_x, mb_y;
const int mb_xy = s->mb_y * s->mb_stride + s->mb_x; const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
...@@ -1764,7 +1764,7 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], ...@@ -1764,7 +1764,7 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
/* update DC predictors for P macroblocks */ /* update DC predictors for P macroblocks */
if (!s->mb_intra) { if (!s->mb_intra) {
if (s->h263_pred || s->h263_aic) { if (!is_mpeg12 && (s->h263_pred || s->h263_aic)) {
if(s->mbintra_table[mb_xy]) if(s->mbintra_table[mb_xy])
ff_clean_intra_table_entries(s); ff_clean_intra_table_entries(s);
} else { } else {
...@@ -1773,7 +1773,7 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], ...@@ -1773,7 +1773,7 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
s->last_dc[2] = 128 << s->intra_dc_precision; s->last_dc[2] = 128 << s->intra_dc_precision;
} }
} }
else if (s->h263_pred || s->h263_aic) else if (!is_mpeg12 && (s->h263_pred || s->h263_aic))
s->mbintra_table[mb_xy]=1; s->mbintra_table[mb_xy]=1;
if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==FF_B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==FF_B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
...@@ -1888,7 +1888,7 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], ...@@ -1888,7 +1888,7 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale); add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
} }
} }
} else if(s->codec_id != CODEC_ID_WMV2){ } else if(is_mpeg12 || (s->codec_id != CODEC_ID_WMV2)){
add_dct(s, block[0], 0, dest_y , dct_linesize); add_dct(s, block[0], 0, dest_y , dct_linesize);
add_dct(s, block[1], 1, dest_y + block_size, dct_linesize); add_dct(s, block[1], 1, dest_y + block_size, dct_linesize);
add_dct(s, block[2], 2, dest_y + dct_offset , dct_linesize); add_dct(s, block[2], 2, dest_y + dct_offset , dct_linesize);
...@@ -1979,8 +1979,14 @@ skip_idct: ...@@ -1979,8 +1979,14 @@ skip_idct:
} }
void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1); #ifndef CONFIG_SMALL
else MPV_decode_mb_internal(s, block, 0); if(s->out_format == FMT_MPEG1) {
if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 1);
else MPV_decode_mb_internal(s, block, 0, 1);
} else
#endif
if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 0);
else MPV_decode_mb_internal(s, block, 0, 0);
} }
/** /**
......
...@@ -237,13 +237,12 @@ static inline int hpel_motion(MpegEncContext *s, ...@@ -237,13 +237,12 @@ static inline int hpel_motion(MpegEncContext *s,
return emu; return emu;
} }
/* apply one mpeg motion vector to the three components */
static av_always_inline static av_always_inline
void mpeg_motion(MpegEncContext *s, void mpeg_motion_internal(MpegEncContext *s,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int field_based, int bottom_field, int field_select, int field_based, int bottom_field, int field_select,
uint8_t **ref_picture, op_pixels_func (*pix_op)[4], uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
int motion_x, int motion_y, int h) int motion_x, int motion_y, int h, int is_mpeg12)
{ {
uint8_t *ptr_y, *ptr_cb, *ptr_cr; uint8_t *ptr_y, *ptr_cb, *ptr_cr;
int dxy, uvdxy, mx, my, src_x, src_y, int dxy, uvdxy, mx, my, src_x, src_y,
...@@ -265,7 +264,7 @@ if(s->quarter_sample) ...@@ -265,7 +264,7 @@ if(s->quarter_sample)
src_x = s->mb_x* 16 + (motion_x >> 1); src_x = s->mb_x* 16 + (motion_x >> 1);
src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1); src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
if (s->out_format == FMT_H263) { if (!is_mpeg12 && s->out_format == FMT_H263) {
if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){ if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
mx = (motion_x>>1)|(motion_x&1); mx = (motion_x>>1)|(motion_x&1);
my = motion_y >>1; my = motion_y >>1;
...@@ -277,7 +276,7 @@ if(s->quarter_sample) ...@@ -277,7 +276,7 @@ if(s->quarter_sample)
uvsrc_x = src_x>>1; uvsrc_x = src_x>>1;
uvsrc_y = src_y>>1; uvsrc_y = src_y>>1;
} }
}else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261 }else if(!is_mpeg12 && s->out_format == FMT_H261){//even chroma mv's are full pel in H261
mx = motion_x / 4; mx = motion_x / 4;
my = motion_y / 4; my = motion_y / 4;
uvdxy = 0; uvdxy = 0;
...@@ -312,7 +311,7 @@ if(s->quarter_sample) ...@@ -312,7 +311,7 @@ if(s->quarter_sample)
if( (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16 if( (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
|| (unsigned)src_y > v_edge_pos - (motion_y&1) - h){ || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
if(s->codec_id == CODEC_ID_MPEG2VIDEO || if(is_mpeg12 || s->codec_id == CODEC_ID_MPEG2VIDEO ||
s->codec_id == CODEC_ID_MPEG1VIDEO){ s->codec_id == CODEC_ID_MPEG1VIDEO){
av_log(s->avctx,AV_LOG_DEBUG, av_log(s->avctx,AV_LOG_DEBUG,
"MPEG motion vector out of boundary\n"); "MPEG motion vector out of boundary\n");
...@@ -360,11 +359,30 @@ if(s->quarter_sample) ...@@ -360,11 +359,30 @@ if(s->quarter_sample)
pix_op[s->chroma_x_shift][uvdxy] pix_op[s->chroma_x_shift][uvdxy]
(dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift); (dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
} }
if((ENABLE_H261_ENCODER || ENABLE_H261_DECODER) && if(!is_mpeg12 && (ENABLE_H261_ENCODER || ENABLE_H261_DECODER) &&
s->out_format == FMT_H261){ s->out_format == FMT_H261){
ff_h261_loop_filter(s); ff_h261_loop_filter(s);
} }
} }
/* apply one mpeg motion vector to the three components */
static av_always_inline
void mpeg_motion(MpegEncContext *s,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int field_based, int bottom_field, int field_select,
uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
int motion_x, int motion_y, int h)
{
#ifndef CONFIG_SMALL
if(s->out_format == FMT_MPEG1)
mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, field_based,
bottom_field, field_select, ref_picture, pix_op,
motion_x, motion_y, h, 1);
else
#endif
mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, field_based,
bottom_field, field_select, ref_picture, pix_op,
motion_x, motion_y, h, 0);
}
//FIXME move to dsputil, avg variant, 16x16 version //FIXME move to dsputil, avg variant, 16x16 version
static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){ static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
...@@ -617,12 +635,12 @@ static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){ ...@@ -617,12 +635,12 @@ static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
* @param pic_op qpel motion compensation function (average or put normally) * @param pic_op qpel motion compensation function (average or put normally)
* the motion vectors are taken from s->mv and the MV type from s->mv_type * the motion vectors are taken from s->mv and the MV type from s->mv_type
*/ */
static inline void MPV_motion(MpegEncContext *s, static inline void MPV_motion_internal(MpegEncContext *s,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_y, uint8_t *dest_cb,
uint8_t *dest_cr, int dir, uint8_t *dest_cr, int dir,
uint8_t **ref_picture, uint8_t **ref_picture,
op_pixels_func (*pix_op)[4], op_pixels_func (*pix_op)[4],
qpel_mc_func (*qpix_op)[16]) qpel_mc_func (*qpix_op)[16], int is_mpeg12)
{ {
int dxy, mx, my, src_x, src_y, motion_x, motion_y; int dxy, mx, my, src_x, src_y, motion_x, motion_y;
int mb_x, mb_y, i; int mb_x, mb_y, i;
...@@ -633,7 +651,7 @@ static inline void MPV_motion(MpegEncContext *s, ...@@ -633,7 +651,7 @@ static inline void MPV_motion(MpegEncContext *s,
prefetch_motion(s, ref_picture, dir); prefetch_motion(s, ref_picture, dir);
if(s->obmc && s->pict_type != FF_B_TYPE){ if(!is_mpeg12 && s->obmc && s->pict_type != FF_B_TYPE){
int16_t mv_cache[4][4][2]; int16_t mv_cache[4][4][2];
const int xy= s->mb_x + s->mb_y*s->mb_stride; const int xy= s->mb_x + s->mb_y*s->mb_stride;
const int mot_stride= s->b8_stride; const int mot_stride= s->b8_stride;
...@@ -704,12 +722,12 @@ static inline void MPV_motion(MpegEncContext *s, ...@@ -704,12 +722,12 @@ static inline void MPV_motion(MpegEncContext *s,
gmc_motion(s, dest_y, dest_cb, dest_cr, gmc_motion(s, dest_y, dest_cb, dest_cr,
ref_picture); ref_picture);
} }
}else if(s->quarter_sample){ }else if(!is_mpeg12 && s->quarter_sample){
qpel_motion(s, dest_y, dest_cb, dest_cr, qpel_motion(s, dest_y, dest_cb, dest_cr,
0, 0, 0, 0, 0, 0,
ref_picture, pix_op, qpix_op, ref_picture, pix_op, qpix_op,
s->mv[dir][0][0], s->mv[dir][0][1], 16); s->mv[dir][0][0], s->mv[dir][0][1], 16);
}else if(ENABLE_WMV2 && s->mspel){ }else if(!is_mpeg12 && ENABLE_WMV2 && s->mspel){
ff_mspel_motion(s, dest_y, dest_cb, dest_cr, ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
ref_picture, pix_op, ref_picture, pix_op,
s->mv[dir][0][0], s->mv[dir][0][1], 16); s->mv[dir][0][0], s->mv[dir][0][1], 16);
...@@ -722,6 +740,7 @@ static inline void MPV_motion(MpegEncContext *s, ...@@ -722,6 +740,7 @@ static inline void MPV_motion(MpegEncContext *s,
} }
break; break;
case MV_TYPE_8X8: case MV_TYPE_8X8:
if (!is_mpeg12) {
mx = 0; mx = 0;
my = 0; my = 0;
if(s->quarter_sample){ if(s->quarter_sample){
...@@ -775,10 +794,11 @@ static inline void MPV_motion(MpegEncContext *s, ...@@ -775,10 +794,11 @@ static inline void MPV_motion(MpegEncContext *s,
if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY))
chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my); chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
}
break; break;
case MV_TYPE_FIELD: case MV_TYPE_FIELD:
if (s->picture_structure == PICT_FRAME) { if (s->picture_structure == PICT_FRAME) {
if(s->quarter_sample){ if(!is_mpeg12 && s->quarter_sample){
for(i=0; i<2; i++){ for(i=0; i<2; i++){
qpel_motion(s, dest_y, dest_cb, dest_cr, qpel_motion(s, dest_y, dest_cb, dest_cr,
1, i, s->field_select[dir][i], 1, i, s->field_select[dir][i],
...@@ -862,4 +882,20 @@ static inline void MPV_motion(MpegEncContext *s, ...@@ -862,4 +882,20 @@ static inline void MPV_motion(MpegEncContext *s,
} }
} }
static inline void MPV_motion(MpegEncContext *s,
uint8_t *dest_y, uint8_t *dest_cb,
uint8_t *dest_cr, int dir,
uint8_t **ref_picture,
op_pixels_func (*pix_op)[4],
qpel_mc_func (*qpix_op)[16])
{
#ifndef CONFIG_SMALL
if(s->out_format == FMT_MPEG1)
MPV_motion_internal(s, dest_y, dest_cb, dest_cr, dir,
ref_picture, pix_op, qpix_op, 1);
else
#endif
MPV_motion_internal(s, dest_y, dest_cb, dest_cr, dir,
ref_picture, pix_op, qpix_op, 0);
}
#endif /* FFMPEG_MPEGVIDEO_COMMON_H */ #endif /* FFMPEG_MPEGVIDEO_COMMON_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment