Commit 86ca8255 authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

deinterlace: move Phosphor MMX acceleration to a separate function

parent 4c983898
...@@ -78,11 +78,6 @@ static void DarkenField( picture_t *p_dst, ...@@ -78,11 +78,6 @@ static void DarkenField( picture_t *p_dst,
/* Bitwise ANDing with this clears the i_strength highest bits /* Bitwise ANDing with this clears the i_strength highest bits
of each byte */ of each byte */
#ifdef CAN_COMPILE_MMXEXT
const bool mmxext = vlc_CPU_MMXEXT();
uint64_t i_strength_u64 = i_strength; /* for MMX version (needs to know
number of bits) */
#endif
const uint8_t remove_high_u8 = 0xFF >> i_strength; const uint8_t remove_high_u8 = 0xFF >> i_strength;
const uint64_t remove_high_u64 = remove_high_u8 * const uint64_t remove_high_u64 = remove_high_u8 *
INT64_C(0x0101010101010101); INT64_C(0x0101010101010101);
...@@ -92,7 +87,7 @@ static void DarkenField( picture_t *p_dst, ...@@ -92,7 +87,7 @@ static void DarkenField( picture_t *p_dst,
For luma, the operation is just a shift + bitwise AND, so we vectorize For luma, the operation is just a shift + bitwise AND, so we vectorize
even in the C version. even in the C version.
There is an MMX version, too, because it performs about twice faster. There is an MMX version too, because it performs about twice faster.
*/ */
int i_plane = Y_PLANE; int i_plane = Y_PLANE;
uint8_t *p_out, *p_out_end; uint8_t *p_out, *p_out_end;
...@@ -112,26 +107,90 @@ static void DarkenField( picture_t *p_dst, ...@@ -112,26 +107,90 @@ static void DarkenField( picture_t *p_dst,
uint64_t *po = (uint64_t *)p_out; uint64_t *po = (uint64_t *)p_out;
int x = 0; int x = 0;
#ifdef CAN_COMPILE_MMXEXT for( ; x < w8; x += 8, ++po )
if( mmxext ) (*po) = ( ((*po) >> i_strength) & remove_high_u64 );
/* handle the width remainder */
uint8_t *po_temp = (uint8_t *)po;
for( ; x < w; ++x, ++po_temp )
(*po_temp) = ( ((*po_temp) >> i_strength) & remove_high_u8 );
}
/* Process chroma if the field chromas are independent.
The origin (black) is at YUV = (0, 128, 128) in the uint8 format.
The chroma processing is a bit more complicated than luma,
and needs MMX for vectorization.
*/
if( process_chroma )
{
for( i_plane++ /* luma already handled*/;
i_plane < p_dst->i_planes;
i_plane++ )
{ {
movq_m2r( i_strength_u64, mm1 ); int w = p_dst->p[i_plane].i_visible_pitch;
movq_m2r( remove_high_u64, mm2 ); p_out = p_dst->p[i_plane].p_pixels;
for( ; x < w8; x += 8 ) p_out_end = p_out + p_dst->p[i_plane].i_pitch
* p_dst->p[i_plane].i_visible_lines;
/* skip first line for bottom field */
if( i_field == 1 )
p_out += p_dst->p[i_plane].i_pitch;
for( ; p_out < p_out_end ; p_out += 2*p_dst->p[i_plane].i_pitch )
{ {
movq_m2r( (*po), mm0 ); /* Handle the width remainder */
uint8_t *po = p_out;
for( int x = 0; x < w; ++x, ++po )
(*po) = 128 + ( ((*po) - 128) / (1 << i_strength) );
} /* for p_out... */
} /* for i_plane... */
} /* if process_chroma */
}
psrlq_r2r( mm1, mm0 ); #ifdef CAN_COMPILE_MMXEXT
pand_r2r( mm2, mm0 ); VLC_MMX
static void DarkenFieldMMX( picture_t *p_dst,
const int i_field, const int i_strength,
bool process_chroma )
{
assert( p_dst != NULL );
assert( i_field == 0 || i_field == 1 );
assert( i_strength >= 1 && i_strength <= 3 );
movq_r2m( mm0, (*po++) ); uint64_t i_strength_u64 = i_strength; /* needs to know number of bits */
} const uint8_t remove_high_u8 = 0xFF >> i_strength;
} const uint64_t remove_high_u64 = remove_high_u8 *
else INT64_C(0x0101010101010101);
#endif
int i_plane = Y_PLANE;
uint8_t *p_out, *p_out_end;
int w = p_dst->p[i_plane].i_visible_pitch;
p_out = p_dst->p[i_plane].p_pixels;
p_out_end = p_out + p_dst->p[i_plane].i_pitch
* p_dst->p[i_plane].i_visible_lines;
/* skip first line for bottom field */
if( i_field == 1 )
p_out += p_dst->p[i_plane].i_pitch;
int wm8 = w % 8; /* remainder */
int w8 = w - wm8; /* part of width that is divisible by 8 */
for( ; p_out < p_out_end ; p_out += 2*p_dst->p[i_plane].i_pitch )
{
uint64_t *po = (uint64_t *)p_out;
int x = 0;
movq_m2r( i_strength_u64, mm1 );
movq_m2r( remove_high_u64, mm2 );
for( ; x < w8; x += 8 )
{ {
for( ; x < w8; x += 8, ++po ) movq_m2r( (*po), mm0 );
(*po) = ( ((*po) >> i_strength) & remove_high_u64 );
psrlq_r2r( mm1, mm0 );
pand_r2r( mm2, mm0 );
movq_r2m( mm0, (*po++) );
} }
/* handle the width remainder */ /* handle the width remainder */
...@@ -148,16 +207,14 @@ static void DarkenField( picture_t *p_dst, ...@@ -148,16 +207,14 @@ static void DarkenField( picture_t *p_dst,
*/ */
if( process_chroma ) if( process_chroma )
{ {
for( i_plane = 0 ; i_plane < p_dst->i_planes ; i_plane++ ) for( i_plane++ /* luma already handled */;
i_plane < p_dst->i_planes;
i_plane++ )
{ {
if( i_plane == Y_PLANE )
continue; /* luma already handled */
int w = p_dst->p[i_plane].i_visible_pitch; int w = p_dst->p[i_plane].i_visible_pitch;
#ifdef CAN_COMPILE_MMXEXT
int wm8 = w % 8; /* remainder */ int wm8 = w % 8; /* remainder */
int w8 = w - wm8; /* part of width that is divisible by 8 */ int w8 = w - wm8; /* part of width that is divisible by 8 */
#endif
p_out = p_dst->p[i_plane].p_pixels; p_out = p_dst->p[i_plane].p_pixels;
p_out_end = p_out + p_dst->p[i_plane].i_pitch p_out_end = p_out + p_dst->p[i_plane].i_pitch
* p_dst->p[i_plane].i_visible_lines; * p_dst->p[i_plane].i_visible_lines;
...@@ -170,42 +227,36 @@ static void DarkenField( picture_t *p_dst, ...@@ -170,42 +227,36 @@ static void DarkenField( picture_t *p_dst,
{ {
int x = 0; int x = 0;
#ifdef CAN_COMPILE_MMXEXT
/* See also easy-to-read C version below. */ /* See also easy-to-read C version below. */
if( mmxext ) static const mmx_t b128 = { .uq = 0x8080808080808080ULL };
movq_m2r( b128, mm5 );
movq_m2r( i_strength_u64, mm6 );
movq_m2r( remove_high_u64, mm7 );
uint64_t *po8 = (uint64_t *)p_out;
for( ; x < w8; x += 8 )
{ {
static const mmx_t b128 = { .uq = 0x8080808080808080ULL }; movq_m2r( (*po8), mm0 );
movq_m2r( b128, mm5 );
movq_m2r( i_strength_u64, mm6 ); movq_r2r( mm5, mm2 ); /* 128 */
movq_m2r( remove_high_u64, mm7 ); movq_r2r( mm0, mm1 ); /* copy of data */
psubusb_r2r( mm2, mm1 ); /* mm1 = max(data - 128, 0) */
uint64_t *po = (uint64_t *)p_out; psubusb_r2r( mm0, mm2 ); /* mm2 = max(128 - data, 0) */
for( ; x < w8; x += 8 )
{ /* >> i_strength */
movq_m2r( (*po), mm0 ); psrlq_r2r( mm6, mm1 );
psrlq_r2r( mm6, mm2 );
movq_r2r( mm5, mm2 ); /* 128 */ pand_r2r( mm7, mm1 );
movq_r2r( mm0, mm1 ); /* copy of data */ pand_r2r( mm7, mm2 );
psubusb_r2r( mm2, mm1 ); /* mm1 = max(data - 128, 0) */
psubusb_r2r( mm0, mm2 ); /* mm2 = max(128 - data, 0) */ /* collect results from pos./neg. parts */
psubb_r2r( mm2, mm1 );
/* >> i_strength */ paddb_r2r( mm5, mm1 );
psrlq_r2r( mm6, mm1 );
psrlq_r2r( mm6, mm2 ); movq_r2m( mm1, (*po8++) );
pand_r2r( mm7, mm1 );
pand_r2r( mm7, mm2 );
/* collect results from pos./neg. parts */
psubb_r2r( mm2, mm1 );
paddb_r2r( mm5, mm1 );
movq_r2m( mm1, (*po++) );
}
} }
#endif
/* C version - handle the width remainder /* C version - handle the width remainder */
(or everything if no MMX) */
uint8_t *po = p_out; uint8_t *po = p_out;
for( ; x < w; ++x, ++po ) for( ; x < w; ++x, ++po )
(*po) = 128 + ( ((*po) - 128) / (1 << i_strength) ); (*po) = 128 + ( ((*po) - 128) / (1 << i_strength) );
...@@ -213,11 +264,9 @@ static void DarkenField( picture_t *p_dst, ...@@ -213,11 +264,9 @@ static void DarkenField( picture_t *p_dst,
} /* for i_plane... */ } /* for i_plane... */
} /* if process_chroma */ } /* if process_chroma */
#ifdef CAN_COMPILE_MMXEXT emms();
if( mmxext )
emms();
#endif
} }
#endif
/***************************************************************************** /*****************************************************************************
* Public functions * Public functions
...@@ -303,9 +352,17 @@ int RenderPhosphor( filter_t *p_filter, ...@@ -303,9 +352,17 @@ int RenderPhosphor( filter_t *p_filter,
In most use cases the dimmer is used. In most use cases the dimmer is used.
*/ */
if( p_sys->phosphor.i_dimmer_strength > 0 ) if( p_sys->phosphor.i_dimmer_strength > 0 )
DarkenField( p_dst, !i_field, p_sys->phosphor.i_dimmer_strength, {
p_sys->chroma->p[1].h.num == p_sys->chroma->p[1].h.den && #ifdef CAN_COMPILE_MMXEXT
p_sys->chroma->p[2].h.num == p_sys->chroma->p[2].h.den ); if( vlc_CPU_MMXEXT() )
DarkenFieldMMX( p_dst, !i_field, p_sys->phosphor.i_dimmer_strength,
p_sys->chroma->p[1].h.num == p_sys->chroma->p[1].h.den &&
p_sys->chroma->p[2].h.num == p_sys->chroma->p[2].h.den );
else
#endif
DarkenField( p_dst, !i_field, p_sys->phosphor.i_dimmer_strength,
p_sys->chroma->p[1].h.num == p_sys->chroma->p[1].h.den &&
p_sys->chroma->p[2].h.num == p_sys->chroma->p[2].h.den );
}
return VLC_SUCCESS; return VLC_SUCCESS;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment