Commit 32466e66 authored by Felix Abecassis's avatar Felix Abecassis Committed by Tristan Matthews

sharpen: help compiler auto-vectorization

Refs #9458
Signed-off-by: default avatarTristan Matthews <tmatth@videolan.org>
parent e1be96f2
......@@ -171,13 +171,16 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
{
picture_t *p_outpic;
int i, j;
uint8_t *p_src = NULL;
uint8_t *p_out = NULL;
uint8_t *restrict p_src = NULL;
uint8_t *restrict p_out = NULL;
int i_src_pitch;
int i_out_pitch;
int pix;
const int v1 = -1;
const int v2 = 3; /* 2^3 = 8 */
const unsigned i_visible_lines = p_pic->p[Y_PLANE].i_visible_lines;
const unsigned i_visible_pitch = p_pic->p[Y_PLANE].i_visible_pitch;
const int sigma = var_GetFloat( p_filter, FILTER_PREFIX "sigma" ) * (1 << 20);
if( !p_pic ) return NULL;
......@@ -196,22 +199,15 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
/* perform convolution only on Y plane. Avoid border line. */
vlc_mutex_lock( &p_filter->p_sys->lock );
for( i = 0; i < p_pic->p[Y_PLANE].i_visible_lines; i++ )
{
if( (i == 0) || (i == p_pic->p[Y_PLANE].i_visible_lines - 1) )
{
for( j = 0; j < p_pic->p[Y_PLANE].i_visible_pitch; j++ )
p_out[i * i_out_pitch + j] = clip( p_src[i * i_src_pitch + j] );
continue ;
}
for( j = 0; j < p_pic->p[Y_PLANE].i_visible_pitch; j++ )
{
if( (j == 0) || (j == p_pic->p[Y_PLANE].i_visible_pitch - 1) )
memcpy(p_out, p_src, i_visible_pitch);
for( i = 1; i < i_visible_lines - 1; i++ )
{
p_out[i * i_out_pitch + j] = p_src[i * i_src_pitch + j];
continue ;
}
p_out[i * i_out_pitch] = p_src[i * i_src_pitch];
for( j = 1; j < i_visible_pitch - 1; j++ )
{
pix = (p_src[(i - 1) * i_src_pitch + j - 1] * v1) +
(p_src[(i - 1) * i_src_pitch + j ] * v1) +
(p_src[(i - 1) * i_src_pitch + j + 1] * v1) +
......@@ -223,10 +219,16 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
(p_src[(i + 1) * i_src_pitch + j + 1] * v1);
pix = pix >= 0 ? clip(pix) : -clip(pix * -1);
p_out[i * i_out_pitch + j] = clip( p_src[i * i_src_pitch + j] +
p_filter->p_sys->tab_precalc[pix + 256] );
p_out[i * i_out_pitch + j] = clip( p_src[i * i_src_pitch + j]
+ ((pix * sigma) >> 20));
}
p_out[i * i_out_pitch + i_visible_pitch - 1] =
p_src[i * i_src_pitch + i_visible_pitch - 1];
}
memcpy(&p_out[(i_visible_lines - 1) * i_out_pitch],
&p_src[(i_visible_lines - 1) * i_src_pitch], i_visible_pitch);
vlc_mutex_unlock( &p_filter->p_sys->lock );
plane_CopyPixels( &p_outpic->p[U_PLANE], &p_pic->p[U_PLANE] );
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment