Commit 32466e66 authored by Felix Abecassis's avatar Felix Abecassis Committed by Tristan Matthews

sharpen: help compiler auto-vectorization

Refs #9458
Signed-off-by: default avatarTristan Matthews <tmatth@videolan.org>
parent e1be96f2
...@@ -171,13 +171,16 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic ) ...@@ -171,13 +171,16 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
{ {
picture_t *p_outpic; picture_t *p_outpic;
int i, j; int i, j;
uint8_t *p_src = NULL; uint8_t *restrict p_src = NULL;
uint8_t *p_out = NULL; uint8_t *restrict p_out = NULL;
int i_src_pitch; int i_src_pitch;
int i_out_pitch; int i_out_pitch;
int pix; int pix;
const int v1 = -1; const int v1 = -1;
const int v2 = 3; /* 2^3 = 8 */ const int v2 = 3; /* 2^3 = 8 */
const unsigned i_visible_lines = p_pic->p[Y_PLANE].i_visible_lines;
const unsigned i_visible_pitch = p_pic->p[Y_PLANE].i_visible_pitch;
const int sigma = var_GetFloat( p_filter, FILTER_PREFIX "sigma" ) * (1 << 20);
if( !p_pic ) return NULL; if( !p_pic ) return NULL;
...@@ -196,22 +199,15 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic ) ...@@ -196,22 +199,15 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
/* perform convolution only on Y plane. Avoid border line. */ /* perform convolution only on Y plane. Avoid border line. */
vlc_mutex_lock( &p_filter->p_sys->lock ); vlc_mutex_lock( &p_filter->p_sys->lock );
for( i = 0; i < p_pic->p[Y_PLANE].i_visible_lines; i++ )
{ memcpy(p_out, p_src, i_visible_pitch);
if( (i == 0) || (i == p_pic->p[Y_PLANE].i_visible_lines - 1) )
{ for( i = 1; i < i_visible_lines - 1; i++ )
for( j = 0; j < p_pic->p[Y_PLANE].i_visible_pitch; j++ )
p_out[i * i_out_pitch + j] = clip( p_src[i * i_src_pitch + j] );
continue ;
}
for( j = 0; j < p_pic->p[Y_PLANE].i_visible_pitch; j++ )
{
if( (j == 0) || (j == p_pic->p[Y_PLANE].i_visible_pitch - 1) )
{ {
p_out[i * i_out_pitch + j] = p_src[i * i_src_pitch + j]; p_out[i * i_out_pitch] = p_src[i * i_src_pitch];
continue ;
}
for( j = 1; j < i_visible_pitch - 1; j++ )
{
pix = (p_src[(i - 1) * i_src_pitch + j - 1] * v1) + pix = (p_src[(i - 1) * i_src_pitch + j - 1] * v1) +
(p_src[(i - 1) * i_src_pitch + j ] * v1) + (p_src[(i - 1) * i_src_pitch + j ] * v1) +
(p_src[(i - 1) * i_src_pitch + j + 1] * v1) + (p_src[(i - 1) * i_src_pitch + j + 1] * v1) +
...@@ -223,10 +219,16 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic ) ...@@ -223,10 +219,16 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
(p_src[(i + 1) * i_src_pitch + j + 1] * v1); (p_src[(i + 1) * i_src_pitch + j + 1] * v1);
pix = pix >= 0 ? clip(pix) : -clip(pix * -1); pix = pix >= 0 ? clip(pix) : -clip(pix * -1);
p_out[i * i_out_pitch + j] = clip( p_src[i * i_src_pitch + j] + p_out[i * i_out_pitch + j] = clip( p_src[i * i_src_pitch + j]
p_filter->p_sys->tab_precalc[pix + 256] ); + ((pix * sigma) >> 20));
} }
p_out[i * i_out_pitch + i_visible_pitch - 1] =
p_src[i * i_src_pitch + i_visible_pitch - 1];
} }
memcpy(&p_out[(i_visible_lines - 1) * i_out_pitch],
&p_src[(i_visible_lines - 1) * i_src_pitch], i_visible_pitch);
vlc_mutex_unlock( &p_filter->p_sys->lock ); vlc_mutex_unlock( &p_filter->p_sys->lock );
plane_CopyPixels( &p_outpic->p[U_PLANE], &p_pic->p[U_PLANE] ); plane_CopyPixels( &p_outpic->p[U_PLANE], &p_pic->p[U_PLANE] );
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment