deinterlace.c:

 * MMX and SSE2 versions of the Merge() function.
parent 181e9215
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* deinterlace.c : deinterlacer plugin for vlc * deinterlace.c : deinterlacer plugin for vlc
***************************************************************************** *****************************************************************************
* Copyright (C) 2000, 2001, 2002, 2003 VideoLAN * Copyright (C) 2000, 2001, 2002, 2003 VideoLAN
* $Id: deinterlace.c,v 1.20 2004/01/25 20:05:28 hartman Exp $ * $Id: deinterlace.c,v 1.21 2004/02/01 16:53:11 sigmunau Exp $
* *
* Author: Sam Hocevar <sam@zoy.org> * Author: Sam Hocevar <sam@zoy.org>
* *
...@@ -57,6 +57,9 @@ static void RenderLinear ( vout_thread_t *, picture_t *, picture_t *, int ); ...@@ -57,6 +57,9 @@ static void RenderLinear ( vout_thread_t *, picture_t *, picture_t *, int );
static void MergeGeneric ( void *, const void *, const void *, size_t ); static void MergeGeneric ( void *, const void *, const void *, size_t );
static void MergeAltivec ( void *, const void *, const void *, size_t ); static void MergeAltivec ( void *, const void *, const void *, size_t );
static void MergeMMX ( void *, const void *, const void *, size_t );
static void MergeSSE2 ( void *, const void *, const void *, size_t );
static void EndMMX ( void );
static int SendEvents ( vlc_object_t *, char const *, static int SendEvents ( vlc_object_t *, char const *,
vlc_value_t, vlc_value_t, void * ); vlc_value_t, vlc_value_t, void * );
...@@ -111,6 +114,7 @@ struct vout_sys_t ...@@ -111,6 +114,7 @@ struct vout_sys_t
vlc_mutex_t filter_lock; vlc_mutex_t filter_lock;
void (*pf_merge) ( void *, const void *, const void *, size_t ); void (*pf_merge) ( void *, const void *, const void *, size_t );
void (*pf_end_merge) ( void );
}; };
/***************************************************************************** /*****************************************************************************
...@@ -145,10 +149,22 @@ static int Create( vlc_object_t *p_this ) ...@@ -145,10 +149,22 @@ static int Create( vlc_object_t *p_this )
if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_ALTIVEC ) if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_ALTIVEC )
{ {
p_vout->p_sys->pf_merge = MergeAltivec; p_vout->p_sys->pf_merge = MergeAltivec;
p_vout->p_sys->pf_end_merge = NULL;
}
if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_MMX )
{
p_vout->p_sys->pf_merge = MergeMMX;
p_vout->p_sys->pf_end_merge = EndMMX;
}
if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_SSE2 )
{
p_vout->p_sys->pf_merge = MergeSSE2;
p_vout->p_sys->pf_end_merge = EndMMX;
} }
else else
{ {
p_vout->p_sys->pf_merge = MergeGeneric; p_vout->p_sys->pf_merge = MergeGeneric;
p_vout->p_sys->pf_end_merge = NULL;
} }
/* Look what method was requested */ /* Look what method was requested */
...@@ -631,6 +647,7 @@ static void RenderBob( vout_thread_t *p_vout, ...@@ -631,6 +647,7 @@ static void RenderBob( vout_thread_t *p_vout,
} }
#define Merge p_vout->p_sys->pf_merge #define Merge p_vout->p_sys->pf_merge
#define EndMerge if(p_vout->p_sys->pf_end_merge) p_vout->p_sys->pf_end_merge
/***************************************************************************** /*****************************************************************************
* RenderLinear: BOB with linear interpolation * RenderLinear: BOB with linear interpolation
...@@ -687,6 +704,7 @@ static void RenderLinear( vout_thread_t *p_vout, ...@@ -687,6 +704,7 @@ static void RenderLinear( vout_thread_t *p_vout,
p_pic->p[i_plane].i_pitch ); p_pic->p[i_plane].i_pitch );
} }
} }
EndMerge();
} }
static void RenderMean( vout_thread_t *p_vout, static void RenderMean( vout_thread_t *p_vout,
...@@ -715,6 +733,7 @@ static void RenderMean( vout_thread_t *p_vout, ...@@ -715,6 +733,7 @@ static void RenderMean( vout_thread_t *p_vout,
p_in += 2 * p_pic->p[i_plane].i_pitch; p_in += 2 * p_pic->p[i_plane].i_pitch;
} }
} }
EndMerge();
} }
static void RenderBlend( vout_thread_t *p_vout, static void RenderBlend( vout_thread_t *p_vout,
...@@ -787,6 +806,7 @@ static void RenderBlend( vout_thread_t *p_vout, ...@@ -787,6 +806,7 @@ static void RenderBlend( vout_thread_t *p_vout,
break; break;
} }
} }
EndMerge();
} }
#undef Merge #undef Merge
...@@ -819,6 +839,65 @@ static void MergeGeneric( void *_p_dest, const void *_p_s1, ...@@ -819,6 +839,65 @@ static void MergeGeneric( void *_p_dest, const void *_p_s1,
} }
} }
static void MergeMMX( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes )
{
uint8_t* p_dest = (uint8_t*)_p_dest;
const uint8_t *p_s1 = (const uint8_t *)_p_s1;
const uint8_t *p_s2 = (const uint8_t *)_p_s2;
uint8_t* p_end = p_dest + i_bytes - 8;
while( p_dest < p_end )
{
__asm__ __volatile__( "movq %2,%%mm1;"
"pavgb %1, %%mm1;"
"movq %%mm1, %0" :"=m" (*p_dest):
"m" (*p_s1),
"m" (*p_s2) );
p_dest += 8;
p_s1 += 8;
p_s2 += 8;
}
p_end += 8;
while( p_dest < p_end )
{
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
}
}
static void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes )
{
uint8_t* p_dest = (uint8_t*)_p_dest;
const uint8_t *p_s1 = (const uint8_t *)_p_s1;
const uint8_t *p_s2 = (const uint8_t *)_p_s2;
uint8_t* p_end = p_dest + i_bytes - 16;
while( p_dest < p_end )
{
__asm__ __volatile__( "movdqu %2,%%xmm1;"
"pavgb %1, %%xmm1;"
"movdqu %%xmm1, %0" :"=m" (*p_dest):
"m" (*p_s1),
"m" (*p_s2) );
p_dest += 16;
p_s1 += 16;
p_s2 += 16;
}
p_end += 16;
while( p_dest < p_end )
{
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
}
}
static void EndMMX( void )
{
__asm__ __volatile__( "emms" :: );
}
static void MergeAltivec( void *_p_dest, const void *_p_s1, static void MergeAltivec( void *_p_dest, const void *_p_s1,
const void *_p_s2, size_t i_bytes ) const void *_p_s2, size_t i_bytes )
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment