Commit 83f2312b authored by Laurent Aimar's avatar Laurent Aimar

Added support for SSE2 to 16 bit merge (deinterlace).

parent 8962e714
......@@ -636,9 +636,9 @@ int Open( vlc_object_t *p_this )
else
#endif
#if defined(CAN_COMPILE_SSE)
if( chroma->pixel_size == 1 && (vlc_CPU() & CPU_CAPABILITY_SSE2) )
if( (vlc_CPU() & CPU_CAPABILITY_SSE2) )
{
p_sys->pf_merge = MergeSSE2;
p_sys->pf_merge = chroma->pixel_size == 1 ? Merge8BitSSE2 : Merge16BitSSE2;
p_sys->pf_end_merge = EndMMX;
}
else
......
......@@ -118,8 +118,8 @@ void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
#endif
#if defined(CAN_COMPILE_SSE)
void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes )
void Merge8BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes )
{
uint8_t *p_dest = _p_dest;
const uint8_t *p_s1 = _p_s1;
......@@ -143,6 +143,34 @@ void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
for( ; i_bytes > 0; i_bytes-- )
*p_dest++ = ( *p_s1++ + *p_s2++ ) >> 1;
}
void Merge16BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes )
{
uint16_t *p_dest = _p_dest;
const uint16_t *p_s1 = _p_s1;
const uint16_t *p_s2 = _p_s2;
size_t i_words = i_bytes / 2;
for( ; i_words > 0 && ((uintptr_t)p_s1 & 15); i_words-- )
*p_dest++ = ( *p_s1++ + *p_s2++ ) >> 1;
for( ; i_words >= 8; i_words -= 8 )
{
__asm__ __volatile__( "movdqu %2,%%xmm1;"
"pavgw %1, %%xmm1;"
"movdqu %%xmm1, %0" :"=m" (*p_dest):
"m" (*p_s1),
"m" (*p_s2) );
p_dest += 8;
p_s1 += 8;
p_s2 += 8;
}
for( ; i_words > 0; i_words-- )
*p_dest++ = ( *p_s1++ + *p_s2++ ) >> 1;
}
#endif
#ifdef CAN_COMPILE_C_ALTIVEC
......
......@@ -141,7 +141,16 @@ void Merge3DNow ( void *, const void *, const void *, size_t );
* @param _p_s2 Source line B
* @param i_bytes Number of bytes to merge
*/
void MergeSSE2 ( void *, const void *, const void *, size_t );
void Merge8BitSSE2( void *, const void *, const void *, size_t );
/**
* SSE2 routine to blend pixels from two picture lines.
*
* @param _p_dest Target
* @param _p_s1 Source line A
* @param _p_s2 Source line B
* @param i_bytes Number of bytes to merge
*/
void Merge16BitSSE2( void *, const void *, const void *, size_t );
#endif
#if defined __ARM_NEON__
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment