Commit 8fbb7daa authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

deinterlace: clobber MM and XMM registers correctly

parent 86ca8255
...@@ -78,6 +78,7 @@ static inline int XDeint8x8DetectC( uint8_t *src, int i_src ) ...@@ -78,6 +78,7 @@ static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
return fc < 1 ? false : true; return fc < 1 ? false : true;
} }
#ifdef CAN_COMPILE_MMXEXT #ifdef CAN_COMPILE_MMXEXT
VLC_MMX
static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src ) static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
{ {
...@@ -164,6 +165,7 @@ static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst, ...@@ -164,6 +165,7 @@ static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
} }
#ifdef CAN_COMPILE_MMXEXT #ifdef CAN_COMPILE_MMXEXT
VLC_MMX
static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst, static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
uint8_t *src1, int i_src1, uint8_t *src1, int i_src1,
uint8_t *src2, int i_src2 ) uint8_t *src2, int i_src2 )
...@@ -237,6 +239,7 @@ static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst, ...@@ -237,6 +239,7 @@ static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
} }
#ifdef CAN_COMPILE_MMXEXT #ifdef CAN_COMPILE_MMXEXT
VLC_MMX
static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst, static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
uint8_t *src, int i_src ) uint8_t *src, int i_src )
{ {
...@@ -308,6 +311,7 @@ static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst, ...@@ -308,6 +311,7 @@ static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
} }
#ifdef CAN_COMPILE_MMXEXT #ifdef CAN_COMPILE_MMXEXT
VLC_MMX
static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst, static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
uint8_t *src, int i_src ) uint8_t *src, int i_src )
{ {
...@@ -495,6 +499,7 @@ static inline void XDeintBand8x8C( uint8_t *dst, int i_dst, ...@@ -495,6 +499,7 @@ static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
} }
#ifdef CAN_COMPILE_MMXEXT #ifdef CAN_COMPILE_MMXEXT
VLC_MMX
static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst, static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
uint8_t *src, int i_src, uint8_t *src, int i_src,
const int i_mbx, int i_modx ) const int i_mbx, int i_modx )
......
...@@ -27,11 +27,11 @@ ...@@ -27,11 +27,11 @@
# include "config.h" # include "config.h"
#endif #endif
#include <vlc_common.h>
#include <stdlib.h> #include <stdlib.h>
#include <stdint.h> #include <stdint.h>
#include <vlc_common.h>
#include <vlc_cpu.h>
#include "merge.h" #include "merge.h"
#ifdef CAN_COMPILE_MMXEXT #ifdef CAN_COMPILE_MMXEXT
...@@ -69,6 +69,7 @@ void Merge16BitGeneric( void *_p_dest, const void *_p_s1, ...@@ -69,6 +69,7 @@ void Merge16BitGeneric( void *_p_dest, const void *_p_s1,
} }
#if defined(CAN_COMPILE_MMXEXT) #if defined(CAN_COMPILE_MMXEXT)
VLC_MMX
void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2, void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes ) size_t i_bytes )
{ {
...@@ -82,7 +83,7 @@ void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2, ...@@ -82,7 +83,7 @@ void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
"pavgb %1, %%mm1;" "pavgb %1, %%mm1;"
"movq %%mm1, %0" :"=m" (*p_dest): "movq %%mm1, %0" :"=m" (*p_dest):
"m" (*p_s1), "m" (*p_s1),
"m" (*p_s2) ); "m" (*p_s2) : "mm1" );
p_dest += 8; p_dest += 8;
p_s1 += 8; p_s1 += 8;
p_s2 += 8; p_s2 += 8;
...@@ -94,6 +95,7 @@ void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2, ...@@ -94,6 +95,7 @@ void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
#endif #endif
#if defined(CAN_COMPILE_3DNOW) #if defined(CAN_COMPILE_3DNOW)
VLC_MMX
void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2, void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes ) size_t i_bytes )
{ {
...@@ -107,7 +109,7 @@ void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2, ...@@ -107,7 +109,7 @@ void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
"pavgusb %1, %%mm1;" "pavgusb %1, %%mm1;"
"movq %%mm1, %0" :"=m" (*p_dest): "movq %%mm1, %0" :"=m" (*p_dest):
"m" (*p_s1), "m" (*p_s1),
"m" (*p_s2) ); "m" (*p_s2) : "mm1" );
p_dest += 8; p_dest += 8;
p_s1 += 8; p_s1 += 8;
p_s2 += 8; p_s2 += 8;
...@@ -119,6 +121,7 @@ void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2, ...@@ -119,6 +121,7 @@ void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
#endif #endif
#if defined(CAN_COMPILE_SSE) #if defined(CAN_COMPILE_SSE)
VLC_SSE
void Merge8BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2, void Merge8BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes ) size_t i_bytes )
{ {
...@@ -135,7 +138,7 @@ void Merge8BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2, ...@@ -135,7 +138,7 @@ void Merge8BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
"pavgb %1, %%xmm1;" "pavgb %1, %%xmm1;"
"movdqu %%xmm1, %0" :"=m" (*p_dest): "movdqu %%xmm1, %0" :"=m" (*p_dest):
"m" (*p_s1), "m" (*p_s1),
"m" (*p_s2) ); "m" (*p_s2) : "xmm1" );
p_dest += 16; p_dest += 16;
p_s1 += 16; p_s1 += 16;
p_s2 += 16; p_s2 += 16;
...@@ -145,6 +148,7 @@ void Merge8BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2, ...@@ -145,6 +148,7 @@ void Merge8BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
*p_dest++ = ( *p_s1++ + *p_s2++ ) >> 1; *p_dest++ = ( *p_s1++ + *p_s2++ ) >> 1;
} }
VLC_SSE
void Merge16BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2, void Merge16BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes ) size_t i_bytes )
{ {
...@@ -162,7 +166,7 @@ void Merge16BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2, ...@@ -162,7 +166,7 @@ void Merge16BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
"pavgw %1, %%xmm1;" "pavgw %1, %%xmm1;"
"movdqu %%xmm1, %0" :"=m" (*p_dest): "movdqu %%xmm1, %0" :"=m" (*p_dest):
"m" (*p_s1), "m" (*p_s1),
"m" (*p_s2) ); "m" (*p_s2) : "xmm1" );
p_dest += 8; p_dest += 8;
p_s1 += 8; p_s1 += 8;
p_s2 += 8; p_s2 += 8;
......
...@@ -43,20 +43,23 @@ typedef union { ...@@ -43,20 +43,23 @@ typedef union {
#define mmx_i2r(op,imm,reg) \ #define mmx_i2r(op,imm,reg) \
__asm__ __volatile__ (#op " %0, %%" #reg \ __asm__ __volatile__ (#op " %0, %%" #reg \
: /* nothing */ \ : /* nothing */ \
: "i" (imm) ) : "i" (imm) \
: #reg)
#define mmx_m2r(op,mem,reg) \ #define mmx_m2r(op,mem,reg) \
__asm__ __volatile__ (#op " %0, %%" #reg \ __asm__ __volatile__ (#op " %0, %%" #reg \
: /* nothing */ \ : /* nothing */ \
: "m" (mem)) : "m" (mem) \
: #reg)
#define mmx_r2m(op,reg,mem) \ #define mmx_r2m(op,reg,mem) \
__asm__ __volatile__ (#op " %%" #reg ", %0" \ __asm__ __volatile__ (#op " %%" #reg ", %0" \
: "=m" (mem) \ : "=m" (mem) \
: /* nothing */ ) : /* nothing */ \
: "memory")
#define mmx_r2r(op,regs,regd) \ #define mmx_r2r(op,regs,regd) \
__asm__ __volatile__ (#op " %" #regs ", %" #regd) __asm__ __volatile__ (#op " %%" #regs ", %%" #regd ::: #regd)
#define emms() __asm__ __volatile__ ("emms") #define emms() __asm__ __volatile__ ("emms")
...@@ -200,11 +203,13 @@ typedef union { ...@@ -200,11 +203,13 @@ typedef union {
#define mmx_m2ri(op,mem,reg,imm) \ #define mmx_m2ri(op,mem,reg,imm) \
__asm__ __volatile__ (#op " %1, %0, %%" #reg \ __asm__ __volatile__ (#op " %1, %0, %%" #reg \
: /* nothing */ \ : /* nothing */ \
: "X" (mem), "X" (imm)) : "X" (mem), "X" (imm) \
: #reg)
#define mmx_r2ri(op,regs,regd,imm) \ #define mmx_r2ri(op,regs,regd,imm) \
__asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
: /* nothing */ \ : /* nothing */ \
: "X" (imm) ) : "X" (imm) \
: #regd)
#define mmx_fetch(mem,hint) \ #define mmx_fetch(mem,hint) \
__asm__ __volatile__ ("prefetch" #hint " %0" \ __asm__ __volatile__ ("prefetch" #hint " %0" \
...@@ -238,7 +243,7 @@ typedef union { ...@@ -238,7 +243,7 @@ typedef union {
#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd) #define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd)
#define pmovmskb(mmreg,reg) \ #define pmovmskb(mmreg,reg) \
__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg) __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg : : : #reg)
#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg) #define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg)
#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd) #define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment