Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc-2-2
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc-2-2
Commits
5c7c27ca
Commit
5c7c27ca
authored
Dec 21, 2011
by
Naohiro KORIYAMA
Committed by
Jean-Baptiste Kempf
Dec 21, 2011
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
yadif : Add SSSE3 and SSE2 support. porting from FFmpeg.
Signed-off-by:
Jean-Baptiste Kempf
<
jb@videolan.org
>
parent
8634f761
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
373 additions
and
266 deletions
+373
-266
modules/video_filter/deinterlace/algo_yadif.c
modules/video_filter/deinterlace/algo_yadif.c
+22
-34
modules/video_filter/deinterlace/yadif.h
modules/video_filter/deinterlace/yadif.h
+76
-232
modules/video_filter/deinterlace/yadif_template.h
modules/video_filter/deinterlace/yadif_template.h
+275
-0
No files found.
modules/video_filter/deinterlace/algo_yadif.c
View file @
5c7c27ca
/*****************************************************************************
/*****************************************************************************
* algo_yadif.c : Wrapper for
MPlayer
's Yadif algorithm
* algo_yadif.c : Wrapper for
FFmpeg
's Yadif algorithm
*****************************************************************************
*****************************************************************************
* Copyright (C) 2000-2011 the VideoLAN team
* Copyright (C) 2000-2011 the VideoLAN team
* $Id$
* $Id$
...
@@ -26,10 +26,6 @@
...
@@ -26,10 +26,6 @@
# include "config.h"
# include "config.h"
#endif
#endif
#ifdef CAN_COMPILE_MMXEXT
# include "mmx.h"
#endif
#include <stdint.h>
#include <stdint.h>
#include <assert.h>
#include <assert.h>
...
@@ -47,23 +43,7 @@
...
@@ -47,23 +43,7 @@
* Yadif (Yet Another DeInterlacing Filter).
* Yadif (Yet Another DeInterlacing Filter).
*****************************************************************************/
*****************************************************************************/
/* Yadif's private data struct */
/* yadif.h comes from yadif.c of FFmpeg project.
struct
vf_priv_s
{
/*
* 0: Output 1 frame for each frame.
* 1: Output 1 frame for each field.
* 2: Like 0 but skips spatial interlacing check.
* 3: Like 1 but skips spatial interlacing check.
*
* In vlc, only & 0x02 has meaning, as we do the & 0x01 ourself.
*/
int
mode
;
};
/* I am unsure it is the right one */
typedef
intptr_t
x86_reg
;
/* yadif.h comes from vf_yadif.c of mplayer project.
Necessary preprocessor macros are defined in common.h. */
Necessary preprocessor macros are defined in common.h. */
#include "yadif.h"
#include "yadif.h"
...
@@ -125,15 +105,22 @@ int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
...
@@ -125,15 +105,22 @@ int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
if
(
p_prev
&&
p_cur
&&
p_next
)
if
(
p_prev
&&
p_cur
&&
p_next
)
{
{
/* */
/* */
void
(
*
filter
)(
struct
vf_priv_s
*
p
,
uint8_t
*
dst
,
void
(
*
filter
)(
uint8_t
*
dst
,
uint8_t
*
prev
,
uint8_t
*
cur
,
uint8_t
*
next
,
uint8_t
*
prev
,
uint8_t
*
cur
,
uint8_t
*
next
,
int
w
,
int
prefs
,
int
mrefs
,
int
parity
,
int
mode
);
int
w
,
int
refs
,
int
parity
);
filter
=
yadif_filter_line_c
;
#if defined(HAVE_YADIF_MMX)
if
(
vlc_CPU
()
&
CPU_CAPABILITY_MMX
)
filter
=
yadif_filter_line_mmx
;
#endif
#if defined(HAVE_YADIF_SSE2)
#if defined(HAVE_YADIF_SSE2)
if
(
vlc_CPU
()
&
CPU_CAPABILITY_SSE2
)
if
(
vlc_CPU
()
&
CPU_CAPABILITY_SSE2
)
filter
=
yadif_filter_line_mmx2
;
filter
=
yadif_filter_line_sse2
;
else
#endif
#if defined(HAVE_YADIF_SSSE3)
if
(
vlc_CPU
()
&
CPU_CAPABILITY_SSSE3
)
filter
=
yadif_filter_line_ssse3
;
#endif
#endif
filter
=
yadif_filter_line_c
;
for
(
int
n
=
0
;
n
<
p_dst
->
i_planes
;
n
++
)
for
(
int
n
=
0
;
n
<
p_dst
->
i_planes
;
n
++
)
{
{
...
@@ -151,19 +138,20 @@ int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
...
@@ -151,19 +138,20 @@ int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
}
}
else
else
{
{
struct
vf_priv_s
cfg
;
int
mode
;
/* Spatial checks only when enough data */
/* Spatial checks only when enough data */
cfg
.
mode
=
(
y
>=
2
&&
y
<
dstp
->
i_visible_lines
-
2
)
?
0
:
2
;
mode
=
(
y
>=
2
&&
y
<
dstp
->
i_visible_lines
-
2
)
?
0
:
2
;
assert
(
prevp
->
i_pitch
==
curp
->
i_pitch
&&
curp
->
i_pitch
==
nextp
->
i_pitch
);
assert
(
prevp
->
i_pitch
==
curp
->
i_pitch
&&
curp
->
i_pitch
==
nextp
->
i_pitch
);
filter
(
&
cfg
,
filter
(
&
dstp
->
p_pixels
[
y
*
dstp
->
i_pitch
],
&
dstp
->
p_pixels
[
y
*
dstp
->
i_pitch
],
&
prevp
->
p_pixels
[
y
*
prevp
->
i_pitch
],
&
prevp
->
p_pixels
[
y
*
prevp
->
i_pitch
],
&
curp
->
p_pixels
[
y
*
curp
->
i_pitch
],
&
curp
->
p_pixels
[
y
*
curp
->
i_pitch
],
&
nextp
->
p_pixels
[
y
*
nextp
->
i_pitch
],
&
nextp
->
p_pixels
[
y
*
nextp
->
i_pitch
],
dstp
->
i_visible_pitch
,
dstp
->
i_visible_pitch
,
curp
->
i_pitch
,
y
<
dstp
->
i_visible_lines
-
2
?
curp
->
i_pitch
:
-
curp
->
i_pitch
,
yadif_parity
);
y
-
1
?
-
curp
->
i_pitch
:
curp
->
i_pitch
,
yadif_parity
,
mode
);
}
}
/* We duplicate the first and last lines */
/* We duplicate the first and last lines */
...
...
modules/video_filter/deinterlace/yadif.h
View file @
5c7c27ca
/*
/*
* Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
* Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
*
*
* This file is part of
MPlayer
.
* This file is part of
FFmpeg
.
*
*
*
MPlayer
is free software; you can redistribute it and/or modify
*
FFmpeg
is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* (at your option) any later version.
*
*
*
MPlayer
is distributed in the hope that it will be useful,
*
FFmpeg
is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* GNU General Public License for more details.
*
*
* You should have received a copy of the GNU General Public License along
* You should have received a copy of the GNU General Public License along
* with
MPlayer
; if not, write to the Free Software Foundation, Inc.,
* with
FFmpeg
; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
*/
/* */
#ifdef HAVE_CONFIG_H
#if defined(CAN_COMPILE_SSE2) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ > 0))
# include "config.h"
#endif
#define HAVE_YADIF_SSE2
#define LOAD4(mem,dst) \
"movd "mem", "#dst" \n\t"\
"punpcklbw %%mm7, "#dst" \n\t"
#define PABS(tmp,dst) \
"pxor "#tmp", "#tmp" \n\t"\
"psubw "#dst", "#tmp" \n\t"\
"pmaxsw "#tmp", "#dst" \n\t"
#define CHECK(pj,mj) \
"movq "#pj"(%[cur],%[mrefs]), %%mm2 \n\t"
/* cur[x-refs-1+j] */
\
"movq "#mj"(%[cur],%[prefs]), %%mm3 \n\t"
/* cur[x+refs-1-j] */
\
"movq %%mm2, %%mm4 \n\t"\
"movq %%mm2, %%mm5 \n\t"\
"pxor %%mm3, %%mm4 \n\t"\
"pavgb %%mm3, %%mm5 \n\t"\
"pand %[pb1], %%mm4 \n\t"\
"psubusb %%mm4, %%mm5 \n\t"\
"psrlq $8, %%mm5 \n\t"\
"punpcklbw %%mm7, %%mm5 \n\t"
/* (cur[x-refs+j] + cur[x+refs-j])>>1 */
\
"movq %%mm2, %%mm4 \n\t"\
"psubusb %%mm3, %%mm2 \n\t"\
"psubusb %%mm4, %%mm3 \n\t"\
"pmaxub %%mm3, %%mm2 \n\t"\
"movq %%mm2, %%mm3 \n\t"\
"movq %%mm2, %%mm4 \n\t"
/* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */
\
"psrlq $8, %%mm3 \n\t"
/* ABS(cur[x-refs +j] - cur[x+refs -j]) */
\
"psrlq $16, %%mm4 \n\t"
/* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */
\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpcklbw %%mm7, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm4 \n\t"\
"paddw %%mm3, %%mm2 \n\t"\
"paddw %%mm4, %%mm2 \n\t"
/* score */
#define CHECK1 \
"movq %%mm0, %%mm3 \n\t"\
"pcmpgtw %%mm2, %%mm3 \n\t"
/* if(score < spatial_score) */
\
"pminsw %%mm2, %%mm0 \n\t"
/* spatial_score= score; */
\
"movq %%mm3, %%mm6 \n\t"\
"pand %%mm3, %%mm5 \n\t"\
"pandn %%mm1, %%mm3 \n\t"\
"por %%mm5, %%mm3 \n\t"\
"movq %%mm3, %%mm1 \n\t"
/* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
#define CHECK2
/* pretend not to have checked dir=2 if dir=1 was bad.\
hurts both quality and speed, but matches the C version. */
\
"paddw %[pw1], %%mm6 \n\t"\
"psllw $14, %%mm6 \n\t"\
"paddsw %%mm6, %%mm2 \n\t"\
"movq %%mm0, %%mm3 \n\t"\
"pcmpgtw %%mm2, %%mm3 \n\t"\
"pminsw %%mm2, %%mm0 \n\t"\
"pand %%mm3, %%mm5 \n\t"\
"pandn %%mm1, %%mm3 \n\t"\
"por %%mm5, %%mm3 \n\t"\
"movq %%mm3, %%mm1 \n\t"
static
void
yadif_filter_line_mmx2
(
struct
vf_priv_s
*
p
,
uint8_t
*
dst
,
uint8_t
*
prev
,
uint8_t
*
cur
,
uint8_t
*
next
,
int
w
,
int
refs
,
int
parity
){
#if defined(__GNUC__)
static
const
uint64_t
pw_1
=
0x0001000100010001ULL
;
# define DECLARE_ALIGNED(n,t,v) t __attribute__ ((aligned (n))) v
static
const
uint64_t
pb_1
=
0x0101010101010101ULL
;
# if VLC_GCC_VERSION(3,1)
const
int
mode
=
p
->
mode
;
# define DECLARE_ASM_CONST(n,t,v) static const t __attribute__((used)) __attribute__ ((aligned (n))) v
uint64_t
tmp0
,
tmp1
,
tmp2
,
tmp3
;
# else
int
x
;
# define DECLARE_ASM_CONST(n,t,v) static const t __attribute__ ((aligned (n))) v
# endif
#endif
#define FILTER\
typedef
intptr_t
x86_reg
;
for(x=0; x<w; x+=4){\
typedef
struct
{
uint64_t
a
,
b
;
}
xmm_reg
;
__asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
DECLARE_ASM_CONST
(
16
,
const
xmm_reg
,
pb_1
)
=
{
0x0101010101010101ULL
,
0x0101010101010101ULL
};
LOAD4("(%[cur],%[mrefs])", %%mm0)
/* c = cur[x-refs] */
\
DECLARE_ASM_CONST
(
16
,
const
xmm_reg
,
pw_1
)
=
{
0x0001000100010001ULL
,
0x0001000100010001ULL
};
LOAD4("(%[cur],%[prefs])", %%mm1)
/* e = cur[x+refs] */
\
LOAD4("(%["prev2"])", %%mm2)
/* prev2[x] */
\
LOAD4("(%["next2"])", %%mm3)
/* next2[x] */
\
#ifdef CAN_COMPILE_SSSE3
"movq %%mm3, %%mm4 \n\t"\
#if defined(__SSE__) || VLC_GCC_VERSION(4, 4)
"paddw %%mm2, %%mm3 \n\t"\
// ================ SSSE3 =================
"psraw $1, %%mm3 \n\t"
/* d = (prev2[x] + next2[x])>>1 */
\
#define HAVE_YADIF_SSSE3
"movq %%mm0, %[tmp0] \n\t"
/* c */
\
#define COMPILE_TEMPLATE_SSE 1
"movq %%mm3, %[tmp1] \n\t"
/* d */
\
#define COMPILE_TEMPLATE_SSSE3 1
"movq %%mm1, %[tmp2] \n\t"
/* e */
\
#define VLC_TARGET VLC_SSE
"psubw %%mm4, %%mm2 \n\t"\
#define RENAME(a) a ## _ssse3
PABS( %%mm4, %%mm2)
/* temporal_diff0 */
\
#include "yadif_template.h"
LOAD4("(%[prev],%[mrefs])", %%mm3)
/* prev[x-refs] */
\
#undef COMPILE_TEMPLATE_SSE
LOAD4("(%[prev],%[prefs])", %%mm4)
/* prev[x+refs] */
\
#undef COMPILE_TEMPLATE_SSSE3
"psubw %%mm0, %%mm3 \n\t"\
#undef VLC_TARGET
"psubw %%mm1, %%mm4 \n\t"\
#undef RENAME
PABS( %%mm5, %%mm3)\
#endif
PABS( %%mm5, %%mm4)\
#endif
"paddw %%mm4, %%mm3 \n\t"
/* temporal_diff1 */
\
"psrlw $1, %%mm2 \n\t"\
"psrlw $1, %%mm3 \n\t"\
"pmaxsw %%mm3, %%mm2 \n\t"\
LOAD4("(%[next],%[mrefs])", %%mm3)
/* next[x-refs] */
\
LOAD4("(%[next],%[prefs])", %%mm4)
/* next[x+refs] */
\
"psubw %%mm0, %%mm3 \n\t"\
"psubw %%mm1, %%mm4 \n\t"\
PABS( %%mm5, %%mm3)\
PABS( %%mm5, %%mm4)\
"paddw %%mm4, %%mm3 \n\t"
/* temporal_diff2 */
\
"psrlw $1, %%mm3 \n\t"\
"pmaxsw %%mm3, %%mm2 \n\t"\
"movq %%mm2, %[tmp3] \n\t"
/* diff */
\
\
"paddw %%mm0, %%mm1 \n\t"\
"paddw %%mm0, %%mm0 \n\t"\
"psubw %%mm1, %%mm0 \n\t"\
"psrlw $1, %%mm1 \n\t"
/* spatial_pred */
\
PABS( %%mm2, %%mm0)
/* ABS(c-e) */
\
\
"movq -1(%[cur],%[mrefs]), %%mm2 \n\t"
/* cur[x-refs-1] */
\
"movq -1(%[cur],%[prefs]), %%mm3 \n\t"
/* cur[x+refs-1] */
\
"movq %%mm2, %%mm4 \n\t"\
"psubusb %%mm3, %%mm2 \n\t"\
"psubusb %%mm4, %%mm3 \n\t"\
"pmaxub %%mm3, %%mm2 \n\t"\
"pshufw $9,%%mm2, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"
/* ABS(cur[x-refs-1] - cur[x+refs-1]) */
\
"punpcklbw %%mm7, %%mm3 \n\t"
/* ABS(cur[x-refs+1] - cur[x+refs+1]) */
\
"paddw %%mm2, %%mm0 \n\t"\
"paddw %%mm3, %%mm0 \n\t"\
"psubw %[pw1], %%mm0 \n\t"
/* spatial_score */
\
\
CHECK(-2,0)\
CHECK1\
CHECK(-3,1)\
CHECK2\
CHECK(0,-2)\
CHECK1\
CHECK(1,-3)\
CHECK2\
\
/* if(p->mode<2) ... */
\
"movq %[tmp3], %%mm6 \n\t"
/* diff */
\
"cmpl $2, %[mode] \n\t"\
"jge 1f \n\t"\
LOAD4("(%["prev2"],%[mrefs],2)", %%mm2)
/* prev2[x-2*refs] */
\
LOAD4("(%["next2"],%[mrefs],2)", %%mm4)
/* next2[x-2*refs] */
\
LOAD4("(%["prev2"],%[prefs],2)", %%mm3)
/* prev2[x+2*refs] */
\
LOAD4("(%["next2"],%[prefs],2)", %%mm5)
/* next2[x+2*refs] */
\
"paddw %%mm4, %%mm2 \n\t"\
"paddw %%mm5, %%mm3 \n\t"\
"psrlw $1, %%mm2 \n\t"
/* b */
\
"psrlw $1, %%mm3 \n\t"
/* f */
\
"movq %[tmp0], %%mm4 \n\t"
/* c */
\
"movq %[tmp1], %%mm5 \n\t"
/* d */
\
"movq %[tmp2], %%mm7 \n\t"
/* e */
\
"psubw %%mm4, %%mm2 \n\t"
/* b-c */
\
"psubw %%mm7, %%mm3 \n\t"
/* f-e */
\
"movq %%mm5, %%mm0 \n\t"\
"psubw %%mm4, %%mm5 \n\t"
/* d-c */
\
"psubw %%mm7, %%mm0 \n\t"
/* d-e */
\
"movq %%mm2, %%mm4 \n\t"\
"pminsw %%mm3, %%mm2 \n\t"\
"pmaxsw %%mm4, %%mm3 \n\t"\
"pmaxsw %%mm5, %%mm2 \n\t"\
"pminsw %%mm5, %%mm3 \n\t"\
"pmaxsw %%mm0, %%mm2 \n\t"
/* max */
\
"pminsw %%mm0, %%mm3 \n\t"
/* min */
\
"pxor %%mm4, %%mm4 \n\t"\
"pmaxsw %%mm3, %%mm6 \n\t"\
"psubw %%mm2, %%mm4 \n\t"
/* -max */
\
"pmaxsw %%mm4, %%mm6 \n\t"
/* diff= MAX3(diff, min, -max); */
\
"1: \n\t"\
\
"movq %[tmp1], %%mm2 \n\t"
/* d */
\
"movq %%mm2, %%mm3 \n\t"\
"psubw %%mm6, %%mm2 \n\t"
/* d-diff */
\
"paddw %%mm6, %%mm3 \n\t"
/* d+diff */
\
"pmaxsw %%mm2, %%mm1 \n\t"\
"pminsw %%mm3, %%mm1 \n\t"
/* d = clip(spatial_pred, d-diff, d+diff); */
\
"packuswb %%mm1, %%mm1 \n\t"\
\
:[tmp0]"=m"(tmp0),\
[tmp1]"=m"(tmp1),\
[tmp2]"=m"(tmp2),\
[tmp3]"=m"(tmp3)\
:[prev] "r"(prev),\
[cur] "r"(cur),\
[next] "r"(next),\
[prefs]"r"((x86_reg)refs),\
[mrefs]"r"((x86_reg)-refs),\
[pw1] "m"(pw_1),\
[pb1] "m"(pb_1),\
[mode] "g"(mode)\
);\
__asm__ volatile("movd %%mm1, %0" :"=m"(*dst));\
dst += 4;\
prev+= 4;\
cur += 4;\
next+= 4;\
}
if
(
parity
){
#ifdef CAN_COMPILE_SSE2
#define prev2 "prev"
#if defined(__SSE__) || VLC_GCC_VERSION(4, 4)
#define next2 "cur"
// ================= SSE2 =================
FILTER
#define HAVE_YADIF_SSE2
#undef prev2
#define COMPILE_TEMPLATE_SSE 1
#undef next2
#define VLC_TARGET VLC_SSE
}
else
{
#define RENAME(a) a ## _sse2
#define prev2 "cur"
#include "yadif_template.h"
#define next2 "next"
#undef COMPILE_TEMPLATE_SSE
FILTER
#undef VLC_TARGET
#undef prev2
#undef RENAME
#undef next2
#endif
}
#endif
}
#undef LOAD4
#undef PABS
#undef CHECK
#undef CHECK1
#undef CHECK2
#undef FILTER
#ifdef CAN_COMPILE_MMX
#if defined(__MMX__) || VLC_GCC_VERSION(4, 4)
// ================ MMX =================
#define HAVE_YADIF_MMX
#define VLC_TARGET VLC_MMX
#define RENAME(a) a ## _mmx
#include "yadif_template.h"
#undef VLC_TARGET
#undef RENAME
#endif
#endif
#endif
static
void
yadif_filter_line_c
(
struct
vf_priv_s
*
p
,
uint8_t
*
dst
,
uint8_t
*
prev
,
uint8_t
*
cur
,
uint8_t
*
next
,
int
w
,
int
refs
,
int
parity
)
{
static
void
yadif_filter_line_c
(
uint8_t
*
dst
,
uint8_t
*
prev
,
uint8_t
*
cur
,
uint8_t
*
next
,
int
w
,
int
prefs
,
int
mrefs
,
int
parity
,
int
mode
)
{
int
x
;
int
x
;
uint8_t
*
prev2
=
parity
?
prev
:
cur
;
uint8_t
*
prev2
=
parity
?
prev
:
cur
;
uint8_t
*
next2
=
parity
?
cur
:
next
;
uint8_t
*
next2
=
parity
?
cur
:
next
;
for
(
x
=
0
;
x
<
w
;
x
++
){
for
(
x
=
0
;
x
<
w
;
x
++
){
int
c
=
cur
[
-
refs
];
int
c
=
cur
[
m
refs
];
int
d
=
(
prev2
[
0
]
+
next2
[
0
])
>>
1
;
int
d
=
(
prev2
[
0
]
+
next2
[
0
])
>>
1
;
int
e
=
cur
[
+
refs
];
int
e
=
cur
[
p
refs
];
int
temporal_diff0
=
FFABS
(
prev2
[
0
]
-
next2
[
0
]);
int
temporal_diff0
=
FFABS
(
prev2
[
0
]
-
next2
[
0
]);
int
temporal_diff1
=
(
FFABS
(
prev
[
-
refs
]
-
c
)
+
FFABS
(
prev
[
+
refs
]
-
e
)
)
>>
1
;
int
temporal_diff1
=
(
FFABS
(
prev
[
mrefs
]
-
c
)
+
FFABS
(
prev
[
p
refs
]
-
e
)
)
>>
1
;
int
temporal_diff2
=
(
FFABS
(
next
[
-
refs
]
-
c
)
+
FFABS
(
next
[
+
refs
]
-
e
)
)
>>
1
;
int
temporal_diff2
=
(
FFABS
(
next
[
mrefs
]
-
c
)
+
FFABS
(
next
[
p
refs
]
-
e
)
)
>>
1
;
int
diff
=
FFMAX3
(
temporal_diff0
>>
1
,
temporal_diff1
,
temporal_diff2
);
int
diff
=
FFMAX3
(
temporal_diff0
>>
1
,
temporal_diff1
,
temporal_diff2
);
int
spatial_pred
=
(
c
+
e
)
>>
1
;
int
spatial_pred
=
(
c
+
e
)
>>
1
;
int
spatial_score
=
FFABS
(
cur
[
-
refs
-
1
]
-
cur
[
+
refs
-
1
])
+
FFABS
(
c
-
e
)
int
spatial_score
=
FFABS
(
cur
[
mrefs
-
1
]
-
cur
[
p
refs
-
1
])
+
FFABS
(
c
-
e
)
+
FFABS
(
cur
[
-
refs
+
1
]
-
cur
[
+
refs
+
1
])
-
1
;
+
FFABS
(
cur
[
mrefs
+
1
]
-
cur
[
p
refs
+
1
])
-
1
;
#define CHECK(j)\
#define CHECK(j)\
{ int score= FFABS(cur[
-refs-1+j] - cur[+
refs-1-j])\
{ int score= FFABS(cur[
mrefs-1+j] - cur[p
refs-1-j])\
+ FFABS(cur[
-refs +j] - cur[+
refs -j])\
+ FFABS(cur[
mrefs +j] - cur[p
refs -j])\
+ FFABS(cur[
-refs+1+j] - cur[+
refs+1-j]);\
+ FFABS(cur[
mrefs+1+j] - cur[p
refs+1-j]);\
if(score < spatial_score){\
if(score < spatial_score){\
spatial_score= score;\
spatial_score= score;\
spatial_pred= (cur[
-refs +j] + cur[+
refs -j])>>1;\
spatial_pred= (cur[
mrefs +j] + cur[p
refs -j])>>1;\
CHECK
(
-
1
)
CHECK
(
-
2
)
}}
}}
CHECK
(
-
1
)
CHECK
(
-
2
)
}}
}}
CHECK
(
1
)
CHECK
(
2
)
}}
}}
CHECK
(
1
)
CHECK
(
2
)
}}
}}
if
(
p
->
mode
<
2
){
if
(
mode
<
2
){
int
b
=
(
prev2
[
-
2
*
refs
]
+
next2
[
-
2
*
refs
])
>>
1
;
int
b
=
(
prev2
[
2
*
mrefs
]
+
next2
[
2
*
m
refs
])
>>
1
;
int
f
=
(
prev2
[
+
2
*
refs
]
+
next2
[
+
2
*
refs
])
>>
1
;
int
f
=
(
prev2
[
2
*
prefs
]
+
next2
[
2
*
p
refs
])
>>
1
;
#if 0
#if 0
int a= cur[
-3*
refs];
int a= cur[
3*m
refs];
int g= cur[
+3*
refs];
int g= cur[
3*p
refs];
int max= FFMAX3(d-e, d-c, FFMIN3(FFMAX(b-c,f-e),FFMAX(b-c,b-a),FFMAX(f-g,f-e)) );
int max= FFMAX3(d-e, d-c, FFMIN3(FFMAX(b-c,f-e),FFMAX(b-c,b-a),FFMAX(f-g,f-e)) );
int min= FFMIN3(d-e, d-c, FFMAX3(FFMIN(b-c,f-e),FFMIN(b-c,b-a),FFMIN(f-g,f-e)) );
int min= FFMIN3(d-e, d-c, FFMAX3(FFMIN(b-c,f-e),FFMIN(b-c,b-a),FFMIN(f-g,f-e)) );
#else
#else
...
...
modules/video_filter/deinterlace/yadif_template.h
0 → 100644
View file @
5c7c27ca
/*
* Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifdef COMPILE_TEMPLATE_SSE
#define REGMM "xmm"
#define MM "%%"REGMM
#define MOV "movq"
#define MOVQ "movdqa"
#define MOVQU "movdqu"
#define STEP 8
#define LOAD(mem,dst) \
MOV" "mem", "dst" \n\t"\
"punpcklbw "MM"7, "dst" \n\t"
#define PSRL1(reg) "psrldq $1, "reg" \n\t"
#define PSRL2(reg) "psrldq $2, "reg" \n\t"
#define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\
"psrldq $2, "src" \n\t"
#else
#define REGMM "mm"
#define MM "%%"REGMM
#define MOV "movd"
#define MOVQ "movq"
#define MOVQU "movq"
#define STEP 4
#define LOAD(mem,dst) \
MOV" "mem", "dst" \n\t"\
"punpcklbw "MM"7, "dst" \n\t"
#define PSRL1(reg) "psrlq $8, "reg" \n\t"
#define PSRL2(reg) "psrlq $16, "reg" \n\t"
#define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t"
#endif
#ifdef COMPILE_TEMPLATE_SSSE3
#define PABS(tmp,dst) \
"pabsw "dst", "dst" \n\t"
#else
#define PABS(tmp,dst) \
"pxor "tmp", "tmp" \n\t"\
"psubw "dst", "tmp" \n\t"\
"pmaxsw "tmp", "dst" \n\t"
#endif
#define CHECK(pj,mj) \
MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t"
/* cur[x-refs-1+j] */
\
MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t"
/* cur[x+refs-1-j] */
\
MOVQ" "MM"2, "MM"4 \n\t"\
MOVQ" "MM"2, "MM"5 \n\t"\
"pxor "MM"3, "MM"4 \n\t"\
"pavgb "MM"3, "MM"5 \n\t"\
"pand %[pb_1], "MM"4 \n\t"\
"psubusb "MM"4, "MM"5 \n\t"\
PSRL1(MM"5") \
"punpcklbw "MM"7, "MM"5 \n\t"
/* (cur[x-refs+j] + cur[x+refs-j])>>1 */
\
MOVQ" "MM"2, "MM"4 \n\t"\
"psubusb "MM"3, "MM"2 \n\t"\
"psubusb "MM"4, "MM"3 \n\t"\
"pmaxub "MM"3, "MM"2 \n\t"\
MOVQ" "MM"2, "MM"3 \n\t"\
MOVQ" "MM"2, "MM"4 \n\t"
/* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */
\
PSRL1(MM"3")
/* ABS(cur[x-refs +j] - cur[x+refs -j]) */
\
PSRL2(MM"4")
/* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */
\
"punpcklbw "MM"7, "MM"2 \n\t"\
"punpcklbw "MM"7, "MM"3 \n\t"\
"punpcklbw "MM"7, "MM"4 \n\t"\
"paddw "MM"3, "MM"2 \n\t"\
"paddw "MM"4, "MM"2 \n\t"
/* score */
#define CHECK1 \
MOVQ" "MM"0, "MM"3 \n\t"\
"pcmpgtw "MM"2, "MM"3 \n\t"
/* if(score < spatial_score) */
\
"pminsw "MM"2, "MM"0 \n\t"
/* spatial_score= score; */
\
MOVQ" "MM"3, "MM"6 \n\t"\
"pand "MM"3, "MM"5 \n\t"\
"pandn "MM"1, "MM"3 \n\t"\
"por "MM"5, "MM"3 \n\t"\
MOVQ" "MM"3, "MM"1 \n\t"
/* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
#define CHECK2
/* pretend not to have checked dir=2 if dir=1 was bad.\
hurts both quality and speed, but matches the C version. */
\
"paddw %[pw_1], "MM"6 \n\t"\
"psllw $14, "MM"6 \n\t"\
"paddsw "MM"6, "MM"2 \n\t"\
MOVQ" "MM"0, "MM"3 \n\t"\
"pcmpgtw "MM"2, "MM"3 \n\t"\
"pminsw "MM"2, "MM"0 \n\t"\
"pand "MM"3, "MM"5 \n\t"\
"pandn "MM"1, "MM"3 \n\t"\
"por "MM"5, "MM"3 \n\t"\
MOVQ" "MM"3, "MM"1 \n\t"
VLC_TARGET
static
void
RENAME
(
yadif_filter_line
)(
uint8_t
*
dst
,
uint8_t
*
prev
,
uint8_t
*
cur
,
uint8_t
*
next
,
int
w
,
int
prefs
,
int
mrefs
,
int
parity
,
int
mode
)
{
DECLARE_ALIGNED
(
16
,
uint8_t
,
tmp0
)[
16
];
DECLARE_ALIGNED
(
16
,
uint8_t
,
tmp1
)[
16
];
DECLARE_ALIGNED
(
16
,
uint8_t
,
tmp2
)[
16
];
DECLARE_ALIGNED
(
16
,
uint8_t
,
tmp3
)[
16
];
int
x
;
#define FILTER\
for(x=0; x<w; x+=STEP){\
__asm__ volatile(\
"pxor "MM"7, "MM"7 \n\t"\
LOAD("(%[cur],%[mrefs])", MM"0")
/* c = cur[x-refs] */
\
LOAD("(%[cur],%[prefs])", MM"1")
/* e = cur[x+refs] */
\
LOAD("(%["prev2"])", MM"2")
/* prev2[x] */
\
LOAD("(%["next2"])", MM"3")
/* next2[x] */
\
MOVQ" "MM"3, "MM"4 \n\t"\
"paddw "MM"2, "MM"3 \n\t"\
"psraw $1, "MM"3 \n\t"
/* d = (prev2[x] + next2[x])>>1 */
\
MOVQ" "MM"0, %[tmp0] \n\t"
/* c */
\
MOVQ" "MM"3, %[tmp1] \n\t"
/* d */
\
MOVQ" "MM"1, %[tmp2] \n\t"
/* e */
\
"psubw "MM"4, "MM"2 \n\t"\
PABS( MM"4", MM"2")
/* temporal_diff0 */
\
LOAD("(%[prev],%[mrefs])", MM"3")
/* prev[x-refs] */
\
LOAD("(%[prev],%[prefs])", MM"4")
/* prev[x+refs] */
\
"psubw "MM"0, "MM"3 \n\t"\
"psubw "MM"1, "MM"4 \n\t"\
PABS( MM"5", MM"3")\
PABS( MM"5", MM"4")\
"paddw "MM"4, "MM"3 \n\t"
/* temporal_diff1 */
\
"psrlw $1, "MM"2 \n\t"\
"psrlw $1, "MM"3 \n\t"\
"pmaxsw "MM"3, "MM"2 \n\t"\
LOAD("(%[next],%[mrefs])", MM"3")
/* next[x-refs] */
\
LOAD("(%[next],%[prefs])", MM"4")
/* next[x+refs] */
\
"psubw "MM"0, "MM"3 \n\t"\
"psubw "MM"1, "MM"4 \n\t"\
PABS( MM"5", MM"3")\
PABS( MM"5", MM"4")\
"paddw "MM"4, "MM"3 \n\t"
/* temporal_diff2 */
\
"psrlw $1, "MM"3 \n\t"\
"pmaxsw "MM"3, "MM"2 \n\t"\
MOVQ" "MM"2, %[tmp3] \n\t"
/* diff */
\
\
"paddw "MM"0, "MM"1 \n\t"\
"paddw "MM"0, "MM"0 \n\t"\
"psubw "MM"1, "MM"0 \n\t"\
"psrlw $1, "MM"1 \n\t"
/* spatial_pred */
\
PABS( MM"2", MM"0")
/* ABS(c-e) */
\
\
MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t"
/* cur[x-refs-1] */
\
MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t"
/* cur[x+refs-1] */
\
MOVQ" "MM"2, "MM"4 \n\t"\
"psubusb "MM"3, "MM"2 \n\t"\
"psubusb "MM"4, "MM"3 \n\t"\
"pmaxub "MM"3, "MM"2 \n\t"\
PSHUF(MM"3", MM"2") \
"punpcklbw "MM"7, "MM"2 \n\t"
/* ABS(cur[x-refs-1] - cur[x+refs-1]) */
\
"punpcklbw "MM"7, "MM"3 \n\t"
/* ABS(cur[x-refs+1] - cur[x+refs+1]) */
\
"paddw "MM"2, "MM"0 \n\t"\
"paddw "MM"3, "MM"0 \n\t"\
"psubw %[pw_1], "MM"0 \n\t"
/* spatial_score */
\
\
CHECK(-2,0)\
CHECK1\
CHECK(-3,1)\
CHECK2\
CHECK(0,-2)\
CHECK1\
CHECK(1,-3)\
CHECK2\
\
/* if(p->mode<2) ... */
\
MOVQ" %[tmp3], "MM"6 \n\t"
/* diff */
\
"cmpl $2, %[mode] \n\t"\
"jge 1f \n\t"\
LOAD("(%["prev2"],%[mrefs],2)", MM"2")
/* prev2[x-2*refs] */
\
LOAD("(%["next2"],%[mrefs],2)", MM"4")
/* next2[x-2*refs] */
\
LOAD("(%["prev2"],%[prefs],2)", MM"3")
/* prev2[x+2*refs] */
\
LOAD("(%["next2"],%[prefs],2)", MM"5")
/* next2[x+2*refs] */
\
"paddw "MM"4, "MM"2 \n\t"\
"paddw "MM"5, "MM"3 \n\t"\
"psrlw $1, "MM"2 \n\t"
/* b */
\
"psrlw $1, "MM"3 \n\t"
/* f */
\
MOVQ" %[tmp0], "MM"4 \n\t"
/* c */
\
MOVQ" %[tmp1], "MM"5 \n\t"
/* d */
\
MOVQ" %[tmp2], "MM"7 \n\t"
/* e */
\
"psubw "MM"4, "MM"2 \n\t"
/* b-c */
\
"psubw "MM"7, "MM"3 \n\t"
/* f-e */
\
MOVQ" "MM"5, "MM"0 \n\t"\
"psubw "MM"4, "MM"5 \n\t"
/* d-c */
\
"psubw "MM"7, "MM"0 \n\t"
/* d-e */
\
MOVQ" "MM"2, "MM"4 \n\t"\
"pminsw "MM"3, "MM"2 \n\t"\
"pmaxsw "MM"4, "MM"3 \n\t"\
"pmaxsw "MM"5, "MM"2 \n\t"\
"pminsw "MM"5, "MM"3 \n\t"\
"pmaxsw "MM"0, "MM"2 \n\t"
/* max */
\
"pminsw "MM"0, "MM"3 \n\t"
/* min */
\
"pxor "MM"4, "MM"4 \n\t"\
"pmaxsw "MM"3, "MM"6 \n\t"\
"psubw "MM"2, "MM"4 \n\t"
/* -max */
\
"pmaxsw "MM"4, "MM"6 \n\t"
/* diff= MAX3(diff, min, -max); */
\
"1: \n\t"\
\
MOVQ" %[tmp1], "MM"2 \n\t"
/* d */
\
MOVQ" "MM"2, "MM"3 \n\t"\
"psubw "MM"6, "MM"2 \n\t"
/* d-diff */
\
"paddw "MM"6, "MM"3 \n\t"
/* d+diff */
\
"pmaxsw "MM"2, "MM"1 \n\t"\
"pminsw "MM"3, "MM"1 \n\t"
/* d = clip(spatial_pred, d-diff, d+diff); */
\
"packuswb "MM"1, "MM"1 \n\t"\
\
:[tmp0]"=m"(tmp0),\
[tmp1]"=m"(tmp1),\
[tmp2]"=m"(tmp2),\
[tmp3]"=m"(tmp3)\
:[prev] "r"(prev),\
[cur] "r"(cur),\
[next] "r"(next),\
[prefs]"r"((x86_reg)prefs),\
[mrefs]"r"((x86_reg)mrefs),\
[pw_1] "m"(pw_1),\
[pb_1] "m"(pb_1),\
[mode] "g"(mode)\
:REGMM"0",REGMM"1",REGMM"2",REGMM"3",REGMM"4",REGMM"5",REGMM"6",REGMM"7"\
);\
__asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
dst += STEP;\
prev+= STEP;\
cur += STEP;\
next+= STEP;\
}
if
(
parity
)
{
#define prev2 "prev"
#define next2 "cur"
FILTER
#undef prev2
#undef next2
}
else
{
#define prev2 "cur"
#define next2 "next"
FILTER
#undef prev2
#undef next2
}
}
#undef STEP
#undef REGMM
#undef MM
#undef MOV
#undef MOVQ
#undef MOVQU
#undef PSHUF
#undef PSRL1
#undef PSRL2
#undef LOAD
#undef PABS
#undef CHECK
#undef CHECK1
#undef CHECK2
#undef FILTER
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment