Commit b899d5ad authored by Laurent Aimar's avatar Laurent Aimar

Imported gradfun filter from MPlayer.

gradfun.h is a stripped copy of mplayer vf_gradfun.c (a function has been renamed).
parent e5058da0
......@@ -72,6 +72,7 @@ SOURCES_atmo = atmo/atmo.cpp \
atmo/AtmoMultiConnection.cpp atmo/AtmoMultiConnection.h \
atmo/MoMoConnection.cpp atmo/MoMoConnection.h \
atmo/AtmoPacketQueue.cpp atmo/AtmoPacketQueue.h
SOURCES_gradfun = gradfun.c gradfun.h
noinst_HEADERS = filter_picture.h
libvlc_LTLIBRARIES += \
......@@ -113,4 +114,5 @@ libvlc_LTLIBRARIES += \
libsharpen_plugin.la \
libwall_plugin.la \
libwave_plugin.la \
libgradfun_plugin.la \
libyuvp_plugin.la
/*****************************************************************************
* gradfun.c: wrapper for the gradfun filter from mplayer
*****************************************************************************
* Copyright (C) 2010 Laurent Aimar
* $Id$
*
* Authors: Laurent Aimar <fenrir _AT_ videolan _DOT_ org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
/*****************************************************************************
* Preamble
*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <vlc_common.h>
#include <vlc_plugin.h>
#include <vlc_cpu.h>
#include <vlc_filter.h>
/*****************************************************************************
* Module descriptor
*****************************************************************************/
static int Open (vlc_object_t *);
static void Close(vlc_object_t *);
#define CFG_PREFIX "gradfun-"
#define RADIUS_MIN (4)
#define RADIUS_MAX (32)
#define RADIUS_TEXT N_("Radius")
#define RADIUS_LONGTEXT N_("Radius in pixels")
#define STRENGTH_MIN (0.51)
#define STRENGTH_MAX (255)
#define STRENGTH_TEXT N_("Strength")
#define STRENGTH_LONGTEXT N_("Strength used to modify the value of a pixel")
vlc_module_begin()
set_description(N_("Gradfun video filter"))
set_shortname(N_("Gradfun"))
set_help("Debanding algorithm")
set_capability("video filter2", 0)
set_category(CAT_VIDEO)
set_subcategory(SUBCAT_VIDEO_VFILTER)
add_integer_with_range(CFG_PREFIX "radius", 16, RADIUS_MIN, RADIUS_MAX,
NULL, RADIUS_TEXT, RADIUS_LONGTEXT, false)
add_float_with_range(CFG_PREFIX "strength", 1.2, STRENGTH_MIN, STRENGTH_MAX,
NULL, STRENGTH_TEXT, STRENGTH_LONGTEXT, false)
set_callbacks(Open, Close)
vlc_module_end()
/*****************************************************************************
* Local prototypes
*****************************************************************************/
#define FFMAX(a,b) __MAX(a,b)
#ifdef CAN_COMPILE_MMXEXT
# define HAVE_MMX2 1
#else
# define HAVE_MMX2 0
#endif
#ifdef CAN_COMPILE_SSE2
# define HAVE_SSE2 1
#else
# define HAVE_SSE2 0
#endif
#ifdef CAN_COMPILE_SSSE3
# define HAVE_SSSE3 1
#else
# define HAVE_SSSE3 0
#endif
// FIXME too restrictive
#ifdef __x86_64__
# define HAVE_6REGS 1
#else
# define HAVE_6REGS 0
#endif
#define av_clip_uint8 clip_uint8_vlc
#include "gradfun.h"
static picture_t *Filter(filter_t *, picture_t *);
static int Callback(vlc_object_t *, char const *, vlc_value_t, vlc_value_t, void *);
struct filter_sys_t {
vlc_mutex_t lock;
float strength;
int radius;
int h_shift;
int v_shift;
void *base_buf;
struct vf_priv_s cfg;
};
static int Open(vlc_object_t *object)
{
filter_t *filter = (filter_t *)object;
int h_shift;
int v_shift;
switch (filter->fmt_in.video.i_chroma) {
case VLC_CODEC_I410:
case VLC_CODEC_YV9:
h_shift = 2; v_shift = 2;
break;
case VLC_CODEC_I411:
h_shift = 2; v_shift = 0;
break;
case VLC_CODEC_I420:
case VLC_CODEC_J420:
case VLC_CODEC_YV12:
h_shift = 1; v_shift = 1;
break;
case VLC_CODEC_I422:
case VLC_CODEC_J422:
h_shift = 1; v_shift = 0;
break;
case VLC_CODEC_I444:
case VLC_CODEC_J444:
case VLC_CODEC_YUVA:
h_shift = 0; v_shift = 0;
break;
case VLC_CODEC_I440:
case VLC_CODEC_J440:
h_shift = 0; v_shift = 1;
break;
default:
return VLC_EGENERIC;
}
filter_sys_t *sys = malloc(sizeof(*sys));
if (!sys)
return VLC_ENOMEM;
vlc_mutex_init(&sys->lock);
sys->h_shift = h_shift;
sys->v_shift = v_shift;
sys->strength = var_CreateGetFloatCommand(filter, CFG_PREFIX "strength");
sys->radius = var_CreateGetIntegerCommand(filter, CFG_PREFIX "radius");
var_AddCallback(filter, CFG_PREFIX "strength", Callback, NULL);
var_AddCallback(filter, CFG_PREFIX "radius", Callback, NULL);
sys->base_buf = NULL;
struct vf_priv_s *cfg = &sys->cfg;
cfg->thresh = 0.0;
cfg->radius = 0;
cfg->buf = NULL;
cfg->filter_line = filter_line_c;
cfg->blur_line = blur_line_c;
#if HAVE_SSE2 && HAVE_6REGS
if (vlc_CPU() & CPU_CAPABILITY_SSE2)
cfg->blur_line = blur_line_sse2;
#endif
#if HAVE_MMX2
if (vlc_CPU() & CPU_CAPABILITY_MMXEXT)
cfg->filter_line = filter_line_mmx2;
#endif
#if HAVE_SSSE3
if (vlc_CPU() & CPU_CAPABILITY_SSSE3)
cfg->filter_line = filter_line_ssse3;
#endif
filter->p_sys = sys;
filter->pf_video_filter = Filter;
return VLC_SUCCESS;
}
static void Close(vlc_object_t *object)
{
filter_t *filter = (filter_t *)object;
filter_sys_t *sys = filter->p_sys;
free(sys->base_buf);
vlc_mutex_destroy(&sys->lock);
free(sys);
}
static picture_t *Filter(filter_t *filter, picture_t *src)
{
filter_sys_t *sys = filter->p_sys;
picture_t *dst = filter_NewPicture(filter);
if (!dst) {
picture_Release(src);
return NULL;
}
vlc_mutex_lock(&sys->lock);
float strength = __MIN(__MAX(sys->strength, STRENGTH_MIN), STRENGTH_MAX);
int radius = __MIN(__MAX((sys->radius + 1) & ~1, RADIUS_MIN), RADIUS_MAX);
vlc_mutex_unlock(&sys->lock);
const video_format_t *fmt = &filter->fmt_in.video;
struct vf_priv_s *cfg = &sys->cfg;
cfg->thresh = (1 << 15) / strength;
if (cfg->radius != radius) {
cfg->radius = radius;
cfg->buf = vlc_memalign(&sys->base_buf, 16,
(((fmt->i_width + 15) & ~15) * (cfg->radius + 1) / 2 + 32) * sizeof(*cfg->buf));
}
for (int i = 0; i < dst->i_planes; i++) {
const plane_t *srcp = &src->p[i];
plane_t *dstp = &dst->p[i];
int w = fmt->i_width;
int h = fmt->i_height;
int r = cfg->radius;
if (i > 0) {
w >>= sys->h_shift;
h >>= sys->v_shift;
r = ((r >> sys->h_shift) + (r >> sys->v_shift)) / 2;
r = __MIN(__MAX((r + 1) & ~1, RADIUS_MIN), RADIUS_MAX);
}
if (__MIN(w, h) > 2 * r && cfg->buf) {
filter_plane(cfg, dstp->p_pixels, srcp->p_pixels,
w, h, dstp->i_pitch, srcp->i_pitch, r);
} else {
plane_CopyPixels(dstp, srcp);
}
}
picture_CopyProperties(dst, src);
picture_Release(src);
return dst;
}
static int Callback(vlc_object_t *object, char const *cmd,
vlc_value_t oldval, vlc_value_t newval, void *data)
{
filter_t *filter = (filter_t *)object;
filter_sys_t *sys = filter->p_sys;
VLC_UNUSED(oldval); VLC_UNUSED(data);
vlc_mutex_lock(&sys->lock);
if (!strcmp(cmd, CFG_PREFIX "strength"))
sys->strength = newval.f_float;
else
sys->radius = newval.i_int;
vlc_mutex_unlock(&sys->lock);
return VLC_SUCCESS;
}
/*
* Copyright (C) 2009 Loren Merritt <lorenm@u.washignton.edu>
*
* This file is part of MPlayer.
*
* MPlayer is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* MPlayer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with MPlayer; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
/*
* Debanding algorithm (from gradfun2db by prunedtree):
* Boxblur.
* Foreach pixel, if it's within threshold of the blurred value, make it closer.
* So now we have a smoothed and higher bitdepth version of all the shallow
* gradients, while leaving detailed areas untouched.
* Dither it back to 8bit.
*/
struct vf_priv_s {
int thresh;
int radius;
uint16_t *buf;
void (*filter_line)(uint8_t *dst, uint8_t *src, uint16_t *dc,
int width, int thresh, const uint16_t *dithers);
void (*blur_line)(uint16_t *dc, uint16_t *buf, uint16_t *buf1,
uint8_t *src, int sstride, int width);
};
static const uint16_t __attribute__((aligned(16))) pw_7f[8] = {127,127,127,127,127,127,127,127};
static const uint16_t __attribute__((aligned(16))) pw_ff[8] = {255,255,255,255,255,255,255,255};
static const uint16_t __attribute__((aligned(16))) dither[8][8] = {
{ 0, 96, 24,120, 6,102, 30,126 },
{ 64, 32, 88, 56, 70, 38, 94, 62 },
{ 16,112, 8,104, 22,118, 14,110 },
{ 80, 48, 72, 40, 86, 54, 78, 46 },
{ 4,100, 28,124, 2, 98, 26,122 },
{ 68, 36, 92, 60, 66, 34, 90, 58 },
{ 20,116, 12,108, 18,114, 10,106 },
{ 84, 52, 76, 44, 82, 50, 74, 42 },
};
static void filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc,
int width, int thresh, const uint16_t *dithers)
{
int x;
for (x=0; x<width; x++, dc+=x&1) {
int pix = src[x]<<7;
int delta = dc[0] - pix;
int m = abs(delta) * thresh >> 16;
m = FFMAX(0, 127-m);
m = m*m*delta >> 14;
pix += m + dithers[x&7];
dst[x] = av_clip_uint8(pix>>7);
}
}
static void blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1,
uint8_t *src, int sstride, int width)
{
int x, v, old;
for (x=0; x<width; x++) {
v = buf1[x] + src[2*x] + src[2*x+1] + src[2*x+sstride] + src[2*x+1+sstride];
old = buf[x];
buf[x] = v;
dc[x] = v - old;
}
}
#if HAVE_MMX2
static void filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc,
int width, int thresh, const uint16_t *dithers)
{
intptr_t x;
if (width&3) {
x = width&~3;
filter_line_c(dst+x, src+x, dc+x/2, width-x, thresh, dithers);
width = x;
}
x = -width;
__asm__ volatile(
"movd %4, %%mm5 \n"
"pxor %%mm7, %%mm7 \n"
"pshufw $0, %%mm5, %%mm5 \n"
"movq %6, %%mm6 \n"
"movq %5, %%mm4 \n"
"1: \n"
"movd (%2,%0), %%mm0 \n"
"movd (%3,%0), %%mm1 \n"
"punpcklbw %%mm7, %%mm0 \n"
"punpcklwd %%mm1, %%mm1 \n"
"psllw $7, %%mm0 \n"
"pxor %%mm2, %%mm2 \n"
"psubw %%mm0, %%mm1 \n" // delta = dc - pix
"psubw %%mm1, %%mm2 \n"
"pmaxsw %%mm1, %%mm2 \n"
"pmulhuw %%mm5, %%mm2 \n" // m = abs(delta) * thresh >> 16
"psubw %%mm6, %%mm2 \n"
"pminsw %%mm7, %%mm2 \n" // m = -max(0, 127-m)
"pmullw %%mm2, %%mm2 \n"
"paddw %%mm4, %%mm0 \n" // pix += dither
"pmulhw %%mm2, %%mm1 \n"
"psllw $2, %%mm1 \n" // m = m*m*delta >> 14
"paddw %%mm1, %%mm0 \n" // pix += m
"psraw $7, %%mm0 \n"
"packuswb %%mm0, %%mm0 \n"
"movd %%mm0, (%1,%0) \n" // dst = clip(pix>>7)
"add $4, %0 \n"
"jl 1b \n"
"emms \n"
:"+r"(x)
:"r"(dst+width), "r"(src+width), "r"(dc+width/2),
"rm"(thresh), "m"(*dithers), "m"(*pw_7f)
:"memory"
);
}
#endif
#if HAVE_SSSE3
static void filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc,
int width, int thresh, const uint16_t *dithers)
{
intptr_t x;
if (width&7) {
// could be 10% faster if I somehow eliminated this
x = width&~7;
filter_line_c(dst+x, src+x, dc+x/2, width-x, thresh, dithers);
width = x;
}
x = -width;
__asm__ volatile(
"movd %4, %%xmm5 \n"
"pxor %%xmm7, %%xmm7 \n"
"pshuflw $0,%%xmm5, %%xmm5 \n"
"movdqa %6, %%xmm6 \n"
"punpcklqdq %%xmm5, %%xmm5 \n"
"movdqa %5, %%xmm4 \n"
"1: \n"
"movq (%2,%0), %%xmm0 \n"
"movq (%3,%0), %%xmm1 \n"
"punpcklbw %%xmm7, %%xmm0 \n"
"punpcklwd %%xmm1, %%xmm1 \n"
"psllw $7, %%xmm0 \n"
"psubw %%xmm0, %%xmm1 \n" // delta = dc - pix
"pabsw %%xmm1, %%xmm2 \n"
"pmulhuw %%xmm5, %%xmm2 \n" // m = abs(delta) * thresh >> 16
"psubw %%xmm6, %%xmm2 \n"
"pminsw %%xmm7, %%xmm2 \n" // m = -max(0, 127-m)
"pmullw %%xmm2, %%xmm2 \n"
"psllw $1, %%xmm2 \n"
"paddw %%xmm4, %%xmm0 \n" // pix += dither
"pmulhrsw %%xmm2, %%xmm1 \n" // m = m*m*delta >> 14
"paddw %%xmm1, %%xmm0 \n" // pix += m
"psraw $7, %%xmm0 \n"
"packuswb %%xmm0, %%xmm0 \n"
"movq %%xmm0, (%1,%0) \n" // dst = clip(pix>>7)
"add $8, %0 \n"
"jl 1b \n"
:"+&r"(x)
:"r"(dst+width), "r"(src+width), "r"(dc+width/2),
"rm"(thresh), "m"(*dithers), "m"(*pw_7f)
:"memory"
);
}
#endif // HAVE_SSSE3
#if HAVE_SSE2 && HAVE_6REGS
#define BLURV(load)\
intptr_t x = -2*width;\
__asm__ volatile(\
"movdqa %6, %%xmm7 \n"\
"1: \n"\
load" (%4,%0), %%xmm0 \n"\
load" (%5,%0), %%xmm1 \n"\
"movdqa %%xmm0, %%xmm2 \n"\
"movdqa %%xmm1, %%xmm3 \n"\
"psrlw $8, %%xmm0 \n"\
"psrlw $8, %%xmm1 \n"\
"pand %%xmm7, %%xmm2 \n"\
"pand %%xmm7, %%xmm3 \n"\
"paddw %%xmm1, %%xmm0 \n"\
"paddw %%xmm3, %%xmm2 \n"\
"paddw %%xmm2, %%xmm0 \n"\
"paddw (%2,%0), %%xmm0 \n"\
"movdqa (%1,%0), %%xmm1 \n"\
"movdqa %%xmm0, (%1,%0) \n"\
"psubw %%xmm1, %%xmm0 \n"\
"movdqa %%xmm0, (%3,%0) \n"\
"add $16, %0 \n"\
"jl 1b \n"\
:"+&r"(x)\
:"r"(buf+width),\
"r"(buf1+width),\
"r"(dc+width),\
"r"(src+width*2),\
"r"(src+width*2+sstride),\
"m"(*pw_ff)\
:"memory"\
);
static void blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1,
uint8_t *src, int sstride, int width)
{
if (((intptr_t)src|sstride)&15) {
BLURV("movdqu");
} else {
BLURV("movdqa");
}
}
#endif // HAVE_6REGS && HAVE_SSE2
static void filter_plane(struct vf_priv_s *ctx, uint8_t *dst, uint8_t *src,
int width, int height, int dstride, int sstride, int r)
{
int bstride = ((width+15)&~15)/2;
int y;
uint32_t dc_factor = (1<<21)/(r*r);
uint16_t *dc = ctx->buf+16;
uint16_t *buf = ctx->buf+bstride+32;
int thresh = ctx->thresh;
memset(dc, 0, (bstride+16)*sizeof(*buf));
for (y=0; y<r; y++)
ctx->blur_line(dc, buf+y*bstride, buf+(y-1)*bstride, src+2*y*sstride, sstride, width/2);
for (;;) {
if (y < height-r) {
int mod = ((y+r)/2)%r;
uint16_t *buf0 = buf+mod*bstride;
uint16_t *buf1 = buf+(mod?mod-1:r-1)*bstride;
int x, v;
ctx->blur_line(dc, buf0, buf1, src+(y+r)*sstride, sstride, width/2);
for (x=v=0; x<r; x++)
v += dc[x];
for (; x<width/2; x++) {
v += dc[x] - dc[x-r];
dc[x-r] = v * dc_factor >> 16;
}
for (; x<(width+r+1)/2; x++)
dc[x-r] = v * dc_factor >> 16;
for (x=-r/2; x<0; x++)
dc[x] = dc[0];
}
if (y == r) {
for (y=0; y<r; y++)
ctx->filter_line(dst+y*dstride, src+y*sstride, dc-r/2, width, thresh, dither[y&7]);
}
ctx->filter_line(dst+y*dstride, src+y*sstride, dc-r/2, width, thresh, dither[y&7]);
if (++y >= height) break;
ctx->filter_line(dst+y*dstride, src+y*sstride, dc-r/2, width, thresh, dither[y&7]);
if (++y >= height) break;
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment