Commit 53fb6484 authored by Laurent Aimar's avatar Laurent Aimar

Rewrote grain from scratch using filtered gaussian noise.

It is faster (and has use a bit of sse2), and better looking IMHO.
parent 06f934e9
/***************************************************************************** /*****************************************************************************
* noise.c : "add grain to image" video filter * grain.c: add film grain
***************************************************************************** *****************************************************************************
* Copyright (C) 2000-2007 the VideoLAN team * Copyright (C) 2010 Laurent Aimar
* $Id$ * $Id$
* *
* Authors: Antoine Cellerier <dionoea -at- videolan -dot- org> * Authors: Laurent Aimar <fenrir _AT_ videolan _DOT_ org>
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
...@@ -28,177 +28,407 @@ ...@@ -28,177 +28,407 @@
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
# include "config.h" # include "config.h"
#endif #endif
#include <assert.h>
#include <math.h>
#include <vlc_common.h> #include <vlc_common.h>
#include <vlc_plugin.h> #include <vlc_plugin.h>
#include <vlc_rand.h>
#include <vlc_filter.h> #include <vlc_filter.h>
#include "filter_picture.h" #include <vlc_cpu.h>
#include <vlc_rand.h>
/***************************************************************************** /*****************************************************************************
* Local prototypes * Module descriptor
*****************************************************************************/ *****************************************************************************/
static int Create ( vlc_object_t * ); static int Open (vlc_object_t *);
static void Destroy ( vlc_object_t * ); static void Close(vlc_object_t *);
static picture_t *Filter( filter_t *, picture_t * ); #define BANK_SIZE (64)
#define FILTER_PREFIX "grain-" #define CFG_PREFIX "grain-"
#define VARIANCE_MIN (0.0)
#define VARIANCE_MAX (10.0)
#define VARIANCE_TEXT N_("Variance")
#define VARIANCE_LONGTEXT N_("Variance of the gaussian noise")
#define PERIOD_MIN 1
#define PERIOD_MAX BANK_SIZE
#define PERIOD_MIN_TEXT N_("Minimal period")
#define PERIOD_MIN_LONGTEXT N_("Minimal period of the noise grain in pixel")
#define PERIOD_MAX_TEXT N_("Maximal period")
#define PERIOD_MAX_LONGTEXT N_("Maximal period of the noise grain in pixel")
vlc_module_begin()
set_description(N_("Grain video filter"))
set_shortname( N_("Grain"))
set_help(N_("Adds filtered gaussian noise"))
set_capability( "video filter2", 0 )
set_category(CAT_VIDEO)
set_subcategory(SUBCAT_VIDEO_VFILTER)
add_float_with_range(CFG_PREFIX "variance", 2.0, VARIANCE_MIN, VARIANCE_MAX,
NULL, VARIANCE_TEXT, VARIANCE_LONGTEXT, false)
add_integer_with_range(CFG_PREFIX "period-min", 1, PERIOD_MIN, PERIOD_MAX,
NULL, PERIOD_MIN_TEXT, PERIOD_MIN_LONGTEXT, false)
add_integer_with_range(CFG_PREFIX "period-max", 3*PERIOD_MAX/4, PERIOD_MIN, PERIOD_MAX,
NULL, PERIOD_MAX_TEXT, PERIOD_MAX_LONGTEXT, false)
set_callbacks(Open, Close)
vlc_module_end()
#define NOISE_HELP N_("add grain to image")
/***************************************************************************** /*****************************************************************************
* Module descriptor * Local prototypes
*****************************************************************************/ *****************************************************************************/
vlc_module_begin ()
set_description( N_("Grain video filter") )
set_shortname( N_( "Grain" ))
set_help(NOISE_HELP)
set_capability( "video filter2", 0 )
set_category( CAT_VIDEO )
set_subcategory( SUBCAT_VIDEO_VFILTER )
set_callbacks( Create, Destroy ) #define BLEND_SIZE (8)
vlc_module_end () struct filter_sys_t {
bool is_uv_filtered;
uint32_t seed;
int scale;
int16_t bank[BANK_SIZE * BANK_SIZE];
int16_t bank_y[BANK_SIZE * BANK_SIZE];
int16_t bank_uv[BANK_SIZE * BANK_SIZE];
void (*blend)(uint8_t *dst, size_t dst_pitch,
const uint8_t *src, size_t src_pitch,
const int16_t *noise);
void (*emms)(void);
struct {
vlc_mutex_t lock;
double variance;
} cfg;
};
struct filter_sys_t /* Simple and *really fast* RNG (xorshift[13,17,5])*/
#define URAND_SEED (2463534242)
static uint32_t urand(uint32_t *seed)
{ {
int *p_noise; uint32_t s = *seed;
}; s ^= s << 13;
s ^= s >> 17;
s ^= s << 5;
return *seed = s;
}
/* Uniform random value between 0 and 1 */
static double drand(uint32_t *seed)
{
return urand(seed) / (double)UINT32_MAX;
}
/* Gaussian random value with a mean of 0 and a variance of 1 */
static void grand(double *r1, double *r2, uint32_t *seed)
{
double s;
double u1, u2;
do {
u1 = 2 * drand(seed) - 1;
u2 = 2 * drand(seed) - 1;
s = u1 * u1 + u2 * u2;
} while (s >= 1.0);
s = sqrt(-2 * log(s) / s);
*r1 = u1 * s;
*r2 = u2 * s;
}
static int Create( vlc_object_t *p_this ) static void BlockBlend(uint8_t *dst, size_t dst_pitch,
const uint8_t *src, size_t src_pitch,
const int16_t *noise,
int w, int h)
{ {
filter_t *p_filter = (filter_t *)p_this; for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
dst[y * dst_pitch + x] =
clip_uint8_vlc(src[y * src_pitch + x] + noise[y * BANK_SIZE +x]);
}
}
}
switch( p_filter->fmt_in.video.i_chroma ) static void BlockBlendC(uint8_t *dst, size_t dst_pitch,
{ const uint8_t *src, size_t src_pitch,
CASE_PLANAR_YUV const int16_t *noise)
break; {
BlockBlend(dst, dst_pitch, src, src_pitch, noise,
BLEND_SIZE, BLEND_SIZE);
}
default: #ifdef CAN_COMPILE_SSE2
msg_Err( p_filter, "Unsupported input chroma (%4.4s)", #define _STRING(x) #x
(char*)&(p_filter->fmt_in.video.i_chroma) ); #define STRING(x) _STRING(x)
return VLC_EGENERIC; static void BlockBlendSse2(uint8_t *dst, size_t dst_pitch,
const uint8_t *src, size_t src_pitch,
const int16_t *noise)
{
#if BLEND_SIZE == 8
/* TODO It is possible to do the math on 8 bits using
* paddusb X and then psubusb -X.
*/
asm volatile ("pxor %%xmm0, %%xmm0\n" : :);
for (int i = 0; i < 8/2; i++) {
asm volatile (
"movq (%[src1]), %%xmm1\n"
"movq (%[src2]), %%xmm3\n"
"movdqu (%[noise]), %%xmm2\n"
"movdqu 2*"STRING(BANK_SIZE)"(%[noise]), %%xmm4\n"
"punpcklbw %%xmm0, %%xmm1\n"
"punpcklbw %%xmm0, %%xmm3\n"
"paddsw %%xmm2, %%xmm1\n"
"paddsw %%xmm4, %%xmm3\n"
"packuswb %%xmm1, %%xmm1\n"
"packuswb %%xmm3, %%xmm3\n"
"movq %%xmm1, (%[dst1])\n"
"movq %%xmm3, (%[dst2])\n"
: : [dst1]"r"(&dst[(2*i+0) * dst_pitch]),
[dst2]"r"(&dst[(2*i+1) * dst_pitch]),
[src1]"r"(&src[(2*i+0) * src_pitch]),
[src2]"r"(&src[(2*i+1) * src_pitch]),
[noise]"r"(&noise[2*i * BANK_SIZE])
: "memory");
} }
#else
# error "BLEND_SIZE unsupported"
#endif
}
static void Emms(void)
{
asm volatile ("emms");
}
#endif
/* Allocate structure */ /**
p_filter->p_sys = malloc( sizeof( filter_sys_t ) ); * Scale the given signed data (on 7 bits + 1 for sign) using scale on 8 bits.
if( p_filter->p_sys == NULL ) */
return VLC_ENOMEM; static void Scale(int16_t *dst, int16_t *src, int scale)
{
const int N = BANK_SIZE;
const int shift = 7 + 8;
for (int y = 0; y < N; y++) {
for (int x = 0; x < N; x++) {
const int v = src[y * N + x];
int vq;
if (v >= 0)
vq = ( v * scale + (1 << (shift-1)) - 1) >> shift;
else
vq = -((-v * scale + (1 << (shift-1)) - 1) >> shift);
dst[y * N + x] = vq;
}
}
}
p_filter->pf_video_filter = Filter; static void PlaneFilter(filter_t *filter,
plane_t *dst, const plane_t *src,
int16_t *bank, uint32_t *seed)
{
filter_sys_t *sys = filter->p_sys;
p_filter->p_sys->p_noise = NULL; for (int y = 0; y < dst->i_visible_lines; y += BLEND_SIZE) {
for (int x = 0; x < dst->i_visible_pitch; x += BLEND_SIZE) {
int bx = urand(seed) % (BANK_SIZE - BLEND_SIZE + 1);
int by = urand(seed) % (BANK_SIZE - BLEND_SIZE + 1);
const int16_t *noise = &bank[by * BANK_SIZE + bx];
return VLC_SUCCESS; int w = dst->i_visible_pitch - x;
} int h = dst->i_visible_lines - y;
static void Destroy( vlc_object_t *p_this ) const uint8_t *srcp = &src->p_pixels[y * src->i_pitch + x];
{ uint8_t *dstp = &dst->p_pixels[y * dst->i_pitch + x];
filter_t *p_filter = (filter_t *)p_this;
free( p_filter->p_sys->p_noise ); if (w >= BLEND_SIZE && h >= BLEND_SIZE)
free( p_filter->p_sys ); sys->blend(dstp, dst->i_pitch, srcp, src->i_pitch, noise);
else
BlockBlend(dstp, dst->i_pitch, srcp, src->i_pitch, noise, w, h);
}
}
if (sys->emms)
sys->emms();
} }
static picture_t *Filter( filter_t *p_filter, picture_t *p_pic ) static picture_t *Filter(filter_t *filter, picture_t *src)
{ {
picture_t *p_outpic; filter_sys_t *sys = filter->p_sys;
filter_sys_t *p_sys = p_filter->p_sys;
int i_index;
if( !p_pic ) return NULL;
p_outpic = filter_NewPicture( p_filter ); picture_t *dst = filter_NewPicture(filter);
if( !p_outpic ) if (!dst) {
{ picture_Release(src);
picture_Release( p_pic );
return NULL; return NULL;
} }
{ vlc_mutex_lock(&sys->cfg.lock);
uint8_t *p_in = p_pic->p[Y_PLANE].p_pixels; const double variance = __MIN(__MAX(sys->cfg.variance, VARIANCE_MIN), VARIANCE_MAX);
uint8_t *p_out = p_outpic->p[Y_PLANE].p_pixels; vlc_mutex_unlock(&sys->cfg.lock);
const int i_num_lines = p_pic->p[Y_PLANE].i_visible_lines; const int scale = 256 * sqrt(variance);
const int i_num_cols = p_pic->p[Y_PLANE].i_visible_pitch; if (scale != sys->scale) {
const int i_pitch = p_pic->p[Y_PLANE].i_pitch; sys->scale = scale;
Scale(sys->bank_y, sys->bank, sys->scale);
Scale(sys->bank_uv, sys->bank, sys->scale / 2);
}
int i_line, i_col; for (int i = 0; i < dst->i_planes; i++) {
const plane_t *srcp = &src->p[i];
plane_t *dstp = &dst->p[i];
int *p_noise = p_sys->p_noise; if (i == 0 || sys->is_uv_filtered) {
if( !p_noise ) int16_t *bank = i == 0 ? sys->bank_y :
{ sys->bank_uv;
p_noise = p_sys->p_noise = PlaneFilter(filter, dstp, srcp, bank, &sys->seed);
(int*)malloc(i_pitch*i_num_lines*sizeof(int)); }
else {
plane_CopyPixels(dstp, srcp);
}
} }
for( i_line = 0; i_line < i_num_lines; i_line++ ) picture_CopyProperties(dst, src);
{ picture_Release(src);
for( i_col = 0; i_col < i_num_cols; i_col++ ) return dst;
{ }
p_noise[i_line*i_pitch+i_col] = ((vlc_mrand48()&0x1f)-0x0f);
/**
* Generate a filteried gaussian noise within [-127, 127] range.
*/
static int Generate(int16_t *bank, int h_min, int h_max, int v_min, int v_max)
{
const int N = BANK_SIZE;
double *workspace = calloc(3 * N * N, sizeof(*workspace));
if (!workspace)
return VLC_ENOMEM;
double *gn = &workspace[0 * N * N];
double *cij = &workspace[1 * N * N];
double *tmp = &workspace[2 * N * N];
/* Create a gaussian noise matrix */
assert((N % 2) == 0);
uint32_t seed = URAND_SEED;
for (int y = 0; y < N; y++) {
for (int x = 0; x < N/2; x++) {
grand(&gn[y * N + 2 * x + 0], &gn[y * N + 2 * x + 1], &seed);
} }
} }
for( i_line = 2/*0*/ ; i_line < i_num_lines-2/**/; i_line++ ) /* Clear non selected frequency.
{ * Only the central band is kept */
for( i_col = 2/*0*/; i_col < i_num_cols/2; i_col++ ) int zero = 0;
{ for (int y = 0; y < N; y++) {
p_out[i_line*i_pitch+i_col] = clip_uint8_vlc( for (int x = 0; x < N; x++) {
p_in[i_line*i_pitch+i_col] if ((x < h_min && y < v_min) || x > h_max || y > v_max) {
#if 0 gn[y * N + x] = 0.0;
+ p_noise[i_line*i_pitch+i_col] ); zero++;
#else }
/* 2 rows up */ }
+ (( ( p_noise[(i_line-2)*i_pitch+i_col-2]<<1 ) }
+ ( p_noise[(i_line-2)*i_pitch+i_col-1]<<2 ) const double correction = sqrt((double)N * N / (N * N - zero));
+ ( p_noise[(i_line-2)*i_pitch+i_col]<<2 )
+ ( p_noise[(i_line-2)*i_pitch+i_col+1]<<2 ) /* Filter the gaussian noise using an IDCT
+ ( p_noise[(i_line-2)*i_pitch+i_col+2]<<1 ) * The algo is simple/stupid and does C * GN * Ct */
/* 1 row up */ for (int i = 0; i < N; i++) {
+ ( p_noise[(i_line-1)*i_pitch+i_col-2]<<2 ) for (int j = 0; j < N; j++) {
+ ( p_noise[(i_line-1)*i_pitch+i_col-1]<<3 ) cij[i * N + j] = i == 0 ? sqrt(1.0f / N) :
+ ( p_noise[(i_line-1)*i_pitch+i_col]*12 ) sqrt(2.0f / N) * cos((2 * j + 1) * i * M_PI / 2 / N);
+ ( p_noise[(i_line-1)*i_pitch+i_col+1]<<3 ) }
+ ( p_noise[(i_line-1)*i_pitch+i_col+2]<<2 ) }
/* */
+ ( p_noise[i_line*i_pitch+i_col-2]<<2 )
+ ( p_noise[i_line*i_pitch+i_col-1]*12 )
+ ( p_noise[i_line*i_pitch+i_col]<<4 )
+ ( p_noise[i_line*i_pitch+i_col+1]*12 )
+ ( p_noise[i_line*i_pitch+i_col+2]<<2 )
/* 1 row down */
+ ( p_noise[(i_line+1)*i_pitch+i_col-2]<<2 )
+ ( p_noise[(i_line+1)*i_pitch+i_col-1]<<3 )
+ ( p_noise[(i_line+1)*i_pitch+i_col]*12 )
+ ( p_noise[(i_line+1)*i_pitch+i_col+1]<<3 )
+ ( p_noise[(i_line+1)*i_pitch+i_col+2]<<2 )
/* 2 rows down */
+ ( p_noise[(i_line+2)*i_pitch+i_col-2]<<1 )
+ ( p_noise[(i_line+2)*i_pitch+i_col-1]<<2 )
+ ( p_noise[(i_line+2)*i_pitch+i_col]<<2 )
+ ( p_noise[(i_line+2)*i_pitch+i_col+1]<<2 )
+ ( p_noise[(i_line+2)*i_pitch+i_col+2]<<1 )
)>>7/*/152*/));
#endif
//mtime_t tmul_0 = mdate();
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
double v = 0.0;
for (int k = 0; k < N; k++)
v += gn[i * N + k] * cij[k * N + j];
tmp[i * N + j] = v;
}
} }
for( ; i_col < i_num_cols; i_col++ ) for (int i = 0; i < N; i++) {
p_out[i_line*i_pitch+i_col] = p_in[i_line*i_pitch+i_col]; for (int j = 0; j < N; j++) {
double v = 0.0;
for (int k = 0; k < N; k++)
v += cij[k * N + i] * tmp[k * N + j];
/* Do not bias when rounding */
int vq;
if (v >= 0)
vq = (int)( v * correction * 127 + 0.5);
else
vq = -(int)(-v * correction * 127 + 0.5);
bank[i * N + j] = __MIN(__MAX(vq, INT16_MIN), INT16_MAX);
} }
} }
//mtime_t mul_duration = mdate() - tmul_0;
//fprintf(stderr, "IDCT took %d ms\n", (int)(mul_duration / 1000));
for( i_index = 1; i_index < p_pic->i_planes; i_index++ ) free(workspace);
{ return VLC_SUCCESS;
uint8_t *p_in = p_pic->p[i_index].p_pixels; }
uint8_t *p_out = p_outpic->p[i_index].p_pixels;
const int i_lines = p_pic->p[i_index].i_lines; static int Callback(vlc_object_t *object, char const *cmd,
const int i_pitch = p_pic->p[i_index].i_pitch; vlc_value_t oldval, vlc_value_t newval, void *data)
{
filter_t *filter = (filter_t *)object;
filter_sys_t *sys = filter->p_sys;
VLC_UNUSED(oldval); VLC_UNUSED(data);
vlc_mutex_lock(&sys->cfg.lock);
//if (!strcmp(cmd, CFG_PREFIX "variance"))
sys->cfg.variance = newval.f_float;
vlc_mutex_unlock(&sys->cfg.lock);
return VLC_SUCCESS;
}
vlc_memcpy( p_out, p_in, i_lines * i_pitch ); static int Open(vlc_object_t *object)
{
filter_t *filter = (filter_t *)object;
const vlc_chroma_description_t *chroma =
vlc_fourcc_GetChromaDescription(filter->fmt_in.video.i_chroma);
if (!chroma || chroma->plane_count < 3) {
msg_Err(filter, "Unsupported chroma (%4.4s)",
(char*)&(filter->fmt_in.video.i_chroma));
return VLC_EGENERIC;
}
filter_sys_t *sys = malloc(sizeof(*sys));
if (!sys)
return VLC_ENOMEM;
sys->is_uv_filtered = true;
sys->scale = -1;
sys->seed = URAND_SEED;
int cutoff_low = BANK_SIZE - var_InheritInteger(filter, CFG_PREFIX "period-max");
int cutoff_high= BANK_SIZE - var_InheritInteger(filter, CFG_PREFIX "period-min");
cutoff_low = __MIN(__MAX(cutoff_low, 1), BANK_SIZE - 1);
cutoff_high = __MIN(__MAX(cutoff_high, 1), BANK_SIZE - 1);
if (Generate(sys->bank, cutoff_low, cutoff_high, cutoff_low, cutoff_high)) {
free(sys);
return VLC_EGENERIC;
} }
return CopyInfoAndRelease( p_outpic, p_pic ); sys->blend = BlockBlendC;
sys->emms = NULL;
#if defined(CAN_COMPILE_SSE2) && 1
if (vlc_CPU() & CPU_CAPABILITY_SSE2) {
sys->blend = BlockBlendSse2;
sys->emms = Emms;
}
#endif
vlc_mutex_init(&sys->cfg.lock);
sys->cfg.variance = var_CreateGetFloatCommand(filter, CFG_PREFIX "variance");
var_AddCallback(filter, CFG_PREFIX "variance", Callback, NULL);
filter->p_sys = sys;
filter->pf_video_filter = Filter;
return VLC_SUCCESS;
} }
static void Close(vlc_object_t *object)
{
filter_t *filter = (filter_t *)object;
filter_sys_t *sys = filter->p_sys;
vlc_mutex_destroy(&sys->cfg.lock);
free(sys);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment