Commit c7d289ca authored by Juha Jeronen's avatar Juha Jeronen Committed by Rémi Denis-Courmont

Refactored deinterlacer module

Signed-off-by: default avatarRémi Denis-Courmont <remi@remlab.net>
parent 814a19c3
......@@ -10,7 +10,16 @@ SOURCES_crop = crop.c
SOURCES_motionblur = motionblur.c
SOURCES_logo = logo.c
SOURCES_audiobargraph_v = audiobargraph_v.c
SOURCES_deinterlace = deinterlace.c yadif.h mmx.h
SOURCES_deinterlace = deinterlace/deinterlace.c \
deinterlace/mmx.h deinterlace/common.h \
deinterlace/merge.c deinterlace/merge.h \
deinterlace/helpers.c deinterlace/helpers.h \
deinterlace/algo_basic.c deinterlace/algo_basic.h \
deinterlace/algo_x.c deinterlace/algo_x.h \
deinterlace/algo_yadif.c deinterlace/algo_yadif.h \
deinterlace/yadif.h \
deinterlace/algo_phosphor.c deinterlace/algo_phosphor.h \
deinterlace/algo_ivtc.c deinterlace/algo_ivtc.h
SOURCES_blend = blend.c
SOURCES_scale = scale.c
SOURCES_marq = marq.c
......
/*****************************************************************************
* algo_basic.c : Basic algorithms for the VLC deinterlacer
*****************************************************************************
* Copyright (C) 2000-2011 the VideoLAN team
* $Id$
*
* Author: Sam Hocevar <sam@zoy.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#include <stdint.h>
#include <vlc_common.h>
#include <vlc_picture.h>
#include <vlc_filter.h>
#include "merge.h"
#include "deinterlace.h" /* definition of p_sys, needed for Merge() */
#include "algo_basic.h"
/*****************************************************************************
* RenderDiscard: only keep TOP or BOTTOM field, discard the other.
*****************************************************************************/
void RenderDiscard( filter_t *p_filter,
picture_t *p_outpic, picture_t *p_pic, int i_field )
{
int i_plane;
/* Copy image and skip lines */
for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
{
uint8_t *p_in, *p_out_end, *p_out;
int i_increment;
p_in = p_pic->p[i_plane].p_pixels
+ i_field * p_pic->p[i_plane].i_pitch;
p_out = p_outpic->p[i_plane].p_pixels;
p_out_end = p_out + p_outpic->p[i_plane].i_pitch
* p_outpic->p[i_plane].i_visible_lines;
switch( p_filter->fmt_in.video.i_chroma )
{
case VLC_CODEC_I420:
case VLC_CODEC_J420:
case VLC_CODEC_YV12:
for( ; p_out < p_out_end ; )
{
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
p_out += p_outpic->p[i_plane].i_pitch;
p_in += 2 * p_pic->p[i_plane].i_pitch;
}
break;
case VLC_CODEC_I422:
case VLC_CODEC_J422:
i_increment = 2 * p_pic->p[i_plane].i_pitch;
if( i_plane == Y_PLANE )
{
for( ; p_out < p_out_end ; )
{
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
p_out += p_outpic->p[i_plane].i_pitch;
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
p_out += p_outpic->p[i_plane].i_pitch;
p_in += i_increment;
}
}
else
{
for( ; p_out < p_out_end ; )
{
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
p_out += p_outpic->p[i_plane].i_pitch;
p_in += i_increment;
}
}
break;
default:
break;
}
}
}
/*****************************************************************************
* RenderBob: renders a BOB picture - simple copy
*****************************************************************************/
void RenderBob( filter_t *p_filter,
picture_t *p_outpic, picture_t *p_pic, int i_field )
{
int i_plane;
/* Copy image and skip lines */
for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
{
uint8_t *p_in, *p_out_end, *p_out;
p_in = p_pic->p[i_plane].p_pixels;
p_out = p_outpic->p[i_plane].p_pixels;
p_out_end = p_out + p_outpic->p[i_plane].i_pitch
* p_outpic->p[i_plane].i_visible_lines;
switch( p_filter->fmt_in.video.i_chroma )
{
case VLC_CODEC_I420:
case VLC_CODEC_J420:
case VLC_CODEC_YV12:
/* For BOTTOM field we need to add the first line */
if( i_field == 1 )
{
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
p_in += p_pic->p[i_plane].i_pitch;
p_out += p_outpic->p[i_plane].i_pitch;
}
p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
for( ; p_out < p_out_end ; )
{
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
p_out += p_outpic->p[i_plane].i_pitch;
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
p_in += 2 * p_pic->p[i_plane].i_pitch;
p_out += p_outpic->p[i_plane].i_pitch;
}
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
/* For TOP field we need to add the last line */
if( i_field == 0 )
{
p_in += p_pic->p[i_plane].i_pitch;
p_out += p_outpic->p[i_plane].i_pitch;
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
}
break;
case VLC_CODEC_I422:
case VLC_CODEC_J422:
/* For BOTTOM field we need to add the first line */
if( i_field == 1 )
{
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
p_in += p_pic->p[i_plane].i_pitch;
p_out += p_outpic->p[i_plane].i_pitch;
}
p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
if( i_plane == Y_PLANE )
{
for( ; p_out < p_out_end ; )
{
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
p_out += p_outpic->p[i_plane].i_pitch;
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
p_in += 2 * p_pic->p[i_plane].i_pitch;
p_out += p_outpic->p[i_plane].i_pitch;
}
}
else
{
for( ; p_out < p_out_end ; )
{
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
p_out += p_outpic->p[i_plane].i_pitch;
p_in += 2 * p_pic->p[i_plane].i_pitch;
}
}
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
/* For TOP field we need to add the last line */
if( i_field == 0 )
{
p_in += p_pic->p[i_plane].i_pitch;
p_out += p_outpic->p[i_plane].i_pitch;
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
}
break;
}
}
}
/*****************************************************************************
* RenderLinear: BOB with linear interpolation
*****************************************************************************/
void RenderLinear( filter_t *p_filter,
picture_t *p_outpic, picture_t *p_pic, int i_field )
{
int i_plane;
/* Copy image and skip lines */
for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
{
uint8_t *p_in, *p_out_end, *p_out;
p_in = p_pic->p[i_plane].p_pixels;
p_out = p_outpic->p[i_plane].p_pixels;
p_out_end = p_out + p_outpic->p[i_plane].i_pitch
* p_outpic->p[i_plane].i_visible_lines;
/* For BOTTOM field we need to add the first line */
if( i_field == 1 )
{
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
p_in += p_pic->p[i_plane].i_pitch;
p_out += p_outpic->p[i_plane].i_pitch;
}
p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
for( ; p_out < p_out_end ; )
{
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
p_out += p_outpic->p[i_plane].i_pitch;
Merge( p_out, p_in, p_in + 2 * p_pic->p[i_plane].i_pitch,
p_pic->p[i_plane].i_pitch );
p_in += 2 * p_pic->p[i_plane].i_pitch;
p_out += p_outpic->p[i_plane].i_pitch;
}
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
/* For TOP field we need to add the last line */
if( i_field == 0 )
{
p_in += p_pic->p[i_plane].i_pitch;
p_out += p_outpic->p[i_plane].i_pitch;
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
}
}
EndMerge();
}
/*****************************************************************************
* RenderMean: Half-resolution blender
*****************************************************************************/
void RenderMean( filter_t *p_filter,
picture_t *p_outpic, picture_t *p_pic )
{
int i_plane;
/* Copy image and skip lines */
for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
{
uint8_t *p_in, *p_out_end, *p_out;
p_in = p_pic->p[i_plane].p_pixels;
p_out = p_outpic->p[i_plane].p_pixels;
p_out_end = p_out + p_outpic->p[i_plane].i_pitch
* p_outpic->p[i_plane].i_visible_lines;
/* All lines: mean value */
for( ; p_out < p_out_end ; )
{
Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
p_pic->p[i_plane].i_pitch );
p_out += p_outpic->p[i_plane].i_pitch;
p_in += 2 * p_pic->p[i_plane].i_pitch;
}
}
EndMerge();
}
/*****************************************************************************
* RenderBlend: Full-resolution blender
*****************************************************************************/
void RenderBlend( filter_t *p_filter,
picture_t *p_outpic, picture_t *p_pic )
{
int i_plane;
/* Copy image and skip lines */
for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
{
uint8_t *p_in, *p_out_end, *p_out;
p_in = p_pic->p[i_plane].p_pixels;
p_out = p_outpic->p[i_plane].p_pixels;
p_out_end = p_out + p_outpic->p[i_plane].i_pitch
* p_outpic->p[i_plane].i_visible_lines;
switch( p_filter->fmt_in.video.i_chroma )
{
case VLC_CODEC_I420:
case VLC_CODEC_J420:
case VLC_CODEC_YV12:
/* First line: simple copy */
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
p_out += p_outpic->p[i_plane].i_pitch;
/* Remaining lines: mean value */
for( ; p_out < p_out_end ; )
{
Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
p_pic->p[i_plane].i_pitch );
p_out += p_outpic->p[i_plane].i_pitch;
p_in += p_pic->p[i_plane].i_pitch;
}
break;
case VLC_CODEC_I422:
case VLC_CODEC_J422:
/* First line: simple copy */
vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
p_out += p_outpic->p[i_plane].i_pitch;
/* Remaining lines: mean value */
if( i_plane == Y_PLANE )
{
for( ; p_out < p_out_end ; )
{
Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
p_pic->p[i_plane].i_pitch );
p_out += p_outpic->p[i_plane].i_pitch;
p_in += p_pic->p[i_plane].i_pitch;
}
}
else
{
for( ; p_out < p_out_end ; )
{
Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
p_pic->p[i_plane].i_pitch );
p_out += p_outpic->p[i_plane].i_pitch;
p_in += 2*p_pic->p[i_plane].i_pitch;
}
}
break;
}
}
EndMerge();
}
/*****************************************************************************
* algo_basic.h : Basic algorithms for the VLC deinterlacer
*****************************************************************************
* Copyright (C) 2000-2011 the VideoLAN team
* $Id$
*
* Author: Sam Hocevar <sam@zoy.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifndef VLC_DEINTERLACE_ALGO_BASIC_H
#define VLC_DEINTERLACE_ALGO_BASIC_H 1
/**
* \file
* Basic deinterlace algorithms: Discard, Bob, Linear, Mean and Blend.
*/
/* Forward declarations */
struct filter_t;
struct picture_t;
/*****************************************************************************
* Functions
*****************************************************************************/
/**
* RenderDiscard: only keep top or bottom field, discard the other.
*
* For a 2x (framerate-doubling) near-equivalent, see RenderBob().
*
* @param p_filter The filter instance. Must be non-NULL.
* @param p_outpic Output frame. Must be allocated by caller.
* @param p_pic Input frame. Must exist.
* @param i_field Keep which field? 0 = top field, 1 = bottom field.
* @see RenderBob()
* @see Deinterlace()
*/
void RenderDiscard( filter_t *p_filter,
picture_t *p_outpic, picture_t *p_pic, int i_field );
/**
* RenderBob: basic framerate doubler.
*
* Creates an illusion of full vertical resolution while running.
*
* For a 1x (non-doubling) near-equivalent, see RenderDiscard().
*
* @param p_filter The filter instance. Must be non-NULL.
* @param p_outpic Output frame. Must be allocated by caller.
* @param p_pic Input frame. Must exist.
* @param i_field Render which field? 0 = top field, 1 = bottom field.
* @see RenderLinear()
* @see Deinterlace()
*/
void RenderBob( filter_t *p_filter,
picture_t *p_outpic, picture_t *p_pic, int i_field );
/**
* RenderLinear: Bob with linear interpolation.
*
* There is no 1x (non-doubling) equivalent for this filter.
*
* @param p_filter The filter instance. Must be non-NULL.
* @param p_outpic Output frame. Must be allocated by caller.
* @param p_pic Input frame. Must exist.
* @param i_field Render which field? 0 = top field, 1 = bottom field.
* @see RenderBob()
* @see Deinterlace()
*/
void RenderLinear( filter_t *p_filter,
picture_t *p_outpic, picture_t *p_pic, int i_field );
/**
* RenderMean: half-resolution blender.
*
* Renders the mean of the top and bottom fields.
*
* Obviously, there is no 2x equivalent for this filter.
*
* @param p_filter The filter instance. Must be non-NULL.
* @param p_outpic Output frame. Must be allocated by caller.
* @param p_pic Input frame. Must exist.
* @see Deinterlace()
*/
void RenderMean( filter_t *p_filter,
picture_t *p_outpic, picture_t *p_pic );
/**
* RenderBlend: full-resolution blender.
*
* The first line is copied; for the rest of the lines, line N
* is the mean of lines N and N-1 in the input.
*
* Obviously, there is no 2x equivalent for this filter.
*
* @param p_filter The filter instance. Must be non-NULL.
* @param p_outpic Output frame. Must be allocated by caller.
* @param p_pic Input frame. Must exist.
* @see Deinterlace()
*/
void RenderBlend( filter_t *p_filter,
picture_t *p_outpic, picture_t *p_pic );
#endif
This source diff could not be displayed because it is too large. You can view the blob instead.
/*****************************************************************************
* algo_ivtc.h : IVTC (inverse telecine) algorithm for the VLC deinterlacer
*****************************************************************************
* Copyright (C) 2010-2011 the VideoLAN team
* $Id$
*
* Author: Juha Jeronen <juha.jeronen@jyu.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifndef VLC_DEINTERLACE_ALGO_IVTC_H
#define VLC_DEINTERLACE_ALGO_IVTC_H 1
/* Forward declarations */
struct filter_t;
struct picture_t;
/*****************************************************************************
* Data structures
*****************************************************************************/
#define IVTC_NUM_FIELD_PAIRS 7
#define IVTC_DETECTION_HISTORY_SIZE 3
#define IVTC_LATEST (IVTC_DETECTION_HISTORY_SIZE-1)
/**
* Algorithm-specific state for IVTC.
* @see RenderIVTC()
*/
typedef struct
{
int i_mode; /**< Detecting, hard TC, or soft TC. @see ivtc_mode */
int i_old_mode; /**< @see IVTCSoftTelecineDetect() */
int i_cadence_pos; /**< Cadence counter, 0..4. Runs when locked on. */
int i_tfd; /**< TFF or BFF telecine. Detected from the video. */
/** Raw low-level detector output.
*
* @see IVTCLowLevelDetect()
*/
int pi_scores[IVTC_NUM_FIELD_PAIRS]; /**< Interlace scores. */
int pi_motion[IVTC_DETECTION_HISTORY_SIZE]; /**< 8x8 blocks with motion. */
int pi_top_rep[IVTC_DETECTION_HISTORY_SIZE]; /**< Hard top field repeat. */
int pi_bot_rep[IVTC_DETECTION_HISTORY_SIZE]; /**< Hard bot field repeat. */
/** Interlace scores of outgoing frames, used for judging IVTC output
* (detecting cadence breaks).
*
* @see IVTCOutputOrDropFrame()
*/
int pi_final_scores[IVTC_DETECTION_HISTORY_SIZE];
/** Cadence position detection history (in ivtc_cadence_pos format).
* Contains the detected cadence position and a corresponding
* reliability flag for each algorithm.
*
* s = scores, interlace scores based algorithm, original to this filter.
* v = vektor, hard field repeat based algorithm, inspired by
* the TVTime/Xine IVTC filter by Billy Biggs (Vektor).
*
* Each algorithm may also keep internal, opaque data.
*
* @see ivtc_cadence_pos
* @see IVTCCadenceDetectAlgoScores()
* @see IVTCCadenceDetectAlgoVektor()
*/
int pi_s_cadence_pos[IVTC_DETECTION_HISTORY_SIZE];
bool pb_s_reliable[IVTC_DETECTION_HISTORY_SIZE];
int pi_v_raw[IVTC_DETECTION_HISTORY_SIZE]; /**< "vektor" algo internal */
int pi_v_cadence_pos[IVTC_DETECTION_HISTORY_SIZE];
bool pb_v_reliable[IVTC_DETECTION_HISTORY_SIZE];
/** Final result, chosen by IVTCCadenceDetectFinalize() from the results
* given by the different detection algorithms.
*
* @see IVTCCadenceDetectFinalize()
*/
int pi_cadence_pos_history[IVTC_DETECTION_HISTORY_SIZE];
/**
* Set by cadence analyzer. Whether the sequence of last
* IVTC_DETECTION_HISTORY_SIZE detected positions, stored in
* pi_cadence_pos_history, looks like a valid telecine.
*
* @see IVTCCadenceAnalyze()
*/
bool b_sequence_valid;
/**
* Set by cadence analyzer. True if detected position = "dea".
* The three entries of this are used for detecting three progressive
* stencil positions in a row, i.e. five progressive frames in a row;
* this triggers exit from hard IVTC.
*
* @see IVTCCadenceAnalyze()
*/
bool pb_all_progressives[IVTC_DETECTION_HISTORY_SIZE];
} ivtc_sys_t;
/*****************************************************************************
* Functions
*****************************************************************************/
/**
* Deinterlace filter. Performs inverse telecine.
*
* Also known as "film mode" or "3:2 reverse pulldown" in some equipment.
*
* This filter attempts to reconstruct the original film frames from an
* NTSC telecined signal. It is intended for 24fps progressive material
* that was telecined to NTSC 60i. For example, most NTSC anime DVDs
* are like this.
*
* There is no input frame parameter, because the input frames
* are taken from the history buffer.
*
* This algorithm does CUSTOM_PTS timestamp mangling.
*
* See the file comment for a detailed description of the algorithm.
*
* @param p_filter The filter instance. Must be non-NULL.
* @param[out] p_dst Output frame. Must be allocated by caller.
* @return VLC error code (int).
* @retval VLC_SUCCESS A film frame was reconstructed to p_dst.
* @retval VLC_EGENERIC Frame dropped as part of normal IVTC operation.
* @see Deinterlace()
* @see ComposeFrame()
* @see CalculateInterlaceScore()
* @see EstimateNumBlocksWithMotion()
*/
int RenderIVTC( filter_t *p_filter, picture_t *p_dst );
/**
* Clears the inverse telecine subsystem state.
*
* Used during initialization and uninitialization
* (called from Open() and Flush()).
*
* @param p_filter The filter instance.
* @see RenderIVTC()
* @see Open()
* @see Flush()
*/
void IVTCClearState( filter_t *p_filter );
/*****************************************************************************
* Extra documentation
*****************************************************************************/
/**
* \file
* IVTC (inverse telecine) algorithm for the VLC deinterlacer.
* Also known as "film mode" or "3:2 reverse pulldown" in some equipment.
*
* Summary:
*
* This is a "live IVTC" filter, which attempts to do in realtime what
* Transcode's ivtc->decimate->32detect chain does offline. Additionally,
* it removes soft telecine. It is an original design, based on some ideas
* from Transcode, some from TVTime/Xine, and some original.
*
* If the input material is pure NTSC telecined film, inverse telecine
* will (ideally) exactly recover the original progressive film frames.
* The output will run at 4/5 of the original framerate with no loss of
* information. Interlacing artifacts are removed, and motion becomes
* as smooth as it was on the original film. For soft-telecined material,
* on the other hand, the progressive frames alredy exist, so only the
* timings are changed such that the output becomes smooth 24fps (or would,
* if the output device had an infinite framerate).
*
* Put in simple terms, this filter is targeted for NTSC movies and
* especially anime. Virtually all 1990s and early 2000s anime is
* hard-telecined. Because the source material is like that,
* IVTC is needed for also virtually all official R1 (US) anime DVDs.
*
* Note that some anime from the turn of the century (e.g. Silent Mobius
* and Sol Bianca) is a hybrid of telecined film and true interlaced
* computer-generated effects and camera pans. In this case, applying IVTC
* will effectively attempt to reconstruct the frames based on the film
* component, but even if this is successful, the framerate reduction will
* cause the computer-generated effects to stutter. This is mathematically
* unavoidable. Instead of IVTC, a framerate doubling deinterlacer is
* recommended for such material. Try "Phosphor", "Bob", or "Linear".
*
* Fortunately, 30fps true progressive anime is on the rise (e.g. ARIA,
* Black Lagoon, Galaxy Angel, Ghost in the Shell: Solid State Society,
* Mai Otome, Last Exile, and Rocket Girls). This type requires no
* deinterlacer at all.
*
* Another recent trend is using 24fps computer-generated effects and
* telecining them along with the cels (e.g. Kiddy Grade, Str.A.In. and
* The Third: The Girl with the Blue Eye). For this group, IVTC is the
* correct way to deinterlace, and works properly.
*
* Soft telecined anime, while rare, also exists. Stellvia of the Universe
* and Angel Links are examples of this. Stellvia constantly alternates
* between soft and hard telecine - pure CGI sequences are soft-telecined,
* while sequences incorporating cel animation are hard-telecined.
* This makes it very hard for the cadence detector to lock on,
* and indeed Stellvia gives some trouble for the filter.
*
* To finish the list of different material types, Azumanga Daioh deserves
* a special mention. The OP and ED sequences are both 30fps progressive,
* while the episodes themselves are hard-telecined. This filter should
* mostly work correctly with such material, too. (The beginning of the OP
* shows some artifacts, but otherwise both the OP and ED are indeed
* rendered progressive. The technical reason is that the filter has been
* designed to aggressively reconstruct film frames, which helps in many
* cases with hard-telecined material. In very rare cases, this approach may
* go wrong, regardless of whether the input is telecined or progressive.)
*
* Finally, note also that IVTC is the only correct way to deinterlace NTSC
* telecined material. Simply applying an interpolating deinterlacing filter
* (with no framerate doubling) is harmful for two reasons. First, even if
* the filter does not damage already progressive frames, it will lose half
* of the available vertical resolution of those frames that are judged
* interlaced. Some algorithms combining data from multiple frames may be
* able to counter this to an extent, effectively performing something akin
* to the frame reconstruction part of IVTC. A more serious problem is that
* any motion will stutter, because (even in the ideal case) one out of
* every four film frames will be shown twice, while the other three will
* be shown only once. Duplicate removal and framerate reduction - which are
* part of IVTC - are also needed to properly play back telecined material
* on progressive displays at a non-doubled framerate.
*
* So, try this filter on your NTSC anime DVDs. It just might help.
*
*
* Technical details:
*
*
* First, NTSC hard telecine in a nutshell:
*
* Film is commonly captured at 24 fps. The framerate must be raised from
* 24 fps to 59.94 fields per second, This starts by pretending that the
* original framerate is 23.976 fps. When authoring, the audio can be
* slowed down by 0.1% to match. Now 59.94 = 5/4 * (2*23.976), which gives
* a nice ratio made out of small integers.
*
* Thus, each group of four film frames must become five frames in the NTSC
* video stream. One cannot simply repeat one frame of every four, because
* this would result in jerky motion. To slightly soften the jerkiness,
* the extra frame is split into two extra fields, inserted at different
* times. The content of the extra fields is (in classical telecine)
* duplicated as-is from existing fields.
*
* The field duplication technique is called "3:2 pulldown". The pattern
* is called the cadence. The output from 3:2 pulldown looks like this
* (if the telecine is TFF, top field first):
*
* a b c d e Telecined frame (actual frames stored on DVD)
* T1 T1 T2 T3 T4 *T*op field content
* B1 B2 B3 B3 B4 *B*ottom field content
*
* Numbers 1-4 denote the original film frames. E.g. T1 = top field of
* original film frame 1. The field Tb, and one of either Bc or Bd, are
* the extra fields inserted in the telecine. With exact duplication, it
* of course doesn't matter whether Bc or Bd is the extra field, but
* with "full field blended" material (see below) this will affect how to
* correctly wxtract film frame 3.
*
* See the following web pages for illustrations and discussion:
* http://neuron2.net/LVG/telecining1.html
* http://arbor.ee.ntu.edu.tw/~jackeikuo/dvd2avi/ivtc/
*
* Note that film frame 2 has been stored "half and half" into two telecined
* frames (b and c). Note also that telecine produces a sequence of
* 3 progressive frames (d, e and a) followed by 2 interlaced frames
* (b and c).
*
* The output may also look like this (BFF telecine, bottom field first):
*
* a' b' c' d' e'
* T1 T2 T3 T3 T4
* B1 B1 B2 B3 B4
*
* Now field Bb', and one of either Tc' or Td', are the extra fields.
* Again, film frame 2 is stored "half and half" (into b' and c').
*
* Whether the pattern is like abcde or a'b'c'd'e', depends on the telecine
* field dominance (TFF or BFF). This must match the video field dominance,
* but is conceptually different. Importantly, there is no temporal
* difference between those fields that came from the same film frame.
* Also, see the section on soft telecine below.
*
* In a hard telecine, the TFD and VFD must match for field renderers
* (e.g. traditional DVD player + CRT TV) to work correctly; this should be
* fairly obvious by considering the above telecine patterns and how a
* field renderer displays the material (one field at a time, dominant
* field first).
*
* The VFD may, *correctly*, flip mid-stream, if soft field repeats
* (repeat_pict) have been used. They are commonly used in soft telecine
* (see below), but also occasional lone field repeats exist in some streams,
* e.g., Sol Bianca.
*
* See e.g.
* http://www.cambridgeimaging.co.uk/downloads/Telecine%20field%20dominance.pdf
* for discussion. The document discusses mostly PAL, but includes some notes
* on NTSC, too.
*
* The reason for the words "classical telecine" above, when field
* duplication was first mentioned, is that there exists a
* "full field blended" version, where the added fields are not exact
* duplicates, but are blends of the original film frames. This is rare
* in NTSC, but some material like this reportedly exists. See
* http://www.animemusicvideos.org/guides/avtech/videogetb2a.html
* In these cases, the additional fields are a (probably 50%) blend of the
* frames between which they have been inserted. Which one of the two
* possibilites is the extra field then becomes important.
* This filter does NOT support "full field blended" material.
*
* To summarize, the 3:2 pulldown sequence produces a group of ten fields
* out of every four film frames. Only eight of these fields are unique.
* To remove the telecine, the duplicate fields must be removed, and the
* original progressive frames restored. Additionally, the presentation
* timestamps (PTS) must be adjusted, and one frame out of five (containing
* no new information) dropped. The duration of each frame in the output
* becomes 5/4 of that in the input, i.e. 25% longer.
*
* Theoretically, this whole mess could be avoided by soft telecining, if the
* original material is pure 24fps progressive. By using the stream flags
* correctly, the original progressive frames can be stored on the DVD.
* In such cases, the DVD player will apply "soft" 3:2 pulldown. See the
* following section.
*
* Also, the mess with cadence detection for hard telecine (see below) could
* be avoided by using the progressive frame flag and a five-frame future
* buffer, but no one ever sets the flag correctly for hard-telecined
* streams. All frames are marked as interlaced, regardless of their cadence
* position. This is evil, but sort-of-understandable, given that video
* editors often come with "progressive" and "interlaced" editing modes,
* but no separate "telecined" mode that could correctly handle this
* information.
*
* In practice, most material with its origins in Asia (including virtually
* all official US (R1) anime DVDs) is hard-telecined. Combined with the
* turn-of-the-century practice of rendering true interlaced effects
* on top of the hard-telecined stream, we have what can only be described
* as a monstrosity. Fortunately, recent material is much more consistent,
* even though still almost always hard-telecined.
*
* Finally, note that telecined video is often edited directly in interlaced
* form, disregarding safe cut positions as pertains to the telecine sequence
* (there are only two: between "d" and "e", or between "e" and the
* next "a"). Thus, the telecine sequence will in practice jump erratically
* at cuts [**]. An aggressive detection strategy is needed to cope with
* this.
*
* [**] http://users.softlab.ece.ntua.gr/~ttsiod/ivtc.html
*
*
* Note about chroma formats: 4:2:0 is very common at least on anime DVDs.
* In the interlaced frames in a hard telecine, the chroma alternates
* every chroma line, even if the chroma format is 4:2:0! This means that
* if the interlaced picture is viewed as-is, the luma alternates every line,
* while the chroma alternates only every two lines of the picture.
*
* That is, an interlaced frame in a 4:2:0 telecine looks like this
* (numbers indicate which film frame the data comes from):
*
* luma stored 4:2:0 chroma displayed chroma
* 1111 1111 1111
* 2222 1111
* 1111 2222 2222
* 2222 2222
* ... ... ...
*
* The deinterlace filter sees the stored 4:2:0 chroma. The "displayed chroma"
* is only generated later in the filter chain (probably when YUV is converted
* to the display format, if the display does not accept YUV 4:2:0 directly).
*
*
* Next, how NTSC soft telecine works:
*
* a b c d Frame index (actual frames stored on DVD)
* T1 T2 T3 T4 *T*op field content
* B1 B2 B3 B4 *B*ottom field content
*
* Here the progressive frames are stored as-is. The catch is in the stream
* flags. For hard telecine, which was explained above, we have
* VFD = constant and nb_fields = 2, just like in a true progressive or
* true interlaced stream. Soft telecine, on the other hand, looks like this:
*
* a b c d
* 3 2 3 2 nb_fields
* T B B T *Video* field dominance (for TFF telecine)
* B T T B *Video* field dominance (for BFF telecine)
*
* Now the video field dominance flipflops every two frames!
*
* Note that nb_fields = 3 means the frame duration will be 1.5x that of a
* normal frame. Often, soft-telecined frames are correctly flagged as
* progressive.
*
* Here the telecining is expected to be done by the player, utilizing the
* soft field repeat (repeat_pict) feature. This is indeed what a field
* renderer (traditional interlaced equipment, or a framerate doubler)
* should do with such a stream.
*
* In the IVTC filter, our job is to even out the frame durations, but
* disregard video field dominance and just pass the progressive pictures
* through as-is.
*
* Fortunately, for soft telecine to work at all, the stream flags must be
* set correctly. Thus this type can be detected reliably by reading
* nb_fields from three consecutive frames:
*
* Let P = previous, C = current, N = next. If the frame to be rendered is C,
* there are only three relevant nb_fields flag patterns for the three-frame
* stencil concerning soft telecine:
*
* P C N What is happening:
* 2 3 2 Entering soft telecine at frame C, or running inside it already.
* 3 2 3 Running inside soft telecine.
* 3 2 2 Exiting soft telecine at frame C. C is the last frame that should
* be handled as soft-telecined. (If we do timing adjustments to the
* "3"s only, we can already exit soft telecine mode when we see
* this pattern.)
*
* Note that the same stream may alternate between soft and hard telecine,
* but these cannot occur at the same time. The start and end of the
* soft-telecined parts can be read off the stream flags, and the rest of
* the stream can be handed to the hard IVTC part of the filter for analysis.
*
* Finally, note also that a stream may also request a lone field repeat
* (a sudden "3" surrounded by "2"s). Fortunately, these can be handled as
* a two-frame soft telecine, as they match the first and third
* flag patterns above.
*
* Combinations with several "3"s in a row are not valid for soft or hard
* telecine, so if they occur, the frames can be passed through as-is.
*
*
* Cadence detection for hard telecine:
*
* Consider viewing the TFF and BFF hard telecine sequences through a
* three-frame stencil. Again, let P = previous, C = current, N = next.
* A brief analysis leads to the following cadence tables.
*
* PCN = stencil position (Previous Current Next),
* Dups. = duplicate fields,
* Best field pairs... = combinations of fields which correctly reproduce
* the original progressive frames,
* * = see timestamp considerations below for why
* this particular arrangement.
*
* For TFF:
*
* PCN Dups. Best field pairs for progressive (correct, theoretical)
* abc TP = TC TPBP = frame 1, TCBP = frame 1, TNBC = frame 2
* bcd BC = BN TCBP = frame 2, TNBC = frame 3, TNBN = frame 3
* cde BP = BC TCBP = frame 3, TCBC = frame 3, TNBN = frame 4
* dea none TPBP = frame 3, TCBC = frame 4, TNBN = frame 1
* eab TC = TN TPBP = frame 4, TCBC = frame 1, TNBC = frame 1
*
* (table cont'd)
* PCN Progressive output*
* abc frame 2 = TNBC (compose TN+BC)
* bcd frame 3 = TNBN (copy N)
* cde frame 4 = TNBN (copy N)
* dea (drop)
* eab frame 1 = TCBC (copy C), or TNBC (compose TN+BC)
*
* On the rows "dea" and "eab", frame 1 refers to a frame from the next
* group of 4. "Compose TN+BC" means to construct a frame using the
* top field of N, and the bottom field of C. See ComposeFrame().
*
* For BFF, swap all B and T, and rearrange the symbol pairs to again
* read "TxBx". We have:
*
* PCN Dups. Best field pairs for progressive (correct, theoretical)
* abc BP = BC TPBP = frame 1, TPBC = frame 1, TCBN = frame 2
* bcd TC = TN TPBC = frame 2, TCBN = frame 3, TNBN = frame 3
* cde TP = TC TPBC = frame 3, TCBC = frame 3, TNBN = frame 4
* dea none TPBP = frame 3, TCBC = frame 4, TNBN = frame 1
* eab BC = BN TPBP = frame 4, TCBC = frame 1, TCBN = frame 1
*
* (table cont'd)
* PCN Progressive output*
* abc frame 2 = TCBN (compose TC+BN)
* bcd frame 3 = TNBN (copy N)
* cde frame 4 = TNBN (copy N)
* dea (drop)
* eab frame 1 = TCBC (copy C), or TCBN (compose TC+BN)
*
* From these cadence tables we can extract two strategies for
* cadence detection. We use both.
*
* Strategy 1: duplicated fields ("vektor").
*
* Consider that each stencil position has a unique duplicate field
* condition. In one unique position, "dea", there is no match; in all
* other positions, exactly one. By conservatively filtering the
* possibilities based on detected hard field repeats (identical fields
* in successive input frames), it is possible to gradually lock on
* to the cadence. This kind of strategy is used by the classic IVTC filter
* in TVTime/Xine by Billy Biggs (Vektor), hence the name.
*
* "Conservative" here means that we do not rule anything out, but start at
* each stencil position by suggesting the position "dea", and then only add
* to the list of possibilities based on field repeats that are detected at
* the present stencil position. This estimate is then filtered by ANDing
* against a shifted (time-advanced) version of the estimate from the
* previous stencil position. Once the detected position becomes unique,
* the filter locks on. If the new detection is inconsistent with the
* previous one, the detector resets itself and starts from scratch.
*
* The strategy is very reliable, as it only requires running (fuzzy)
* duplicate field detection against the input. It is very good at staying
* locked on once it acquires the cadence, and it does so correctly very
* often. These are indeed characteristics that can be observed in the
* behaviour of the TVTime/Xine filter.
*
* Note especially that 8fps/12fps animation, common in anime, will cause
* spurious hard-repeated fields. The conservative nature of the method
* makes it very good at dealing with this - any spurious repeats will only
* slow down the lock-on, not completely confuse it. It should also be good
* at detecting the presence of a telecine, as neither true interlaced nor
* true progressive material should contain any hard field repeats.
* (This, however, has not been tested yet.)
*
* The disadvantages are that at times the method may lock on slowly,
* because the detection must be filtered against the history until
* a unique solution is found. Resets, if they happen, will also
* slow down the lock-on.
*
* The hard duplicate detection required by this strategy can be made
* data-adaptive in several ways. TVTime uses a running average of motion
* scores for its history buffer. We utilize a different, original approach.
* It is rare, if not nonexistent, that only one field changes between
* two valid frames. Thus, if one field changes "much more" than the other
* in fieldwise motion detection, the less changed one is probably a
* duplicate. Importantly, this works with telecined input, too - the field
* that changes "much" may be part of another film frame, while the "less"
* changed one is actually a duplicate from the previous film frame.
* If both fields change "about as much", then no hard field repeat
* is detected.
*
*
* Strategy 2: progressive/interlaced field combinations ("scores").
*
* We can also form a second strategy, which is not as reliable in practice,
* but which locks on faster when it does. This is original to this filter.
*
* Consider all possible field pairs from two successive frames: TCBC, TCBN,
* TNBC, TNBN. After one frame, these become TPBP, TPBC, TCBP, TCBC.
* These eight pairs (seven unique, disregarding the duplicate TCBC)
* are the exhaustive list of possible field pairs from two successive
* frames in the three-frame PCN stencil.
*
* The above tables list triplets of field pair combinations for each cadence
* position, which should produce progressive frames. All the given triplets
* are unique in each table alone, although the one at "dea" is
* indistinguishable from the case of pure progressive material. It is also
* the only one which is not unique across both tables.
*
* Thus, all sequences of two neighboring triplets are unique across both
* tables. (For "neighboring", each table is considered to wrap around from
* "eab" back to "abc", i.e. from the last row back to the first row.)
* Furthermore, each sequence of three neighboring triplets is redundantly
* unique (i.e. is unique, and reduces the chance of false positives).
* (In practice, though, we already know which table to consider, from the fact
* that TFD and VFD must match. Checking only the relevant table makes the
* strategy slightly more robust.)
*
* The important idea is: *all other* field pair combinations should produce
* frames that look interlaced. This includes those combinations present in
* the "wrong" (i.e. not current position) rows of the table (insofar as
* those combinations are not also present in the "correct" row; by the
* uniqueness property, *every* "wrong" row will always contain at least one
* combination that differs from those in the "correct" row).
*
* We generate the artificial frames TCBC, TCBN, TNBC and TNBN (virtually;
* no data is actually moved). Two of these are just the frames C and N,
* which already exist; the two others correspond to composing the given
* field pairs. We then compute the interlace score for each of these frames.
* The interlace scores of what are now TPBP, TPBC and TCBP, also needed,
* were computed by this same mechanism during the previous input frame.
* These can be slided in history and reused.
*
* We then check, using the computed interlace scores, and taking into
* account the video field dominance information, which field combination
* triplet given in the appropriate table produces the smallest sum of
* interlace scores. Unless we are at PCN = "dea" (which could also be pure
* progressive!), this immediately gives us the most likely current cadence
* position. Combined with a two-step history, the sequence of three most
* likely positions found this way always allows us to make a more or less
* reliable detection. (That is, when a reliable detection is possible; if the
* video has no motion at all, every detection will report the position "dea".
* In anime, still shots are common. Thus we must augment this with a
* full-frame motion detection that switches the detector off if no motion
* was detected.)
*
* The detection seems to need four full-frame interlace analyses per frame.
* Actually, three are enough, because the previous N is the new C, so we can
* slide the already computed result. Also during initialization, we only
* need to compute TNBN on the first frame; this has become TPBP when the
* third frame is reached. Similarly, we compute TNBN, TNBC and TCBN during
* the second frame (just before the filter starts), and these get slided
* into TCBC, TCBP and TPBC when the third frame is reached. At that point,
* initialization is complete.
*
* Because we only compare interlace scores against each other, no threshold
* is needed in the cadence detector. Thus it, trivially, adapts to the
* material automatically.
*
* The weakness of this approach is that any comb metric detects incorrectly
* every now and then. Especially slow vertical camera pans often get treated
* wrong, because the messed-up field combination looks less interlaced
* according to the comb metric (especially in anime) than the correct one
* (which contains, correctly, one-pixel thick cartoon outlines, parts of
* which often perfectly horizontal).
*
* The advantage is that this strategy catches horizontal camera pans
* immediately and reliably, while the other strategy may still be trying
* to lock on.
*
*
* Frame reconstruction:
*
* We utilize a hybrid approach. If a valid cadence is locked on, we use the
* operation table to decide what to do. This handles those cases correctly,
* which would be difficult for the interlace detector alone (e.g. vertical
* camera pans). Note that the operations that must be performed for IVTC
* include timestamp mangling and frame dropping, which can only be done
* reliably on a valid cadence.
*
* When the cadence fails (we detect this from a sudden upward jump in the
* interlace scores of the constructed frames), we reset the "vektor"
* detector strategy and fall back to an emergency frame composer, where we
* use ideas from Transcode's IVTC.
*
* In this emergency mode, we simply output the least interlaced frame out of
* the combinations TNBN, TNBC and TCBN (where only one of the last two is
* tested, based on the stream TFF/BFF information). In this mode, we do not
* touch the timestamps, and just pass all five frames from each group right
* through. This introduces some stutter, but in practice it is often not
* noticeable. This is because the kind of material that is likely to trip up
* the cadence detector usually includes irregular 8fps/12fps motion. With
* true 24fps motion, the cadence quickly locks on, and stays locked on.
*
* Once the cadence locks on again, we resume normal operation based on
* the operation table.
*
*
* Timestamp mangling:
*
* To make five into four we need to extend frame durations by 25%.
* Consider the following diagram (times given in 90kHz ticks, rounded to
* integers; this is just for illustration, and for comparison with the
* "scratch paper" comments in pulldown.c of TVTime/Xine):
*
* NTSC input (29.97 fps)
* a b c d e a (from next group) ...
* 0 3003 6006 9009 12012 15015
* 0 3754 7508 11261 15015
* 1 2 3 4 1 (from next group) ...
* Film output (23.976 fps)
*
* Three of the film frames have length 3754, and one has 3753
* (it is 1/90000 sec shorter). This rounding was chosen so that the lengths
* of the group of four sum to the original 15015.
*
* From the diagram we get these deltas for presentation timestamp adjustment
* (in 90 kHz ticks, for illustration):
* (1-a) (2-b) (3-c) (4-d) (skip) (1-a) ...
* 0 +751 +1502 +2252 (skip) 0 ...
*
* In fractions of (p_next->date - p_cur->date), regardless of actual
* time unit, the deltas are:
* (1-a) (2-b) (3-c) (4-d) (skip) (1-a) ...
* 0 +0.25 +0.50 +0.75 (skip) 0 ...
*
* This is what we actually use. In our implementation, the values are stored
* multiplied by 4, as integers.
*
* The "current" frame should be displayed at [original time + delta].
* E.g., when "current" = b (i.e. PCN = abc), start displaying film frame 2
* at time [original time of b + 751 ticks]. So, when we catch the cadence,
* we will start mangling the timestamps according to the cadence position
* of the "current" frame, using the deltas given above. This will cause
* a one-time jerk, most noticeable if the cadence happens to catch at
* position "d". (Alternatively, upon lock-on, we could wait until we are
* at "a" before switching on IVTC, but this makes the maximal delay
* [max. detection + max. wait] = 3 + 4 = 7 input frames, which comes to
* 7/30 ~ 0.23 seconds instead of the 3/30 = 0.10 seconds from purely
* the detection. The one-time jerk is simpler to implement and gives the
* faster lock-on.)
*
* It is clear that "e" is a safe choice for the dropped frame. This can be
* seen from the timings and the cadence tables. First, consider the timings.
* If we have only one future frame, "e" is the only one whose PTS, comparing
* to the film frames, allows dropping it safely. To see this, consider which
* film frame needs to be rendered as each new input frame arrives. Secondly,
* consider the cadence tables. It is ok to drop "e", because the same
* film frame "1" is available also at the next PCN position "eab".
* (As a side note, it is interesting that Vektor's filter drops "b".
* See the TVTime sources.)
*
* When the filter falls out of film mode, the timestamps of the incoming
* frames are left untouched. Thus, the output from this filter has a
* variable framerate: 4/5 of the input framerate when IVTC is active
* (whether hard or soft), and the same framerate as input when it is not
* (or when in emergency mode).
*
*
* For other open-source IVTC codes, which may be a useful source for ideas,
* see the following:
*
* The classic filter by Billy Biggs (Vektor). Written in 2001-2003 for
* TVTime, and adapted into Xine later. In xine-lib 1.1.19, it is at
* src/post/deinterlace/pulldown.*. Also needed are tvtime.*, and speedy.*.
*
* Transcode's ivtc->decimate->32detect chain by Thanassis Tsiodras.
* Written in 2002, added in Transcode 0.6.12. This probably has something
* to do with the same chain in MPlayer, considering that MPlayer acquired
* an IVTC filter around the same time. In Transcode 1.1.5, the IVTC part is
* at filter/filter_ivtc.c. Transcode 1.1.5 sources can be downloaded from
* http://developer.berlios.de/project/showfiles.php?group_id=10094
*/
#endif
/*****************************************************************************
* algo_phosphor.c : Phosphor algorithm for the VLC deinterlacer
*****************************************************************************
* Copyright (C) 2011 the VideoLAN team
* $Id$
*
* Author: Juha Jeronen <juha.jeronen@jyu.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#ifdef CAN_COMPILE_MMXEXT
# include "mmx.h"
#endif
#include <stdint.h>
#include <assert.h>
#include <vlc_common.h>
#include <vlc_cpu.h>
#include <vlc_picture.h>
#include <vlc_filter.h>
#include "deinterlace.h" /* filter_sys_t */
#include "helpers.h" /* ComposeFrame() */
#include "algo_phosphor.h"
/*****************************************************************************
* Internal functions
*****************************************************************************/
/**
* Internal helper function: dims (darkens) the given field
* of the given picture.
*
* This is used for simulating CRT light output decay in RenderPhosphor().
*
* The strength "1" is recommended. It's a matter of taste,
* so it's parametrized.
*
* Note on chroma formats:
* - If input is 4:2:2, all planes are processed.
* - If input is 4:2:0, only the luma plane is processed, because both fields
* have the same chroma. This will distort colours, especially for high
* filter strengths, especially for pixels whose U and/or V values are
* far away from the origin (which is at 128 in uint8 format).
*
* @param p_dst Input/output picture. Will be modified in-place.
* @param i_field Darken which field? 0 = top, 1 = bottom.
* @param i_strength Strength of effect: 1, 2 or 3 (division by 2, 4 or 8).
* @see RenderPhosphor()
* @see ComposeFrame()
*/
static void DarkenField( picture_t *p_dst, const int i_field,
const int i_strength )
{
assert( p_dst != NULL );
assert( i_field == 0 || i_field == 1 );
assert( i_strength >= 1 && i_strength <= 3 );
unsigned u_cpu = vlc_CPU();
/* Bitwise ANDing with this clears the i_strength highest bits
of each byte */
#ifdef CAN_COMPILE_MMXEXT
uint64_t i_strength_u64 = i_strength; /* for MMX version (needs to know
number of bits) */
#endif
const uint8_t remove_high_u8 = 0xFF >> i_strength;
const uint64_t remove_high_u64 = remove_high_u8 *
INT64_C(0x0101010101010101);
/* Process luma.
For luma, the operation is just a shift + bitwise AND, so we vectorize
even in the C version.
There is an MMX version, too, because it performs about twice faster.
*/
int i_plane = Y_PLANE;
uint8_t *p_out, *p_out_end;
int w = p_dst->p[i_plane].i_visible_pitch;
p_out = p_dst->p[i_plane].p_pixels;
p_out_end = p_out + p_dst->p[i_plane].i_pitch
* p_dst->p[i_plane].i_visible_lines;
/* skip first line for bottom field */
if( i_field == 1 )
p_out += p_dst->p[i_plane].i_pitch;
int wm8 = w % 8; /* remainder */
int w8 = w - wm8; /* part of width that is divisible by 8 */
for( ; p_out < p_out_end ; p_out += 2*p_dst->p[i_plane].i_pitch )
{
uint64_t *po = (uint64_t *)p_out;
#ifdef CAN_COMPILE_MMXEXT
if( u_cpu & CPU_CAPABILITY_MMXEXT )
{
movq_m2r( i_strength_u64, mm1 );
movq_m2r( remove_high_u64, mm2 );
for( int x = 0 ; x < w8; x += 8 )
{
movq_m2r( (*po), mm0 );
psrlq_r2r( mm1, mm0 );
pand_r2r( mm2, mm0 );
movq_r2m( mm0, (*po++) );
}
}
else
{
#endif
for( int x = 0 ; x < w8; x += 8, ++po )
(*po) = ( ((*po) >> i_strength) & remove_high_u64 );
#ifdef CAN_COMPILE_MMXEXT
}
#endif
/* handle the width remainder */
if( wm8 )
{
uint8_t *po_temp = (uint8_t *)po;
for( int x = 0 ; x < wm8; ++x, ++po_temp )
(*po_temp) = ( ((*po_temp) >> i_strength) & remove_high_u8 );
}
}
/* Process chroma if the field chromas are independent.
The origin (black) is at YUV = (0, 128, 128) in the uint8 format.
The chroma processing is a bit more complicated than luma,
and needs MMX for vectorization.
*/
if( p_dst->format.i_chroma == VLC_CODEC_I422 ||
p_dst->format.i_chroma == VLC_CODEC_J422 )
{
for( i_plane = 0 ; i_plane < p_dst->i_planes ; i_plane++ )
{
if( i_plane == Y_PLANE )
continue; /* luma already handled */
int w = p_dst->p[i_plane].i_visible_pitch;
#ifdef CAN_COMPILE_MMXEXT
int wm8 = w % 8; /* remainder */
int w8 = w - wm8; /* part of width that is divisible by 8 */
#endif
p_out = p_dst->p[i_plane].p_pixels;
p_out_end = p_out + p_dst->p[i_plane].i_pitch
* p_dst->p[i_plane].i_visible_lines;
/* skip first line for bottom field */
if( i_field == 1 )
p_out += p_dst->p[i_plane].i_pitch;
for( ; p_out < p_out_end ; p_out += 2*p_dst->p[i_plane].i_pitch )
{
#ifdef CAN_COMPILE_MMXEXT
/* See also easy-to-read C version below. */
if( u_cpu & CPU_CAPABILITY_MMXEXT )
{
static const mmx_t b128 = { .uq = 0x8080808080808080ULL };
movq_m2r( b128, mm5 );
movq_m2r( i_strength_u64, mm6 );
movq_m2r( remove_high_u64, mm7 );
uint64_t *po = (uint64_t *)p_out;
for( int x = 0 ; x < w8; x += 8 )
{
movq_m2r( (*po), mm0 );
movq_r2r( mm5, mm2 ); /* 128 */
movq_r2r( mm0, mm1 ); /* copy of data */
psubusb_r2r( mm2, mm1 ); /* mm1 = max(data - 128, 0) */
psubusb_r2r( mm0, mm2 ); /* mm2 = max(128 - data, 0) */
/* >> i_strength */
psrlq_r2r( mm6, mm1 );
psrlq_r2r( mm6, mm2 );
pand_r2r( mm7, mm1 );
pand_r2r( mm7, mm2 );
/* collect results from pos./neg. parts */
psubb_r2r( mm2, mm1 );
paddb_r2r( mm5, mm1 );
movq_r2m( mm1, (*po++) );
}
/* handle the width remainder */
if( wm8 )
{
/* The output is closer to 128 than the input;
the result always fits in uint8. */
uint8_t *po8 = (uint8_t *)po;
for( int x = 0 ; x < wm8; ++x, ++po8 )
(*po8) = 128 + ( ((*po8) - 128) /
(1 << i_strength) );
}
}
else
{
#endif
/* 4:2:2 chroma handler, C version */
uint8_t *po = p_out;
for( int x = 0 ; x < w; ++x, ++po )
(*po) = 128 + ( ((*po) - 128) / (1 << i_strength) );
#ifdef CAN_COMPILE_MMXEXT
}
#endif
} /* for p_out... */
} /* for i_plane... */
} /* if b_i422 */
#ifdef CAN_COMPILE_MMXEXT
if( u_cpu & CPU_CAPABILITY_MMXEXT )
emms();
#endif
}
/*****************************************************************************
* Public functions
*****************************************************************************/
/* See header for function doc. */
int RenderPhosphor( filter_t *p_filter,
picture_t *p_dst,
int i_order, int i_field )
{
assert( p_filter != NULL );
assert( p_dst != NULL );
assert( i_order >= 0 && i_order <= 2 ); /* 2 = soft field repeat */
assert( i_field == 0 || i_field == 1 );
filter_sys_t *p_sys = p_filter->p_sys;
/* Last two input frames */
picture_t *p_in = p_sys->pp_history[HISTORY_SIZE-1];
picture_t *p_old = p_sys->pp_history[HISTORY_SIZE-2];
/* Use the same input picture as "old" at the first frame after startup */
if( !p_old )
p_old = p_in;
/* If the history mechanism has failed, we can't do anything. */
if( !p_in )
return VLC_EGENERIC;
assert( p_old != NULL );
assert( p_in != NULL );
/* Decide sources for top & bottom fields of output. */
picture_t *p_in_top = p_in;
picture_t *p_in_bottom = p_in;
/* For the first output field this frame,
grab "old" field from previous frame. */
if( i_order == 0 )
{
if( i_field == 0 ) /* rendering top field */
p_in_bottom = p_old;
else /* i_field == 1, rendering bottom field */
p_in_top = p_old;
}
compose_chroma_t cc = CC_ALTLINE; /* initialize to prevent compiler warning */
switch( p_sys->phosphor.i_chroma_for_420 )
{
case PC_BLEND:
cc = CC_MERGE;
break;
case PC_LATEST:
if( i_field == 0 )
cc = CC_SOURCE_TOP;
else /* i_field == 1 */
cc = CC_SOURCE_BOTTOM;
break;
case PC_ALTLINE:
cc = CC_ALTLINE;
break;
case PC_UPCONVERT:
cc = CC_UPCONVERT;
break;
default:
/* The above are the only possibilities, if there are no bugs. */
assert(0);
break;
}
ComposeFrame( p_filter, p_dst, p_in_top, p_in_bottom, cc );
/* Simulate phosphor light output decay for the old field.
The dimmer can also be switched off in the configuration, but that is
more of a technical curiosity or an educational toy for advanced users
than a useful deinterlacer mode (although it does make telecined
material look slightly better than without any filtering).
In most use cases the dimmer is used.
*/
if( p_sys->phosphor.i_dimmer_strength > 0 )
DarkenField( p_dst, !i_field, p_sys->phosphor.i_dimmer_strength );
return VLC_SUCCESS;
}
/*****************************************************************************
* algo_phosphor.h : Phosphor algorithm for the VLC deinterlacer
*****************************************************************************
* Copyright (C) 2011 the VideoLAN team
* $Id$
*
* Author: Juha Jeronen <juha.jeronen@jyu.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifndef VLC_DEINTERLACE_ALGO_PHOSPHOR_H
#define VLC_DEINTERLACE_ALGO_PHOSPHOR_H 1
/* Forward declarations */
struct filter_t;
struct picture_t;
/*****************************************************************************
* Data structures etc.
*****************************************************************************/
/* These numbers, and phosphor_chroma_list[], should be in the same order
as phosphor_chroma_list_text[]. The value 0 is reserved, because
var_GetInteger() returns 0 in case of error. */
/** Valid Phosphor 4:2:0 chroma handling modes. */
typedef enum { PC_LATEST = 1, PC_ALTLINE = 2,
PC_BLEND = 3, PC_UPCONVERT = 4 } phosphor_chroma_t;
/** Phosphor 4:2:0 chroma handling modes (config item). */
static const int phosphor_chroma_list[] = { PC_LATEST, PC_ALTLINE,
PC_BLEND, PC_UPCONVERT };
/** User labels for Phosphor 4:2:0 chroma handling modes (config item). */
static const char *const phosphor_chroma_list_text[] = { N_("Latest"),
N_("AltLine"),
N_("Blend"),
N_("Upconvert") };
/* Same here. Same order as in phosphor_dimmer_list_text[],
and the value 0 is reserved for config error. */
/** Phosphor dimmer strengths (config item). */
static const int phosphor_dimmer_list[] = { 1, 2, 3, 4 };
/** User labels for Phosphor dimmer strengths (config item). */
static const char *const phosphor_dimmer_list_text[] = { N_("Off"),
N_("Low"),
N_("Medium"),
N_("High") };
/** Algorithm-specific state for Phosphor. */
typedef struct
{
phosphor_chroma_t i_chroma_for_420;
int i_dimmer_strength;
} phosphor_sys_t;
/*****************************************************************************
* Functions
*****************************************************************************/
/**
* "Phosphor" deinterlace algorithm: framerate-doubling CRT TV simulator.
*
* There is no "1x" mode in this filter; only framerate doubling is supported.
*
* There is no input frame parameter, because the input frames
* are taken from the history buffer.
*
* Soft field repeat (repeat_pict) is supported. Note that the generated
* "repeated" output picture is unique because of the simulated light decay.
* Its "old" field comes from the same input frame as the "new" one, unlike
* the first output picture of the same frame.
*
* As many output frames should be requested for each input frame as is
* indicated by p_src->i_nb_fields. This is done by calling this function
* several times, first with i_order = 0, and then with all other parameters
* the same, but a new p_dst, increasing i_order (1 for second field,
* and then if i_nb_fields = 3, also i_order = 2 to get the repeated first
* field), and alternating i_field (starting, at i_order = 0, with the field
* according to p_src->b_top_field_first). See Deinterlace() for an example.
*
* @param p_filter The filter instance. Must be non-NULL.
* @param p_dst Output frame. Must be allocated by caller.
* @param i_order Temporal field number: 0 = first, 1 = second, 2 = rep. first.
* @param i_field Render which field? 0 = top field, 1 = bottom field.
* @return VLC error code (int).
* @retval VLC_SUCCESS The requested field was rendered into p_dst.
* @retval VLC_EGENERIC No pictures in history buffer, cannot render.
* @see RenderBob()
* @see RenderLinear()
* @see Deinterlace()
*/
int RenderPhosphor( filter_t *p_filter,
picture_t *p_dst,
int i_order, int i_field );
/*****************************************************************************
* Extra documentation
*****************************************************************************/
/**
* \file
* "Phosphor" deinterlace algorithm. This simulates the rendering mechanism
* of an interlaced CRT TV, actually producing *interlaced* output.
*
* The main use case for this filter is anime for which IVTC is not applicable.
* This is the case, if 24fps telecined material has been mixed with 60fps
* interlaced effects, such as in Sol Bianca or Silent Mobius. It can also
* be used for true interlaced video, such as most camcorder recordings.
*
* The filter has several modes for handling 4:2:0 chroma for those output
* frames that fall across input frame temporal boundaries (i.e. fields come
* from different frames). Upconvert (to 4:2:2) provides the most accurate
* CRT simulation, but requires more CPU and memory bandwidth than the other
* modes. The other modes keep the chroma at 4:2:0.
*
* About these modes: telecined input (such as NTSC anime DVDs) works better
* with AltLine, while true interlaced input works better with Latest.
* Merge is a compromise, which may or may not look acceptable.
* The mode can be set in the VLC advanced configuration,
* All settings > Video > Filters > Deinterlace
*
* Technically speaking, this is an interlaced field renderer targeted for
* progressive displays. It works by framerate doubling, and simulating one
* step of light output decay of the "old" field during the "new" field,
* until the next new field comes in to replace the "old" one.
*
* While playback is running, the simulated light decay gives the picture an
* appearance of visible "scanlines", much like on a real TV. Only when the
* video is paused, it is clearly visible that one of the fields is actually
* brighter than the other.
*
* The main differences to the Bob algorithm are:
* - in addition to the current field, the previous one (fading out)
* is also rendered
* - some horizontal lines don't seem to flicker as much
* - scanline visual effect (adjustable; the dimmer strength can be set
* in the VLC advanced configuration)
* - the picture appears 25%, 38% or 44% darker on average (for dimmer
* strengths 1, 2 and 3)
* - if the input has 4:2:0 chroma, the colours may look messed up in some
* output frames. This is a limitation of the 4:2:0 chroma format, and due
* to the fact that both fields are present in each output picture. Usually
* this doesn't matter in practice, but see the 4:2:0 chroma mode setting
* in the configuration if needed (it may help a bit).
*
* In addition, when this filter is used on an LCD computer monitor,
* the main differences to a real CRT TV are:
* - Pixel shape and grid layout; CRT TVs were designed for interlaced
* field rendering, while LCD monitors weren't.
* - No scan flicker even though the display runs (usually) at 60Hz.
* (This at least is a good thing.)
*
* The output vertical resolution should be large enough for the scaling
* not to have a too adverse effect on the regular scanline pattern.
* In practice, NTSC video can be acceptably rendered already at 1024x600
* if fullscreen even on an LCD. PAL video requires more.
*
* Just like Bob, this filter works properly only if the input framerate
* is stable. Otherwise the scanline effect breaks down and the picture
* will flicker.
*/
#endif
/*****************************************************************************
* algo_x.c : "X" algorithm for vlc deinterlacer
*****************************************************************************
* Copyright (C) 2000-2011 the VideoLAN team
* $Id$
*
* Author: Sam Hocevar <sam@zoy.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#ifdef CAN_COMPILE_MMXEXT
# include "mmx.h"
#endif
#include <stdint.h>
#include <vlc_common.h>
#include <vlc_cpu.h>
#include <vlc_picture.h>
#include "deinterlace.h" /* filter_sys_t */
#include "algo_x.h"
/*****************************************************************************
* Internal functions
*****************************************************************************/
/* XDeint8x8Detect: detect if a 8x8 block is interlaced.
* XXX: It need to access to 8x10
* We use more than 8 lines to help with scrolling (text)
* (and because XDeint8x8Frame use line 9)
* XXX: smooth/uniform area with noise detection doesn't works well
* but it's not really a problem because they don't have much details anyway
*/
static inline int ssd( int a ) { return a*a; }
static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
{
int y, x;
int ff, fr;
int fc;
/* Detect interlacing */
fc = 0;
for( y = 0; y < 7; y += 2 )
{
ff = fr = 0;
for( x = 0; x < 8; x++ )
{
fr += ssd(src[ x] - src[1*i_src+x]) +
ssd(src[i_src+x] - src[2*i_src+x]);
ff += ssd(src[ x] - src[2*i_src+x]) +
ssd(src[i_src+x] - src[3*i_src+x]);
}
if( ff < 6*fr/8 && fr > 32 )
fc++;
src += 2*i_src;
}
return fc < 1 ? false : true;
}
#ifdef CAN_COMPILE_MMXEXT
static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
{
int y, x;
int32_t ff, fr;
int fc;
/* Detect interlacing */
fc = 0;
pxor_r2r( mm7, mm7 );
for( y = 0; y < 9; y += 2 )
{
ff = fr = 0;
pxor_r2r( mm5, mm5 );
pxor_r2r( mm6, mm6 );
for( x = 0; x < 8; x+=4 )
{
movd_m2r( src[ x], mm0 );
movd_m2r( src[1*i_src+x], mm1 );
movd_m2r( src[2*i_src+x], mm2 );
movd_m2r( src[3*i_src+x], mm3 );
punpcklbw_r2r( mm7, mm0 );
punpcklbw_r2r( mm7, mm1 );
punpcklbw_r2r( mm7, mm2 );
punpcklbw_r2r( mm7, mm3 );
movq_r2r( mm0, mm4 );
psubw_r2r( mm1, mm0 );
psubw_r2r( mm2, mm4 );
psubw_r2r( mm1, mm2 );
psubw_r2r( mm1, mm3 );
pmaddwd_r2r( mm0, mm0 );
pmaddwd_r2r( mm4, mm4 );
pmaddwd_r2r( mm2, mm2 );
pmaddwd_r2r( mm3, mm3 );
paddd_r2r( mm0, mm2 );
paddd_r2r( mm4, mm3 );
paddd_r2r( mm2, mm5 );
paddd_r2r( mm3, mm6 );
}
movq_r2r( mm5, mm0 );
psrlq_i2r( 32, mm0 );
paddd_r2r( mm0, mm5 );
movd_r2m( mm5, fr );
movq_r2r( mm6, mm0 );
psrlq_i2r( 32, mm0 );
paddd_r2r( mm0, mm6 );
movd_r2m( mm6, ff );
if( ff < 6*fr/8 && fr > 32 )
fc++;
src += 2*i_src;
}
return fc;
}
#endif
static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
uint8_t *src1, int i_src1,
uint8_t *src2, int i_src2 )
{
int y, x;
/* Progressive */
for( y = 0; y < 8; y += 2 )
{
memcpy( dst, src1, 8 );
dst += i_dst;
for( x = 0; x < 8; x++ )
dst[x] = (src1[x] + 6*src2[x] + src1[i_src1+x] + 4 ) >> 3;
dst += i_dst;
src1 += i_src1;
src2 += i_src2;
}
}
#ifdef CAN_COMPILE_MMXEXT
static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
uint8_t *src1, int i_src1,
uint8_t *src2, int i_src2 )
{
static const uint64_t m_4 = INT64_C(0x0004000400040004);
int y, x;
/* Progressive */
pxor_r2r( mm7, mm7 );
for( y = 0; y < 8; y += 2 )
{
for( x = 0; x < 8; x +=4 )
{
movd_m2r( src1[x], mm0 );
movd_r2m( mm0, dst[x] );
movd_m2r( src2[x], mm1 );
movd_m2r( src1[i_src1+x], mm2 );
punpcklbw_r2r( mm7, mm0 );
punpcklbw_r2r( mm7, mm1 );
punpcklbw_r2r( mm7, mm2 );
paddw_r2r( mm1, mm1 );
movq_r2r( mm1, mm3 );
paddw_r2r( mm3, mm3 );
paddw_r2r( mm2, mm0 );
paddw_r2r( mm3, mm1 );
paddw_m2r( m_4, mm1 );
paddw_r2r( mm1, mm0 );
psraw_i2r( 3, mm0 );
packuswb_r2r( mm7, mm0 );
movd_r2m( mm0, dst[i_dst+x] );
}
dst += 2*i_dst;
src1 += i_src1;
src2 += i_src2;
}
}
#endif
/* For debug */
static inline void XDeint8x8Set( uint8_t *dst, int i_dst, uint8_t v )
{
int y;
for( y = 0; y < 8; y++ )
memset( &dst[y*i_dst], v, 8 );
}
/* XDeint8x8FieldE: Stupid deinterlacing (1,0,1) for block that miss a
* neighbour
* (Use 8x9 pixels)
* TODO: a better one for the inner part.
*/
static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
uint8_t *src, int i_src )
{
int y, x;
/* Interlaced */
for( y = 0; y < 8; y += 2 )
{
memcpy( dst, src, 8 );
dst += i_dst;
for( x = 0; x < 8; x++ )
dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
dst += 1*i_dst;
src += 2*i_src;
}
}
#ifdef CAN_COMPILE_MMXEXT
static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
uint8_t *src, int i_src )
{
int y;
/* Interlaced */
for( y = 0; y < 8; y += 2 )
{
movq_m2r( src[0], mm0 );
movq_r2m( mm0, dst[0] );
dst += i_dst;
movq_m2r( src[2*i_src], mm1 );
pavgb_r2r( mm1, mm0 );
movq_r2m( mm0, dst[0] );
dst += 1*i_dst;
src += 2*i_src;
}
}
#endif
/* XDeint8x8Field: Edge oriented interpolation
* (Need -4 and +5 pixels H, +1 line)
*/
static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
uint8_t *src, int i_src )
{
int y, x;
/* Interlaced */
for( y = 0; y < 8; y += 2 )
{
memcpy( dst, src, 8 );
dst += i_dst;
for( x = 0; x < 8; x++ )
{
uint8_t *src2 = &src[2*i_src];
/* I use 8 pixels just to match the MMX version, but it's overkill
* 5 would be enough (less isn't good) */
const int c0 = abs(src[x-4]-src2[x-2]) + abs(src[x-3]-src2[x-1]) +
abs(src[x-2]-src2[x+0]) + abs(src[x-1]-src2[x+1]) +
abs(src[x+0]-src2[x+2]) + abs(src[x+1]-src2[x+3]) +
abs(src[x+2]-src2[x+4]) + abs(src[x+3]-src2[x+5]);
const int c1 = abs(src[x-3]-src2[x-3]) + abs(src[x-2]-src2[x-2]) +
abs(src[x-1]-src2[x-1]) + abs(src[x+0]-src2[x+0]) +
abs(src[x+1]-src2[x+1]) + abs(src[x+2]-src2[x+2]) +
abs(src[x+3]-src2[x+3]) + abs(src[x+4]-src2[x+4]);
const int c2 = abs(src[x-2]-src2[x-4]) + abs(src[x-1]-src2[x-3]) +
abs(src[x+0]-src2[x-2]) + abs(src[x+1]-src2[x-1]) +
abs(src[x+2]-src2[x+0]) + abs(src[x+3]-src2[x+1]) +
abs(src[x+4]-src2[x+2]) + abs(src[x+5]-src2[x+3]);
if( c0 < c1 && c1 <= c2 )
dst[x] = (src[x-1] + src2[x+1]) >> 1;
else if( c2 < c1 && c1 <= c0 )
dst[x] = (src[x+1] + src2[x-1]) >> 1;
else
dst[x] = (src[x+0] + src2[x+0]) >> 1;
}
dst += 1*i_dst;
src += 2*i_src;
}
}
#ifdef CAN_COMPILE_MMXEXT
static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
uint8_t *src, int i_src )
{
int y, x;
/* Interlaced */
for( y = 0; y < 8; y += 2 )
{
memcpy( dst, src, 8 );
dst += i_dst;
for( x = 0; x < 8; x++ )
{
uint8_t *src2 = &src[2*i_src];
int32_t c0, c1, c2;
movq_m2r( src[x-2], mm0 );
movq_m2r( src[x-3], mm1 );
movq_m2r( src[x-4], mm2 );
psadbw_m2r( src2[x-4], mm0 );
psadbw_m2r( src2[x-3], mm1 );
psadbw_m2r( src2[x-2], mm2 );
movd_r2m( mm0, c2 );
movd_r2m( mm1, c1 );
movd_r2m( mm2, c0 );
if( c0 < c1 && c1 <= c2 )
dst[x] = (src[x-1] + src2[x+1]) >> 1;
else if( c2 < c1 && c1 <= c0 )
dst[x] = (src[x+1] + src2[x-1]) >> 1;
else
dst[x] = (src[x+0] + src2[x+0]) >> 1;
}
dst += 1*i_dst;
src += 2*i_src;
}
}
#endif
/* NxN arbitray size (and then only use pixel in the NxN block)
*/
static inline int XDeintNxNDetect( uint8_t *src, int i_src,
int i_height, int i_width )
{
int y, x;
int ff, fr;
int fc;
/* Detect interlacing */
/* FIXME way too simple, need to be more like XDeint8x8Detect */
ff = fr = 0;
fc = 0;
for( y = 0; y < i_height - 2; y += 2 )
{
const uint8_t *s = &src[y*i_src];
for( x = 0; x < i_width; x++ )
{
fr += ssd(s[ x] - s[1*i_src+x]);
ff += ssd(s[ x] - s[2*i_src+x]);
}
if( ff < fr && fr > i_width / 2 )
fc++;
}
return fc < 2 ? false : true;
}
static inline void XDeintNxNFrame( uint8_t *dst, int i_dst,
uint8_t *src, int i_src,
int i_width, int i_height )
{
int y, x;
/* Progressive */
for( y = 0; y < i_height; y += 2 )
{
memcpy( dst, src, i_width );
dst += i_dst;
if( y < i_height - 2 )
{
for( x = 0; x < i_width; x++ )
dst[x] = (src[x] + 2*src[1*i_src+x] + src[2*i_src+x] + 2 ) >> 2;
}
else
{
/* Blend last line */
for( x = 0; x < i_width; x++ )
dst[x] = (src[x] + src[1*i_src+x] ) >> 1;
}
dst += 1*i_dst;
src += 2*i_src;
}
}
static inline void XDeintNxNField( uint8_t *dst, int i_dst,
uint8_t *src, int i_src,
int i_width, int i_height )
{
int y, x;
/* Interlaced */
for( y = 0; y < i_height; y += 2 )
{
memcpy( dst, src, i_width );
dst += i_dst;
if( y < i_height - 2 )
{
for( x = 0; x < i_width; x++ )
dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
}
else
{
/* Blend last line */
for( x = 0; x < i_width; x++ )
dst[x] = (src[x] + src[i_src+x]) >> 1;
}
dst += 1*i_dst;
src += 2*i_src;
}
}
static inline void XDeintNxN( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
int i_width, int i_height )
{
if( XDeintNxNDetect( src, i_src, i_width, i_height ) )
XDeintNxNField( dst, i_dst, src, i_src, i_width, i_height );
else
XDeintNxNFrame( dst, i_dst, src, i_src, i_width, i_height );
}
static inline int median( int a, int b, int c )
{
int min = a, max =a;
if( b < min )
min = b;
else
max = b;
if( c < min )
min = c;
else if( c > max )
max = c;
return a + b + c - min - max;
}
/* XDeintBand8x8:
*/
static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
uint8_t *src, int i_src,
const int i_mbx, int i_modx )
{
int x;
for( x = 0; x < i_mbx; x++ )
{
int s;
if( ( s = XDeint8x8DetectC( src, i_src ) ) )
{
if( x == 0 || x == i_mbx - 1 )
XDeint8x8FieldEC( dst, i_dst, src, i_src );
else
XDeint8x8FieldC( dst, i_dst, src, i_src );
}
else
{
XDeint8x8MergeC( dst, i_dst,
&src[0*i_src], 2*i_src,
&src[1*i_src], 2*i_src );
}
dst += 8;
src += 8;
}
if( i_modx )
XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
}
#ifdef CAN_COMPILE_MMXEXT
static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
uint8_t *src, int i_src,
const int i_mbx, int i_modx )
{
int x;
/* Reset current line */
for( x = 0; x < i_mbx; x++ )
{
int s;
if( ( s = XDeint8x8DetectMMXEXT( src, i_src ) ) )
{
if( x == 0 || x == i_mbx - 1 )
XDeint8x8FieldEMMXEXT( dst, i_dst, src, i_src );
else
XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
}
else
{
XDeint8x8MergeMMXEXT( dst, i_dst,
&src[0*i_src], 2*i_src,
&src[1*i_src], 2*i_src );
}
dst += 8;
src += 8;
}
if( i_modx )
XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
}
#endif
/*****************************************************************************
* Public functions
*****************************************************************************/
void RenderX( picture_t *p_outpic, picture_t *p_pic )
{
int i_plane;
unsigned u_cpu = vlc_CPU();
/* Copy image and skip lines */
for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
{
const int i_mby = ( p_outpic->p[i_plane].i_visible_lines + 7 )/8 - 1;
const int i_mbx = p_outpic->p[i_plane].i_visible_pitch/8;
const int i_mody = p_outpic->p[i_plane].i_visible_lines - 8*i_mby;
const int i_modx = p_outpic->p[i_plane].i_visible_pitch - 8*i_mbx;
const int i_dst = p_outpic->p[i_plane].i_pitch;
const int i_src = p_pic->p[i_plane].i_pitch;
int y, x;
for( y = 0; y < i_mby; y++ )
{
uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
#ifdef CAN_COMPILE_MMXEXT
if( u_cpu & CPU_CAPABILITY_MMXEXT )
XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx );
else
#endif
XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx );
}
/* Last line (C only)*/
if( i_mody )
{
uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
for( x = 0; x < i_mbx; x++ )
{
XDeintNxN( dst, i_dst, src, i_src, 8, i_mody );
dst += 8;
src += 8;
}
if( i_modx )
XDeintNxN( dst, i_dst, src, i_src, i_modx, i_mody );
}
}
#ifdef CAN_COMPILE_MMXEXT
if( u_cpu & CPU_CAPABILITY_MMXEXT )
emms();
#endif
}
/*****************************************************************************
* algo_x.h : "X" algorithm for vlc deinterlacer
*****************************************************************************
* Copyright (C) 2000-2011 the VideoLAN team
* $Id$
*
* Author: Sam Hocevar <sam@zoy.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifndef VLC_DEINTERLACE_ALGO_X_H
#define VLC_DEINTERLACE_ALGO_X_H 1
/* Forward declarations */
struct picture_t;
/*****************************************************************************
* Functions
*****************************************************************************/
/**
* Interpolating deinterlace filter "X".
*
* The algorithm works on a 8x8 block basic, it copies the top field
* and applies a process to recreate the bottom field.
*
* If a 8x8 block is classified as :
* - progressive: it applies a small blend (1,6,1)
* - interlaced:
* * in the MMX version: we do a ME between the 2 fields, if there is a
* good match we use MC to recreate the bottom field (with a small
* blend (1,6,1) )
* * otherwise: it recreates the bottom field by an edge oriented
* interpolation.
*
* @param[in] p_pic Input frame.
* @param[out] p_outpic Output frame. Must be allocated by caller.
* @see Deinterlace()
*/
void RenderX( picture_t *p_outpic, picture_t *p_pic );
#endif
/*****************************************************************************
* algo_yadif.c : Wrapper for MPlayer's Yadif algorithm
*****************************************************************************
* Copyright (C) 2000-2011 the VideoLAN team
* $Id$
*
* Author: Sam Hocevar <sam@zoy.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#ifdef CAN_COMPILE_MMXEXT
# include "mmx.h"
#endif
#include <stdint.h>
#include <assert.h>
#include <vlc_common.h>
#include <vlc_cpu.h>
#include <vlc_picture.h>
#include <vlc_filter.h>
#include "deinterlace.h" /* filter_sys_t */
#include "common.h" /* FFMIN3 et al. */
#include "algo_yadif.h"
/*****************************************************************************
* Yadif (Yet Another DeInterlacing Filter).
*****************************************************************************/
/* Yadif's private data struct */
struct vf_priv_s {
/*
* 0: Output 1 frame for each frame.
* 1: Output 1 frame for each field.
* 2: Like 0 but skips spatial interlacing check.
* 3: Like 1 but skips spatial interlacing check.
*
* In vlc, only & 0x02 has meaning, as we do the & 0x01 ourself.
*/
int mode;
};
/* I am unsure it is the right one */
typedef intptr_t x86_reg;
/* yadif.h comes from vf_yadif.c of mplayer project.
Necessary preprocessor macros are defined in common.h. */
#include "yadif.h"
int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
int i_order, int i_field )
{
VLC_UNUSED(p_src);
filter_sys_t *p_sys = p_filter->p_sys;
/* */
assert( i_order >= 0 && i_order <= 2 ); /* 2 = soft field repeat */
assert( i_field == 0 || i_field == 1 );
/* As the pitches must match, use ONLY pictures coming from picture_New()! */
picture_t *p_prev = p_sys->pp_history[0];
picture_t *p_cur = p_sys->pp_history[1];
picture_t *p_next = p_sys->pp_history[2];
/* Account for soft field repeat.
The "parity" parameter affects the algorithm like this (from yadif.h):
uint8_t *prev2= parity ? prev : cur ;
uint8_t *next2= parity ? cur : next;
The original parity expression that was used here is:
(i_field ^ (i_order == i_field)) & 1
Truth table:
i_field = 0, i_order = 0 => 1
i_field = 1, i_order = 1 => 0
i_field = 1, i_order = 0 => 1
i_field = 0, i_order = 1 => 0
=> equivalent with e.g. (1 - i_order) or (i_order + 1) % 2
Thus, in a normal two-field frame,
parity 1 = first field (i_order == 0)
parity 0 = second field (i_order == 1)
Now, with three fields, where the third is a copy of the first,
i_order = 0 => parity 1 (as usual)
i_order = 1 => due to the repeat, prev = cur, but also next = cur.
Because in such a case there is no motion
(otherwise field repeat makes no sense),
we don't actually need to invoke Yadif's filter().
Thus, set "parity" to 2, and use this to bypass
the filter.
i_order = 2 => parity 0 (as usual)
*/
int yadif_parity;
if( p_cur && p_cur->i_nb_fields > 2 )
yadif_parity = (i_order + 1) % 3; /* 1, *2*, 0; where 2 is a special
value meaning "bypass filter". */
else
yadif_parity = (i_order + 1) % 2; /* 1, 0 */
/* Filter if we have all the pictures we need */
if( p_prev && p_cur && p_next )
{
/* */
void (*filter)(struct vf_priv_s *p, uint8_t *dst,
uint8_t *prev, uint8_t *cur, uint8_t *next,
int w, int refs, int parity);
#if defined(HAVE_YADIF_SSE2)
if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
filter = yadif_filter_line_mmx2;
else
#endif
filter = yadif_filter_line_c;
for( int n = 0; n < p_dst->i_planes; n++ )
{
const plane_t *prevp = &p_prev->p[n];
const plane_t *curp = &p_cur->p[n];
const plane_t *nextp = &p_next->p[n];
plane_t *dstp = &p_dst->p[n];
for( int y = 1; y < dstp->i_visible_lines - 1; y++ )
{
if( (y % 2) == i_field || yadif_parity == 2 )
{
vlc_memcpy( &dstp->p_pixels[y * dstp->i_pitch],
&curp->p_pixels[y * curp->i_pitch], dstp->i_visible_pitch );
}
else
{
struct vf_priv_s cfg;
/* Spatial checks only when enough data */
cfg.mode = (y >= 2 && y < dstp->i_visible_lines - 2) ? 0 : 2;
assert( prevp->i_pitch == curp->i_pitch && curp->i_pitch == nextp->i_pitch );
filter( &cfg,
&dstp->p_pixels[y * dstp->i_pitch],
&prevp->p_pixels[y * prevp->i_pitch],
&curp->p_pixels[y * curp->i_pitch],
&nextp->p_pixels[y * nextp->i_pitch],
dstp->i_visible_pitch,
curp->i_pitch,
yadif_parity );
}
/* We duplicate the first and last lines */
if( y == 1 )
vlc_memcpy(&dstp->p_pixels[(y-1) * dstp->i_pitch],
&dstp->p_pixels[ y * dstp->i_pitch],
dstp->i_pitch);
else if( y == dstp->i_visible_lines - 2 )
vlc_memcpy(&dstp->p_pixels[(y+1) * dstp->i_pitch],
&dstp->p_pixels[ y * dstp->i_pitch],
dstp->i_pitch);
}
}
p_sys->i_frame_offset = 1; /* p_cur will be rendered at next frame, too */
return VLC_SUCCESS;
}
else if( !p_prev && !p_cur && p_next )
{
/* NOTE: For the first frame, we use the default frame offset
as set by Open() or SetFilterMethod(). It is always 0. */
/* FIXME not good as it does not use i_order/i_field */
RenderX( p_dst, p_next );
return VLC_SUCCESS;
}
else
{
p_sys->i_frame_offset = 1; /* p_cur will be rendered at next frame */
return VLC_EGENERIC;
}
}
/*****************************************************************************
* algo_yadif.h : Wrapper for MPlayer's Yadif algorithm
*****************************************************************************
* Copyright (C) 2000-2011 the VideoLAN team
* $Id$
*
* Author: Sam Hocevar <sam@zoy.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifndef VLC_DEINTERLACE_ALGO_YADIF_H
#define VLC_DEINTERLACE_ALGO_YADIF_H 1
/**
* \file
* Adapter to fit the Yadif (Yet Another DeInterlacing Filter) algorithm
* from MPlayer into VLC. The algorithm itself is implemented in yadif.h.
*/
/* Forward declarations */
struct filter_t;
struct picture_t;
/*****************************************************************************
* Functions
*****************************************************************************/
/**
* Yadif (Yet Another DeInterlacing Filter) from MPlayer.
* One field is copied as-is (i_field), the other is interpolated.
*
* Comes with both interpolating and framerate doubling modes.
*
* If you do NOT want to use framerate doubling: use i_order = 0,
* and either 0 or 1 for i_field (keep it constant),
*
* If you DO want framerate doubling, do as instructed below.
*
* See Deinterlace() for usage examples of both modes.
*
* Needs three frames in the history buffer to operate.
* The first-ever frame is rendered using RenderX().
* The second is dropped. At the third frame, Yadif starts.
*
* Once Yadif starts, the frame that is rendered corresponds to the *previous*
* input frame (i_frame_offset = 1), complete with its original PTS.
* The latest input frame is used as the future/next frame, as reference
* for temporal interpolation.
*
* This wrapper adds support for soft field repeat (repeat_pict).
* Note that the generated "repeated" output picture is unique because
* of temporal interpolation.
*
* As many output frames should be requested for each input frame as is
* indicated by p_src->i_nb_fields. This is done by calling this function
* several times, first with i_order = 0, and then with all other parameters
* the same, but a new p_dst, increasing i_order (1 for second field,
* and then if i_nb_fields = 3, also i_order = 2 to get the repeated first
* field), and alternating i_field (starting, at i_order = 0, with the field
* according to p_src->b_top_field_first). See Deinterlace() for an example.
*
* @param p_filter The filter instance. Must be non-NULL.
* @param p_dst Output frame. Must be allocated by caller.
* @param p_src Input frame. Must exist.
* @param i_order Temporal field number: 0 = first, 1 = second, 2 = rep. first.
* @param i_field Keep which field? 0 = top field, 1 = bottom field.
* @return VLC error code (int).
* @retval VLC_SUCCESS The requested field was rendered into p_dst.
* @retval VLC_EGENERIC Frame dropped; only occurs at the second frame after start.
* @see Deinterlace()
*/
int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
int i_order, int i_field );
#endif
/*****************************************************************************
* common.h : Common macros for the VLC deinterlacer
*****************************************************************************
* Copyright (C) 2000-2011 the VideoLAN team
* $Id$
*
* Author: Sam Hocevar <sam@zoy.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifndef VLC_DEINTERLACE_COMMON_H
#define VLC_DEINTERLACE_COMMON_H 1
/**
* \file
* Common macros for the VLC deinterlacer.
*/
/* Needed for Yadif, but also some others. */
#define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
#define FFMAX(a,b) __MAX(a,b)
#define FFMAX3(a,b,c) FFMAX(FFMAX(a,b),c)
#define FFMIN(a,b) __MIN(a,b)
#define FFMIN3(a,b,c) FFMIN(FFMIN(a,b),c)
#endif
/*****************************************************************************
* deinterlace.c : deinterlacer plugin for vlc
*****************************************************************************
* Copyright (C) 2000-2011 the VideoLAN team
* $Id$
*
* Author: Sam Hocevar <sam@zoy.org>
* Juha Jeronen <juha.jeronen@jyu.fi> (Phosphor and IVTC modes)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
/*****************************************************************************
* Preamble
*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <assert.h>
#include <stdint.h>
#include <vlc_common.h>
#include <vlc_plugin.h>
#include <vlc_filter.h>
#include <vlc_cpu.h>
#include <vlc_mouse.h>
#include "deinterlace.h"
#include "helpers.h"
#include "merge.h"
/*****************************************************************************
* Module descriptor
*****************************************************************************/
#define MODE_TEXT N_("Deinterlace mode")
#define MODE_LONGTEXT N_("Deinterlace method to use for local playback.")
#define SOUT_MODE_TEXT N_("Streaming deinterlace mode")
#define SOUT_MODE_LONGTEXT N_("Deinterlace method to use for streaming.")
#define FILTER_CFG_PREFIX "sout-deinterlace-"
/* Tooltips drop linefeeds (at least in the Qt GUI);
thus the space before each set of consecutive \n.
See phosphor.h for phosphor_chroma_list and phosphor_dimmer_list.
*/
#define PHOSPHOR_CHROMA_TEXT N_("Phosphor chroma mode for 4:2:0 input")
#define PHOSPHOR_CHROMA_LONGTEXT N_("Choose handling for colours in those "\
"output frames that fall across input "\
"frame boundaries. \n"\
"\n"\
"Latest: take chroma from new (bright) "\
"field only. Good for interlaced input, "\
"such as videos from a camcorder. \n"\
"\n"\
"AltLine: take chroma line 1 from top "\
"field, line 2 from bottom field, etc. \n"\
"Default, good for NTSC telecined input "\
"(anime DVDs, etc.). \n"\
"\n"\
"Blend: average input field chromas. "\
"May distort the colours of the new "\
"(bright) field, too. \n"\
"\n"\
"Upconvert: output in 4:2:2 format "\
"(independent chroma for each field). "\
"Best simulation, but requires more CPU "\
"and memory bandwidth.")
#define PHOSPHOR_DIMMER_TEXT N_("Phosphor old field dimmer strength")
#define PHOSPHOR_DIMMER_LONGTEXT N_("This controls the strength of the "\
"darkening filter that simulates CRT TV "\
"phosphor light decay for the old field "\
"in the Phosphor framerate doubler. "\
"Default: Low.")
vlc_module_begin ()
set_description( N_("Deinterlacing video filter") )
set_shortname( N_("Deinterlace" ))
set_capability( "video filter2", 0 )
set_category( CAT_VIDEO )
set_subcategory( SUBCAT_VIDEO_VFILTER )
add_string( FILTER_CFG_PREFIX "mode", "blend", SOUT_MODE_TEXT,
SOUT_MODE_LONGTEXT, false )
change_string_list( mode_list, mode_list_text, 0 )
change_safe ()
add_integer( FILTER_CFG_PREFIX "phosphor-chroma", 2, PHOSPHOR_CHROMA_TEXT,
PHOSPHOR_CHROMA_LONGTEXT, true )
change_integer_list( phosphor_chroma_list, phosphor_chroma_list_text )
change_safe ()
add_integer( FILTER_CFG_PREFIX "phosphor-dimmer", 2, PHOSPHOR_DIMMER_TEXT,
PHOSPHOR_DIMMER_LONGTEXT, true )
change_integer_list( phosphor_dimmer_list, phosphor_dimmer_list_text )
change_safe ()
add_shortcut( "deinterlace" )
set_callbacks( Open, Close )
vlc_module_end ()
/*****************************************************************************
* Local data
*****************************************************************************/
/**
* Available config options for the deinterlacer module.
*
* Note that also algorithm-specific options must be listed here,
* and reading logic for them implemented in Open().
*/
static const char *const ppsz_filter_options[] = {
"mode", "phosphor-chroma", "phosphor-dimmer",
NULL
};
/*****************************************************************************
* SetFilterMethod: setup the deinterlace method to use.
*****************************************************************************/
void SetFilterMethod( filter_t *p_filter, const char *psz_method,
vlc_fourcc_t i_chroma )
{
filter_sys_t *p_sys = p_filter->p_sys;
if( !psz_method )
psz_method = "";
if( !strcmp( psz_method, "mean" ) )
{
p_sys->i_mode = DEINTERLACE_MEAN;
p_sys->b_double_rate = false;
p_sys->b_half_height = true;
p_sys->b_use_frame_history = false;
}
else if( !strcmp( psz_method, "bob" )
|| !strcmp( psz_method, "progressive-scan" ) )
{
p_sys->i_mode = DEINTERLACE_BOB;
p_sys->b_double_rate = true;
p_sys->b_half_height = false;
p_sys->b_use_frame_history = false;
}
else if( !strcmp( psz_method, "linear" ) )
{
p_sys->i_mode = DEINTERLACE_LINEAR;
p_sys->b_double_rate = true;
p_sys->b_half_height = false;
p_sys->b_use_frame_history = false;
}
else if( !strcmp( psz_method, "x" ) )
{
p_sys->i_mode = DEINTERLACE_X;
p_sys->b_double_rate = false;
p_sys->b_half_height = false;
p_sys->b_use_frame_history = false;
}
else if( !strcmp( psz_method, "yadif" ) )
{
p_sys->i_mode = DEINTERLACE_YADIF;
p_sys->b_double_rate = false;
p_sys->b_half_height = false;
p_sys->b_use_frame_history = true;
}
else if( !strcmp( psz_method, "yadif2x" ) )
{
p_sys->i_mode = DEINTERLACE_YADIF2X;
p_sys->b_double_rate = true;
p_sys->b_half_height = false;
p_sys->b_use_frame_history = true;
}
else if( !strcmp( psz_method, "phosphor" ) )
{
p_sys->i_mode = DEINTERLACE_PHOSPHOR;
p_sys->b_double_rate = true;
p_sys->b_half_height = false;
p_sys->b_use_frame_history = true;
}
else if( !strcmp( psz_method, "ivtc" ) )
{
p_sys->i_mode = DEINTERLACE_IVTC;
p_sys->b_double_rate = false;
p_sys->b_half_height = false;
p_sys->b_use_frame_history = true;
}
else if( !strcmp( psz_method, "discard" ) )
{
const bool b_i422 = i_chroma == VLC_CODEC_I422 ||
i_chroma == VLC_CODEC_J422;
p_sys->i_mode = DEINTERLACE_DISCARD;
p_sys->b_double_rate = false;
p_sys->b_half_height = !b_i422;
p_sys->b_use_frame_history = false;
}
else
{
if( strcmp( psz_method, "blend" ) )
msg_Err( p_filter,
"no valid deinterlace mode provided, using \"blend\"" );
p_sys->i_mode = DEINTERLACE_BLEND;
p_sys->b_double_rate = false;
p_sys->b_half_height = false;
p_sys->b_use_frame_history = false;
}
p_sys->i_frame_offset = 0; /* reset to default when method changes */
msg_Dbg( p_filter, "using %s deinterlace method", psz_method );
}
/*****************************************************************************
* GetOutputFormat: return which format the chosen algorithm outputs.
*****************************************************************************/
void GetOutputFormat( filter_t *p_filter,
video_format_t *p_dst, const video_format_t *p_src )
{
filter_sys_t *p_sys = p_filter->p_sys;
*p_dst = *p_src;
if( p_sys->b_half_height )
{
p_dst->i_height /= 2;
p_dst->i_visible_height /= 2;
p_dst->i_y_offset /= 2;
p_dst->i_sar_den *= 2;
}
if( p_src->i_chroma == VLC_CODEC_I422 ||
p_src->i_chroma == VLC_CODEC_J422 )
{
switch( p_sys->i_mode )
{
case DEINTERLACE_MEAN:
case DEINTERLACE_LINEAR:
case DEINTERLACE_X:
case DEINTERLACE_YADIF:
case DEINTERLACE_YADIF2X:
case DEINTERLACE_PHOSPHOR:
case DEINTERLACE_IVTC:
p_dst->i_chroma = p_src->i_chroma;
break;
default:
p_dst->i_chroma = p_src->i_chroma == VLC_CODEC_I422 ? VLC_CODEC_I420 :
VLC_CODEC_J420;
break;
}
}
else if( p_sys->i_mode == DEINTERLACE_PHOSPHOR &&
p_sys->phosphor.i_chroma_for_420 == PC_UPCONVERT )
{
p_dst->i_chroma = p_src->i_chroma == VLC_CODEC_J420 ? VLC_CODEC_J422 :
VLC_CODEC_I422;
}
}
/*****************************************************************************
* IsChromaSupported: return whether the specified chroma is implemented.
*****************************************************************************/
bool IsChromaSupported( vlc_fourcc_t i_chroma )
{
return i_chroma == VLC_CODEC_I420 ||
i_chroma == VLC_CODEC_J420 ||
i_chroma == VLC_CODEC_YV12 ||
i_chroma == VLC_CODEC_I422 ||
i_chroma == VLC_CODEC_J422;
}
/*****************************************************************************
* video filter2 functions
*****************************************************************************/
#define DEINTERLACE_DST_SIZE 3
/* This is the filter function. See Open(). */
picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
{
filter_sys_t *p_sys = p_filter->p_sys;
picture_t *p_dst[DEINTERLACE_DST_SIZE];
/* Request output picture */
p_dst[0] = filter_NewPicture( p_filter );
if( p_dst[0] == NULL )
{
picture_Release( p_pic );
return NULL;
}
picture_CopyProperties( p_dst[0], p_pic );
/* Any unused p_dst pointers must be NULL, because they are used to
check how many output frames we have. */
for( int i = 1; i < DEINTERLACE_DST_SIZE; ++i )
p_dst[i] = NULL;
/* Update the input frame history, if the currently active algorithm
needs it. */
if( p_sys->b_use_frame_history )
{
/* Duplicate the picture
* TODO when the vout rework is finished, picture_Hold() might be enough
* but becarefull, the pitches must match */
picture_t *p_dup = picture_NewFromFormat( &p_pic->format );
if( p_dup )
picture_Copy( p_dup, p_pic );
/* Slide the history */
if( p_sys->pp_history[0] )
picture_Release( p_sys->pp_history[0] );
for( int i = 1; i < HISTORY_SIZE; i++ )
p_sys->pp_history[i-1] = p_sys->pp_history[i];
p_sys->pp_history[HISTORY_SIZE-1] = p_dup;
}
/* Slide the metadata history. */
for( int i = 1; i < METADATA_SIZE; i++ )
{
p_sys->meta.pi_date[i-1] = p_sys->meta.pi_date[i];
p_sys->meta.pi_nb_fields[i-1] = p_sys->meta.pi_nb_fields[i];
p_sys->meta.pb_top_field_first[i-1] = p_sys->meta.pb_top_field_first[i];
}
/* The last element corresponds to the current input frame. */
p_sys->meta.pi_date[METADATA_SIZE-1] = p_pic->date;
p_sys->meta.pi_nb_fields[METADATA_SIZE-1] = p_pic->i_nb_fields;
p_sys->meta.pb_top_field_first[METADATA_SIZE-1] = p_pic->b_top_field_first;
/* Remember the frame offset that we should use for this frame.
The value in p_sys will be updated to reflect the correct value
for the *next* frame when we call the renderer. */
int i_frame_offset = p_sys->i_frame_offset;
int i_meta_idx = (METADATA_SIZE-1) - i_frame_offset;
/* These correspond to the current *outgoing* frame. */
bool b_top_field_first;
int i_nb_fields;
if( i_frame_offset != CUSTOM_PTS )
{
/* Pick the correct values from the history. */
b_top_field_first = p_sys->meta.pb_top_field_first[i_meta_idx];
i_nb_fields = p_sys->meta.pi_nb_fields[i_meta_idx];
}
else
{
/* Framerate doublers must not request CUSTOM_PTS, as they need the
original field timings, and need Deinterlace() to allocate the
correct number of output frames. */
assert( !p_sys->b_double_rate );
/* NOTE: i_nb_fields is only used for framerate doublers, so it is
unused in this case. b_top_field_first is only passed to the
algorithm. We assume that algorithms that request CUSTOM_PTS
will, if necessary, extract the TFF/BFF information themselves.
*/
b_top_field_first = p_pic->b_top_field_first; /* this is not guaranteed
to be meaningful */
i_nb_fields = p_pic->i_nb_fields; /* unused */
}
/* For framerate doublers, determine field duration and allocate
output frames. */
mtime_t i_field_dur = 0;
int i_double_rate_alloc_end = 0; /* One past last for allocated output
frames in p_dst[]. Used only for
framerate doublers. Will be inited
below. Declared here because the
PTS logic needs the result. */
if( p_sys->b_double_rate )
{
/* Calculate one field duration. */
int i = 0;
int iend = METADATA_SIZE-1;
/* Find oldest valid logged date.
The current input frame doesn't count. */
for( ; i < iend; i++ )
if( p_sys->meta.pi_date[i] > VLC_TS_INVALID )
break;
if( i < iend )
{
/* Count how many fields the valid history entries
(except the new frame) represent. */
int i_fields_total = 0;
for( int j = i ; j < iend; j++ )
i_fields_total += p_sys->meta.pi_nb_fields[j];
/* One field took this long. */
i_field_dur = (p_pic->date - p_sys->meta.pi_date[i]) / i_fields_total;
}
/* Note that we default to field duration 0 if it could not be
determined. This behaves the same as the old code - leaving the
extra output frame dates the same as p_pic->date if the last cached
date was not valid.
*/
i_double_rate_alloc_end = i_nb_fields;
if( i_nb_fields > DEINTERLACE_DST_SIZE )
{
/* Note that the effective buffer size depends also on the constant
private_picture in vout_wrapper.c, since that determines the
maximum number of output pictures filter_NewPicture() will
successfully allocate for one input frame.
*/
msg_Err( p_filter, "Framerate doubler: output buffer too small; "\
"fields = %d, buffer size = %d. Dropping the "\
"remaining fields.",
i_nb_fields, DEINTERLACE_DST_SIZE );
i_double_rate_alloc_end = DEINTERLACE_DST_SIZE;
}
/* Allocate output frames. */
for( int i = 1; i < i_double_rate_alloc_end ; ++i )
{
p_dst[i-1]->p_next =
p_dst[i] = filter_NewPicture( p_filter );
if( p_dst[i] )
{
picture_CopyProperties( p_dst[i], p_pic );
}
else
{
msg_Err( p_filter, "Framerate doubler: could not allocate "\
"output frame %d", i+1 );
i_double_rate_alloc_end = i; /* Inform the PTS logic about the
correct end position. */
break; /* If this happens, the rest of the allocations
aren't likely to work, either... */
}
}
/* Now we have allocated *up to* the correct number of frames;
normally, exactly the correct number. Upon alloc failure,
we may have succeeded in allocating *some* output frames,
but fewer than were desired. In such a case, as many will
be rendered as were successfully allocated.
Note that now p_dst[i] != NULL
for 0 <= i < i_double_rate_alloc_end. */
}
assert( p_sys->b_double_rate || p_dst[1] == NULL );
assert( i_nb_fields > 2 || p_dst[2] == NULL );
/* Render */
switch( p_sys->i_mode )
{
case DEINTERLACE_DISCARD:
RenderDiscard( p_filter, p_dst[0], p_pic, 0 );
break;
case DEINTERLACE_BOB:
RenderBob( p_filter, p_dst[0], p_pic, !b_top_field_first );
if( p_dst[1] )
RenderBob( p_filter, p_dst[1], p_pic, b_top_field_first );
if( p_dst[2] )
RenderBob( p_filter, p_dst[2], p_pic, !b_top_field_first );
break;;
case DEINTERLACE_LINEAR:
RenderLinear( p_filter, p_dst[0], p_pic, !b_top_field_first );
if( p_dst[1] )
RenderLinear( p_filter, p_dst[1], p_pic, b_top_field_first );
if( p_dst[2] )
RenderLinear( p_filter, p_dst[2], p_pic, !b_top_field_first );
break;
case DEINTERLACE_MEAN:
RenderMean( p_filter, p_dst[0], p_pic );
break;
case DEINTERLACE_BLEND:
RenderBlend( p_filter, p_dst[0], p_pic );
break;
case DEINTERLACE_X:
RenderX( p_dst[0], p_pic );
break;
case DEINTERLACE_YADIF:
if( RenderYadif( p_filter, p_dst[0], p_pic, 0, 0 ) )
goto drop;
break;
case DEINTERLACE_YADIF2X:
if( RenderYadif( p_filter, p_dst[0], p_pic, 0, !b_top_field_first ) )
goto drop;
if( p_dst[1] )
RenderYadif( p_filter, p_dst[1], p_pic, 1, b_top_field_first );
if( p_dst[2] )
RenderYadif( p_filter, p_dst[2], p_pic, 2, !b_top_field_first );
break;
case DEINTERLACE_PHOSPHOR:
if( RenderPhosphor( p_filter, p_dst[0], 0,
!b_top_field_first ) )
goto drop;
if( p_dst[1] )
RenderPhosphor( p_filter, p_dst[1], 1,
b_top_field_first );
if( p_dst[2] )
RenderPhosphor( p_filter, p_dst[2], 2,
!b_top_field_first );
break;
case DEINTERLACE_IVTC:
/* Note: RenderIVTC will automatically drop the duplicate frames
produced by IVTC. This is part of normal operation. */
if( RenderIVTC( p_filter, p_dst[0] ) )
goto drop;
break;
}
/* Set output timestamps, if the algorithm didn't request CUSTOM_PTS
for this frame. */
assert( i_frame_offset <= METADATA_SIZE || i_frame_offset == CUSTOM_PTS );
if( i_frame_offset != CUSTOM_PTS )
{
mtime_t i_base_pts = p_sys->meta.pi_date[i_meta_idx];
/* Note: in the usual case (i_frame_offset = 0 and
b_double_rate = false), this effectively does nothing.
This is needed to correct the timestamp
when i_frame_offset > 0. */
p_dst[0]->date = i_base_pts;
if( p_sys->b_double_rate )
{
/* Processing all actually allocated output frames. */
for( int i = 1; i < i_double_rate_alloc_end; ++i )
{
/* XXX it's not really good especially for the first picture, but
* I don't think that delaying by one frame is worth it */
if( i_base_pts > VLC_TS_INVALID )
p_dst[i]->date = i_base_pts + i * i_field_dur;
else
p_dst[i]->date = VLC_TS_INVALID;
}
}
}
for( int i = 0; i < DEINTERLACE_DST_SIZE; ++i )
{
if( p_dst[i] )
{
p_dst[i]->b_progressive = true;
p_dst[i]->i_nb_fields = 2;
}
}
picture_Release( p_pic );
return p_dst[0];
drop:
picture_Release( p_dst[0] );
for( int i = 1; i < DEINTERLACE_DST_SIZE; ++i )
{
if( p_dst[i] )
picture_Release( p_dst[i] );
}
picture_Release( p_pic );
return NULL;
}
/*****************************************************************************
* Flush
*****************************************************************************/
void Flush( filter_t *p_filter )
{
filter_sys_t *p_sys = p_filter->p_sys;
for( int i = 0; i < METADATA_SIZE; i++ )
{
p_sys->meta.pi_date[i] = VLC_TS_INVALID;
p_sys->meta.pi_nb_fields[i] = 2;
p_sys->meta.pb_top_field_first[i] = true;
}
p_sys->i_frame_offset = 0; /* reset to default value (first frame after
flush cannot have offset) */
for( int i = 0; i < HISTORY_SIZE; i++ )
{
if( p_sys->pp_history[i] )
picture_Release( p_sys->pp_history[i] );
p_sys->pp_history[i] = NULL;
}
IVTCClearState( p_filter );
}
/*****************************************************************************
* Mouse event callback
*****************************************************************************/
int Mouse( filter_t *p_filter,
vlc_mouse_t *p_mouse,
const vlc_mouse_t *p_old, const vlc_mouse_t *p_new )
{
VLC_UNUSED(p_old);
*p_mouse = *p_new;
if( p_filter->p_sys->b_half_height )
p_mouse->i_y *= 2;
return VLC_SUCCESS;
}
/*****************************************************************************
* Open
*****************************************************************************/
int Open( vlc_object_t *p_this )
{
filter_t *p_filter = (filter_t*)p_this;
filter_sys_t *p_sys;
if( !IsChromaSupported( p_filter->fmt_in.video.i_chroma ) )
return VLC_EGENERIC;
/* */
p_sys = p_filter->p_sys = malloc( sizeof( *p_sys ) );
if( !p_sys )
return VLC_ENOMEM;
p_sys->i_mode = DEINTERLACE_BLEND;
p_sys->b_double_rate = false;
p_sys->b_half_height = true;
p_sys->b_use_frame_history = false;
for( int i = 0; i < METADATA_SIZE; i++ )
{
p_sys->meta.pi_date[i] = VLC_TS_INVALID;
p_sys->meta.pi_nb_fields[i] = 2;
p_sys->meta.pb_top_field_first[i] = true;
}
p_sys->i_frame_offset = 0; /* start with default value (first-ever frame
cannot have offset) */
for( int i = 0; i < HISTORY_SIZE; i++ )
p_sys->pp_history[i] = NULL;
IVTCClearState( p_filter );
#if defined(CAN_COMPILE_C_ALTIVEC)
if( vlc_CPU() & CPU_CAPABILITY_ALTIVEC )
{
p_sys->pf_merge = MergeAltivec;
p_sys->pf_end_merge = NULL;
}
else
#endif
#if defined(CAN_COMPILE_SSE)
if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
{
p_sys->pf_merge = MergeSSE2;
p_sys->pf_end_merge = EndMMX;
}
else
#endif
#if defined(CAN_COMPILE_MMXEXT)
if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
{
p_sys->pf_merge = MergeMMXEXT;
p_sys->pf_end_merge = EndMMX;
}
else
#endif
#if defined(CAN_COMPILE_3DNOW)
if( vlc_CPU() & CPU_CAPABILITY_3DNOW )
{
p_sys->pf_merge = Merge3DNow;
p_sys->pf_end_merge = End3DNow;
}
else
#endif
#if defined __ARM_NEON__
if( vlc_CPU() & CPU_CAPABILITY_NEON )
{
p_sys->pf_merge = MergeNEON;
p_sys->pf_end_merge = NULL;
}
else
#endif
{
p_sys->pf_merge = MergeGeneric;
p_sys->pf_end_merge = NULL;
}
/* */
config_ChainParse( p_filter, FILTER_CFG_PREFIX, ppsz_filter_options,
p_filter->p_cfg );
char *psz_mode = var_GetNonEmptyString( p_filter, FILTER_CFG_PREFIX "mode" );
SetFilterMethod( p_filter, psz_mode, p_filter->fmt_in.video.i_chroma );
free( psz_mode );
if( p_sys->i_mode == DEINTERLACE_PHOSPHOR )
{
int i_c420 = var_GetInteger( p_filter,
FILTER_CFG_PREFIX "phosphor-chroma" );
if( i_c420 != PC_LATEST && i_c420 != PC_ALTLINE &&
i_c420 != PC_BLEND && i_c420 != PC_UPCONVERT )
{
msg_Dbg( p_filter, "Phosphor 4:2:0 input chroma mode not set"\
"or out of range (valid: 1, 2, 3 or 4), "\
"using default" );
i_c420 = PC_ALTLINE;
}
msg_Dbg( p_filter, "using Phosphor 4:2:0 input chroma mode %d",
i_c420 );
/* This maps directly to the phosphor_chroma_t enum. */
p_sys->phosphor.i_chroma_for_420 = i_c420;
int i_dimmer = var_GetInteger( p_filter,
FILTER_CFG_PREFIX "phosphor-dimmer" );
if( i_dimmer < 1 || i_dimmer > 4 )
{
msg_Dbg( p_filter, "Phosphor dimmer strength not set "\
"or out of range (valid: 1, 2, 3 or 4), "\
"using default" );
i_dimmer = 2; /* low */
}
msg_Dbg( p_filter, "using Phosphor dimmer strength %d", i_dimmer );
/* The internal value ranges from 0 to 3. */
p_sys->phosphor.i_dimmer_strength = i_dimmer - 1;
}
else
{
p_sys->phosphor.i_chroma_for_420 = PC_ALTLINE;
p_sys->phosphor.i_dimmer_strength = 1;
}
/* */
video_format_t fmt;
GetOutputFormat( p_filter, &fmt, &p_filter->fmt_in.video );
if( !p_filter->b_allow_fmt_out_change &&
( fmt.i_chroma != p_filter->fmt_in.video.i_chroma ||
fmt.i_height != p_filter->fmt_in.video.i_height ) )
{
Close( VLC_OBJECT(p_filter) );
return VLC_EGENERIC;
}
p_filter->fmt_out.video = fmt;
p_filter->fmt_out.i_codec = fmt.i_chroma;
p_filter->pf_video_filter = Deinterlace;
p_filter->pf_video_flush = Flush;
p_filter->pf_video_mouse = Mouse;
msg_Dbg( p_filter, "deinterlacing" );
return VLC_SUCCESS;
}
/*****************************************************************************
* Close: clean up the filter
*****************************************************************************/
void Close( vlc_object_t *p_this )
{
filter_t *p_filter = (filter_t*)p_this;
Flush( p_filter );
free( p_filter->p_sys );
}
/*****************************************************************************
* deinterlace.h : deinterlacer plugin for vlc
*****************************************************************************
* Copyright (C) 2000-2011 the VideoLAN team
* $Id$
*
* Author: Sam Hocevar <sam@zoy.org>
* Juha Jeronen <juha.jeronen@jyu.fi> (Phosphor and IVTC modes)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifndef VLC_DEINTERLACE_H
#define VLC_DEINTERLACE_H 1
/* Forward declarations */
struct filter_t;
struct picture_t;
struct vlc_object_t;
#include <vlc_common.h>
#include <vlc_mouse.h>
/* Local algorithm headers */
#include "algo_basic.h"
#include "algo_x.h"
#include "algo_yadif.h"
#include "algo_phosphor.h"
#include "algo_ivtc.h"
/*****************************************************************************
* Local data
*****************************************************************************/
/** Available deinterlace modes. */
static const char *const mode_list[] = {
"discard", "blend", "mean", "bob", "linear", "x",
"yadif", "yadif2x", "phosphor", "ivtc" };
/** User labels for the available deinterlace modes. */
static const char *const mode_list_text[] = {
N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"), N_("Linear"), "X",
"Yadif", "Yadif (2x)", N_("Phosphor"), N_("Film NTSC (IVTC)") };
/*****************************************************************************
* Data structures
*****************************************************************************/
/**
* Available deinterlace algorithms.
* @see SetFilterMethod()
*/
typedef enum { DEINTERLACE_DISCARD, DEINTERLACE_MEAN, DEINTERLACE_BLEND,
DEINTERLACE_BOB, DEINTERLACE_LINEAR, DEINTERLACE_X,
DEINTERLACE_YADIF, DEINTERLACE_YADIF2X, DEINTERLACE_PHOSPHOR,
DEINTERLACE_IVTC } deinterlace_mode;
#define METADATA_SIZE (3)
/**
* Metadata history structure, used for framerate doublers.
* This is used for computing field duration in Deinterlace().
* @see Deinterlace()
*/
typedef struct {
mtime_t pi_date[METADATA_SIZE];
int pi_nb_fields[METADATA_SIZE];
bool pb_top_field_first[METADATA_SIZE];
} metadata_history_t;
#define HISTORY_SIZE (3)
#define CUSTOM_PTS -1
/**
* Top-level deinterlace subsystem state.
*/
struct filter_sys_t
{
int i_mode; /**< Deinterlace mode */
/* Algorithm behaviour flags */
bool b_double_rate; /**< Shall we double the framerate? */
bool b_half_height; /**< Shall be divide the height by 2 */
bool b_use_frame_history; /**< Use the input frame history buffer? */
/** Merge routine: C, MMX, SSE, ALTIVEC, NEON, ... */
void (*pf_merge) ( void *, const void *, const void *, size_t );
/** Merge finalization routine: C, MMX, SSE, ALTIVEC, NEON, ... */
void (*pf_end_merge) ( void );
/**
* Metadata history (PTS, nb_fields, TFF). Used for framerate doublers.
* @see metadata_history_t
*/
metadata_history_t meta;
/** Output frame timing / framerate doubler control
(see extra documentation in deinterlace.h) */
int i_frame_offset;
/** Input frame history buffer for algorithms with temporal filtering. */
picture_t *pp_history[HISTORY_SIZE];
/* Algorithm-specific substructures */
phosphor_sys_t phosphor; /**< Phosphor algorithm state. */
ivtc_sys_t ivtc; /**< IVTC algorithm state. */
};
/*****************************************************************************
* Filter control related internal functions for the deinterlace filter
*****************************************************************************/
/**
* Setup the deinterlace method to use.
*
* FIXME: extract i_chroma from p_filter automatically?
*
* @param p_filter The filter instance.
* @param psz_method Desired method. See mode_list for available choices.
* @param i_chroma Input chroma. Set this to p_filter->fmt_in.video.i_chroma.
* @see mode_list
*/
void SetFilterMethod( filter_t *p_filter, const char *psz_method,
vlc_fourcc_t i_chroma );
/**
* Get the output video format of the chosen deinterlace method
* for the given input video format.
*
* Note that each algorithm is allowed to specify its output format,
* which may (for some input formats) differ from the input format.
*
* @param p_filter The filter instance.
* @param[out] p_dst Output video format. The structure must be allocated by caller.
* @param[in] p_src Input video format.
* @see SetFilterMethod()
*/
void GetOutputFormat( filter_t *p_filter,
video_format_t *p_dst,
const video_format_t *p_src );
/**
* Returns whether the specified chroma is implemented in the deinterlace
* filter.
*
* Currently, supported chromas are I420, J420 (4:2:0 full scale),
* YV12 (like I420, but YVU), I422 and J422.
*
* Note for deinterlace hackers: adding support for a new chroma typically
* requires changes to all low-level functions across all the algorithms.
*
* @see vlc_fourcc_t
*/
bool IsChromaSupported( vlc_fourcc_t i_chroma );
/*****************************************************************************
* video filter2 functions
*****************************************************************************/
/**
* Top-level filtering method.
*
* Open() sets this up as the processing method (pf_video_filter)
* in the filter structure.
*
* Note that there is no guarantee that the returned picture directly
* corresponds to p_pic. The first few times, the filter may not even
* return a picture, if it is still filling the history for temporal
* filtering (although such filters often return *something* also
* while starting up). It should be assumed that N input pictures map to
* M output pictures, with no restrictions for N and M (except that there
* is not much delay).
*
* Also, there is no guarantee that the PTS of the frame stays untouched.
* In fact, framerate doublers automatically compute the proper PTSs for the
* two output frames for each input frame, and IVTC does a nontrivial
* framerate conversion (29.97 > 23.976 fps).
*
* Yadif has an offset of one frame between input and output, but introduces
* no delay: the returned frame is the *previous* input frame deinterlaced,
* complete with its original PTS.
*
* Finally, note that returning NULL sometimes can be normal behaviour for some
* algorithms (e.g. IVTC).
*
* Currently:
* Most algorithms: 1 -> 1, no offset
* All framerate doublers: 1 -> 2, no offset
* Yadif: 1 -> 1, offset of one frame
* IVTC: 1 -> 1 or 0 (depends on whether a drop was needed)
* with an offset of one frame (in most cases)
* and framerate conversion.
*
* @param p_filter The filter instance.
* @param p_pic The latest input picture.
* @return Deinterlaced picture(s). Linked list of picture_t's or NULL.
* @see Open()
* @see filter_t
* @see filter_sys_t
*/
picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic );
/**
* Reads the configuration, sets up and starts the filter.
*
* Possible reasons for returning VLC_EGENERIC:
* - Unsupported input chroma. See IsChromaSupported().
* - Caller has set p_filter->b_allow_fmt_out_change to false,
* but the algorithm chosen in the configuration
* wants to convert the output to a format different
* from the input. See SetFilterMethod().
*
* Open() is atomic: if an error occurs, the state of p_this
* is left as it was before the call to this function.
*
* @param p_this The filter instance as vlc_object_t.
* @return VLC error code
* @retval VLC_SUCCESS All ok, filter set up and started.
* @retval VLC_ENOMEM Memory allocation error, initialization aborted.
* @retval VLC_EGENERIC Something went wrong, initialization aborted.
* @see IsChromaSupported()
* @see SetFilterMethod()
*/
int Open( vlc_object_t *p_this );
/**
* Resets the filter state, including resetting all algorithm-specific state
* and discarding all histories, but does not stop the filter.
*
* Open() sets this up as the flush method (pf_video_flush)
* in the filter structure.
*
* @param p_filter The filter instance.
* @see Open()
* @see filter_t
* @see filter_sys_t
* @see metadata_history_t
* @see phosphor_sys_t
* @see ivtc_sys_t
*/
void Flush( filter_t *p_filter );
/**
* Mouse callback for the deinterlace filter.
*
* Open() sets this up as the mouse callback method (pf_video_mouse)
* in the filter structure.
*
* Currently, this handles the scaling of the y coordinate for algorithms
* that halve the output height.
*
* @param p_filter The filter instance.
* @param[out] p_mouse Updated mouse position data.
* @param[in] p_old Previous mouse position data. Unused in this filter.
* @param[in] p_new Latest mouse position data.
* @return VLC error code; currently always VLC_SUCCESS.
* @retval VLC_SUCCESS All ok.
* @see Open()
* @see filter_t
* @see vlc_mouse_t
*/
int Mouse( filter_t *p_filter,
vlc_mouse_t *p_mouse,
const vlc_mouse_t *p_old,
const vlc_mouse_t *p_new );
/**
* Stops and uninitializes the filter, and deallocates memory.
* @param p_this The filter instance as vlc_object_t.
*/
void Close( vlc_object_t *p_this );
/*****************************************************************************
* Extra documentation
*****************************************************************************/
/**
* \file
* Deinterlacer plugin for vlc. Data structures and video filter2 functions.
*
* Note on i_frame_offset:
*
* This value indicates the offset between input and output frames in the
* currently active deinterlace algorithm. See the rationale below for why
* this is needed and how it is used.
*
* Valid range: 0 <= i_frame_offset < METADATA_SIZE, or
* i_frame_offset = CUSTOM_PTS.
* The special value CUSTOM_PTS is only allowed
* if b_double_rate is false.
*
* If CUSTOM_PTS is used, the algorithm must compute the outgoing
* PTSs itself, and additionally, read the TFF/BFF information
* itself (if it needs it) from the incoming frames.
*
* Meaning of values:
* 0 = output frame corresponds to the current input frame
* (no frame offset; default if not set),
* 1 = output frame corresponds to the previous input frame
* (e.g. Yadif and Yadif2x work like this),
* ...
*
* If necessary, i_frame_offset should be updated by the active deinterlace
* algorithm to indicate the correct delay for the *next* input frame.
* It does not matter at which i_order the algorithm updates this information,
* but the new value will only take effect upon the next call to Deinterlace()
* (i.e. at the next incoming frame).
*
* The first-ever frame that arrives to the filter after Open() is always
* handled as having i_frame_offset = 0. For the second and all subsequent
* frames, each algorithm is responsible for setting the offset correctly.
* (The default is 0, so if that is correct, there's no need to do anything.)
*
* This solution guarantees that i_frame_offset:
* 1) is up to date at the start of each frame,
* 2) does not change (as far as Deinterlace() is concerned) during
* a frame, and
* 3) does not need a special API for setting the value at the start of each
* input frame, before the algorithm starts rendering the (first) output
* frame for that input frame.
*
* The deinterlace algorithm is allowed to behave differently for different
* input frames. This is especially important for startup, when full history
* (as defined by each algorithm) is not yet available. During the first-ever
* input frame, it is clear that it is the only possible source for
* information, so i_frame_offset = 0 is necessarily correct. After that,
* what to do is up to each algorithm.
*
* Having the correct offset at the start of each input frame is critically
* important in order to:
* 1) Allocate the correct number of output frames for framerate doublers,
* and to
* 2) Pass correct TFF/BFF information to the algorithm.
*
* These points are important for proper soft field repeat support. This
* feature is used in some streams (especially NTSC) originating from film.
* For example, in soft NTSC telecine, the number of fields alternates
* as 3,2,3,2,... and the video field dominance flips every two frames (after
* every "3"). Also, some streams request an occasional field repeat
* (nb_fields = 3), after which the video field dominance flips.
* To render such streams correctly, the nb_fields and TFF/BFF information
* must be taken from the specific input frame that the algorithm intends
* to render.
*
* Additionally, the output PTS is automatically computed by Deinterlace()
* from i_frame_offset and i_order.
*
* It is possible to use the special value CUSTOM_PTS to indicate that the
* algorithm computes the output PTSs itself. In this case, Deinterlace()
* will pass them through. This special value is not valid for framerate
* doublers, as by definition they are field renderers, so they need to
* use the original field timings to work correctly. Basically, this special
* value is only intended for algorithms that need to perform nontrivial
* framerate conversions (such as IVTC).
*/
#endif
/*****************************************************************************
* helpers.c : Generic helper functions for the VLC deinterlacer
*****************************************************************************
* Copyright (C) 2011 the VideoLAN team
* $Id$
*
* Author: Juha Jeronen <juha.jeronen@jyu.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#ifdef CAN_COMPILE_MMXEXT
# include "mmx.h"
#endif
#include <stdint.h>
#include <assert.h>
#include <vlc_common.h>
#include <vlc_cpu.h>
#include <vlc_filter.h>
#include <vlc_picture.h>
#include "deinterlace.h" /* definition of p_sys, needed for Merge() */
#include "common.h" /* FFMIN3 et al. */
#include "merge.h"
#include "helpers.h"
/*****************************************************************************
* Internal functions
*****************************************************************************/
/**
* This internal function converts a normal (full frame) plane_t into a
* field plane_t.
*
* Field plane_t's can be used e.g. for a weaving copy operation from two
* source frames into one destination frame.
*
* The pixels themselves will not be touched; only the metadata is generated.
* The same pixel data is shared by both the original plane_t and the field
* plane_t. Note, however, that the bottom field's data starts from the
* second line, so for the bottom field, the actual pixel pointer value
* does not exactly match the original plane pixel pointer value. (It points
* one line further down.)
*
* The caller must allocate p_dst (creating a local variable is fine).
*
* @param p_dst Field plane_t is written here. Must be non-NULL.
* @param p_src Original full-frame plane_t. Must be non-NULL.
* @param i_field Extract which field? 0 = top field, 1 = bottom field.
* @see plane_CopyPixels()
* @see ComposeFrame()
* @see RenderPhosphor()
*/
static void FieldFromPlane( plane_t *p_dst, const plane_t *p_src, int i_field )
{
assert( p_dst != NULL );
assert( p_src != NULL );
assert( i_field == 0 || i_field == 1 );
/* Start with a copy of the metadata, and then update it to refer
to one field only.
We utilize the fact that plane_CopyPixels() differentiates between
visible_pitch and pitch.
The other field will be defined as the "margin" by doubling the pitch.
The visible pitch will be left as in the original.
*/
(*p_dst) = (*p_src);
p_dst->i_lines /= 2;
p_dst->i_visible_lines /= 2;
p_dst->i_pitch *= 2;
/* For the bottom field, skip the first line in the pixel data. */
if( i_field == 1 )
p_dst->p_pixels += p_src->i_pitch;
}
/**
* Internal helper function for EstimateNumBlocksWithMotion():
* estimates whether there is motion in the given 8x8 block on one plane
* between two images. The block as a whole and its fields are evaluated
* separately, and use different motion thresholds.
*
* This is a low-level function only used by EstimateNumBlocksWithMotion().
* There is no need to call this function manually.
*
* For interpretation of pi_top and pi_bot, it is assumed that the block
* starts on an even-numbered line (belonging to the top field).
*
* The b_mmx parameter avoids the need to call vlc_CPU() separately
* for each block.
*
* @param[in] p_pix_p Base pointer to the block in previous picture
* @param[in] p_pix_c Base pointer to the same block in current picture
* @param i_pitch_prev i_pitch of previous picture
* @param i_pitch_curr i_pitch of current picture
* @param b_mmx (vlc_CPU() & CPU_CAPABILITY_MMXEXT) or false.
* @param[out] pi_top 1 if top field of the block had motion, 0 if no
* @param[out] pi_bot 1 if bottom field of the block had motion, 0 if no
* @return 1 if the block had motion, 0 if no
* @see EstimateNumBlocksWithMotion()
*/
static inline int TestForMotionInBlock( uint8_t *p_pix_p, uint8_t *p_pix_c,
int i_pitch_prev, int i_pitch_curr,
bool b_mmx,
int* pi_top, int* pi_bot )
{
/* Pixel luma/chroma difference threshold to detect motion. */
#define T 10
int32_t i_motion = 0;
int32_t i_top_motion = 0;
int32_t i_bot_motion = 0;
/* See below for the C version to see more quickly what this does. */
#ifdef CAN_COMPILE_MMXEXT
if( b_mmx )
{
static const mmx_t bT = { .ub = { T, T, T, T, T, T, T, T } };
pxor_r2r( mm6, mm6 ); /* zero, used in psadbw */
movq_m2r( bT, mm5 );
pxor_r2r( mm3, mm3 ); /* score (top field) */
pxor_r2r( mm4, mm4 ); /* score (bottom field) */
for( int y = 0; y < 8; y+=2 )
{
/* top field */
movq_m2r( *((uint64_t*)p_pix_c), mm0 );
movq_m2r( *((uint64_t*)p_pix_p), mm1 );
movq_r2r( mm0, mm2 );
psubusb_r2r( mm1, mm2 );
psubusb_r2r( mm0, mm1 );
pcmpgtb_r2r( mm5, mm2 );
pcmpgtb_r2r( mm5, mm1 );
psadbw_r2r( mm6, mm2 );
psadbw_r2r( mm6, mm1 );
paddd_r2r( mm2, mm1 );
paddd_r2r( mm1, mm3 ); /* add to top field score */
p_pix_c += i_pitch_curr;
p_pix_p += i_pitch_prev;
/* bottom field - handling identical to top field, except... */
movq_m2r( *((uint64_t*)p_pix_c), mm0 );
movq_m2r( *((uint64_t*)p_pix_p), mm1 );
movq_r2r( mm0, mm2 );
psubusb_r2r( mm1, mm2 );
psubusb_r2r( mm0, mm1 );
pcmpgtb_r2r( mm5, mm2 );
pcmpgtb_r2r( mm5, mm1 );
psadbw_r2r( mm6, mm2 );
psadbw_r2r( mm6, mm1 );
paddd_r2r( mm2, mm1 );
paddd_r2r( mm1, mm4 ); /* ...here we add to bottom field score */
p_pix_c += i_pitch_curr;
p_pix_p += i_pitch_prev;
}
movq_r2r( mm3, mm7 ); /* score (total) */
paddd_r2r( mm4, mm7 );
movd_r2m( mm3, i_top_motion );
movd_r2m( mm4, i_bot_motion );
movd_r2m( mm7, i_motion );
/* The loop counts actual score * 255. */
i_top_motion /= 255;
i_bot_motion /= 255;
i_motion /= 255;
emms();
}
else
#endif
{
for( int y = 0; y < 8; ++y )
{
uint8_t *pc = p_pix_c;
uint8_t *pp = p_pix_p;
int score = 0;
for( int x = 0; x < 8; ++x )
{
int_fast16_t C = abs((*pc) - (*pp));
if( C > T )
++score;
++pc;
++pp;
}
i_motion += score;
if( y % 2 == 0 )
i_top_motion += score;
else
i_bot_motion += score;
p_pix_c += i_pitch_curr;
p_pix_p += i_pitch_prev;
}
}
/* Field motion thresholds.
Empirical value - works better in practice than the "4" that
would be consistent with the full-block threshold.
Especially the opening scene of The Third ep. 1 (just after the OP)
works better with this. It also fixes some talking scenes in
Stellvia ep. 1, where the cadence would otherwise catch on incorrectly,
leading to more interlacing artifacts than by just using the emergency
mode frame composer.
*/
(*pi_top) = ( i_top_motion >= 8 );
(*pi_bot) = ( i_bot_motion >= 8 );
/* Full-block threshold = (8*8)/8: motion is detected if 1/8 of the block
changes "enough". */
return (i_motion >= 8);
}
#undef T
/*****************************************************************************
* Public functions
*****************************************************************************/
/* See header for function doc. */
void ComposeFrame( filter_t *p_filter, picture_t *p_outpic,
picture_t *p_inpic_top, picture_t *p_inpic_bottom,
compose_chroma_t i_output_chroma )
{
assert( p_filter != NULL );
assert( p_outpic != NULL );
assert( p_inpic_top != NULL );
assert( p_inpic_bottom != NULL );
/* Valid 4:2:0 chroma handling modes. */
assert( i_output_chroma == CC_ALTLINE ||
i_output_chroma == CC_UPCONVERT ||
i_output_chroma == CC_SOURCE_TOP ||
i_output_chroma == CC_SOURCE_BOTTOM ||
i_output_chroma == CC_MERGE );
const int i_chroma = p_filter->fmt_in.video.i_chroma;
const bool b_i422 = i_chroma == VLC_CODEC_I422 ||
i_chroma == VLC_CODEC_J422;
const bool b_upconvert_chroma = ( !b_i422 &&
i_output_chroma == CC_UPCONVERT );
for( int i_plane = 0 ; i_plane < p_inpic_top->i_planes ; i_plane++ )
{
bool b_is_chroma_plane = ( i_plane == U_PLANE || i_plane == V_PLANE );
/* YV12 is YVU, but I422 is YUV. For such input, swap chroma planes
in output when converting to 4:2:2. */
int i_out_plane;
if( b_is_chroma_plane && b_upconvert_chroma &&
i_chroma == VLC_CODEC_YV12 )
{
if( i_plane == U_PLANE )
i_out_plane = V_PLANE;
else /* V_PLANE */
i_out_plane = U_PLANE;
}
else
{
i_out_plane = i_plane;
}
/* Copy luma or chroma, alternating between input fields. */
if( !b_is_chroma_plane || b_i422 || i_output_chroma == CC_ALTLINE )
{
/* Do an alternating line copy. This is always done for luma,
and for 4:2:2 chroma. It can be requested for 4:2:0 chroma
using CC_ALTLINE (see function doc).
Note that when we get here, the number of lines matches
in input and output.
*/
plane_t dst_top;
plane_t dst_bottom;
plane_t src_top;
plane_t src_bottom;
FieldFromPlane( &dst_top, &p_outpic->p[i_out_plane], 0 );
FieldFromPlane( &dst_bottom, &p_outpic->p[i_out_plane], 1 );
FieldFromPlane( &src_top, &p_inpic_top->p[i_plane], 0 );
FieldFromPlane( &src_bottom, &p_inpic_bottom->p[i_plane], 1 );
/* Copy each field from the corresponding source. */
plane_CopyPixels( &dst_top, &src_top );
plane_CopyPixels( &dst_bottom, &src_bottom );
}
else /* Input 4:2:0, on a chroma plane, and not in altline mode. */
{
if( i_output_chroma == CC_UPCONVERT )
{
/* Upconverting copy - use all data from both input fields.
This produces an output picture with independent chroma
for each field. It can be used for general input when
the two input frames are different.
The output is 4:2:2, but the input is 4:2:0. Thus the output
has twice the lines of the input, and each full chroma plane
in the input corresponds to a field chroma plane in the
output.
*/
plane_t dst_top;
plane_t dst_bottom;
FieldFromPlane( &dst_top, &p_outpic->p[i_out_plane], 0 );
FieldFromPlane( &dst_bottom, &p_outpic->p[i_out_plane], 1 );
/* Copy each field from the corresponding source. */
plane_CopyPixels( &dst_top, &p_inpic_top->p[i_plane] );
plane_CopyPixels( &dst_bottom, &p_inpic_bottom->p[i_plane] );
}
else if( i_output_chroma == CC_SOURCE_TOP )
{
/* Copy chroma of input top field. Ignore chroma of input
bottom field. Input and output are both 4:2:0, so we just
copy the whole plane. */
plane_CopyPixels( &p_outpic->p[i_out_plane],
&p_inpic_top->p[i_plane] );
}
else if( i_output_chroma == CC_SOURCE_BOTTOM )
{
/* Copy chroma of input bottom field. Ignore chroma of input
top field. Input and output are both 4:2:0, so we just
copy the whole plane. */
plane_CopyPixels( &p_outpic->p[i_out_plane],
&p_inpic_bottom->p[i_plane] );
}
else /* i_output_chroma == CC_MERGE */
{
/* Average the chroma of the input fields.
Input and output are both 4:2:0. */
uint8_t *p_in_top, *p_in_bottom, *p_out_end, *p_out;
p_in_top = p_inpic_top->p[i_plane].p_pixels;
p_in_bottom = p_inpic_bottom->p[i_plane].p_pixels;
p_out = p_outpic->p[i_out_plane].p_pixels;
p_out_end = p_out + p_outpic->p[i_out_plane].i_pitch
* p_outpic->p[i_out_plane].i_visible_lines;
int w = FFMIN3( p_inpic_top->p[i_plane].i_visible_pitch,
p_inpic_bottom->p[i_plane].i_visible_pitch,
p_outpic->p[i_plane].i_visible_pitch );
for( ; p_out < p_out_end ; )
{
Merge( p_out, p_in_top, p_in_bottom, w );
p_out += p_outpic->p[i_out_plane].i_pitch;
p_in_top += p_inpic_top->p[i_plane].i_pitch;
p_in_bottom += p_inpic_bottom->p[i_plane].i_pitch;
}
EndMerge();
}
}
}
}
/* See header for function doc. */
int EstimateNumBlocksWithMotion( const picture_t* p_prev,
const picture_t* p_curr,
int *pi_top, int *pi_bot)
{
assert( p_prev != NULL );
assert( p_curr != NULL );
int i_score_top = 0;
int i_score_bot = 0;
if( p_prev->i_planes != p_curr->i_planes )
return -1;
/* We must tell our inline helper whether to use MMX acceleration. */
#ifdef CAN_COMPILE_MMXEXT
bool b_mmx = ( vlc_CPU() & CPU_CAPABILITY_MMXEXT );
#else
bool b_mmx = false;
#endif
int i_score = 0;
for( int i_plane = 0 ; i_plane < p_prev->i_planes ; i_plane++ )
{
/* Sanity check */
if( p_prev->p[i_plane].i_visible_lines !=
p_curr->p[i_plane].i_visible_lines )
return -1;
const int i_pitch_prev = p_prev->p[i_plane].i_pitch;
const int i_pitch_curr = p_curr->p[i_plane].i_pitch;
/* Last pixels and lines (which do not make whole blocks) are ignored.
Shouldn't really matter for our purposes. */
const int i_mby = p_prev->p[i_plane].i_visible_lines / 8;
const int w = FFMIN( p_prev->p[i_plane].i_visible_pitch,
p_curr->p[i_plane].i_visible_pitch );
const int i_mbx = w / 8;
for( int by = 0; by < i_mby; ++by )
{
uint8_t *p_pix_p = &p_prev->p[i_plane].p_pixels[i_pitch_prev*8*by];
uint8_t *p_pix_c = &p_curr->p[i_plane].p_pixels[i_pitch_curr*8*by];
for( int bx = 0; bx < i_mbx; ++bx )
{
int i_top_temp, i_bot_temp;
i_score += TestForMotionInBlock( p_pix_p, p_pix_c,
i_pitch_prev, i_pitch_curr,
b_mmx,
&i_top_temp, &i_bot_temp );
i_score_top += i_top_temp;
i_score_bot += i_bot_temp;
p_pix_p += 8;
p_pix_c += 8;
}
}
}
if( pi_top )
(*pi_top) = i_score_top;
if( pi_bot )
(*pi_bot) = i_score_bot;
return i_score;
}
/* See header for function doc. */
int CalculateInterlaceScore( const picture_t* p_pic_top,
const picture_t* p_pic_bot )
{
/*
We use the comb metric from the IVTC filter of Transcode 1.1.5.
This was found to work better for the particular purpose of IVTC
than RenderX()'s comb metric.
Note that we *must not* subsample at all in order to catch interlacing
in telecined frames with localized motion (e.g. anime with characters
talking, where only mouths move and everything else stays still.)
*/
assert( p_pic_top != NULL );
assert( p_pic_bot != NULL );
if( p_pic_top->i_planes != p_pic_bot->i_planes )
return -1;
unsigned u_cpu = vlc_CPU();
/* Amount of bits must be known for MMX, thus int32_t.
Doesn't hurt the C implementation. */
int32_t i_score = 0;
#ifdef CAN_COMPILE_MMXEXT
if( u_cpu & CPU_CAPABILITY_MMXEXT )
pxor_r2r( mm7, mm7 ); /* we will keep score in mm7 */
#endif
for( int i_plane = 0 ; i_plane < p_pic_top->i_planes ; ++i_plane )
{
/* Sanity check */
if( p_pic_top->p[i_plane].i_visible_lines !=
p_pic_bot->p[i_plane].i_visible_lines )
return -1;
const int i_lasty = p_pic_top->p[i_plane].i_visible_lines-1;
const int w = FFMIN( p_pic_top->p[i_plane].i_visible_pitch,
p_pic_bot->p[i_plane].i_visible_pitch );
const int wm8 = w % 8; /* remainder */
const int w8 = w - wm8; /* part of width that is divisible by 8 */
/* Current line / neighbouring lines picture pointers */
const picture_t *cur = p_pic_bot;
const picture_t *ngh = p_pic_top;
int wc = cur->p[i_plane].i_pitch;
int wn = ngh->p[i_plane].i_pitch;
/* Transcode 1.1.5 only checks every other line. Checking every line
works better for anime, which may contain horizontal,
one pixel thick cartoon outlines.
*/
for( int y = 1; y < i_lasty; ++y )
{
uint8_t *p_c = &cur->p[i_plane].p_pixels[y*wc]; /* this line */
uint8_t *p_p = &ngh->p[i_plane].p_pixels[(y-1)*wn]; /* prev line */
uint8_t *p_n = &ngh->p[i_plane].p_pixels[(y+1)*wn]; /* next line */
int x = 0;
/* Threshold (value from Transcode 1.1.5) */
#define T 100
#ifdef CAN_COMPILE_MMXEXT
/* Easy-to-read C version further below.
Assumptions: 0 < T < 127
# of pixels < (2^32)/255
Note: calculates score * 255
*/
if( u_cpu & CPU_CAPABILITY_MMXEXT )
{
static const mmx_t b0 = { .uq = 0x0000000000000000ULL };
static const mmx_t b128 = { .uq = 0x8080808080808080ULL };
static const mmx_t bT = { .ub = { T, T, T, T, T, T, T, T } };
for( ; x < w8; x += 8 )
{
movq_m2r( *((int64_t*)p_c), mm0 );
movq_m2r( *((int64_t*)p_p), mm1 );
movq_m2r( *((int64_t*)p_n), mm2 );
psubb_m2r( b128, mm0 );
psubb_m2r( b128, mm1 );
psubb_m2r( b128, mm2 );
psubsb_r2r( mm0, mm1 );
psubsb_r2r( mm0, mm2 );
pxor_r2r( mm3, mm3 );
pxor_r2r( mm4, mm4 );
pxor_r2r( mm5, mm5 );
pxor_r2r( mm6, mm6 );
punpcklbw_r2r( mm1, mm3 );
punpcklbw_r2r( mm2, mm4 );
punpckhbw_r2r( mm1, mm5 );
punpckhbw_r2r( mm2, mm6 );
pmulhw_r2r( mm3, mm4 );
pmulhw_r2r( mm5, mm6 );
packsswb_r2r(mm4, mm6);
pcmpgtb_m2r( bT, mm6 );
psadbw_m2r( b0, mm6 );
paddd_r2r( mm6, mm7 );
p_c += 8;
p_p += 8;
p_n += 8;
}
}
#endif
for( ; x < w; ++x )
{
/* Worst case: need 17 bits for "comb". */
int_fast32_t C = *p_c;
int_fast32_t P = *p_p;
int_fast32_t N = *p_n;
/* Comments in Transcode's filter_ivtc.c attribute this
combing metric to Gunnar Thalin.
The idea is that if the picture is interlaced, both
expressions will have the same sign, and this comes
up positive. The value T = 100 has been chosen such
that a pixel difference of 10 (on average) will
trigger the detector.
*/
int_fast32_t comb = (P - C) * (N - C);
if( comb > T )
++i_score;
++p_c;
++p_p;
++p_n;
}
/* Now the other field - swap current and neighbour pictures */
const picture_t *tmp = cur;
cur = ngh;
ngh = tmp;
int tmp_pitch = wc;
wc = wn;
wn = tmp_pitch;
}
}
#ifdef CAN_COMPILE_MMXEXT
if( u_cpu & CPU_CAPABILITY_MMXEXT )
{
movd_r2m( mm7, i_score );
emms();
i_score /= 255;
}
#endif
return i_score;
}
#undef T
/*****************************************************************************
* helpers.h : Generic helper functions for the VLC deinterlacer
*****************************************************************************
* Copyright (C) 2011 the VideoLAN team
* $Id$
*
* Author: Juha Jeronen <juha.jeronen@jyu.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifndef VLC_DEINTERLACE_HELPERS_H
#define VLC_DEINTERLACE_HELPERS_H 1
/**
* \file
* Generic helper functions for the VLC deinterlacer, used in
* some of the advanced algorithms.
*/
/* Forward declarations */
struct filter_t;
struct picture_t;
struct plane_t;
/**
* Chroma operation types for composing 4:2:0 frames.
* @see ComposeFrame()
*/
typedef enum { CC_ALTLINE, CC_UPCONVERT, CC_SOURCE_TOP, CC_SOURCE_BOTTOM,
CC_MERGE } compose_chroma_t;
/**
* Helper function: composes a frame from the given field pair.
*
* Caller must manage allocation/deallocation of p_outpic.
*
* The inputs are full pictures (frames); only one field
* will be used from each.
*
* Chroma formats of the inputs must match. It is also desirable that the
* visible pitches of both inputs are the same, so that this will do something
* sensible. The pitch or visible pitch of the output does not need to match
* with the input; the compatible (smaller) part of the visible pitch will
* be filled.
*
* The i_output_chroma parameter must always be supplied, but it is only used
* when the chroma format of the input is detected as 4:2:0. Available modes:
* - CC_ALTLINE: Alternate line copy, like for luma. Chroma line 0
* comes from top field picture, chroma line 1 comes
* from bottom field picture, chroma line 2 from top
* field picture, and so on. This is usually the right
* choice for IVTCing NTSC DVD material, but rarely
* for any other use cases.
* - CC_UPCONVERT: The output will have 4:2:2 chroma. All 4:2:0 chroma
* data from both input fields will be used to generate
* the 4:2:2 chroma data of the output. Each output line
* will thus have independent chroma. This is a good
* choice for most purposes except IVTC, if the machine
* can handle the increased throughput. (Make sure to
* allocate a 4:2:2 output picture first!)
* This mode can also be used for converting a 4:2:0
* frame to 4:2:2 format (by passing the same input
* picture for both input fields).
* Conversions: I420, YV12 --> I422
* J420 --> J422
* - CC_SOURCE_TOP: Copy chroma of source top field picture.
* Ignore chroma of source bottom field picture.
* - CC_SOURCE_BOTTOM: Copy chroma of source bottom field picture.
* Ignore chroma of source top field picture.
* - CC_MERGE: Average the chroma of the input field pictures.
* (Note that this has no effect if the input fields
* come from the same frame.)
*
* @param p_filter The filter instance (determines input chroma).
* @param p_outpic Composed picture is written here. Allocated by caller.
* @param p_inpic_top Picture to extract the top field from.
* @param p_inpic_bottom Picture to extract the bottom field from.
* @param i_output_chroma Chroma operation mode for 4:2:0 (see function doc)
* @see compose_chroma_t
* @see RenderPhosphor()
* @see RenderIVTC()
*/
void ComposeFrame( filter_t *p_filter, picture_t *p_outpic,
picture_t *p_inpic_top, picture_t *p_inpic_bottom,
compose_chroma_t i_output_chroma );
/**
* Helper function: Estimates the number of 8x8 blocks which have motion
* between the given pictures. Needed for various detectors in RenderIVTC().
*
* Number of planes and visible lines in each plane, in the inputs must match.
* If the visible pitches do not match, only the compatible (smaller)
* part will be tested.
*
* Note that the return value is NOT simply *pi_top + *pi_bot, because
* the fields and the full block use different motion thresholds.
*
* If you do not want the separate field scores, pass NULL for pi_top and
* pi_bot. This does not affect computation speed, and is only provided as
* a syntactic convenience.
*
* Motion in each picture plane (Y, U, V) counts separately.
* The sum of number of blocks with motion across all planes is returned.
*
* For 4:2:0 chroma, even-numbered chroma lines make up the "top field" for
* chroma, and odd-numbered chroma lines the "bottom field" for chroma.
* This is correct for IVTC purposes.
*
* @param[in] p_prev Previous picture
* @param[in] p_curr Current picture
* @param[out] pi_top Number of 8x8 blocks where top field has motion.
* @param[out] pi_bot Number of 8x8 blocks where bottom field has motion.
* @return Number of 8x8 blocks that have motion.
* @retval -1 Error: incompatible input pictures.
* @see TestForMotionInBlock()
* @see RenderIVTC()
*/
int EstimateNumBlocksWithMotion( const picture_t* p_prev,
const picture_t* p_curr,
int *pi_top, int *pi_bot);
/**
* Helper function: estimates "how much interlaced" the given field pair is.
*
* It is allowed that p_pic_top == p_pic_bottom.
*
* If p_pic_top != p_pic_bot (fields come from different pictures), you can use
* ComposeFrame() to actually construct the picture if needed.
*
* Number of planes, and number of lines in each plane, in p_pic_top and
* p_pic_bot must match. If the visible pitches differ, only the compatible
* (smaller) part will be tested.
*
* Luma and chroma planes are tested in the same way. This is correct for
* telecined input, where in the interlaced frames also chroma alternates
* every chroma line, even if the chroma format is 4:2:0!
*
* This is just a raw detector that produces a score. The overall score
* indicating a progressive or interlaced frame may vary wildly, depending on
* the material, especially in anime. The scores should be compared to
* each other locally (in the temporal sense) to make meaningful decisions
* about progressive or interlaced frames.
*
* @param p_pic_top Picture to take the top field from.
* @param p_pic_bot Picture to take the bottom field from (same or different).
* @return Interlace score, >= 0. Higher values mean more interlaced.
* @retval -1 Error: incompatible input pictures.
* @see RenderIVTC()
* @see ComposeFrame()
*/
int CalculateInterlaceScore( const picture_t* p_pic_top,
const picture_t* p_pic_bot );
#endif
/*****************************************************************************
* merge.c : Merge (line blending) routines for the VLC deinterlacer
*****************************************************************************
* Copyright (C) 2011 the VideoLAN team
* $Id$
*
* Author: Sam Hocevar <sam@zoy.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
/* This handles including config.h, because the config
is already needed in the header. */
#include "merge.h"
#ifdef CAN_COMPILE_MMXEXT
# include "mmx.h"
#endif
#ifdef HAVE_ALTIVEC_H
# include <altivec.h>
#endif
#include <stdint.h>
/*****************************************************************************
* Merge (line blending) routines
*****************************************************************************/
void MergeGeneric( void *_p_dest, const void *_p_s1,
const void *_p_s2, size_t i_bytes )
{
uint8_t* p_dest = (uint8_t*)_p_dest;
const uint8_t *p_s1 = (const uint8_t *)_p_s1;
const uint8_t *p_s2 = (const uint8_t *)_p_s2;
uint8_t* p_end = p_dest + i_bytes - 8;
while( p_dest < p_end )
{
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
}
p_end += 8;
while( p_dest < p_end )
{
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
}
}
#if defined(CAN_COMPILE_MMXEXT)
void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes )
{
uint8_t* p_dest = (uint8_t*)_p_dest;
const uint8_t *p_s1 = (const uint8_t *)_p_s1;
const uint8_t *p_s2 = (const uint8_t *)_p_s2;
uint8_t* p_end = p_dest + i_bytes - 8;
while( p_dest < p_end )
{
__asm__ __volatile__( "movq %2,%%mm1;"
"pavgb %1, %%mm1;"
"movq %%mm1, %0" :"=m" (*p_dest):
"m" (*p_s1),
"m" (*p_s2) );
p_dest += 8;
p_s1 += 8;
p_s2 += 8;
}
p_end += 8;
while( p_dest < p_end )
{
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
}
}
#endif
#if defined(CAN_COMPILE_3DNOW)
void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes )
{
uint8_t* p_dest = (uint8_t*)_p_dest;
const uint8_t *p_s1 = (const uint8_t *)_p_s1;
const uint8_t *p_s2 = (const uint8_t *)_p_s2;
uint8_t* p_end = p_dest + i_bytes - 8;
while( p_dest < p_end )
{
__asm__ __volatile__( "movq %2,%%mm1;"
"pavgusb %1, %%mm1;"
"movq %%mm1, %0" :"=m" (*p_dest):
"m" (*p_s1),
"m" (*p_s2) );
p_dest += 8;
p_s1 += 8;
p_s2 += 8;
}
p_end += 8;
while( p_dest < p_end )
{
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
}
}
#endif
#if defined(CAN_COMPILE_SSE)
void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes )
{
uint8_t* p_dest = (uint8_t*)_p_dest;
const uint8_t *p_s1 = (const uint8_t *)_p_s1;
const uint8_t *p_s2 = (const uint8_t *)_p_s2;
uint8_t* p_end;
while( (uintptr_t)p_s1 % 16 )
{
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
}
p_end = p_dest + i_bytes - 16;
while( p_dest < p_end )
{
__asm__ __volatile__( "movdqu %2,%%xmm1;"
"pavgb %1, %%xmm1;"
"movdqu %%xmm1, %0" :"=m" (*p_dest):
"m" (*p_s1),
"m" (*p_s2) );
p_dest += 16;
p_s1 += 16;
p_s2 += 16;
}
p_end += 16;
while( p_dest < p_end )
{
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
}
}
#endif
#ifdef CAN_COMPILE_C_ALTIVEC
void MergeAltivec( void *_p_dest, const void *_p_s1,
const void *_p_s2, size_t i_bytes )
{
uint8_t *p_dest = (uint8_t *)_p_dest;
uint8_t *p_s1 = (uint8_t *)_p_s1;
uint8_t *p_s2 = (uint8_t *)_p_s2;
uint8_t *p_end = p_dest + i_bytes - 15;
/* Use C until the first 16-bytes aligned destination pixel */
while( (uintptr_t)p_dest & 0xF )
{
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
}
if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
{
/* Unaligned source */
vector unsigned char s1v, s2v, destv;
vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
vector unsigned char perm1v, perm2v;
perm1v = vec_lvsl( 0, p_s1 );
perm2v = vec_lvsl( 0, p_s2 );
s1oldv = vec_ld( 0, p_s1 );
s2oldv = vec_ld( 0, p_s2 );
while( p_dest < p_end )
{
s1newv = vec_ld( 16, p_s1 );
s2newv = vec_ld( 16, p_s2 );
s1v = vec_perm( s1oldv, s1newv, perm1v );
s2v = vec_perm( s2oldv, s2newv, perm2v );
s1oldv = s1newv;
s2oldv = s2newv;
destv = vec_avg( s1v, s2v );
vec_st( destv, 0, p_dest );
p_s1 += 16;
p_s2 += 16;
p_dest += 16;
}
}
else
{
/* Aligned source */
vector unsigned char s1v, s2v, destv;
while( p_dest < p_end )
{
s1v = vec_ld( 0, p_s1 );
s2v = vec_ld( 0, p_s2 );
destv = vec_avg( s1v, s2v );
vec_st( destv, 0, p_dest );
p_s1 += 16;
p_s2 += 16;
p_dest += 16;
}
}
p_end += 15;
while( p_dest < p_end )
{
*p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
}
}
#endif
#ifdef __ARM_NEON__
void MergeNEON (void *restrict out, const void *in1,
const void *in2, size_t n)
{
uint8_t *outp = out;
const uint8_t *in1p = in1;
const uint8_t *in2p = in2;
size_t mis = ((uintptr_t)outp) & 15;
if (mis)
{
MergeGeneric (outp, in1p, in2p, mis);
outp += mis;
in1p += mis;
in2p += mis;
n -= mis;
}
uint8_t *end = outp + (n & ~15);
if ((((uintptr_t)in1p)|((uintptr_t)in2p)) & 15)
while (outp < end)
asm volatile (
"vld1.u8 {q0-q1}, [%[in1]]!\n"
"vld1.u8 {q2-q3}, [%[in2]]!\n"
"vhadd.u8 q4, q0, q2\n"
"vld1.u8 {q6-q7}, [%[in1]]!\n"
"vhadd.u8 q5, q1, q3\n"
"vld1.u8 {q8-q9}, [%[in2]]!\n"
"vhadd.u8 q10, q6, q8\n"
"vhadd.u8 q11, q7, q9\n"
"vst1.u8 {q4-q5}, [%[out],:128]!\n"
"vst1.u8 {q10-q11}, [%[out],:128]!\n"
: [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
:
: "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "memory");
else
while (outp < end)
asm volatile (
"vld1.u8 {q0-q1}, [%[in1],:128]!\n"
"vld1.u8 {q2-q3}, [%[in2],:128]!\n"
"vhadd.u8 q4, q0, q2\n"
"vld1.u8 {q6-q7}, [%[in1],:128]!\n"
"vhadd.u8 q5, q1, q3\n"
"vld1.u8 {q8-q9}, [%[in2],:128]!\n"
"vhadd.u8 q10, q6, q8\n"
"vhadd.u8 q11, q7, q9\n"
"vst1.u8 {q4-q5}, [%[out],:128]!\n"
"vst1.u8 {q10-q11}, [%[out],:128]!\n"
: [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
:
: "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "memory");
n &= 15;
if (n)
MergeGeneric (outp, in1p, in2p, n);
}
#endif
/*****************************************************************************
* EndMerge routines
*****************************************************************************/
#if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
void EndMMX( void )
{
__asm__ __volatile__( "emms" :: );
}
#endif
#if defined(CAN_COMPILE_3DNOW)
void End3DNow( void )
{
__asm__ __volatile__( "femms" :: );
}
#endif
/*****************************************************************************
* merge.h : Merge (line blending) routines for the VLC deinterlacer
*****************************************************************************
* Copyright (C) 2011 the VideoLAN team
* $Id$
*
* Author: Sam Hocevar <sam@zoy.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifndef VLC_DEINTERLACE_MERGE_H
#define VLC_DEINTERLACE_MERGE_H 1
/**
* \file
* Merge (line blending) routines for the VLC deinterlacer.
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
/*****************************************************************************
* Macros
*****************************************************************************/
/* Convenient Merge() and EndMerge() macros to pick the most appropriate
merge implementation automatically.
Note that you'll need to include vlc_filter.h and deinterlace.h
to use these.
*/
#define Merge p_filter->p_sys->pf_merge
#define EndMerge if(p_filter->p_sys->pf_end_merge) p_filter->p_sys->pf_end_merge
/*****************************************************************************
* Merge routines
*****************************************************************************/
/**
* Generic routine to blend pixels from two picture lines.
* No inline assembler acceleration.
*
* Note that the Open() call of the deinterlace filter automatically selects
* the most appropriate merge routine based on the CPU capabilities.
* You can call the most appropriate version automatically, from a function
* in the deinterlace filter, by using the Merge() macro.
*
* Note that the filter instance (p_filter) must be available for the Merge()
* macro to work, because it needs the detection result from the filter's
* Open().
*
* Macro syntax:
* Merge( _p_dest, _p_s1, _p_s2, i_bytes );
*
* See also the EndMerge() macro, which must be called after the merge is
* finished, if the Merge() macro was used to perform the merge.
*
* i_bytes > 0; no other restrictions. This holds for all versions of the
* merge routine.
*
* @param _p_dest Target line. Blend result = (A + B)/2.
* @param _p_s1 Source line A.
* @param _p_s2 Source line B.
* @param i_bytes Number of bytes to merge.
* @see Open()
*/
void MergeGeneric( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes );
#if defined(CAN_COMPILE_C_ALTIVEC)
/**
* Altivec routine to blend pixels from two picture lines.
*
* @param _p_dest Target
* @param _p_s1 Source line A
* @param _p_s2 Source line B
* @param i_bytes Number of bytes to merge
*/
void MergeAltivec ( void *, const void *, const void *, size_t );
#endif
#if defined(CAN_COMPILE_MMXEXT)
/**
* MMXEXT routine to blend pixels from two picture lines.
*
* @param _p_dest Target
* @param _p_s1 Source line A
* @param _p_s2 Source line B
* @param i_bytes Number of bytes to merge
*/
void MergeMMXEXT ( void *, const void *, const void *, size_t );
#endif
#if defined(CAN_COMPILE_3DNOW)
/**
* 3DNow routine to blend pixels from two picture lines.
*
* @param _p_dest Target
* @param _p_s1 Source line A
* @param _p_s2 Source line B
* @param i_bytes Number of bytes to merge
*/
void Merge3DNow ( void *, const void *, const void *, size_t );
#endif
#if defined(CAN_COMPILE_SSE)
/**
* SSE2 routine to blend pixels from two picture lines.
*
* @param _p_dest Target
* @param _p_s1 Source line A
* @param _p_s2 Source line B
* @param i_bytes Number of bytes to merge
*/
void MergeSSE2 ( void *, const void *, const void *, size_t );
#endif
#if defined __ARM_NEON__
/**
* ARM NEON routine to blend pixels from two picture lines.
*
* @param _p_dest Target
* @param _p_s1 Source line A
* @param _p_s2 Source line B
* @param i_bytes Number of bytes to merge
*/
void MergeNEON (void *, const void *, const void *, size_t);
#endif
/*****************************************************************************
* EndMerge routines
*****************************************************************************/
#if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
/**
* MMX merge finalization routine.
*
* Must be called after an MMX merge is finished.
* This exits MMX mode (by executing the "emms" instruction).
*
* The EndMerge() macro detects whether this is needed, and calls if it is,
* so just use that.
*/
void EndMMX ( void );
#endif
#if defined(CAN_COMPILE_3DNOW)
/**
* 3DNow merge finalization routine.
*
* Must be called after a 3DNow merge is finished.
* This exits 3DNow mode (by executing the "femms" instruction).
*
* The EndMerge() macro detects whether this is needed, and calls if it is,
* so just use that.
*/
void End3DNow ( void );
#endif
#endif
......@@ -25,17 +25,19 @@
* values by ULL, lest they be truncated by the compiler)
*/
#include <stdint.h>
typedef union {
int64_t q; /* Quadword (64-bit) value */
int64_t q; /* Quadword (64-bit) value */
uint64_t uq; /* Unsigned Quadword */
int32_t d[2]; /* 2 Doubleword (32-bit) values */
uint32_t ud[2]; /* 2 Unsigned Doubleword */
int16_t w[4]; /* 4 Word (16-bit) values */
uint16_t uw[4]; /* 4 Unsigned Word */
int8_t b[8]; /* 8 Byte (8-bit) values */
uint8_t ub[8]; /* 8 Unsigned Byte */
float s[2]; /* Single-precision (32-bit) value */
} ATTR_ALIGN(8) mmx_t; /* On an 8-byte (64-bit) boundary */
int32_t d[2]; /* 2 Doubleword (32-bit) values */
uint32_t ud[2]; /* 2 Unsigned Doubleword */
int16_t w[4]; /* 4 Word (16-bit) values */
uint16_t uw[4]; /* 4 Unsigned Word */
int8_t b[8]; /* 8 Byte (8-bit) values */
uint8_t ub[8]; /* 8 Unsigned Byte */
float s[2]; /* Single-precision (32-bit) value */
} ATTR_ALIGN(8) mmx_t; /* On an 8-byte (64-bit) boundary */
#define mmx_i2r(op,imm,reg) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment