Refactored deinterlacer module

Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>

Refactored deinterlacer module
Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>
c7d289ca · Juha Jeronen · Rémi Denis-Courmont · 814a19c3 · c7d289ca · c7d289ca
Commit c7d289ca authored Apr 30, 2011 by Juha Jeronen Committed by Rémi Denis-Courmont May 01, 2011
20 changed files
--- a/modules/video_filter/Modules.am
+++ b/modules/video_filter/Modules.am
@@ -10,7 +10,16 @@ SOURCES_crop = crop.c
 SOURCES_motionblur = motionblur.c
 SOURCES_logo = logo.c
 SOURCES_audiobargraph_v = audiobargraph_v.c
-SOURCES_deinterlace = deinterlace.c yadif.h mmx.h
+SOURCES_deinterlace = deinterlace/deinterlace.c \
+	deinterlace/mmx.h deinterlace/common.h \
+	deinterlace/merge.c deinterlace/merge.h \
+	deinterlace/helpers.c deinterlace/helpers.h \
+	deinterlace/algo_basic.c deinterlace/algo_basic.h \
+	deinterlace/algo_x.c deinterlace/algo_x.h \
+	deinterlace/algo_yadif.c deinterlace/algo_yadif.h \
+	deinterlace/yadif.h \
+	deinterlace/algo_phosphor.c deinterlace/algo_phosphor.h \
+	deinterlace/algo_ivtc.c deinterlace/algo_ivtc.h
 SOURCES_blend = blend.c
 SOURCES_scale = scale.c
 SOURCES_marq = marq.c

--- a/modules/video_filter/deinterlace/algo_basic.c
+++ b/modules/video_filter/deinterlace/algo_basic.c
--- a/modules/video_filter/deinterlace/algo_basic.h
+++ b/modules/video_filter/deinterlace/algo_basic.h
+/*****************************************************************************
+ * algo_basic.h : Basic algorithms for the VLC deinterlacer
+ *****************************************************************************
+ * Copyright (C) 2000-2011 the VideoLAN team
+ * $Id$
+ *
+ * Author: Sam Hocevar <sam@zoy.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifndef VLC_DEINTERLACE_ALGO_BASIC_H
+#define VLC_DEINTERLACE_ALGO_BASIC_H 1
+
+/**
+ * \file
+ * Basic deinterlace algorithms: Discard, Bob, Linear, Mean and Blend.
+ */
+
+/* Forward declarations */
+struct filter_t;
+struct picture_t;
+
+/*****************************************************************************
+ * Functions
+ *****************************************************************************/
+
+/**
+ * RenderDiscard: only keep top or bottom field, discard the other.
+ *
+ * For a 2x (framerate-doubling) near-equivalent, see RenderBob().
+ *
+ * @param p_filter The filter instance. Must be non-NULL.
+ * @param p_outpic Output frame. Must be allocated by caller.
+ * @param p_pic Input frame. Must exist.
+ * @param i_field Keep which field? 0 = top field, 1 = bottom field.
+ * @see RenderBob()
+ * @see Deinterlace()
+ */
+void RenderDiscard( filter_t *p_filter,
+                    picture_t *p_outpic, picture_t *p_pic, int i_field );
+
+/**
+ * RenderBob: basic framerate doubler.
+ *
+ * Creates an illusion of full vertical resolution while running.
+ *
+ * For a 1x (non-doubling) near-equivalent, see RenderDiscard().
+ *
+ * @param p_filter The filter instance. Must be non-NULL.
+ * @param p_outpic Output frame. Must be allocated by caller.
+ * @param p_pic Input frame. Must exist.
+ * @param i_field Render which field? 0 = top field, 1 = bottom field.
+ * @see RenderLinear()
+ * @see Deinterlace()
+ */
+void RenderBob( filter_t *p_filter,
+                picture_t *p_outpic, picture_t *p_pic, int i_field );
+
+/**
+ * RenderLinear: Bob with linear interpolation.
+ *
+ * There is no 1x (non-doubling) equivalent for this filter.
+ *
+ * @param p_filter The filter instance. Must be non-NULL.
+ * @param p_outpic Output frame. Must be allocated by caller.
+ * @param p_pic Input frame. Must exist.
+ * @param i_field Render which field? 0 = top field, 1 = bottom field.
+ * @see RenderBob()
+ * @see Deinterlace()
+ */
+void RenderLinear( filter_t *p_filter,
+                   picture_t *p_outpic, picture_t *p_pic, int i_field );
+
+/**
+ * RenderMean: half-resolution blender.
+ *
+ * Renders the mean of the top and bottom fields.
+ *
+ * Obviously, there is no 2x equivalent for this filter.
+ *
+ * @param p_filter The filter instance. Must be non-NULL.
+ * @param p_outpic Output frame. Must be allocated by caller.
+ * @param p_pic Input frame. Must exist.
+ * @see Deinterlace()
+ */
+void RenderMean( filter_t *p_filter,
+                 picture_t *p_outpic, picture_t *p_pic );
+
+/**
+ * RenderBlend: full-resolution blender.
+ *
+ * The first line is copied; for the rest of the lines, line N
+ * is the mean of lines N and N-1 in the input.
+ *
+ * Obviously, there is no 2x equivalent for this filter.
+ *
+ * @param p_filter The filter instance. Must be non-NULL.
+ * @param p_outpic Output frame. Must be allocated by caller.
+ * @param p_pic Input frame. Must exist.
+ * @see Deinterlace()
+ */
+void RenderBlend( filter_t *p_filter,
+                  picture_t *p_outpic, picture_t *p_pic );
+
+#endif
--- a/modules/video_filter/deinterlace.c
+++ b/modules/video_filter/deinterlace.c
--- a/modules/video_filter/deinterlace/algo_ivtc.h
+++ b/modules/video_filter/deinterlace/algo_ivtc.h
--- a/modules/video_filter/deinterlace/algo_phosphor.c
+++ b/modules/video_filter/deinterlace/algo_phosphor.c
--- a/modules/video_filter/deinterlace/algo_phosphor.h
+++ b/modules/video_filter/deinterlace/algo_phosphor.h
+/*****************************************************************************
+ * algo_phosphor.h : Phosphor algorithm for the VLC deinterlacer
+ *****************************************************************************
+ * Copyright (C) 2011 the VideoLAN team
+ * $Id$
+ *
+ * Author: Juha Jeronen <juha.jeronen@jyu.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifndef VLC_DEINTERLACE_ALGO_PHOSPHOR_H
+#define VLC_DEINTERLACE_ALGO_PHOSPHOR_H 1
+
+/* Forward declarations */
+struct filter_t;
+struct picture_t;
+
+/*****************************************************************************
+ * Data structures etc.
+ *****************************************************************************/
+
+/* These numbers, and phosphor_chroma_list[], should be in the same order
+   as phosphor_chroma_list_text[]. The value 0 is reserved, because
+   var_GetInteger() returns 0 in case of error. */
+/** Valid Phosphor 4:2:0 chroma handling modes. */
+typedef enum { PC_LATEST = 1, PC_ALTLINE   = 2,
+               PC_BLEND  = 3, PC_UPCONVERT = 4 } phosphor_chroma_t;
+/** Phosphor 4:2:0 chroma handling modes (config item). */
+static const int phosphor_chroma_list[] = { PC_LATEST, PC_ALTLINE,
+                                            PC_BLEND,  PC_UPCONVERT };
+/** User labels for Phosphor 4:2:0 chroma handling modes (config item). */
+static const char *const phosphor_chroma_list_text[] = { N_("Latest"),
+                                                         N_("AltLine"),
+                                                         N_("Blend"),
+                                                         N_("Upconvert") };
+
+/* Same here. Same order as in phosphor_dimmer_list_text[],
+   and the value 0 is reserved for config error. */
+/** Phosphor dimmer strengths (config item). */
+static const int phosphor_dimmer_list[] = { 1, 2, 3, 4 };
+/** User labels for Phosphor dimmer strengths (config item). */
+static const char *const phosphor_dimmer_list_text[] = { N_("Off"),
+                                                         N_("Low"),
+                                                         N_("Medium"),
+                                                         N_("High") };
+
+/** Algorithm-specific state for Phosphor. */
+typedef struct
+{
+    phosphor_chroma_t i_chroma_for_420;
+    int i_dimmer_strength;
+} phosphor_sys_t;
+
+/*****************************************************************************
+ * Functions
+ *****************************************************************************/
+
+/**
+ * "Phosphor" deinterlace algorithm: framerate-doubling CRT TV simulator.
+ *
+ * There is no "1x" mode in this filter; only framerate doubling is supported.
+ *
+ * There is no input frame parameter, because the input frames
+ * are taken from the history buffer.
+ *
+ * Soft field repeat (repeat_pict) is supported. Note that the generated
+ * "repeated" output picture is unique because of the simulated light decay.
+ * Its "old" field comes from the same input frame as the "new" one, unlike
+ * the first output picture of the same frame.
+ *
+ * As many output frames should be requested for each input frame as is
+ * indicated by p_src->i_nb_fields. This is done by calling this function
+ * several times, first with i_order = 0, and then with all other parameters
+ * the same, but a new p_dst, increasing i_order (1 for second field,
+ * and then if i_nb_fields = 3, also i_order = 2 to get the repeated first
+ * field), and alternating i_field (starting, at i_order = 0, with the field
+ * according to p_src->b_top_field_first). See Deinterlace() for an example.
+ *
+ * @param p_filter The filter instance. Must be non-NULL.
+ * @param p_dst Output frame. Must be allocated by caller.
+ * @param i_order Temporal field number: 0 = first, 1 = second, 2 = rep. first.
+ * @param i_field Render which field? 0 = top field, 1 = bottom field.
+ * @return VLC error code (int).
+ * @retval VLC_SUCCESS The requested field was rendered into p_dst.
+ * @retval VLC_EGENERIC No pictures in history buffer, cannot render.
+ * @see RenderBob()
+ * @see RenderLinear()
+ * @see Deinterlace()
+ */
+int RenderPhosphor( filter_t *p_filter,
+                    picture_t *p_dst,
+                    int i_order, int i_field );
+
+/*****************************************************************************
+ * Extra documentation
+ *****************************************************************************/
+
+/**
+ * \file
+ * "Phosphor" deinterlace algorithm. This simulates the rendering mechanism
+ * of an interlaced CRT TV, actually producing *interlaced* output.
+ *
+ * The main use case for this filter is anime for which IVTC is not applicable.
+ * This is the case, if 24fps telecined material has been mixed with 60fps
+ * interlaced effects, such as in Sol Bianca or Silent Mobius. It can also
+ * be used for true interlaced video, such as most camcorder recordings.
+ *
+ * The filter has several modes for handling 4:2:0 chroma for those output
+ * frames that fall across input frame temporal boundaries (i.e. fields come
+ * from different frames). Upconvert (to 4:2:2) provides the most accurate
+ * CRT simulation, but requires more CPU and memory bandwidth than the other
+ * modes. The other modes keep the chroma at 4:2:0.
+ *
+ * About these modes: telecined input (such as NTSC anime DVDs) works better
+ * with AltLine, while true interlaced input works better with Latest.
+ * Merge is a compromise, which may or may not look acceptable.
+ * The mode can be set in the VLC advanced configuration,
+ * All settings > Video > Filters > Deinterlace
+ *
+ * Technically speaking, this is an interlaced field renderer targeted for
+ * progressive displays. It works by framerate doubling, and simulating one
+ * step of light output decay of the "old" field during the "new" field,
+ * until the next new field comes in to replace the "old" one.
+ *
+ * While playback is running, the simulated light decay gives the picture an
+ * appearance of visible "scanlines", much like on a real TV. Only when the
+ * video is paused, it is clearly visible that one of the fields is actually
+ * brighter than the other.
+ *
+ * The main differences to the Bob algorithm are:
+ *  - in addition to the current field, the previous one (fading out)
+ *    is also rendered
+ *  - some horizontal lines don't seem to flicker as much
+ *  - scanline visual effect (adjustable; the dimmer strength can be set
+ *    in the VLC advanced configuration)
+ *  - the picture appears 25%, 38% or 44% darker on average (for dimmer
+ *    strengths 1, 2 and 3)
+ *  - if the input has 4:2:0 chroma, the colours may look messed up in some
+ *    output frames. This is a limitation of the 4:2:0 chroma format, and due
+ *    to the fact that both fields are present in each output picture. Usually
+ *    this doesn't matter in practice, but see the 4:2:0 chroma mode setting
+ *    in the configuration if needed (it may help a bit).
+ *
+ * In addition, when this filter is used on an LCD computer monitor,
+ * the main differences to a real CRT TV are:
+ *  - Pixel shape and grid layout; CRT TVs were designed for interlaced
+ *    field rendering, while LCD monitors weren't.
+ *  - No scan flicker even though the display runs (usually) at 60Hz.
+ *    (This at least is a good thing.)
+ *
+ * The output vertical resolution should be large enough for the scaling
+ * not to have a too adverse effect on the regular scanline pattern.
+ * In practice, NTSC video can be acceptably rendered already at 1024x600
+ * if fullscreen even on an LCD. PAL video requires more.
+ *
+ * Just like Bob, this filter works properly only if the input framerate
+ * is stable. Otherwise the scanline effect breaks down and the picture
+ * will flicker.
+ */
+
+#endif
--- a/modules/video_filter/deinterlace/algo_x.c
+++ b/modules/video_filter/deinterlace/algo_x.c
--- a/modules/video_filter/deinterlace/algo_x.h
+++ b/modules/video_filter/deinterlace/algo_x.h
+/*****************************************************************************
+ * algo_x.h : "X" algorithm for vlc deinterlacer
+ *****************************************************************************
+ * Copyright (C) 2000-2011 the VideoLAN team
+ * $Id$
+ *
+ * Author: Sam Hocevar <sam@zoy.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifndef VLC_DEINTERLACE_ALGO_X_H
+#define VLC_DEINTERLACE_ALGO_X_H 1
+
+/* Forward declarations */
+struct picture_t;
+
+/*****************************************************************************
+ * Functions
+ *****************************************************************************/
+
+/**
+ * Interpolating deinterlace filter "X".
+ *
+ * The algorithm works on a 8x8 block basic, it copies the top field
+ * and applies a process to recreate the bottom field.
+ *
+ * If a 8x8 block is classified as :
+ *   - progressive: it applies a small blend (1,6,1)
+ *   - interlaced:
+ *    * in the MMX version: we do a ME between the 2 fields, if there is a
+ *      good match we use MC to recreate the bottom field (with a small
+ *      blend (1,6,1) )
+ *    * otherwise: it recreates the bottom field by an edge oriented
+ *      interpolation.
+ *
+ * @param[in] p_pic Input frame.
+ * @param[out] p_outpic Output frame. Must be allocated by caller.
+ * @see Deinterlace()
+ */
+void RenderX( picture_t *p_outpic, picture_t *p_pic );
+
+#endif
--- a/modules/video_filter/deinterlace/algo_yadif.c
+++ b/modules/video_filter/deinterlace/algo_yadif.c
+/*****************************************************************************
+ * algo_yadif.c : Wrapper for MPlayer's Yadif algorithm
+ *****************************************************************************
+ * Copyright (C) 2000-2011 the VideoLAN team
+ * $Id$
+ *
+ * Author: Sam Hocevar <sam@zoy.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#   include "config.h"
+#endif
+
+#ifdef CAN_COMPILE_MMXEXT
+#   include "mmx.h"
+#endif
+
+#include <stdint.h>
+#include <assert.h>
+
+#include <vlc_common.h>
+#include <vlc_cpu.h>
+#include <vlc_picture.h>
+#include <vlc_filter.h>
+
+#include "deinterlace.h" /* filter_sys_t  */
+#include "common.h"      /* FFMIN3 et al. */
+
+#include "algo_yadif.h"
+
+/*****************************************************************************
+ * Yadif (Yet Another DeInterlacing Filter).
+ *****************************************************************************/
+
+/* Yadif's private data struct */
+struct vf_priv_s {
+    /*
+     * 0: Output 1 frame for each frame.
+     * 1: Output 1 frame for each field.
+     * 2: Like 0 but skips spatial interlacing check.
+     * 3: Like 1 but skips spatial interlacing check.
+     *
+     * In vlc, only & 0x02 has meaning, as we do the & 0x01 ourself.
+     */
+    int mode;
+};
+
+/* I am unsure it is the right one */
+typedef intptr_t x86_reg;
+
+/* yadif.h comes from vf_yadif.c of mplayer project.
+   Necessary preprocessor macros are defined in common.h. */
+#include "yadif.h"
+
+int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
+                 int i_order, int i_field )
+{
+    VLC_UNUSED(p_src);
+
+    filter_sys_t *p_sys = p_filter->p_sys;
+
+    /* */
+    assert( i_order >= 0 && i_order <= 2 ); /* 2 = soft field repeat */
+    assert( i_field == 0 || i_field == 1 );
+
+    /* As the pitches must match, use ONLY pictures coming from picture_New()! */
+    picture_t *p_prev = p_sys->pp_history[0];
+    picture_t *p_cur  = p_sys->pp_history[1];
+    picture_t *p_next = p_sys->pp_history[2];
+
+    /* Account for soft field repeat.
+
+       The "parity" parameter affects the algorithm like this (from yadif.h):
+       uint8_t *prev2= parity ? prev : cur ;
+       uint8_t *next2= parity ? cur  : next;
+
+       The original parity expression that was used here is:
+       (i_field ^ (i_order == i_field)) & 1
+
+       Truth table:
+       i_field = 0, i_order = 0  => 1
+       i_field = 1, i_order = 1  => 0
+       i_field = 1, i_order = 0  => 1
+       i_field = 0, i_order = 1  => 0
+
+       => equivalent with e.g.  (1 - i_order)  or  (i_order + 1) % 2
+
+       Thus, in a normal two-field frame,
+             parity 1 = first field  (i_order == 0)
+             parity 0 = second field (i_order == 1)
+
+       Now, with three fields, where the third is a copy of the first,
+             i_order = 0  =>  parity 1 (as usual)
+             i_order = 1  =>  due to the repeat, prev = cur, but also next = cur.
+                              Because in such a case there is no motion
+                              (otherwise field repeat makes no sense),
+                              we don't actually need to invoke Yadif's filter().
+                              Thus, set "parity" to 2, and use this to bypass
+                              the filter.
+             i_order = 2  =>  parity 0 (as usual)
+    */
+    int yadif_parity;
+    if( p_cur  &&  p_cur->i_nb_fields > 2 )
+        yadif_parity = (i_order + 1) % 3; /* 1, *2*, 0; where 2 is a special
+                                             value meaning "bypass filter". */
+    else
+        yadif_parity = (i_order + 1) % 2; /* 1, 0 */
+
+    /* Filter if we have all the pictures we need */
+    if( p_prev && p_cur && p_next )
+    {
+        /* */
+        void (*filter)(struct vf_priv_s *p, uint8_t *dst,
+                       uint8_t *prev, uint8_t *cur, uint8_t *next,
+                       int w, int refs, int parity);
+#if defined(HAVE_YADIF_SSE2)
+        if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
+            filter = yadif_filter_line_mmx2;
+        else
+#endif
+            filter = yadif_filter_line_c;
+
+        for( int n = 0; n < p_dst->i_planes; n++ )
+        {
+            const plane_t *prevp = &p_prev->p[n];
+            const plane_t *curp  = &p_cur->p[n];
+            const plane_t *nextp = &p_next->p[n];
+            plane_t *dstp        = &p_dst->p[n];
+
+            for( int y = 1; y < dstp->i_visible_lines - 1; y++ )
+            {
+                if( (y % 2) == i_field  ||  yadif_parity == 2 )
+                {
+                    vlc_memcpy( &dstp->p_pixels[y * dstp->i_pitch],
+                                &curp->p_pixels[y * curp->i_pitch], dstp->i_visible_pitch );
+                }
+                else
+                {
+                    struct vf_priv_s cfg;
+                    /* Spatial checks only when enough data */
+                    cfg.mode = (y >= 2 && y < dstp->i_visible_lines - 2) ? 0 : 2;
+
+                    assert( prevp->i_pitch == curp->i_pitch && curp->i_pitch == nextp->i_pitch );
+                    filter( &cfg,
+                            &dstp->p_pixels[y * dstp->i_pitch],
+                            &prevp->p_pixels[y * prevp->i_pitch],
+                            &curp->p_pixels[y * curp->i_pitch],
+                            &nextp->p_pixels[y * nextp->i_pitch],
+                            dstp->i_visible_pitch,
+                            curp->i_pitch,
+                            yadif_parity );
+                }
+
+                /* We duplicate the first and last lines */
+                if( y == 1 )
+                    vlc_memcpy(&dstp->p_pixels[(y-1) * dstp->i_pitch],
+                               &dstp->p_pixels[ y    * dstp->i_pitch],
+                               dstp->i_pitch);
+                else if( y == dstp->i_visible_lines - 2 )
+                    vlc_memcpy(&dstp->p_pixels[(y+1) * dstp->i_pitch],
+                               &dstp->p_pixels[ y    * dstp->i_pitch],
+                               dstp->i_pitch);
+            }
+        }
+
+        p_sys->i_frame_offset = 1; /* p_cur will be rendered at next frame, too */
+
+        return VLC_SUCCESS;
+    }
+    else if( !p_prev && !p_cur && p_next )
+    {
+        /* NOTE: For the first frame, we use the default frame offset
+                 as set by Open() or SetFilterMethod(). It is always 0. */
+
+        /* FIXME not good as it does not use i_order/i_field */
+        RenderX( p_dst, p_next );
+        return VLC_SUCCESS;
+    }
+    else
+    {
+        p_sys->i_frame_offset = 1; /* p_cur will be rendered at next frame */
+
+        return VLC_EGENERIC;
+    }
+}
--- a/modules/video_filter/deinterlace/algo_yadif.h
+++ b/modules/video_filter/deinterlace/algo_yadif.h
+/*****************************************************************************
+ * algo_yadif.h : Wrapper for MPlayer's Yadif algorithm
+ *****************************************************************************
+ * Copyright (C) 2000-2011 the VideoLAN team
+ * $Id$
+ *
+ * Author: Sam Hocevar <sam@zoy.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifndef VLC_DEINTERLACE_ALGO_YADIF_H
+#define VLC_DEINTERLACE_ALGO_YADIF_H 1
+
+/**
+ * \file
+ * Adapter to fit the Yadif (Yet Another DeInterlacing Filter) algorithm
+ * from MPlayer into VLC. The algorithm itself is implemented in yadif.h.
+ */
+
+/* Forward declarations */
+struct filter_t;
+struct picture_t;
+
+/*****************************************************************************
+ * Functions
+ *****************************************************************************/
+
+/**
+ * Yadif (Yet Another DeInterlacing Filter) from MPlayer.
+ * One field is copied as-is (i_field), the other is interpolated.
+ *
+ * Comes with both interpolating and framerate doubling modes.
+ *
+ * If you do NOT want to use framerate doubling: use i_order = 0,
+ * and either 0 or 1 for i_field (keep it constant),
+ *
+ * If you DO want framerate doubling, do as instructed below.
+ *
+ * See Deinterlace() for usage examples of both modes.
+ *
+ * Needs three frames in the history buffer to operate.
+ * The first-ever frame is rendered using RenderX().
+ * The second is dropped. At the third frame, Yadif starts.
+ *
+ * Once Yadif starts, the frame that is rendered corresponds to the *previous*
+ * input frame (i_frame_offset = 1), complete with its original PTS.
+ * The latest input frame is used as the future/next frame, as reference
+ * for temporal interpolation.
+ *
+ * This wrapper adds support for soft field repeat (repeat_pict).
+ * Note that the generated "repeated" output picture is unique because
+ * of temporal interpolation.
+ *
+ * As many output frames should be requested for each input frame as is
+ * indicated by p_src->i_nb_fields. This is done by calling this function
+ * several times, first with i_order = 0, and then with all other parameters
+ * the same, but a new p_dst, increasing i_order (1 for second field,
+ * and then if i_nb_fields = 3, also i_order = 2 to get the repeated first
+ * field), and alternating i_field (starting, at i_order = 0, with the field
+ * according to p_src->b_top_field_first). See Deinterlace() for an example.
+ *
+ * @param p_filter The filter instance. Must be non-NULL.
+ * @param p_dst Output frame. Must be allocated by caller.
+ * @param p_src Input frame. Must exist.
+ * @param i_order Temporal field number: 0 = first, 1 = second, 2 = rep. first.
+ * @param i_field Keep which field? 0 = top field, 1 = bottom field.
+ * @return VLC error code (int).
+ * @retval VLC_SUCCESS The requested field was rendered into p_dst.
+ * @retval VLC_EGENERIC Frame dropped; only occurs at the second frame after start.
+ * @see Deinterlace()
+ */
+int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
+                 int i_order, int i_field );
+
+#endif
--- a/modules/video_filter/deinterlace/common.h
+++ b/modules/video_filter/deinterlace/common.h
+/*****************************************************************************
+ * common.h : Common macros for the VLC deinterlacer
+ *****************************************************************************
+ * Copyright (C) 2000-2011 the VideoLAN team
+ * $Id$
+ *
+ * Author: Sam Hocevar <sam@zoy.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifndef VLC_DEINTERLACE_COMMON_H
+#define VLC_DEINTERLACE_COMMON_H 1
+
+/**
+ * \file
+ * Common macros for the VLC deinterlacer.
+ */
+
+/* Needed for Yadif, but also some others. */
+#define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
+#define FFMAX(a,b)      __MAX(a,b)
+#define FFMAX3(a,b,c)   FFMAX(FFMAX(a,b),c)
+#define FFMIN(a,b)      __MIN(a,b)
+#define FFMIN3(a,b,c)   FFMIN(FFMIN(a,b),c)
+
+#endif
--- a/modules/video_filter/deinterlace/deinterlace.c
+++ b/modules/video_filter/deinterlace/deinterlace.c
--- a/modules/video_filter/deinterlace/deinterlace.h
+++ b/modules/video_filter/deinterlace/deinterlace.h
--- a/modules/video_filter/deinterlace/helpers.c
+++ b/modules/video_filter/deinterlace/helpers.c
--- a/modules/video_filter/deinterlace/helpers.h
+++ b/modules/video_filter/deinterlace/helpers.h
+/*****************************************************************************
+ * helpers.h : Generic helper functions for the VLC deinterlacer
+ *****************************************************************************
+ * Copyright (C) 2011 the VideoLAN team
+ * $Id$
+ *
+ * Author: Juha Jeronen <juha.jeronen@jyu.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifndef VLC_DEINTERLACE_HELPERS_H
+#define VLC_DEINTERLACE_HELPERS_H 1
+
+/**
+ * \file
+ * Generic helper functions for the VLC deinterlacer, used in
+ * some of the advanced algorithms.
+ */
+
+/* Forward declarations */
+struct filter_t;
+struct picture_t;
+struct plane_t;
+
+/**
+ * Chroma operation types for composing 4:2:0 frames.
+ * @see ComposeFrame()
+ */
+typedef enum { CC_ALTLINE, CC_UPCONVERT, CC_SOURCE_TOP, CC_SOURCE_BOTTOM,
+               CC_MERGE } compose_chroma_t;
+
+/**
+ * Helper function: composes a frame from the given field pair.
+ *
+ * Caller must manage allocation/deallocation of p_outpic.
+ *
+ * The inputs are full pictures (frames); only one field
+ * will be used from each.
+ *
+ * Chroma formats of the inputs must match. It is also desirable that the
+ * visible pitches of both inputs are the same, so that this will do something
+ * sensible. The pitch or visible pitch of the output does not need to match
+ * with the input; the compatible (smaller) part of the visible pitch will
+ * be filled.
+ *
+ * The i_output_chroma parameter must always be supplied, but it is only used
+ * when the chroma format of the input is detected as 4:2:0. Available modes:
+ *   - CC_ALTLINE:       Alternate line copy, like for luma. Chroma line 0
+ *                       comes from top field picture, chroma line 1 comes
+ *                       from bottom field picture, chroma line 2 from top
+ *                       field picture, and so on. This is usually the right
+ *                       choice for IVTCing NTSC DVD material, but rarely
+ *                       for any other use cases.
+ *   - CC_UPCONVERT:     The output will have 4:2:2 chroma. All 4:2:0 chroma
+ *                       data from both input fields will be used to generate
+ *                       the 4:2:2 chroma data of the output. Each output line
+ *                       will thus have independent chroma. This is a good
+ *                       choice for most purposes except IVTC, if the machine
+ *                       can handle the increased throughput. (Make sure to
+ *                       allocate a 4:2:2 output picture first!)
+ *                       This mode can also be used for converting a 4:2:0
+ *                       frame to 4:2:2 format (by passing the same input
+ *                       picture for both input fields).
+ *                       Conversions: I420, YV12 --> I422
+ *                                    J420       --> J422
+ *   - CC_SOURCE_TOP:    Copy chroma of source top field picture.
+ *                       Ignore chroma of source bottom field picture.
+ *   - CC_SOURCE_BOTTOM: Copy chroma of source bottom field picture.
+ *                       Ignore chroma of source top field picture.
+ *   - CC_MERGE:         Average the chroma of the input field pictures.
+ *                       (Note that this has no effect if the input fields
+ *                        come from the same frame.)
+ *
+ * @param p_filter The filter instance (determines input chroma).
+ * @param p_outpic Composed picture is written here. Allocated by caller.
+ * @param p_inpic_top Picture to extract the top field from.
+ * @param p_inpic_bottom Picture to extract the bottom field from.
+ * @param i_output_chroma Chroma operation mode for 4:2:0 (see function doc)
+ * @see compose_chroma_t
+ * @see RenderPhosphor()
+ * @see RenderIVTC()
+ */
+void ComposeFrame( filter_t *p_filter, picture_t *p_outpic,
+                   picture_t *p_inpic_top, picture_t *p_inpic_bottom,
+                   compose_chroma_t i_output_chroma );
+
+/**
+ * Helper function: Estimates the number of 8x8 blocks which have motion
+ * between the given pictures. Needed for various detectors in RenderIVTC().
+ *
+ * Number of planes and visible lines in each plane, in the inputs must match.
+ * If the visible pitches do not match, only the compatible (smaller)
+ * part will be tested.
+ *
+ * Note that the return value is NOT simply *pi_top + *pi_bot, because
+ * the fields and the full block use different motion thresholds.
+ *
+ * If you do not want the separate field scores, pass NULL for pi_top and
+ * pi_bot. This does not affect computation speed, and is only provided as
+ * a syntactic convenience.
+ *
+ * Motion in each picture plane (Y, U, V) counts separately.
+ * The sum of number of blocks with motion across all planes is returned.
+ *
+ * For 4:2:0 chroma, even-numbered chroma lines make up the "top field" for
+ * chroma, and odd-numbered chroma lines the "bottom field" for chroma.
+ * This is correct for IVTC purposes.
+ *
+ * @param[in] p_prev Previous picture
+ * @param[in] p_curr Current picture
+ * @param[out] pi_top Number of 8x8 blocks where top field has motion.
+ * @param[out] pi_bot Number of 8x8 blocks where bottom field has motion.
+ * @return Number of 8x8 blocks that have motion.
+ * @retval -1 Error: incompatible input pictures.
+ * @see TestForMotionInBlock()
+ * @see RenderIVTC()
+ */
+int EstimateNumBlocksWithMotion( const picture_t* p_prev,
+                                 const picture_t* p_curr,
+                                 int *pi_top, int *pi_bot);
+
+/**
+ * Helper function: estimates "how much interlaced" the given field pair is.
+ *
+ * It is allowed that p_pic_top == p_pic_bottom.
+ *
+ * If p_pic_top != p_pic_bot (fields come from different pictures), you can use
+ * ComposeFrame() to actually construct the picture if needed.
+ *
+ * Number of planes, and number of lines in each plane, in p_pic_top and
+ * p_pic_bot must match. If the visible pitches differ, only the compatible
+ * (smaller) part will be tested.
+ *
+ * Luma and chroma planes are tested in the same way. This is correct for
+ * telecined input, where in the interlaced frames also chroma alternates
+ * every chroma line, even if the chroma format is 4:2:0!
+ *
+ * This is just a raw detector that produces a score. The overall score
+ * indicating a progressive or interlaced frame may vary wildly, depending on
+ * the material, especially in anime. The scores should be compared to
+ * each other locally (in the temporal sense) to make meaningful decisions
+ * about progressive or interlaced frames.
+ *
+ * @param p_pic_top Picture to take the top field from.
+ * @param p_pic_bot Picture to take the bottom field from (same or different).
+ * @return Interlace score, >= 0. Higher values mean more interlaced.
+ * @retval -1 Error: incompatible input pictures.
+ * @see RenderIVTC()
+ * @see ComposeFrame()
+ */
+int CalculateInterlaceScore( const picture_t* p_pic_top,
+                             const picture_t* p_pic_bot );
+
+#endif
--- a/modules/video_filter/deinterlace/merge.c
+++ b/modules/video_filter/deinterlace/merge.c
+/*****************************************************************************
+ * merge.c : Merge (line blending) routines for the VLC deinterlacer
+ *****************************************************************************
+ * Copyright (C) 2011 the VideoLAN team
+ * $Id$
+ *
+ * Author: Sam Hocevar <sam@zoy.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+/* This handles including config.h, because the config
+   is already needed in the header. */
+#include "merge.h"
+
+#ifdef CAN_COMPILE_MMXEXT
+#   include "mmx.h"
+#endif
+
+#ifdef HAVE_ALTIVEC_H
+#   include <altivec.h>
+#endif
+
+#include <stdint.h>
+
+/*****************************************************************************
+ * Merge (line blending) routines
+ *****************************************************************************/
+
+void MergeGeneric( void *_p_dest, const void *_p_s1,
+                   const void *_p_s2, size_t i_bytes )
+{
+    uint8_t* p_dest = (uint8_t*)_p_dest;
+    const uint8_t *p_s1 = (const uint8_t *)_p_s1;
+    const uint8_t *p_s2 = (const uint8_t *)_p_s2;
+    uint8_t* p_end = p_dest + i_bytes - 8;
+
+    while( p_dest < p_end )
+    {
+        *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
+        *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
+        *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
+        *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
+        *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
+        *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
+        *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
+        *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
+    }
+
+    p_end += 8;
+
+    while( p_dest < p_end )
+    {
+        *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
+    }
+}
+
+#if defined(CAN_COMPILE_MMXEXT)
+void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
+                  size_t i_bytes )
+{
+    uint8_t* p_dest = (uint8_t*)_p_dest;
+    const uint8_t *p_s1 = (const uint8_t *)_p_s1;
+    const uint8_t *p_s2 = (const uint8_t *)_p_s2;
+    uint8_t* p_end = p_dest + i_bytes - 8;
+    while( p_dest < p_end )
+    {
+        __asm__  __volatile__( "movq %2,%%mm1;"
+                               "pavgb %1, %%mm1;"
+                               "movq %%mm1, %0" :"=m" (*p_dest):
+                                                 "m" (*p_s1),
+                                                 "m" (*p_s2) );
+        p_dest += 8;
+        p_s1 += 8;
+        p_s2 += 8;
+    }
+
+    p_end += 8;
+
+    while( p_dest < p_end )
+    {
+        *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
+    }
+}
+#endif
+
+#if defined(CAN_COMPILE_3DNOW)
+void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
+                 size_t i_bytes )
+{
+    uint8_t* p_dest = (uint8_t*)_p_dest;
+    const uint8_t *p_s1 = (const uint8_t *)_p_s1;
+    const uint8_t *p_s2 = (const uint8_t *)_p_s2;
+    uint8_t* p_end = p_dest + i_bytes - 8;
+    while( p_dest < p_end )
+    {
+        __asm__  __volatile__( "movq %2,%%mm1;"
+                               "pavgusb %1, %%mm1;"
+                               "movq %%mm1, %0" :"=m" (*p_dest):
+                                                 "m" (*p_s1),
+                                                 "m" (*p_s2) );
+        p_dest += 8;
+        p_s1 += 8;
+        p_s2 += 8;
+    }
+
+    p_end += 8;
+
+    while( p_dest < p_end )
+    {
+        *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
+    }
+}
+#endif
+
+#if defined(CAN_COMPILE_SSE)
+void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
+                size_t i_bytes )
+{
+    uint8_t* p_dest = (uint8_t*)_p_dest;
+    const uint8_t *p_s1 = (const uint8_t *)_p_s1;
+    const uint8_t *p_s2 = (const uint8_t *)_p_s2;
+    uint8_t* p_end;
+    while( (uintptr_t)p_s1 % 16 )
+    {
+        *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
+    }
+    p_end = p_dest + i_bytes - 16;
+    while( p_dest < p_end )
+    {
+        __asm__  __volatile__( "movdqu %2,%%xmm1;"
+                               "pavgb %1, %%xmm1;"
+                               "movdqu %%xmm1, %0" :"=m" (*p_dest):
+                                                 "m" (*p_s1),
+                                                 "m" (*p_s2) );
+        p_dest += 16;
+        p_s1 += 16;
+        p_s2 += 16;
+    }
+
+    p_end += 16;
+
+    while( p_dest < p_end )
+    {
+        *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
+    }
+}
+#endif
+
+#ifdef CAN_COMPILE_C_ALTIVEC
+void MergeAltivec( void *_p_dest, const void *_p_s1,
+                   const void *_p_s2, size_t i_bytes )
+{
+    uint8_t *p_dest = (uint8_t *)_p_dest;
+    uint8_t *p_s1   = (uint8_t *)_p_s1;
+    uint8_t *p_s2   = (uint8_t *)_p_s2;
+    uint8_t *p_end  = p_dest + i_bytes - 15;
+
+    /* Use C until the first 16-bytes aligned destination pixel */
+    while( (uintptr_t)p_dest & 0xF )
+    {
+        *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
+    }
+
+    if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
+    {
+        /* Unaligned source */
+        vector unsigned char s1v, s2v, destv;
+        vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
+        vector unsigned char perm1v, perm2v;
+
+        perm1v = vec_lvsl( 0, p_s1 );
+        perm2v = vec_lvsl( 0, p_s2 );
+        s1oldv = vec_ld( 0, p_s1 );
+        s2oldv = vec_ld( 0, p_s2 );
+
+        while( p_dest < p_end )
+        {
+            s1newv = vec_ld( 16, p_s1 );
+            s2newv = vec_ld( 16, p_s2 );
+            s1v    = vec_perm( s1oldv, s1newv, perm1v );
+            s2v    = vec_perm( s2oldv, s2newv, perm2v );
+            s1oldv = s1newv;
+            s2oldv = s2newv;
+            destv  = vec_avg( s1v, s2v );
+            vec_st( destv, 0, p_dest );
+
+            p_s1   += 16;
+            p_s2   += 16;
+            p_dest += 16;
+        }
+    }
+    else
+    {
+        /* Aligned source */
+        vector unsigned char s1v, s2v, destv;
+
+        while( p_dest < p_end )
+        {
+            s1v   = vec_ld( 0, p_s1 );
+            s2v   = vec_ld( 0, p_s2 );
+            destv = vec_avg( s1v, s2v );
+            vec_st( destv, 0, p_dest );
+
+            p_s1   += 16;
+            p_s2   += 16;
+            p_dest += 16;
+        }
+    }
+
+    p_end += 15;
+
+    while( p_dest < p_end )
+    {
+        *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
+    }
+}
+#endif
+
+#ifdef __ARM_NEON__
+void MergeNEON (void *restrict out, const void *in1,
+                const void *in2, size_t n)
+{
+    uint8_t *outp = out;
+    const uint8_t *in1p = in1;
+    const uint8_t *in2p = in2;
+    size_t mis = ((uintptr_t)outp) & 15;
+
+    if (mis)
+    {
+        MergeGeneric (outp, in1p, in2p, mis);
+        outp += mis;
+        in1p += mis;
+        in2p += mis;
+        n -= mis;
+    }
+
+    uint8_t *end = outp + (n & ~15);
+
+    if ((((uintptr_t)in1p)|((uintptr_t)in2p)) & 15)
+        while (outp < end)
+            asm volatile (
+                "vld1.u8  {q0-q1}, [%[in1]]!\n"
+                "vld1.u8  {q2-q3}, [%[in2]]!\n"
+                "vhadd.u8 q4, q0, q2\n"
+                "vld1.u8  {q6-q7}, [%[in1]]!\n"
+                "vhadd.u8 q5, q1, q3\n"
+                "vld1.u8  {q8-q9}, [%[in2]]!\n"
+                "vhadd.u8 q10, q6, q8\n"
+                "vhadd.u8 q11, q7, q9\n"
+                "vst1.u8  {q4-q5}, [%[out],:128]!\n"
+                "vst1.u8  {q10-q11}, [%[out],:128]!\n"
+                : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
+                :
+                : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+                  "q8", "q9", "q10", "q11", "memory");
+    else
+         while (outp < end)
+            asm volatile (
+                "vld1.u8  {q0-q1}, [%[in1],:128]!\n"
+                "vld1.u8  {q2-q3}, [%[in2],:128]!\n"
+                "vhadd.u8 q4, q0, q2\n"
+                "vld1.u8  {q6-q7}, [%[in1],:128]!\n"
+                "vhadd.u8 q5, q1, q3\n"
+                "vld1.u8  {q8-q9}, [%[in2],:128]!\n"
+                "vhadd.u8 q10, q6, q8\n"
+                "vhadd.u8 q11, q7, q9\n"
+                "vst1.u8  {q4-q5}, [%[out],:128]!\n"
+                "vst1.u8  {q10-q11}, [%[out],:128]!\n"
+                : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
+                :
+                : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+                  "q8", "q9", "q10", "q11", "memory");
+    n &= 15;
+    if (n)
+        MergeGeneric (outp, in1p, in2p, n);
+}
+#endif
+
+/*****************************************************************************
+ * EndMerge routines
+ *****************************************************************************/
+
+#if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
+void EndMMX( void )
+{
+    __asm__ __volatile__( "emms" :: );
+}
+#endif
+
+#if defined(CAN_COMPILE_3DNOW)
+void End3DNow( void )
+{
+    __asm__ __volatile__( "femms" :: );
+}
+#endif
--- a/modules/video_filter/deinterlace/merge.h
+++ b/modules/video_filter/deinterlace/merge.h
--- a/modules/video_filter/mmx.h
+++ b/modules/video_filter/mmx.h
@@ -25,17 +25,19 @@
 * values by ULL, lest they be truncated by the compiler)
 */

+#include <stdint.h>
+
 typedef    union {
-    int64_t            q;    /* Quadword (64-bit) value */
+    int64_t          q;    /* Quadword (64-bit) value */
    uint64_t        uq;    /* Unsigned Quadword */
-    int32_t            d[2];    /* 2 Doubleword (32-bit) values */
-    uint32_t        ud[2];    /* 2 Unsigned Doubleword */
-    int16_t            w[4];    /* 4 Word (16-bit) values */
-    uint16_t        uw[4];    /* 4 Unsigned Word */
-    int8_t            b[8];    /* 8 Byte (8-bit) values */
-    uint8_t            ub[8];    /* 8 Unsigned Byte */
-    float            s[2];    /* Single-precision (32-bit) value */
-} ATTR_ALIGN(8) mmx_t;    /* On an 8-byte (64-bit) boundary */
+    int32_t          d[2]; /* 2 Doubleword (32-bit) values */
+    uint32_t        ud[2]; /* 2 Unsigned Doubleword */
+    int16_t          w[4]; /* 4 Word (16-bit) values */
+    uint16_t        uw[4]; /* 4 Unsigned Word */
+    int8_t           b[8]; /* 8 Byte (8-bit) values */
+    uint8_t         ub[8]; /* 8 Unsigned Byte */
+    float            s[2]; /* Single-precision (32-bit) value */
+} ATTR_ALIGN(8) mmx_t;     /* On an 8-byte (64-bit) boundary */


 #define    mmx_i2r(op,imm,reg) \

--- a/modules/video_filter/yadif.h
+++ b/modules/video_filter/yadif.h