Commit 5d15f59a authored by Martin Storsjö's avatar Martin Storsjö

arm_neon: Add an optimized routine for deinterleaving chroma

This supports conversion from NV12/21/16/24 to I420/YV12/I422/I444.

This avoids hitting swscale for the NV12->I420 conversion, for hw
decoders that return NV12/21 in combination with the android vout
in YUV mode.
Signed-off-by: default avatarMartin Storsjö <martin@martin.st>
parent 0cf034af
...@@ -7,6 +7,7 @@ libsimple_channel_mixer_neon_plugin_la_CFLAGS = $(AM_CFLAGS) ...@@ -7,6 +7,7 @@ libsimple_channel_mixer_neon_plugin_la_CFLAGS = $(AM_CFLAGS)
libsimple_channel_mixer_neon_plugin_LIBTOOLFLAGS = --tag=CC libsimple_channel_mixer_neon_plugin_LIBTOOLFLAGS = --tag=CC
libchroma_yuv_neon_plugin_la_SOURCES = \ libchroma_yuv_neon_plugin_la_SOURCES = \
arm_neon/deinterleave_chroma.S \
arm_neon/i420_yuyv.S \ arm_neon/i420_yuyv.S \
arm_neon/i422_yuyv.S \ arm_neon/i422_yuyv.S \
arm_neon/yuyv_i422.S \ arm_neon/yuyv_i422.S \
......
...@@ -30,6 +30,14 @@ struct yuv_planes ...@@ -30,6 +30,14 @@ struct yuv_planes
size_t pitch; size_t pitch;
}; };
/* Planar chroma buffers.
* Pitch is in bytes. */
struct uv_planes
{
void *u, *v;
size_t pitch;
};
/* Packed picture buffer. Pitch is in bytes (_not_ pixels). */ /* Packed picture buffer. Pitch is in bytes (_not_ pixels). */
struct yuv_pack struct yuv_pack
{ {
...@@ -67,6 +75,11 @@ void uyvy_i422_neon (struct yuv_planes *const out, ...@@ -67,6 +75,11 @@ void uyvy_i422_neon (struct yuv_planes *const out,
const struct yuv_pack *const in, const struct yuv_pack *const in,
int width, int height) asm("uyvy_i422_neon"); int width, int height) asm("uyvy_i422_neon");
/* Semiplanar to planar conversion. */
void deinterleave_chroma_neon (struct uv_planes *const out,
const struct yuv_pack *const in,
int width, int height) asm("deinterleave_chroma_neon");
/* I420 to RGBA conversion. */ /* I420 to RGBA conversion. */
void i420_rgb_neon (struct yuv_pack *const out, void i420_rgb_neon (struct yuv_pack *const out,
const struct yuv_planes *const in, const struct yuv_planes *const in,
......
...@@ -45,6 +45,15 @@ vlc_module_end () ...@@ -45,6 +45,15 @@ vlc_module_end ()
struct yuv_planes planes = { \ struct yuv_planes planes = { \
(pict)->Y_PIXELS, (pict)->V_PIXELS, (pict)->U_PIXELS, (pict)->Y_PITCH } (pict)->Y_PIXELS, (pict)->V_PIXELS, (pict)->U_PIXELS, (pict)->Y_PITCH }
#define DEFINE_UV_PLANES(planes, pict) \
struct uv_planes planes = { \
(pict)->U_PIXELS, (pict)->V_PIXELS, (pict)->U_PITCH }
#define DEFINE_UV_PLANES_SWAP(planes, pict) \
struct uv_planes planes = { \
(pict)->V_PIXELS, (pict)->U_PIXELS, (pict)->U_PITCH }
#define DEFINE_UV_PACK(pack, pict) \
struct yuv_pack pack = { (pict)->U_PIXELS, (pict)->U_PITCH }
/* Planar YUV420 to packed YUV422 */ /* Planar YUV420 to packed YUV422 */
static void I420_YUYV (filter_t *filter, picture_t *src, picture_t *dst) static void I420_YUYV (filter_t *filter, picture_t *src, picture_t *dst)
{ {
...@@ -83,6 +92,52 @@ static void I420_VYUY (filter_t *filter, picture_t *src, picture_t *dst) ...@@ -83,6 +92,52 @@ static void I420_VYUY (filter_t *filter, picture_t *src, picture_t *dst)
VIDEO_FILTER_WRAPPER (I420_VYUY) VIDEO_FILTER_WRAPPER (I420_VYUY)
/* Semiplanar NV12/21/16/24 to planar I420/YV12/I422/I444 */
static void copy_y_plane(filter_t *filter, picture_t *src, picture_t *dst)
{
uint8_t *src_y = src->Y_PIXELS;
uint8_t *dst_y = dst->Y_PIXELS;
if (src->Y_PITCH == dst->Y_PITCH) {
memcpy(dst_y, src_y, dst->Y_PITCH * filter->fmt_in.video.i_height);
} else {
for (unsigned y = 0; y < filter->fmt_in.video.i_height;
y++, dst_y += dst->Y_PITCH, src_y += src->Y_PITCH)
memcpy(dst_y, src_y, filter->fmt_in.video.i_width);
}
}
#define SEMIPLANAR_FILTERS(name, h_subsamp, v_subsamp) \
static void name (filter_t *filter, picture_t *src, \
picture_t *dst) \
{ \
DEFINE_UV_PLANES(out, dst); \
DEFINE_UV_PACK(in, src); \
copy_y_plane (filter, src, dst); \
deinterleave_chroma_neon (&out, &in, \
filter->fmt_in.video.i_width / h_subsamp, \
filter->fmt_in.video.i_height / v_subsamp); \
} \
VIDEO_FILTER_WRAPPER (name) \
#define SEMIPLANAR_FILTERS_SWAP(name, h_subsamp, v_subsamp) \
static void name (filter_t *filter, picture_t *src, \
picture_t *dst) \
{ \
DEFINE_UV_PLANES_SWAP(out, dst); \
DEFINE_UV_PACK(in, src); \
copy_y_plane (filter, src, dst); \
deinterleave_chroma_neon (&out, &in, \
filter->fmt_in.video.i_width / h_subsamp, \
filter->fmt_in.video.i_height / v_subsamp); \
} \
VIDEO_FILTER_WRAPPER (name) \
SEMIPLANAR_FILTERS (Semiplanar_Planar_420, 2, 2)
SEMIPLANAR_FILTERS_SWAP (Semiplanar_Planar_420_Swap, 2, 2)
SEMIPLANAR_FILTERS (Semiplanar_Planar_422, 2, 1)
SEMIPLANAR_FILTERS (Semiplanar_Planar_444, 1, 1)
/* Planar YUV422 to packed YUV422 */ /* Planar YUV422 to packed YUV422 */
static void I422_YUYV (filter_t *filter, picture_t *src, picture_t *dst) static void I422_YUYV (filter_t *filter, picture_t *src, picture_t *dst)
{ {
...@@ -231,6 +286,57 @@ static int Open (vlc_object_t *obj) ...@@ -231,6 +286,57 @@ static int Open (vlc_object_t *obj)
} }
break; break;
/* Semiplanar to planar */
case VLC_CODEC_NV12:
switch (filter->fmt_out.video.i_chroma)
{
case VLC_CODEC_I420:
filter->pf_video_filter = Semiplanar_Planar_420_Filter;
break;
case VLC_CODEC_YV12:
filter->pf_video_filter = Semiplanar_Planar_420_Swap_Filter;
break;
default:
return VLC_EGENERIC;
}
break;
case VLC_CODEC_NV21:
switch (filter->fmt_out.video.i_chroma)
{
case VLC_CODEC_I420:
filter->pf_video_filter = Semiplanar_Planar_420_Swap_Filter;
break;
case VLC_CODEC_YV12:
filter->pf_video_filter = Semiplanar_Planar_420_Filter;
break;
default:
return VLC_EGENERIC;
}
break;
case VLC_CODEC_NV16:
switch (filter->fmt_out.video.i_chroma)
{
case VLC_CODEC_I422:
filter->pf_video_filter = Semiplanar_Planar_422_Filter;
break;
default:
return VLC_EGENERIC;
}
break;
case VLC_CODEC_NV24:
switch (filter->fmt_out.video.i_chroma)
{
case VLC_CODEC_I444:
filter->pf_video_filter = Semiplanar_Planar_444_Filter;
break;
default:
return VLC_EGENERIC;
}
break;
/* Packed to planar */ /* Packed to planar */
case VLC_CODEC_YUYV: case VLC_CODEC_YUYV:
switch (filter->fmt_out.video.i_chroma) switch (filter->fmt_out.video.i_chroma)
......
@*****************************************************************************
@ deinterleave_chroma.S : ARM NEONv1 conversion of interleaved to planar chroma
@*****************************************************************************
@ Copyright (C) 2009-2011 Rémi Denis-Courmont
@ Copyright (C) 2013 Martin Storsjö
@
@ This program is free software; you can redistribute it and/or modify
@ it under the terms of the GNU Lesser General Public License as published by
@ the Free Software Foundation; either version 2.1 of the License, or
@ (at your option) any later version.
@
@ This program is distributed in the hope that it will be useful,
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
@ GNU Lesser General Public License for more details.
@
@ You should have received a copy of the GNU Lesser General Public License
@ along with this program; if not, write to the Free Software Foundation,
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
.syntax unified
.fpu neon
.text
#define UV r0
#define COUNT r1
#define WIDTH r2
#define HEIGHT r3
#define IPITCH r4
#define IPAD r4
#define U r5
#define V r6
#define OPITCH lr
#define OPAD lr
.align 2
.global deinterleave_chroma_neon
.type deinterleave_chroma_neon, %function
deinterleave_chroma_neon:
push {r4-r6,lr}
ldmia r0, {U, V, OPITCH}
ldmia r1, {UV, IPITCH}
cmp HEIGHT, #0
@ round the width up to a multiple of 8
add WIDTH, WIDTH, #7
bic WIDTH, WIDTH, #7
sub IPAD, IPITCH, WIDTH, lsl #1
sub OPAD, OPITCH, WIDTH
1:
movsgt COUNT, WIDTH
pople {r4-r6,pc}
2:
pld [UV, #64]
vld2.u8 {d0, d1}, [UV,:128]!
subs COUNT, COUNT, #8
vst1.u8 {d0}, [U,:64]!
vst1.u8 {d1}, [V,:64]!
bgt 2b
subs HEIGHT, #1
add UV, UV, IPAD
add U, U, OPAD
add V, V, OPAD
b 1b
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment