Commit 1ac73b15 authored by Rafaël Carré's avatar Rafaël Carré

contrib: add libmpeg2

parent cfceecdd
3648a2b3d7e2056d5adb328acd2fb983a1fa9a05ccb6f9388cc686c819445421811f42e8439418a0491a13080977f074a0d8bf8fa6bc101ff245ddea65a46fbc libmpeg2-0.5.1.tar.gz
diff -urNp libmpeg2.orig/libmpeg2/motion_comp_arm_s.S libmpeg2/libmpeg2/motion_comp_arm_s.S
--- libmpeg2.orig/libmpeg2/motion_comp_arm_s.S 2008-07-09 21:16:05.000000000 +0200
+++ libmpeg2/libmpeg2/motion_comp_arm_s.S 2009-11-20 19:55:22.000000000 +0100
@@ -19,6 +19,16 @@
@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+@ Data preload is supported only by ARM V5TE and above
+
+#if (defined (__ARM_ARCH_2__) || defined (__ARM_ARCH_3__) \
+ || defined (__ARM_ARCH_3M__) || defined (__ARM_ARCH_4__) \
+ || defined (__ARM_ARCH_4T__) || defined (__ARM_ARCH_5__) \
+ || defined (__ARM_ARCH_5T__))
+.macro pld reg
+.endm
+#endif
+
.text
@ ----------------------------------------------------------------
diff -ruN libmpeg2.orig//configure.ac libmpeg2/configure.ac
--- libmpeg2.orig//configure.ac 2011-11-06 22:25:07.273694621 -0500
+++ libmpeg2/configure.ac 2011-11-06 22:25:19.033752939 -0500
@@ -103,7 +103,14 @@
AC_DEFINE([ARCH_ALPHA],,[alpha architecture]);;
arm*)
arm_conditional=:
- AC_DEFINE([ARCH_ARM],,[ARM architecture]);;
+ AC_DEFINE([ARCH_ARM],,[ARM architecture])
+ AC_MSG_CHECKING([if inline ARM Advanced SIMD assembly is supported])
+ AC_TRY_COMPILE([],
+ [asm ("vqmovun.s64 d0, q1":::"d0");],
+ [AC_DEFINE([ARCH_ARM_NEON],, [ARM Advanced SIMD assembly])
+ AC_MSG_RESULT(yes)],
+ [AC_MSG_RESULT(no)])
+ ;;
esac
elif test x"$CC" = x"tendracc"; then
dnl TenDRA portability checking compiler
diff -ruN libmpeg2.orig//include/mpeg2.h libmpeg2/include/mpeg2.h
--- libmpeg2.orig//include/mpeg2.h 2011-11-06 22:25:07.297694741 -0500
+++ libmpeg2/include/mpeg2.h 2011-11-06 22:25:19.025752913 -0500
@@ -164,6 +164,7 @@
#define MPEG2_ACCEL_SPARC_VIS 1
#define MPEG2_ACCEL_SPARC_VIS2 2
#define MPEG2_ACCEL_ARM 1
+#define MPEG2_ACCEL_ARM_NEON 2
#define MPEG2_ACCEL_DETECT 0x80000000
uint32_t mpeg2_accel (uint32_t accel);
diff -ruN libmpeg2.orig//libmpeg2/Makefile.am libmpeg2/libmpeg2/Makefile.am
--- libmpeg2.orig//libmpeg2/Makefile.am 2011-11-06 22:25:07.289694707 -0500
+++ libmpeg2/libmpeg2/Makefile.am 2011-11-06 22:25:19.033752939 -0500
@@ -14,7 +14,7 @@
motion_comp_vis.c motion_comp_arm.c \
cpu_accel.c cpu_state.c
if ARCH_ARM
-libmpeg2arch_la_SOURCES += motion_comp_arm_s.S
+libmpeg2arch_la_SOURCES += motion_comp_arm_s.S motion_comp_neon.c
endif
libmpeg2arch_la_CFLAGS = $(OPT_CFLAGS) $(ARCH_OPT_CFLAGS) $(LIBMPEG2_CFLAGS)
diff -ruN libmpeg2.orig//libmpeg2/motion_comp.c libmpeg2/libmpeg2/motion_comp.c
--- libmpeg2.orig//libmpeg2/motion_comp.c 2011-11-06 22:25:07.289694707 -0500
+++ libmpeg2/libmpeg2/motion_comp.c 2011-11-06 22:25:19.029752924 -0500
@@ -58,6 +58,11 @@
else
#endif
#ifdef ARCH_ARM
+#ifdef ARCH_ARM_NEON
+ if (accel & MPEG2_ACCEL_ARM_NEON)
+ mpeg2_mc = mpeg2_mc_neon;
+ else
+#endif
if (accel & MPEG2_ACCEL_ARM) {
mpeg2_mc = mpeg2_mc_arm;
} else
diff -ruN libmpeg2.orig//libmpeg2/motion_comp_neon.c libmpeg2/libmpeg2/motion_comp_neon.c
--- libmpeg2.orig//libmpeg2/motion_comp_neon.c 1969-12-31 19:00:00.000000000 -0500
+++ libmpeg2/libmpeg2/motion_comp_neon.c 2011-11-06 22:25:19.029752924 -0500
@@ -0,0 +1,302 @@
+/*
+ * motion_comp_neon.c
+ * Copyright (C) 2009 Rémi Denis-Courmont
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpeg2dec; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "config.h"
+
+#if defined(ARCH_ARM_NEON)
+
+#include <stdint.h>
+#include <string.h>
+
+#include "mpeg2.h"
+#include "attributes.h"
+#include "mpeg2_internal.h"
+
+/* dest = ref */
+static void MC_put_o_16_neon (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ do {
+ memcpy (dest, ref, 16);
+ ref += stride;
+ dest += stride;
+ } while (--height);
+}
+
+static void MC_put_o_8_neon (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ do {
+ memcpy (dest, ref, 8);
+ ref += stride;
+ dest += stride;
+ } while (--height);
+}
+
+/* dest = (src1 + src2 + 1) / 2 */
+static void MC_avg_1_16_neon (uint8_t * dest, const uint8_t * src1,
+ const uint8_t * src2,
+ const int stride, unsigned height)
+{
+ do {
+ asm volatile (
+ "vld1.u8 {q0}, [%[src1]]\n"
+ "vld1.u8 {q1}, [%[src2]]\n"
+ "vrhadd.u8 q0, q0, q1\n"
+ /* XXX: three cycles stall */
+ "vst1.u8 {q0}, [%[dest]]\n"
+ :
+ : [dest]"r"(dest), [src1]"r"(src1), [src2]"r"(src2)
+ : "memory", "q0", "q1");
+ src1 += stride;
+ src2 += stride;
+ dest += stride;
+ } while (--height);
+}
+
+static void MC_avg_1_8_neon (uint8_t * dest, const uint8_t * src1,
+ const uint8_t * src2,
+ const int stride, unsigned height)
+{
+ do {
+ asm volatile (
+ "vld1.u8 {d0}, [%[src1]]\n"
+ "vld1.u8 {d1}, [%[src2]]\n"
+ "vrhadd.u8 d0, d0, d1\n"
+ "vst1.u8 {d0}, [%[dest]]\n"
+ :
+ : [dest]"r"(dest), [src1]"r"(src1), [src2]"r"(src2)
+ : "memory", "q0");
+
+ src1 += stride;
+ src2 += stride;
+ dest += stride;
+ } while (--height);
+}
+
+/* dest = (dest + ((src1 + src2 + 1) / 2) + 1) / 2 */
+static void MC_avg_2_16_neon (uint8_t * dest, const uint8_t * src1,
+ const uint8_t * src2,
+ const int stride, unsigned height)
+{
+ do {
+ asm volatile (
+ "vld1.u8 {q0}, [%[src1]]\n"
+ "vld1.u8 {q1}, [%[src2]]\n"
+ "vrhadd.u8 q0, q0, q1\n"
+ "vld1.u8 {q2}, [%[dest]]\n"
+ /* XXX: one cycle stall */
+ "vrhadd.u8 q0, q0, q2\n"
+ /* XXX: three cycles stall */
+ "vst1.u8 {q0}, [%[dest]]\n"
+ :
+ : [dest]"r"(dest), [src1]"r"(src1), [src2]"r"(src2)
+ : "memory", "q0", "q1", "q2");
+ src1 += stride;
+ src2 += stride;
+ dest += stride;
+ } while (--height);
+}
+
+static void MC_avg_2_8_neon (uint8_t * dest, const uint8_t * src1,
+ const uint8_t * src2,
+ const int stride, unsigned height)
+{
+ do {
+ asm volatile (
+ "vld1.u8 {d0}, [%[src1]]\n"
+ "vld1.u8 {d1}, [%[src2]]\n"
+ "vrhadd.u8 d0, d0, d1\n"
+ "vld1.u8 {d2}, [%[dest]]\n"
+ "vrhadd.u8 d0, d0, d2\n"
+ "vst1.u8 {d0}, [%[dest]]\n"
+ :
+ : [dest]"r"(dest), [src1]"r"(src1), [src2]"r"(src2)
+ : "memory", "q0", "d2");
+ src1 += stride;
+ src2 += stride;
+ dest += stride;
+ } while (--height);
+}
+
+static void MC_avg_o_16_neon (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ MC_avg_1_16_neon (dest, dest, ref, stride, height);
+}
+
+static void MC_avg_o_8_neon (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ MC_avg_1_8_neon (dest, dest, ref, stride, height);
+}
+
+static void MC_put_x_16_neon (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ MC_avg_1_16_neon (dest, ref, ref + 1, stride, height);
+}
+
+static void MC_put_x_8_neon (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ MC_avg_1_8_neon (dest, ref, ref + 1, stride, height);
+}
+
+static void MC_avg_x_16_neon (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ MC_avg_2_16_neon (dest, ref, ref + 1, stride, height);
+}
+
+static void MC_avg_x_8_neon (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ MC_avg_2_8_neon (dest, ref, ref + 1, stride, height);
+}
+
+static void MC_put_y_16_neon (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ MC_avg_1_16_neon (dest, ref, ref + stride, stride, height);
+}
+static void MC_put_y_8_neon (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ MC_avg_1_8_neon (dest, ref, ref + stride, stride, height);
+}
+
+static void MC_avg_y_16_neon (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ MC_avg_2_16_neon (dest, ref, ref + stride, stride, height);
+}
+
+static void MC_avg_y_8_neon (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ MC_avg_2_8_neon (dest, ref, ref + stride, stride, height);
+}
+
+static void MC_put_xy_16_neon (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ do {
+ asm volatile (
+ "vld1.u8 {q0}, [%[ref]]\n"
+ "vld1.u8 {q1}, [%[refx]]\n"
+ "vrhadd.u8 q0, q0, q1\n"
+ "vld1.u8 {q2}, [%[refy]]\n"
+ "vld1.u8 {q3}, [%[refxy]]\n"
+ "vrhadd.u8 q2, q2, q3\n"
+ /* XXX: three cycles stall */
+ "vrhadd.u8 q0, q0, q2\n"
+ /* XXX: three cycles stall */
+ "vst1.u8 {q0}, [%[dest]]\n"
+ :
+ : [dest]"r"(dest), [ref]"r"(ref), [refx]"r"(ref + 1),
+ [refy]"r"(ref + stride), [refxy]"r"(ref + stride + 1)
+ : "memory", "q0", "q1", "q2", "q3");
+ ref += stride;
+ dest += stride;
+ } while (--height);
+}
+
+static void MC_put_xy_8_neon (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ do {
+ asm volatile (
+ "vld1.u8 {d0}, [%[ref]]\n"
+ "vld1.u8 {d1}, [%[refx]]\n"
+ "vrhadd.u8 d0, d0, d1\n"
+ "vld1.u8 {d2}, [%[refy]]\n"
+ "vld1.u8 {d3}, [%[refxy]]\n"
+ "vrhadd.u8 d2, d2, d3\n"
+ /* XXX: three cycles stall */
+ "vrhadd.u8 d0, d0, d2\n"
+ /* XXX: three cycles stall */
+ "vst1.u8 {d0}, [%[dest]]\n"
+ :
+ : [dest]"r"(dest), [ref]"r"(ref), [refx]"r"(ref + 1),
+ [refy]"r"(ref + stride), [refxy]"r"(ref + stride + 1)
+ : "memory", "q0", "q1");
+ ref += stride;
+ dest += stride;
+ } while (--height);
+}
+
+static void MC_avg_xy_16_neon (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ do {
+ asm volatile (
+ "vld1.u8 {q0}, [%[ref]]\n"
+ "vld1.u8 {q1}, [%[refx]]\n"
+ "vrhadd.u8 q0, q0, q1\n"
+ "vld1.u8 {q2}, [%[refy]]\n"
+ "vld1.u8 {q3}, [%[refxy]]\n"
+ "vrhadd.u8 q2, q2, q3\n"
+ "vld1.u8 {q4}, [%[dest]]\n"
+ /* XXX: one cycle stall */
+ "vrhadd.u8 q0, q0, q2\n"
+ /* XXX: three cycles stall */
+ "vrhadd.u8 q0, q4, q0\n"
+ "vst1.u8 {q0}, [%[dest]]\n"
+ :
+ : [dest]"r"(dest), [ref]"r"(ref), [refx]"r"(ref + 1),
+ [refy]"r"(ref + stride), [refxy]"r"(ref + stride + 1)
+ : "memory", "q0", "q1", "q2", "q3", "q4");
+ ref += stride;
+ dest += stride;
+ } while (--height);
+}
+
+static void MC_avg_xy_8_neon (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ do {
+ asm volatile (
+ "vld1.u8 {d0}, [%[ref]]\n"
+ "vld1.u8 {d1}, [%[refx]]\n"
+ "vrhadd.u8 d0, d0, d1\n"
+ "vld1.u8 {d2}, [%[refy]]\n"
+ "vld1.u8 {d3}, [%[refxy]]\n"
+ "vrhadd.u8 d2, d2, d3\n"
+ "vld1.u8 {d4}, [%[dest]]\n"
+ /* XXX: one cycle stall */
+ "vrhadd.u8 d0, d0, d2\n"
+ /* XXX: three cycles stall */
+ "vrhadd.u8 d0, d4, d0\n"
+ "vst1.u8 {d0}, [%[dest]]\n"
+ :
+ : [dest]"r"(dest), [ref]"r"(ref), [refx]"r"(ref + 1),
+ [refy]"r"(ref + stride), [refxy]"r"(ref + stride + 1)
+ : "memory", "q0", "q1", "d4");
+ ref += stride;
+ dest += stride;
+ } while (--height);
+}
+
+MPEG2_MC_EXTERN (neon)
+
+#endif /* ARCH_ARM_NEON */
diff -ruN libmpeg2.orig//libmpeg2/mpeg2_internal.h libmpeg2/libmpeg2/mpeg2_internal.h
--- libmpeg2.orig//libmpeg2/mpeg2_internal.h 2011-11-06 22:25:07.293694722 -0500
+++ libmpeg2/libmpeg2/mpeg2_internal.h 2011-11-06 22:25:19.029752924 -0500
@@ -313,5 +313,6 @@
extern mpeg2_mc_t mpeg2_mc_alpha;
extern mpeg2_mc_t mpeg2_mc_vis;
extern mpeg2_mc_t mpeg2_mc_arm;
+extern mpeg2_mc_t mpeg2_mc_neon;
#endif /* LIBMPEG2_MPEG2_INTERNAL_H */
# libmpeg2
LIBMPEG2_VERSION = 0.5.1
LIBMPEG2_URL := http://libmpeg2.sourceforge.net/files/libmpeg2-$(LIBMPEG2_VERSION).tar.gz
PKGS += libmpeg2
ifeq ($(call need_pkg,"libmpeg2"),)
PKGS_FOUND += libmpeg2
endif
$(TARBALLS)/libmpeg2-$(LIBMPEG2_VERSION).tar.gz:
$(call download,$(LIBMPEG2_URL))
.sum-libmpeg2: libmpeg2-$(LIBMPEG2_VERSION).tar.gz
libmpeg2: libmpeg2-$(LIBMPEG2_VERSION).tar.gz .sum-libmpeg2
$(UNPACK)
$(MOVE)
.libmpeg2: libmpeg2
cd $< && $(HOSTVARS) ./configure $(HOSTCONF) --without-x --disable-sdl
cd $</libmpeg2 && make && make install
cd $</include && make && make install
touch $@
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment