Commit cce080a5 authored by gpoirier's avatar gpoirier

Support for MacIntel, last part: balign directives

Determines whether .align's arg is power-of-two or not, then defines ASMALIGN appropriately in config.h. Changes all .baligns to ASMALIGNs.
Patch by John Dalgliesh % johnd AH defyne P org %
Original thread:
Date: Aug 11, 2006 8:00 AM
Subject: Re: [Ffmpeg-devel] Mac OS X Intel last part: balign directives


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@5990 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent c62842ea
...@@ -468,6 +468,7 @@ pthreads="no" ...@@ -468,6 +468,7 @@ pthreads="no"
swscaler="no" swscaler="no"
gpl="no" gpl="no"
memalignhack="no" memalignhack="no"
asmalign_pot="unknown"
# OS specific # OS specific
targetos=`uname -s` targetos=`uname -s`
...@@ -1469,6 +1470,12 @@ if test "$gprof" = "yes" ; then ...@@ -1469,6 +1470,12 @@ if test "$gprof" = "yes" ; then
LDFLAGS="$LDFLAGS -p" LDFLAGS="$LDFLAGS -p"
fi fi
# find if .align arg is power-of-two or not
if test $asmalign_pot = "unknown"; then
asmalign_pot="no"
echo 'asm (".align 3");' | check_cc && asmalign_pot="yes"
fi
echo "install prefix $PREFIX" echo "install prefix $PREFIX"
echo "source path $source_path" echo "source path $source_path"
echo "C compiler $cc" echo "C compiler $cc"
...@@ -1535,6 +1542,7 @@ echo "network support $network" ...@@ -1535,6 +1542,7 @@ echo "network support $network"
if test "$network" = "yes" ; then if test "$network" = "yes" ; then
echo "IPv6 support $ipv6" echo "IPv6 support $ipv6"
fi fi
echo ".align is power-of-two" $asmalign_pot
if test "$gpl" = "no" ; then if test "$gpl" = "no" ; then
echo "License: LGPL" echo "License: LGPL"
else else
...@@ -2096,6 +2104,12 @@ if test "$amr_if2" = "yes" ; then ...@@ -2096,6 +2104,12 @@ if test "$amr_if2" = "yes" ; then
echo "AMR_CFLAGS=-DIF2=1" >> config.mak echo "AMR_CFLAGS=-DIF2=1" >> config.mak
fi fi
if test "$asmalign_pot" = "yes" ; then
echo '#define ASMALIGN(ZEROBITS) ".align " #ZEROBITS "\n\t"' >> $TMPH
else
echo '#define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t"' >> $TMPH
fi
for codec in $DECODER_LIST $ENCODER_LIST $PARSER_LIST $DEMUXER_LIST $MUXER_LIST; do for codec in $DECODER_LIST $ENCODER_LIST $PARSER_LIST $DEMUXER_LIST $MUXER_LIST; do
echo "#define CONFIG_`echo $codec | tr a-z A-Z` 1" >> $TMPH echo "#define CONFIG_`echo $codec | tr a-z A-Z` 1" >> $TMPH
......
...@@ -56,7 +56,7 @@ static const uint64_t ff_pw_15 attribute_used __attribute__ ((aligned(8))) = 0x0 ...@@ -56,7 +56,7 @@ static const uint64_t ff_pw_15 attribute_used __attribute__ ((aligned(8))) = 0x0
static const uint64_t ff_pb_3F attribute_used __attribute__ ((aligned(8))) = 0x3F3F3F3F3F3F3F3FULL; static const uint64_t ff_pb_3F attribute_used __attribute__ ((aligned(8))) = 0x3F3F3F3F3F3F3F3FULL;
static const uint64_t ff_pb_FC attribute_used __attribute__ ((aligned(8))) = 0xFCFCFCFCFCFCFCFCULL; static const uint64_t ff_pb_FC attribute_used __attribute__ ((aligned(8))) = 0xFCFCFCFCFCFCFCFCULL;
#define JUMPALIGN() __asm __volatile (".balign 8"::) #define JUMPALIGN() __asm __volatile (ASMALIGN(3)::)
#define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::) #define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::)
#define MOVQ_WONE(regd) \ #define MOVQ_WONE(regd) \
...@@ -204,7 +204,7 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) ...@@ -204,7 +204,7 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size)
asm volatile( asm volatile(
"mov $-128, %%"REG_a" \n\t" "mov $-128, %%"REG_a" \n\t"
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
".balign 16 \n\t" ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%0), %%mm0 \n\t" "movq (%0), %%mm0 \n\t"
"movq (%0, %2), %%mm2 \n\t" "movq (%0, %2), %%mm2 \n\t"
...@@ -232,7 +232,7 @@ static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint ...@@ -232,7 +232,7 @@ static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint
asm volatile( asm volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"mov $-128, %%"REG_a" \n\t" "mov $-128, %%"REG_a" \n\t"
".balign 16 \n\t" ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%0), %%mm0 \n\t" "movq (%0), %%mm0 \n\t"
"movq (%1), %%mm2 \n\t" "movq (%1), %%mm2 \n\t"
...@@ -375,7 +375,7 @@ static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size ...@@ -375,7 +375,7 @@ static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size
{ {
__asm __volatile( __asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
".balign 8 \n\t" ASMALIGN(3)
"1: \n\t" "1: \n\t"
"movd (%1), %%mm0 \n\t" "movd (%1), %%mm0 \n\t"
"movd (%1, %3), %%mm1 \n\t" "movd (%1, %3), %%mm1 \n\t"
...@@ -401,7 +401,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size ...@@ -401,7 +401,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size
{ {
__asm __volatile( __asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
".balign 8 \n\t" ASMALIGN(3)
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
...@@ -427,7 +427,7 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz ...@@ -427,7 +427,7 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz
{ {
__asm __volatile( __asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
".balign 8 \n\t" ASMALIGN(3)
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq 8(%1), %%mm4 \n\t" "movq 8(%1), %%mm4 \n\t"
......
...@@ -754,7 +754,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -754,7 +754,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
PAVGB" 1(%1), %%mm0 \n\t" PAVGB" 1(%1), %%mm0 \n\t"
".balign 8 \n\t" ASMALIGN(3)
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t" "movq (%1, %%"REG_a"), %%mm2 \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
......
...@@ -28,7 +28,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -28,7 +28,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
__asm __volatile( __asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
".balign 8 \n\t" ASMALIGN(3)
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t" "movq 1(%1), %%mm1 \n\t"
...@@ -69,7 +69,7 @@ static void attribute_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, u ...@@ -69,7 +69,7 @@ static void attribute_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, u
"movq %%mm4, (%3) \n\t" "movq %%mm4, (%3) \n\t"
"add %5, %3 \n\t" "add %5, %3 \n\t"
"decl %0 \n\t" "decl %0 \n\t"
".balign 8 \n\t" ASMALIGN(3)
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t" "movq (%2), %%mm1 \n\t"
...@@ -110,7 +110,7 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin ...@@ -110,7 +110,7 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
__asm __volatile( __asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
".balign 8 \n\t" ASMALIGN(3)
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t" "movq 1(%1), %%mm1 \n\t"
...@@ -168,7 +168,7 @@ static void attribute_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, ...@@ -168,7 +168,7 @@ static void attribute_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1,
"movq %%mm5, 8(%3) \n\t" "movq %%mm5, 8(%3) \n\t"
"add %5, %3 \n\t" "add %5, %3 \n\t"
"decl %0 \n\t" "decl %0 \n\t"
".balign 8 \n\t" ASMALIGN(3)
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t" "movq (%2), %%mm1 \n\t"
...@@ -206,7 +206,7 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -206,7 +206,7 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
__asm __volatile( __asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
".balign 8 \n\t" ASMALIGN(3)
"1: \n\t" "1: \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"),%%mm2 \n\t" "movq (%1, %%"REG_a"),%%mm2 \n\t"
...@@ -246,7 +246,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin ...@@ -246,7 +246,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
"paddusw %%mm1, %%mm5 \n\t" "paddusw %%mm1, %%mm5 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"REG_a", %%"REG_a" \n\t"
"add %3, %1 \n\t" "add %3, %1 \n\t"
".balign 8 \n\t" ASMALIGN(3)
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t" "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
...@@ -458,7 +458,7 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -458,7 +458,7 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
__asm __volatile( __asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
".balign 8 \n\t" ASMALIGN(3)
"1: \n\t" "1: \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t" "movq (%1, %%"REG_a"), %%mm2 \n\t"
...@@ -509,7 +509,7 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin ...@@ -509,7 +509,7 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
"paddusw %%mm1, %%mm5 \n\t" "paddusw %%mm1, %%mm5 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"REG_a", %%"REG_a" \n\t"
"add %3, %1 \n\t" "add %3, %1 \n\t"
".balign 8 \n\t" ASMALIGN(3)
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t" "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
......
...@@ -34,7 +34,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -34,7 +34,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ {
long len= -(stride*h); long len= -(stride*h);
asm volatile( asm volatile(
".balign 16 \n\t" ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t" "movq (%2, %%"REG_a"), %%mm2 \n\t"
...@@ -70,7 +70,7 @@ static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -70,7 +70,7 @@ static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ {
long len= -(stride*h); long len= -(stride*h);
asm volatile( asm volatile(
".balign 16 \n\t" ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t" "movq (%2, %%"REG_a"), %%mm2 \n\t"
...@@ -92,7 +92,7 @@ static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, in ...@@ -92,7 +92,7 @@ static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, in
{ {
long len= -(stride*h); long len= -(stride*h);
asm volatile( asm volatile(
".balign 16 \n\t" ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t" "movq (%2, %%"REG_a"), %%mm2 \n\t"
...@@ -118,7 +118,7 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -118,7 +118,7 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ //FIXME reuse src { //FIXME reuse src
long len= -(stride*h); long len= -(stride*h);
asm volatile( asm volatile(
".balign 16 \n\t" ASMALIGN(4)
"movq "MANGLE(bone)", %%mm5 \n\t" "movq "MANGLE(bone)", %%mm5 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t"
...@@ -155,7 +155,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int ...@@ -155,7 +155,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int
{ {
long len= -(stride*h); long len= -(stride*h);
asm volatile( asm volatile(
".balign 16 \n\t" ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm1 \n\t" "movq (%2, %%"REG_a"), %%mm1 \n\t"
...@@ -193,7 +193,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -193,7 +193,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ {
long len= -(stride*h); long len= -(stride*h);
asm volatile( asm volatile(
".balign 16 \n\t" ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm1 \n\t" "movq (%2, %%"REG_a"), %%mm1 \n\t"
......
...@@ -66,7 +66,7 @@ asm volatile( ...@@ -66,7 +66,7 @@ asm volatile(
"packssdw %%mm5, %%mm5 \n\t" "packssdw %%mm5, %%mm5 \n\t"
"psubw %%mm5, %%mm7 \n\t" "psubw %%mm5, %%mm7 \n\t"
"pxor %%mm4, %%mm4 \n\t" "pxor %%mm4, %%mm4 \n\t"
".balign 16 \n\t" ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%0, %3), %%mm0 \n\t" "movq (%0, %3), %%mm0 \n\t"
"movq 8(%0, %3), %%mm1 \n\t" "movq 8(%0, %3), %%mm1 \n\t"
...@@ -129,7 +129,7 @@ asm volatile( ...@@ -129,7 +129,7 @@ asm volatile(
"packssdw %%mm5, %%mm5 \n\t" "packssdw %%mm5, %%mm5 \n\t"
"psubw %%mm5, %%mm7 \n\t" "psubw %%mm5, %%mm7 \n\t"
"pxor %%mm4, %%mm4 \n\t" "pxor %%mm4, %%mm4 \n\t"
".balign 16 \n\t" ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%0, %3), %%mm0 \n\t" "movq (%0, %3), %%mm0 \n\t"
"movq 8(%0, %3), %%mm1 \n\t" "movq 8(%0, %3), %%mm1 \n\t"
...@@ -222,7 +222,7 @@ asm volatile( ...@@ -222,7 +222,7 @@ asm volatile(
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t" "mov %3, %%"REG_a" \n\t"
".balign 16 \n\t" ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t" "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
...@@ -285,7 +285,7 @@ asm volatile( ...@@ -285,7 +285,7 @@ asm volatile(
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t" "mov %3, %%"REG_a" \n\t"
".balign 16 \n\t" ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t" "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
...@@ -357,7 +357,7 @@ asm volatile( ...@@ -357,7 +357,7 @@ asm volatile(
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t" "mov %3, %%"REG_a" \n\t"
".balign 16 \n\t" ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t" "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
...@@ -418,7 +418,7 @@ asm volatile( ...@@ -418,7 +418,7 @@ asm volatile(
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t" "mov %3, %%"REG_a" \n\t"
".balign 16 \n\t" ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t" "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
......
...@@ -112,7 +112,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -112,7 +112,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
"pxor %%mm6, %%mm6 \n\t" "pxor %%mm6, %%mm6 \n\t"
"psubw (%3), %%mm6 \n\t" // -bias[0] "psubw (%3), %%mm6 \n\t" // -bias[0]
"mov $-128, %%"REG_a" \n\t" "mov $-128, %%"REG_a" \n\t"
".balign 16 \n\t" ASMALIGN(4)
"1: \n\t" "1: \n\t"
"pxor %%mm1, %%mm1 \n\t" // 0 "pxor %%mm1, %%mm1 \n\t" // 0
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
...@@ -156,7 +156,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -156,7 +156,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
"pxor %%mm7, %%mm7 \n\t" // 0 "pxor %%mm7, %%mm7 \n\t" // 0
"pxor %%mm4, %%mm4 \n\t" // 0 "pxor %%mm4, %%mm4 \n\t" // 0
"mov $-128, %%"REG_a" \n\t" "mov $-128, %%"REG_a" \n\t"
".balign 16 \n\t" ASMALIGN(4)
"1: \n\t" "1: \n\t"
"pxor %%mm1, %%mm1 \n\t" // 0 "pxor %%mm1, %%mm1 \n\t" // 0
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
......
...@@ -785,7 +785,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) ...@@ -785,7 +785,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
"jmp 9f \n\t" "jmp 9f \n\t"
"#.balign 16 \n\t"\ "#" ASMALIGN(4) \
"4: \n\t" "4: \n\t"
Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f) Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f) Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
...@@ -860,7 +860,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) ...@@ -860,7 +860,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
"jmp 9f \n\t" "jmp 9f \n\t"
"#.balign 16 \n\t"\ "#" ASMALIGN(4) \
"6: \n\t" "6: \n\t"
Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f) Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
...@@ -926,7 +926,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) ...@@ -926,7 +926,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
"jmp 9f \n\t" "jmp 9f \n\t"
"#.balign 16 \n\t"\ "#" ASMALIGN(4) \
"2: \n\t" "2: \n\t"
Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f) Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
...@@ -1003,7 +1003,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) ...@@ -1003,7 +1003,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
"jmp 9f \n\t" "jmp 9f \n\t"
"#.balign 16 \n\t"\ "#" ASMALIGN(4) \
"3: \n\t" "3: \n\t"
#undef IDCT #undef IDCT
#define IDCT(src0, src4, src1, src5, dst, shift) \ #define IDCT(src0, src4, src1, src5, dst, shift) \
...@@ -1067,7 +1067,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) ...@@ -1067,7 +1067,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
"jmp 9f \n\t" "jmp 9f \n\t"
"#.balign 16 \n\t"\ "#" ASMALIGN(4) \
"5: \n\t" "5: \n\t"
#undef IDCT #undef IDCT
#define IDCT(src0, src4, src1, src5, dst, shift) \ #define IDCT(src0, src4, src1, src5, dst, shift) \
...@@ -1132,7 +1132,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) ...@@ -1132,7 +1132,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
"jmp 9f \n\t" "jmp 9f \n\t"
"#.balign 16 \n\t"\ "#" ASMALIGN(4) \
"1: \n\t" "1: \n\t"
#undef IDCT #undef IDCT
#define IDCT(src0, src4, src1, src5, dst, shift) \ #define IDCT(src0, src4, src1, src5, dst, shift) \
...@@ -1206,7 +1206,7 @@ IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) ...@@ -1206,7 +1206,7 @@ IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
"jmp 9f \n\t" "jmp 9f \n\t"
"#.balign 16 \n\t" "#" ASMALIGN(4)
"7: \n\t" "7: \n\t"
#undef IDCT #undef IDCT
#define IDCT(src0, src4, src1, src5, dst, shift) \ #define IDCT(src0, src4, src1, src5, dst, shift) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment