Commit 4c4133b8 authored by mru's avatar mru

ARM: change alignment of loops in put_pixels*_arm to 32

git-svn-id: file:///var/local/repositories/ffmpeg/trunk@16820 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 605fc814
...@@ -91,7 +91,7 @@ function ff_prefetch_arm, export=1 ...@@ -91,7 +91,7 @@ function ff_prefetch_arm, export=1
.endm .endm
@ ---------------------------------------------------------------- @ ----------------------------------------------------------------
.align 8 .align 5
function put_pixels16_arm, export=1 function put_pixels16_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned @ block = word aligned, pixles = unaligned
...@@ -111,7 +111,7 @@ function put_pixels16_arm, export=1 ...@@ -111,7 +111,7 @@ function put_pixels16_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 1b bne 1b
ldmfd sp!, {r4-r11, pc} ldmfd sp!, {r4-r11, pc}
.align 8 .align 5
2: 2:
ldmia r1, {r4-r8} ldmia r1, {r4-r8}
add r1, r1, r2 add r1, r1, r2
...@@ -122,7 +122,7 @@ function put_pixels16_arm, export=1 ...@@ -122,7 +122,7 @@ function put_pixels16_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 2b bne 2b
ldmfd sp!, {r4-r11, pc} ldmfd sp!, {r4-r11, pc}
.align 8 .align 5
3: 3:
ldmia r1, {r4-r8} ldmia r1, {r4-r8}
add r1, r1, r2 add r1, r1, r2
...@@ -133,7 +133,7 @@ function put_pixels16_arm, export=1 ...@@ -133,7 +133,7 @@ function put_pixels16_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 3b bne 3b
ldmfd sp!, {r4-r11, pc} ldmfd sp!, {r4-r11, pc}
.align 8 .align 5
4: 4:
ldmia r1, {r4-r8} ldmia r1, {r4-r8}
add r1, r1, r2 add r1, r1, r2
...@@ -144,7 +144,6 @@ function put_pixels16_arm, export=1 ...@@ -144,7 +144,6 @@ function put_pixels16_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 4b bne 4b
ldmfd sp!, {r4-r11,pc} ldmfd sp!, {r4-r11,pc}
.align 8
5: 5:
.word 1b .word 1b
.word 2b .word 2b
...@@ -153,7 +152,7 @@ function put_pixels16_arm, export=1 ...@@ -153,7 +152,7 @@ function put_pixels16_arm, export=1
.endfunc .endfunc
@ ---------------------------------------------------------------- @ ----------------------------------------------------------------
.align 8 .align 5
function put_pixels8_arm, export=1 function put_pixels8_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned @ block = word aligned, pixles = unaligned
...@@ -173,7 +172,7 @@ function put_pixels8_arm, export=1 ...@@ -173,7 +172,7 @@ function put_pixels8_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 1b bne 1b
ldmfd sp!, {r4-r5,pc} ldmfd sp!, {r4-r5,pc}
.align 8 .align 5
2: 2:
ldmia r1, {r4-r5, r12} ldmia r1, {r4-r5, r12}
add r1, r1, r2 add r1, r1, r2
...@@ -184,7 +183,7 @@ function put_pixels8_arm, export=1 ...@@ -184,7 +183,7 @@ function put_pixels8_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 2b bne 2b
ldmfd sp!, {r4-r5,pc} ldmfd sp!, {r4-r5,pc}
.align 8 .align 5
3: 3:
ldmia r1, {r4-r5, r12} ldmia r1, {r4-r5, r12}
add r1, r1, r2 add r1, r1, r2
...@@ -195,7 +194,7 @@ function put_pixels8_arm, export=1 ...@@ -195,7 +194,7 @@ function put_pixels8_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 3b bne 3b
ldmfd sp!, {r4-r5,pc} ldmfd sp!, {r4-r5,pc}
.align 8 .align 5
4: 4:
ldmia r1, {r4-r5, r12} ldmia r1, {r4-r5, r12}
add r1, r1, r2 add r1, r1, r2
...@@ -206,7 +205,6 @@ function put_pixels8_arm, export=1 ...@@ -206,7 +205,6 @@ function put_pixels8_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 4b bne 4b
ldmfd sp!, {r4-r5,pc} ldmfd sp!, {r4-r5,pc}
.align 8
5: 5:
.word 1b .word 1b
.word 2b .word 2b
...@@ -215,7 +213,7 @@ function put_pixels8_arm, export=1 ...@@ -215,7 +213,7 @@ function put_pixels8_arm, export=1
.endfunc .endfunc
@ ---------------------------------------------------------------- @ ----------------------------------------------------------------
.align 8 .align 5
function put_pixels8_x2_arm, export=1 function put_pixels8_x2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned @ block = word aligned, pixles = unaligned
...@@ -238,7 +236,7 @@ function put_pixels8_x2_arm, export=1 ...@@ -238,7 +236,7 @@ function put_pixels8_x2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 1b bne 1b
ldmfd sp!, {r4-r10,pc} ldmfd sp!, {r4-r10,pc}
.align 8 .align 5
2: 2:
ldmia r1, {r4-r5, r10} ldmia r1, {r4-r5, r10}
add r1, r1, r2 add r1, r1, r2
...@@ -251,7 +249,7 @@ function put_pixels8_x2_arm, export=1 ...@@ -251,7 +249,7 @@ function put_pixels8_x2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 2b bne 2b
ldmfd sp!, {r4-r10,pc} ldmfd sp!, {r4-r10,pc}
.align 8 .align 5
3: 3:
ldmia r1, {r4-r5, r10} ldmia r1, {r4-r5, r10}
add r1, r1, r2 add r1, r1, r2
...@@ -264,7 +262,7 @@ function put_pixels8_x2_arm, export=1 ...@@ -264,7 +262,7 @@ function put_pixels8_x2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 3b bne 3b
ldmfd sp!, {r4-r10,pc} ldmfd sp!, {r4-r10,pc}
.align 8 .align 5
4: 4:
ldmia r1, {r4-r5, r10} ldmia r1, {r4-r5, r10}
add r1, r1, r2 add r1, r1, r2
...@@ -276,7 +274,6 @@ function put_pixels8_x2_arm, export=1 ...@@ -276,7 +274,6 @@ function put_pixels8_x2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 4b bne 4b
ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
.align 8
5: 5:
.word 0xFEFEFEFE .word 0xFEFEFEFE
.word 2b .word 2b
...@@ -284,7 +281,7 @@ function put_pixels8_x2_arm, export=1 ...@@ -284,7 +281,7 @@ function put_pixels8_x2_arm, export=1
.word 4b .word 4b
.endfunc .endfunc
.align 8 .align 5
function put_no_rnd_pixels8_x2_arm, export=1 function put_no_rnd_pixels8_x2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned @ block = word aligned, pixles = unaligned
...@@ -307,7 +304,7 @@ function put_no_rnd_pixels8_x2_arm, export=1 ...@@ -307,7 +304,7 @@ function put_no_rnd_pixels8_x2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 1b bne 1b
ldmfd sp!, {r4-r10,pc} ldmfd sp!, {r4-r10,pc}
.align 8 .align 5
2: 2:
ldmia r1, {r4-r5, r10} ldmia r1, {r4-r5, r10}
add r1, r1, r2 add r1, r1, r2
...@@ -320,7 +317,7 @@ function put_no_rnd_pixels8_x2_arm, export=1 ...@@ -320,7 +317,7 @@ function put_no_rnd_pixels8_x2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 2b bne 2b
ldmfd sp!, {r4-r10,pc} ldmfd sp!, {r4-r10,pc}
.align 8 .align 5
3: 3:
ldmia r1, {r4-r5, r10} ldmia r1, {r4-r5, r10}
add r1, r1, r2 add r1, r1, r2
...@@ -333,7 +330,7 @@ function put_no_rnd_pixels8_x2_arm, export=1 ...@@ -333,7 +330,7 @@ function put_no_rnd_pixels8_x2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 3b bne 3b
ldmfd sp!, {r4-r10,pc} ldmfd sp!, {r4-r10,pc}
.align 8 .align 5
4: 4:
ldmia r1, {r4-r5, r10} ldmia r1, {r4-r5, r10}
add r1, r1, r2 add r1, r1, r2
...@@ -345,7 +342,6 @@ function put_no_rnd_pixels8_x2_arm, export=1 ...@@ -345,7 +342,6 @@ function put_no_rnd_pixels8_x2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 4b bne 4b
ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
.align 8
5: 5:
.word 0xFEFEFEFE .word 0xFEFEFEFE
.word 2b .word 2b
...@@ -355,7 +351,7 @@ function put_no_rnd_pixels8_x2_arm, export=1 ...@@ -355,7 +351,7 @@ function put_no_rnd_pixels8_x2_arm, export=1
@ ---------------------------------------------------------------- @ ----------------------------------------------------------------
.align 8 .align 5
function put_pixels8_y2_arm, export=1 function put_pixels8_y2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned @ block = word aligned, pixles = unaligned
...@@ -386,7 +382,7 @@ function put_pixels8_y2_arm, export=1 ...@@ -386,7 +382,7 @@ function put_pixels8_y2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 6b bne 6b
ldmfd sp!, {r4-r11,pc} ldmfd sp!, {r4-r11,pc}
.align 8 .align 5
2: 2:
ldmia r1, {r4-r6} ldmia r1, {r4-r6}
add r1, r1, r2 add r1, r1, r2
...@@ -409,7 +405,7 @@ function put_pixels8_y2_arm, export=1 ...@@ -409,7 +405,7 @@ function put_pixels8_y2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 6b bne 6b
ldmfd sp!, {r4-r11,pc} ldmfd sp!, {r4-r11,pc}
.align 8 .align 5
3: 3:
ldmia r1, {r4-r6} ldmia r1, {r4-r6}
add r1, r1, r2 add r1, r1, r2
...@@ -432,7 +428,7 @@ function put_pixels8_y2_arm, export=1 ...@@ -432,7 +428,7 @@ function put_pixels8_y2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 6b bne 6b
ldmfd sp!, {r4-r11,pc} ldmfd sp!, {r4-r11,pc}
.align 8 .align 5
4: 4:
ldmia r1, {r4-r6} ldmia r1, {r4-r6}
add r1, r1, r2 add r1, r1, r2
...@@ -456,7 +452,6 @@ function put_pixels8_y2_arm, export=1 ...@@ -456,7 +452,6 @@ function put_pixels8_y2_arm, export=1
bne 6b bne 6b
ldmfd sp!, {r4-r11,pc} ldmfd sp!, {r4-r11,pc}
.align 8
5: 5:
.word 0xFEFEFEFE .word 0xFEFEFEFE
.word 2b .word 2b
...@@ -464,7 +459,7 @@ function put_pixels8_y2_arm, export=1 ...@@ -464,7 +459,7 @@ function put_pixels8_y2_arm, export=1
.word 4b .word 4b
.endfunc .endfunc
.align 8 .align 5
function put_no_rnd_pixels8_y2_arm, export=1 function put_no_rnd_pixels8_y2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned @ block = word aligned, pixles = unaligned
...@@ -495,7 +490,7 @@ function put_no_rnd_pixels8_y2_arm, export=1 ...@@ -495,7 +490,7 @@ function put_no_rnd_pixels8_y2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 6b bne 6b
ldmfd sp!, {r4-r11,pc} ldmfd sp!, {r4-r11,pc}
.align 8 .align 5
2: 2:
ldmia r1, {r4-r6} ldmia r1, {r4-r6}
add r1, r1, r2 add r1, r1, r2
...@@ -518,7 +513,7 @@ function put_no_rnd_pixels8_y2_arm, export=1 ...@@ -518,7 +513,7 @@ function put_no_rnd_pixels8_y2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 6b bne 6b
ldmfd sp!, {r4-r11,pc} ldmfd sp!, {r4-r11,pc}
.align 8 .align 5
3: 3:
ldmia r1, {r4-r6} ldmia r1, {r4-r6}
add r1, r1, r2 add r1, r1, r2
...@@ -541,7 +536,7 @@ function put_no_rnd_pixels8_y2_arm, export=1 ...@@ -541,7 +536,7 @@ function put_no_rnd_pixels8_y2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 6b bne 6b
ldmfd sp!, {r4-r11,pc} ldmfd sp!, {r4-r11,pc}
.align 8 .align 5
4: 4:
ldmia r1, {r4-r6} ldmia r1, {r4-r6}
add r1, r1, r2 add r1, r1, r2
...@@ -564,7 +559,6 @@ function put_no_rnd_pixels8_y2_arm, export=1 ...@@ -564,7 +559,6 @@ function put_no_rnd_pixels8_y2_arm, export=1
add r0, r0, r2 add r0, r0, r2
bne 6b bne 6b
ldmfd sp!, {r4-r11,pc} ldmfd sp!, {r4-r11,pc}
.align 8
5: 5:
.word 0xFEFEFEFE .word 0xFEFEFEFE
.word 2b .word 2b
...@@ -637,7 +631,7 @@ function put_no_rnd_pixels8_y2_arm, export=1 ...@@ -637,7 +631,7 @@ function put_no_rnd_pixels8_y2_arm, export=1
ldmfd sp!, {r4-r11,pc} ldmfd sp!, {r4-r11,pc}
.endm .endm
.align 8 .align 5
function put_pixels8_xy2_arm, export=1 function put_pixels8_xy2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned @ block = word aligned, pixles = unaligned
...@@ -651,15 +645,15 @@ function put_pixels8_xy2_arm, export=1 ...@@ -651,15 +645,15 @@ function put_pixels8_xy2_arm, export=1
1: 1:
RND_XY2_EXPAND 0 RND_XY2_EXPAND 0
.align 8 .align 5
2: 2:
RND_XY2_EXPAND 1 RND_XY2_EXPAND 1
.align 8 .align 5
3: 3:
RND_XY2_EXPAND 2 RND_XY2_EXPAND 2
.align 8 .align 5
4: 4:
RND_XY2_EXPAND 3 RND_XY2_EXPAND 3
...@@ -673,7 +667,7 @@ function put_pixels8_xy2_arm, export=1 ...@@ -673,7 +667,7 @@ function put_pixels8_xy2_arm, export=1
.word 0x0F0F0F0F .word 0x0F0F0F0F
.endfunc .endfunc
.align 8 .align 5
function put_no_rnd_pixels8_xy2_arm, export=1 function put_no_rnd_pixels8_xy2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned @ block = word aligned, pixles = unaligned
...@@ -687,15 +681,15 @@ function put_no_rnd_pixels8_xy2_arm, export=1 ...@@ -687,15 +681,15 @@ function put_no_rnd_pixels8_xy2_arm, export=1
1: 1:
RND_XY2_EXPAND 0 RND_XY2_EXPAND 0
.align 8 .align 5
2: 2:
RND_XY2_EXPAND 1 RND_XY2_EXPAND 1
.align 8 .align 5
3: 3:
RND_XY2_EXPAND 2 RND_XY2_EXPAND 2
.align 8 .align 5
4: 4:
RND_XY2_EXPAND 3 RND_XY2_EXPAND 3
...@@ -709,6 +703,7 @@ function put_no_rnd_pixels8_xy2_arm, export=1 ...@@ -709,6 +703,7 @@ function put_no_rnd_pixels8_xy2_arm, export=1
.word 0x0F0F0F0F .word 0x0F0F0F0F
.endfunc .endfunc
.align 5
@ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride) @ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride)
function ff_add_pixels_clamped_ARM, export=1 function ff_add_pixels_clamped_ARM, export=1
push {r4-r10} push {r4-r10}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment