Commit 1665af73 authored by diego's avatar diego

cosmetics: Reformat PPC code in libavcodec according to style guidelines.

This includes indentation changes, comment reformatting, consistent brace
placement and some prettyprinting.


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@14316 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 18d45891
...@@ -96,10 +96,8 @@ void powerpc_display_perf_report(void) ...@@ -96,10 +96,8 @@ void powerpc_display_perf_report(void)
{ {
int i, j; int i, j;
av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n"); av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
for(i = 0 ; i < powerpc_perf_total ; i++) for(i = 0 ; i < powerpc_perf_total ; i++) {
{ for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
{
if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0) if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
av_log(NULL, AV_LOG_INFO, av_log(NULL, AV_LOG_INFO,
" Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n", " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
...@@ -117,28 +115,23 @@ void powerpc_display_perf_report(void) ...@@ -117,28 +115,23 @@ void powerpc_display_perf_report(void)
/* ***** WARNING ***** WARNING ***** WARNING ***** */ /* ***** WARNING ***** WARNING ***** WARNING ***** */
/* /*
clear_blocks_dcbz32_ppc will not work properly clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with a
on PowerPC processors with a cache line size cache line size not equal to 32 bytes.
not equal to 32 bytes. Fortunately all processor used by Apple up to at least the 7450 (aka second
Fortunately all processor used by Apple up to generation G4) use 32 bytes cache line.
at least the 7450 (aka second generation G4) This is due to the use of the 'dcbz' instruction. It simply clear to zero a
use 32 bytes cache line. single cache line, so you need to know the cache line size to use it !
This is due to the use of the 'dcbz' instruction. It's absurd, but it's fast...
It simply clear to zero a single cache line,
so you need to know the cache line size to use it !
It's absurd, but it's fast...
update 24/06/2003 : Apple released yesterday the G5, update 24/06/2003 : Apple released yesterday the G5, with a PPC970. cache line
with a PPC970. cache line size : 128 bytes. Oups. size: 128 bytes. Oups.
The semantic of dcbz was changed, it always clear The semantic of dcbz was changed, it always clear 32 bytes. so the function
32 bytes. so the function below will work, but will below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl,
be slow. So I fixed check_dcbz_effect to use dcbzl, which is defined to clear a cache line (as dcbz before). So we still can
which is defined to clear a cache line (as dcbz before). distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required.
So we still can distinguish, and use dcbz (32 bytes)
or dcbzl (one cache line) as required.
see <http://developer.apple.com/technotes/tn/tn2087.html> see <http://developer.apple.com/technotes/tn/tn2087.html>
and <http://developer.apple.com/technotes/tn/tn2086.html> and <http://developer.apple.com/technotes/tn/tn2086.html>
*/ */
void clear_blocks_dcbz32_ppc(DCTELEM *blocks) void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
{ {
...@@ -216,8 +209,7 @@ long check_dcbzl_effect(void) ...@@ -216,8 +209,7 @@ long check_dcbzl_effect(void)
register long i = 0; register long i = 0;
long count = 0; long count = 0;
if (!fakedata) if (!fakedata) {
{
return 0L; return 0L;
} }
...@@ -229,8 +221,7 @@ long check_dcbzl_effect(void) ...@@ -229,8 +221,7 @@ long check_dcbzl_effect(void)
in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */ in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
for (i = 0; i < 1024 ; i ++) for (i = 0; i < 1024 ; i ++) {
{
if (fakedata[i] == (char)0) if (fakedata[i] == (char)0)
count++; count++;
} }
...@@ -286,17 +277,14 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) ...@@ -286,17 +277,14 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
#ifdef CONFIG_ENCODERS #ifdef CONFIG_ENCODERS
if (avctx->dct_algo == FF_DCT_AUTO || if (avctx->dct_algo == FF_DCT_AUTO ||
avctx->dct_algo == FF_DCT_ALTIVEC) avctx->dct_algo == FF_DCT_ALTIVEC) {
{
c->fdct = fdct_altivec; c->fdct = fdct_altivec;
} }
#endif //CONFIG_ENCODERS #endif //CONFIG_ENCODERS
if (avctx->lowres==0) if (avctx->lowres==0) {
{
if ((avctx->idct_algo == FF_IDCT_AUTO) || if ((avctx->idct_algo == FF_IDCT_AUTO) ||
(avctx->idct_algo == FF_IDCT_ALTIVEC)) (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
{
c->idct_put = idct_put_altivec; c->idct_put = idct_put_altivec;
c->idct_add = idct_add_altivec; c->idct_add = idct_add_altivec;
c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
...@@ -306,10 +294,8 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) ...@@ -306,10 +294,8 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
#ifdef CONFIG_POWERPC_PERF #ifdef CONFIG_POWERPC_PERF
{ {
int i, j; int i, j;
for (i = 0 ; i < powerpc_perf_total ; i++) for (i = 0 ; i < powerpc_perf_total ; i++) {
{ for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
{
perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL; perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL; perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL; perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
......
...@@ -125,14 +125,11 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][ ...@@ -125,14 +125,11 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][
POWERPC_GET_PMC4(pmc_stop[3]); \ POWERPC_GET_PMC4(pmc_stop[3]); \
POWERPC_GET_PMC5(pmc_stop[4]); \ POWERPC_GET_PMC5(pmc_stop[4]); \
POWERPC_GET_PMC6(pmc_stop[5]); \ POWERPC_GET_PMC6(pmc_stop[5]); \
if (cond) \ if (cond) { \
{ \
for(pmc_loop_index = 0; \ for(pmc_loop_index = 0; \
pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \ pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \
pmc_loop_index++) \ pmc_loop_index++) { \
{ \ if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) { \
if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \
{ \
POWERP_PMC_DATATYPE diff = \ POWERP_PMC_DATATYPE diff = \
pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \ pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \
if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \ if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
......
...@@ -85,12 +85,9 @@ POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6); ...@@ -85,12 +85,9 @@ POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
c1 = vcii(p,p,n,n); c1 = vcii(p,p,n,n);
if (s->inverse) if (s->inverse) {
{
c2 = vcii(p,p,n,p); c2 = vcii(p,p,n,p);
} } else {
else
{
c2 = vcii(p,p,p,n); c2 = vcii(p,p,p,n);
} }
......
...@@ -74,19 +74,17 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); ...@@ -74,19 +74,17 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
src_1 = vec_ld(16, src); src_1 = vec_ld(16, src);
srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src)); srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src));
if (src_really_odd != 0x0000000F) if (src_really_odd != 0x0000000F) {
{ // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector. // if src & 0xF == 0xF, then (src+1) is properly aligned
// on the second vector.
srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src)); srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
} } else {
else
{
srcvB = src_1; srcvB = src_1;
} }
srcvA = vec_mergeh(vczero, srcvA); srcvA = vec_mergeh(vczero, srcvA);
srcvB = vec_mergeh(vczero, srcvB); srcvB = vec_mergeh(vczero, srcvB);
for(i=0; i<h; i++) for(i=0; i<h; i++) {
{
dst_odd = (unsigned long)dst & 0x0000000F; dst_odd = (unsigned long)dst & 0x0000000F;
src_really_odd = (((unsigned long)src) + stride) & 0x0000000F; src_really_odd = (((unsigned long)src) + stride) & 0x0000000F;
...@@ -100,12 +98,11 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); ...@@ -100,12 +98,11 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
src_1 = vec_ld(stride + 16, src); src_1 = vec_ld(stride + 16, src);
srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src)); srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));
if (src_really_odd != 0x0000000F) if (src_really_odd != 0x0000000F) {
{ // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector. // if src & 0xF == 0xF, then (src+1) is properly aligned
// on the second vector.
srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src)); srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
} } else {
else
{
srcvD = src_1; srcvD = src_1;
} }
...@@ -128,12 +125,9 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); ...@@ -128,12 +125,9 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
dstv2 = vec_pack(tempD, (vector unsigned short)vczero); dstv2 = vec_pack(tempD, (vector unsigned short)vczero);
if (dst_odd) if (dst_odd) {
{
dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1)); dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1));
} } else {
else
{
dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3)); dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3));
} }
......
...@@ -392,8 +392,8 @@ static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, ...@@ -392,8 +392,8 @@ static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
#define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h) #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
*/ */
H264_MC(put_, 16, altivec) H264_MC(put_, 16, altivec)
H264_MC(avg_, 16, altivec) H264_MC(avg_, 16, altivec)
/**************************************************************************** /****************************************************************************
......
...@@ -344,7 +344,7 @@ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, i ...@@ -344,7 +344,7 @@ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, i
src += srcStride; src += srcStride;
dst += dstStride; dst += dstStride;
} }
POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1); POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
} }
/* this code assume stride % 16 == 0 */ /* this code assume stride % 16 == 0 */
...@@ -365,23 +365,23 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i ...@@ -365,23 +365,23 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i
const vec_u8_t srcM2a = vec_ld(0, srcbis); const vec_u8_t srcM2a = vec_ld(0, srcbis);
const vec_u8_t srcM2b = vec_ld(16, srcbis); const vec_u8_t srcM2b = vec_ld(16, srcbis);
const vec_u8_t srcM2 = vec_perm(srcM2a, srcM2b, perm); const vec_u8_t srcM2 = vec_perm(srcM2a, srcM2b, perm);
// srcbis += srcStride; //srcbis += srcStride;
const vec_u8_t srcM1a = vec_ld(0, srcbis += srcStride); const vec_u8_t srcM1a = vec_ld(0, srcbis += srcStride);
const vec_u8_t srcM1b = vec_ld(16, srcbis); const vec_u8_t srcM1b = vec_ld(16, srcbis);
const vec_u8_t srcM1 = vec_perm(srcM1a, srcM1b, perm); const vec_u8_t srcM1 = vec_perm(srcM1a, srcM1b, perm);
// srcbis += srcStride; //srcbis += srcStride;
const vec_u8_t srcP0a = vec_ld(0, srcbis += srcStride); const vec_u8_t srcP0a = vec_ld(0, srcbis += srcStride);
const vec_u8_t srcP0b = vec_ld(16, srcbis); const vec_u8_t srcP0b = vec_ld(16, srcbis);
const vec_u8_t srcP0 = vec_perm(srcP0a, srcP0b, perm); const vec_u8_t srcP0 = vec_perm(srcP0a, srcP0b, perm);
// srcbis += srcStride; //srcbis += srcStride;
const vec_u8_t srcP1a = vec_ld(0, srcbis += srcStride); const vec_u8_t srcP1a = vec_ld(0, srcbis += srcStride);
const vec_u8_t srcP1b = vec_ld(16, srcbis); const vec_u8_t srcP1b = vec_ld(16, srcbis);
const vec_u8_t srcP1 = vec_perm(srcP1a, srcP1b, perm); const vec_u8_t srcP1 = vec_perm(srcP1a, srcP1b, perm);
// srcbis += srcStride; //srcbis += srcStride;
const vec_u8_t srcP2a = vec_ld(0, srcbis += srcStride); const vec_u8_t srcP2a = vec_ld(0, srcbis += srcStride);
const vec_u8_t srcP2b = vec_ld(16, srcbis); const vec_u8_t srcP2b = vec_ld(16, srcbis);
const vec_u8_t srcP2 = vec_perm(srcP2a, srcP2b, perm); const vec_u8_t srcP2 = vec_perm(srcP2a, srcP2b, perm);
// srcbis += srcStride; //srcbis += srcStride;
vec_s16_t srcM2ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcM2); vec_s16_t srcM2ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
vec_s16_t srcM2ssB = (vec_s16_t) vec_mergel(zero_u8v, srcM2); vec_s16_t srcM2ssB = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
...@@ -409,7 +409,7 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i ...@@ -409,7 +409,7 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i
srcP3 = vec_perm(srcP3a, srcP3b, perm); srcP3 = vec_perm(srcP3a, srcP3b, perm);
srcP3ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP3); srcP3ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
srcP3ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP3); srcP3ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
// srcbis += srcStride; //srcbis += srcStride;
sum1A = vec_adds(srcP0ssA, srcP1ssA); sum1A = vec_adds(srcP0ssA, srcP1ssA);
sum1B = vec_adds(srcP0ssB, srcP1ssB); sum1B = vec_adds(srcP0ssB, srcP1ssB);
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
* NOTE: This code is based on GPL code from the libmpeg2 project. The * NOTE: This code is based on GPL code from the libmpeg2 project. The
* author, Michel Lespinasses, has given explicit permission to release * author, Michel Lespinasses, has given explicit permission to release
* under LGPL as part of ffmpeg. * under LGPL as part of ffmpeg.
*
*/ */
/* /*
......
...@@ -46,8 +46,7 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, ...@@ -46,8 +46,7 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
vector signed short zeros, sumhv, sumlv; vector signed short zeros, sumhv, sumlv;
s = src; s = src;
for(i=0;i<4;i++) for(i=0;i<4;i++) {
{
/* /*
The vec_madds later on does an implicit >>15 on the result. The vec_madds later on does an implicit >>15 on the result.
Since FILTER_BITS is 8, and we have 15 bits of magnitude in Since FILTER_BITS is 8, and we have 15 bits of magnitude in
...@@ -86,13 +85,11 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, ...@@ -86,13 +85,11 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
/* Do our altivec resampling on 16 pixels at once. */ /* Do our altivec resampling on 16 pixels at once. */
while(dst_width>=16) { while(dst_width>=16) {
/* /* Read 16 (potentially unaligned) bytes from each of
Read 16 (potentially unaligned) bytes from each of
4 lines into 4 vectors, and split them into shorts. 4 lines into 4 vectors, and split them into shorts.
Interleave the multipy/accumulate for the resample Interleave the multipy/accumulate for the resample
filter with the loads to hide the 3 cycle latency filter with the loads to hide the 3 cycle latency
the vec_madds have. the vec_madds have. */
*/
tv = (vector unsigned char *) &s[0 * wrap]; tv = (vector unsigned char *) &s[0 * wrap];
tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
...@@ -121,10 +118,8 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, ...@@ -121,10 +118,8 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
/* /* Pack the results into our destination vector,
Pack the results into our destination vector, and do an aligned write of that back to memory. */
and do an aligned write of that back to memory.
*/
dstv = vec_packsu(sumhv, sumlv) ; dstv = vec_packsu(sumhv, sumlv) ;
vec_st(dstv, 0, (vector unsigned char *) dst); vec_st(dstv, 0, (vector unsigned char *) dst);
...@@ -133,10 +128,8 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, ...@@ -133,10 +128,8 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
dst_width-=16; dst_width-=16;
} }
/* /* If there are any leftover pixels, resample them
If there are any leftover pixels, resample them with the slow scalar method. */
with the slow scalar method.
*/
while(dst_width>0) { while(dst_width>0) {
sum = s[0 * wrap] * filter[0] + sum = s[0 * wrap] * filter[0] +
s[1 * wrap] * filter[1] + s[1 * wrap] * filter[1] +
......
...@@ -25,11 +25,11 @@ ...@@ -25,11 +25,11 @@
#if defined(ARCH_POWERPC_405) #if defined(ARCH_POWERPC_405)
/* signed 16x16 -> 32 multiply add accumulate */ /* signed 16x16 -> 32 multiply add accumulate */
# define MAC16(rt, ra, rb) \ #define MAC16(rt, ra, rb) \
asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
/* signed 16x16 -> 32 multiply */ /* signed 16x16 -> 32 multiply */
# define MUL16(ra, rb) \ #define MUL16(ra, rb) \
({ int __rt; \ ({ int __rt; \
asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \ asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \
__rt; }) __rt; })
......
...@@ -137,10 +137,8 @@ int dct_quantize_altivec(MpegEncContext* s, ...@@ -137,10 +137,8 @@ int dct_quantize_altivec(MpegEncContext* s,
int whichPass, whichHalf; int whichPass, whichHalf;
for(whichPass = 1; whichPass<=2; whichPass++) for(whichPass = 1; whichPass<=2; whichPass++) {
{ for(whichHalf = 1; whichHalf<=2; whichHalf++) {
for(whichHalf = 1; whichHalf<=2; whichHalf++)
{
vector float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; vector float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
vector float tmp10, tmp11, tmp12, tmp13; vector float tmp10, tmp11, tmp12, tmp13;
vector float z1, z2, z3, z4, z5; vector float z1, z2, z3, z4, z5;
...@@ -235,8 +233,7 @@ int dct_quantize_altivec(MpegEncContext* s, ...@@ -235,8 +233,7 @@ int dct_quantize_altivec(MpegEncContext* s,
SWAP(row7, alt7); SWAP(row7, alt7);
} }
if (whichPass == 1) if (whichPass == 1) {
{
// transpose the data for the second pass // transpose the data for the second pass
// First, block transpose the upper right with lower left. // First, block transpose the upper right with lower left.
...@@ -261,8 +258,7 @@ int dct_quantize_altivec(MpegEncContext* s, ...@@ -261,8 +258,7 @@ int dct_quantize_altivec(MpegEncContext* s,
const vector signed int* qmat; const vector signed int* qmat;
vector float bias, negBias; vector float bias, negBias;
if (s->mb_intra) if (s->mb_intra) {
{
vector signed int baseVector; vector signed int baseVector;
// We must cache element 0 in the intra case // We must cache element 0 in the intra case
...@@ -272,9 +268,7 @@ int dct_quantize_altivec(MpegEncContext* s, ...@@ -272,9 +268,7 @@ int dct_quantize_altivec(MpegEncContext* s,
qmat = (vector signed int*)s->q_intra_matrix[qscale]; qmat = (vector signed int*)s->q_intra_matrix[qscale];
biasAddr = &(s->intra_quant_bias); biasAddr = &(s->intra_quant_bias);
} } else {
else
{
qmat = (vector signed int*)s->q_inter_matrix[qscale]; qmat = (vector signed int*)s->q_inter_matrix[qscale];
biasAddr = &(s->inter_quant_bias); biasAddr = &(s->inter_quant_bias);
} }
...@@ -439,8 +433,7 @@ int dct_quantize_altivec(MpegEncContext* s, ...@@ -439,8 +433,7 @@ int dct_quantize_altivec(MpegEncContext* s,
// and handle it using the vector unit if we can. This is the permute used // and handle it using the vector unit if we can. This is the permute used
// by the altivec idct, so it is common when using the altivec dct. // by the altivec idct, so it is common when using the altivec dct.
if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM)) if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM)) {
{
TRANSPOSE8(data0, data1, data2, data3, data4, data5, data6, data7); TRANSPOSE8(data0, data1, data2, data3, data4, data5, data6, data7);
} }
...@@ -456,10 +449,8 @@ int dct_quantize_altivec(MpegEncContext* s, ...@@ -456,10 +449,8 @@ int dct_quantize_altivec(MpegEncContext* s,
} }
// special handling of block[0] // special handling of block[0]
if (s->mb_intra) if (s->mb_intra) {
{ if (!s->h263_aic) {
if (!s->h263_aic)
{
if (n < 4) if (n < 4)
oldBaseValue /= s->y_dc_scale; oldBaseValue /= s->y_dc_scale;
else else
...@@ -474,8 +465,7 @@ int dct_quantize_altivec(MpegEncContext* s, ...@@ -474,8 +465,7 @@ int dct_quantize_altivec(MpegEncContext* s,
// need to permute the "no" permutation case. // need to permute the "no" permutation case.
if ((lastNonZero > 0) && if ((lastNonZero > 0) &&
(s->dsp.idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) && (s->dsp.idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) &&
(s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)) (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)) {
{
ff_block_permute(data, s->dsp.idct_permutation, ff_block_permute(data, s->dsp.idct_permutation,
s->intra_scantable.scantable, lastNonZero); s->intra_scantable.scantable, lastNonZero);
} }
...@@ -483,10 +473,8 @@ int dct_quantize_altivec(MpegEncContext* s, ...@@ -483,10 +473,8 @@ int dct_quantize_altivec(MpegEncContext* s,
return lastNonZero; return lastNonZero;
} }
/* /* AltiVec version of dct_unquantize_h263
AltiVec version of dct_unquantize_h263 this code assumes `block' is 16 bytes-aligned */
this code assumes `block' is 16 bytes-aligned
*/
void dct_unquantize_h263_altivec(MpegEncContext *s, void dct_unquantize_h263_altivec(MpegEncContext *s,
DCTELEM *block, int n, int qscale) DCTELEM *block, int n, int qscale)
{ {
...@@ -559,8 +547,7 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); ...@@ -559,8 +547,7 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
// vectorize all the 16 bytes-aligned blocks // vectorize all the 16 bytes-aligned blocks
// of 8 elements // of 8 elements
for(; (j + 7) <= nCoeffs ; j+=8) for(; (j + 7) <= nCoeffs ; j+=8) {
{
blockv = vec_ld(j << 1, block); blockv = vec_ld(j << 1, block);
blockv_neg = vec_cmplt(blockv, vczero); blockv_neg = vec_cmplt(blockv, vczero);
blockv_null = vec_cmpeq(blockv, vczero); blockv_null = vec_cmpeq(blockv, vczero);
...@@ -589,8 +576,8 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); ...@@ -589,8 +576,8 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
} }
} }
if (i == 1) if (i == 1) {
{ // cheat. this avoid special-casing the first iteration // cheat. this avoid special-casing the first iteration
block[0] = backup_0; block[0] = backup_0;
} }
} }
...@@ -605,11 +592,9 @@ void MPV_common_init_altivec(MpegEncContext *s) ...@@ -605,11 +592,9 @@ void MPV_common_init_altivec(MpegEncContext *s)
{ {
if ((mm_flags & MM_ALTIVEC) == 0) return; if ((mm_flags & MM_ALTIVEC) == 0) return;
if (s->avctx->lowres==0) if (s->avctx->lowres==0) {
{
if ((s->avctx->idct_algo == FF_IDCT_AUTO) || if ((s->avctx->idct_algo == FF_IDCT_AUTO) ||
(s->avctx->idct_algo == FF_IDCT_ALTIVEC)) (s->avctx->idct_algo == FF_IDCT_ALTIVEC)) {
{
s->dsp.idct_put = idct_put_altivec; s->dsp.idct_put = idct_put_altivec;
s->dsp.idct_add = idct_add_altivec; s->dsp.idct_add = idct_add_altivec;
s->dsp.idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; s->dsp.idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
...@@ -618,15 +603,13 @@ void MPV_common_init_altivec(MpegEncContext *s) ...@@ -618,15 +603,13 @@ void MPV_common_init_altivec(MpegEncContext *s)
// Test to make sure that the dct required alignments are met. // Test to make sure that the dct required alignments are met.
if ((((long)(s->q_intra_matrix) & 0x0f) != 0) || if ((((long)(s->q_intra_matrix) & 0x0f) != 0) ||
(((long)(s->q_inter_matrix) & 0x0f) != 0)) (((long)(s->q_inter_matrix) & 0x0f) != 0)) {
{
av_log(s->avctx, AV_LOG_INFO, "Internal Error: q-matrix blocks must be 16-byte aligned " av_log(s->avctx, AV_LOG_INFO, "Internal Error: q-matrix blocks must be 16-byte aligned "
"to use AltiVec DCT. Reverting to non-AltiVec version.\n"); "to use AltiVec DCT. Reverting to non-AltiVec version.\n");
return; return;
} }
if (((long)(s->intra_scantable.inverse) & 0x0f) != 0) if (((long)(s->intra_scantable.inverse) & 0x0f) != 0) {
{
av_log(s->avctx, AV_LOG_INFO, "Internal Error: scan table blocks must be 16-byte aligned " av_log(s->avctx, AV_LOG_INFO, "Internal Error: scan table blocks must be 16-byte aligned "
"to use AltiVec DCT. Reverting to non-AltiVec version.\n"); "to use AltiVec DCT. Reverting to non-AltiVec version.\n");
return; return;
...@@ -634,8 +617,7 @@ void MPV_common_init_altivec(MpegEncContext *s) ...@@ -634,8 +617,7 @@ void MPV_common_init_altivec(MpegEncContext *s)
if ((s->avctx->dct_algo == FF_DCT_AUTO) || if ((s->avctx->dct_algo == FF_DCT_AUTO) ||
(s->avctx->dct_algo == FF_DCT_ALTIVEC)) (s->avctx->dct_algo == FF_DCT_ALTIVEC)) {
{
#if 0 /* seems to cause trouble under some circumstances */ #if 0 /* seems to cause trouble under some circumstances */
s->dct_quantize = dct_quantize_altivec; s->dct_quantize = dct_quantize_altivec;
#endif #endif
......
...@@ -379,8 +379,7 @@ void ff_snow_vertical_compose97i_altivec(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, ...@@ -379,8 +379,7 @@ void ff_snow_vertical_compose97i_altivec(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
v4=(vector signed int *)b4; v4=(vector signed int *)b4;
v5=(vector signed int *)b5; v5=(vector signed int *)b5;
for (i=0; i< w4;i++) for (i=0; i< w4;i++) {
{
#if 0 #if 0
b4[i] -= (3*(b3[i] + b5[i])+4)>>3; b4[i] -= (3*(b3[i] + b5[i])+4)>>3;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment